diff --git a/scraper.py b/scraper.py new file mode 100644 index 0000000..f306d29 --- /dev/null +++ b/scraper.py @@ -0,0 +1,85 @@ +from playwright.sync_api import sync_playwright +import datetime +import time +import random +import os + +def scrape_bokadirekt_appointments(salon_urls): + """ + Scrapes Bokadirekt for available appointments from a list of salon URLs. + This is a simplified example. A real scraper would need to precisely + target elements on Bokadirekt's dynamically loaded pages. + + NOTE: For local development/testing without a full browser environment, + you might need to mock this function's output or ensure Playwright + dependencies are correctly set up (e.g., `playwright install chromium`). + """ + appointments = [] + + try: + with sync_playwright() as p: + # Ensure browsers are installed, e.g., 'playwright install chromium' + browser = p.chromium.launch(headless=True) + page = browser.new_page() + + for url in salon_urls: + try: + print(f"Scraping {url}...") + page.goto(url, wait_until="domcontentloaded", timeout=60000) + + # Wait for specific selectors to appear for robustness + # (Highly dependent on Bokadirekt's current DOM structure) + try: + page.wait_for_selector('h1.placeName', timeout=10000) + salon_name = page.locator("h1.placeName").first.inner_text().strip() + except Exception: + salon_name = "Unknown Salon" + print(f"Could not find salon name for {url}, using default.") + + # This part is a simulation. In a real scenario, you'd inspect the DOM + # to find actual date pickers, time slots, and service names. + today = datetime.date.today() + for _ in range(random.randint(1, 3)): # Simulate finding 1-3 appointments per salon + future_date = today + datetime.timedelta(days=random.randint(0, 7)) + start_time = datetime.time(random.randint(9, 17), random.choice([0, 15, 30, 45])) + end_time = (datetime.datetime.combine(future_date, start_time) + datetime.timedelta(minutes=random.randint(45, 120))).time() + + appointments.append({ + "salon_name": salon_name, + "date": future_date.isoformat(), + "start_time": start_time.isoformat(), + "end_time": end_time.isoformat(), + "service": "Klippning (Simulerad)", "book_link": url # In a real scenario, this would be a direct booking link for the specific time slot + }) + print(f"Simulated {len(appointments)} appointments for {salon_name}") + + except Exception as e: + print(f"Error scraping {url}: {e}") + + browser.close() + + except Exception as e: + print(f"Playwright initialization/runtime error: {e}. Returning mock data.") + # Fallback to mock data if Playwright setup fails or is not available + appointments = [ + { + "salon_name": "Klipphuset", "date": (datetime.date.today() + datetime.timedelta(days=random.randint(1, 7))).isoformat(), + "start_time": f"{random.randint(9,17):02d}:{random.choice([0,30]):02d}:00", "end_time": f"{random.randint(11,19):02d}:{random.choice([0,30]):02d}:00", "service": "Herrklippning", "book_link": "https://www.bokadirekt.se/places/klipphuset-41113" + }, + { + "salon_name": "Studio Siss", "date": (datetime.date.today() + datetime.timedelta(days=random.randint(1, 7))).isoformat(), + "start_time": f"{random.randint(9,17):02d}:{random.choice([0,30]):02d}:00", "end_time": f"{random.randint(11,19):02d}:{random.choice([0,30]):02d}:00", "service": "Damklippning", "book_link": "https://www.bokadirekt.se/places/studio-siss-XXXXX" # Placeholder + } + ] + return appointments + +if __name__ == "__main__": + salon_urls_to_scrape = [ + "https://www.bokadirekt.se/places/klipphuset-41113", # Add more Bokadirekt salon URLs here if available + # "https://www.bokadirekt.se/places/studio-siss-XXXXX" # Replace XXXXX with actual ID + ] + + print("Running scraper directly:") + result = scrape_bokadirekt_appointments(salon_urls_to_scrape) + for appt in result: + print(appt)