6e0dfdb2-eb3a-44fc-8761-744…/scraper.py

from playwright.sync_api import sync_playwright
import datetime
import time
import random
import os

def scrape_bokadirekt_appointments(salon_urls):
    """
    Scrapes Bokadirekt for available appointments from a list of salon URLs.
    This is a simplified example. A real scraper would need to precisely
    target elements on Bokadirekt's dynamically loaded pages.

    NOTE: For local development/testing without a full browser environment,
    you might need to mock this function's output or ensure Playwright
    dependencies are correctly set up (e.g., `playwright install chromium`).
    """
    appointments = []

    try:
        with sync_playwright() as p:
            # Ensure browsers are installed, e.g., 'playwright install chromium'
            browser = p.chromium.launch(headless=True)
            page = browser.new_page()

            for url in salon_urls:
                try:
                    print(f"Scraping {url}...")
                    page.goto(url, wait_until="domcontentloaded", timeout=60000)

                    # Wait for specific selectors to appear for robustness
                    # (Highly dependent on Bokadirekt's current DOM structure)
                    try:
                        page.wait_for_selector('h1.placeName', timeout=10000)
                        salon_name = page.locator("h1.placeName").first.inner_text().strip()
                    except Exception:
                        salon_name = "Unknown Salon"
                        print(f"Could not find salon name for {url}, using default.")

                    # This part is a simulation. In a real scenario, you'd inspect the DOM
                    # to find actual date pickers, time slots, and service names.
                    today = datetime.date.today()
                    for _ in range(random.randint(1, 3)): # Simulate finding 1-3 appointments per salon
                        future_date = today + datetime.timedelta(days=random.randint(0, 7))
                        start_time = datetime.time(random.randint(9, 17), random.choice([0, 15, 30, 45]))
                        end_time = (datetime.datetime.combine(future_date, start_time) + datetime.timedelta(minutes=random.randint(45, 120))).time()

                        appointments.append({
                            "salon_name": salon_name,
                            "date": future_date.isoformat(),
                            "start_time": start_time.isoformat(),
                            "end_time": end_time.isoformat(),
                            "service": "Klippning (Simulerad)",                            "book_link": url # In a real scenario, this would be a direct booking link for the specific time slot
                        })
                    print(f"Simulated {len(appointments)} appointments for {salon_name}")

                except Exception as e:
                    print(f"Error scraping {url}: {e}")

            browser.close()

    except Exception as e:
        print(f"Playwright initialization/runtime error: {e}. Returning mock data.")
        # Fallback to mock data if Playwright setup fails or is not available
        appointments = [
            {
                "salon_name": "Klipphuset",                "date": (datetime.date.today() + datetime.timedelta(days=random.randint(1, 7))).isoformat(),
                "start_time": f"{random.randint(9,17):02d}:{random.choice([0,30]):02d}:00",                "end_time": f"{random.randint(11,19):02d}:{random.choice([0,30]):02d}:00",                "service": "Herrklippning",                "book_link": "https://www.bokadirekt.se/places/klipphuset-41113"
            },
            {
                "salon_name": "Studio Siss",                "date": (datetime.date.today() + datetime.timedelta(days=random.randint(1, 7))).isoformat(),
                "start_time": f"{random.randint(9,17):02d}:{random.choice([0,30]):02d}:00",                "end_time": f"{random.randint(11,19):02d}:{random.choice([0,30]):02d}:00",                "service": "Damklippning",                "book_link": "https://www.bokadirekt.se/places/studio-siss-XXXXX" # Placeholder
            }
        ]
    return appointments

if __name__ == "__main__":
    salon_urls_to_scrape = [
        "https://www.bokadirekt.se/places/klipphuset-41113",        # Add more Bokadirekt salon URLs here if available
        # "https://www.bokadirekt.se/places/studio-siss-XXXXX" # Replace XXXXX with actual ID
    ]

    print("Running scraper directly:")
    result = scrape_bokadirekt_appointments(salon_urls_to_scrape)
    for appt in result:
        print(appt)