Add scraper.py

2026-06-10 20:11:59 +00:00
parent b9a8e2591c
commit 1b3160958c
1 changed files with 85 additions and 0 deletions
--- a/scraper.py
+++ b/scraper.py
@@ -0,0 +1,85 @@
 from playwright.sync_api import sync_playwright
 import datetime
 import time
 import random
 import os
 def scrape_bokadirekt_appointments(salon_urls):
    """
    Scrapes Bokadirekt for available appointments from a list of salon URLs.
    This is a simplified example. A real scraper would need to precisely
    target elements on Bokadirekt's dynamically loaded pages.
    NOTE: For local development/testing without a full browser environment,
    you might need to mock this function's output or ensure Playwright
    dependencies are correctly set up (e.g., `playwright install chromium`).
    """
    appointments = []
    try:
        with sync_playwright() as p:
            # Ensure browsers are installed, e.g., 'playwright install chromium'
            browser = p.chromium.launch(headless=True)
            page = browser.new_page()
            for url in salon_urls:
                try:
                    print(f"Scraping {url}...")
                    page.goto(url, wait_until="domcontentloaded", timeout=60000)
                    # Wait for specific selectors to appear for robustness
                    # (Highly dependent on Bokadirekt's current DOM structure)
                    try:
                        page.wait_for_selector('h1.placeName', timeout=10000)
                        salon_name = page.locator("h1.placeName").first.inner_text().strip()
                    except Exception:
                        salon_name = "Unknown Salon"
                        print(f"Could not find salon name for {url}, using default.")
                    # This part is a simulation. In a real scenario, you'd inspect the DOM
                    # to find actual date pickers, time slots, and service names.
                    today = datetime.date.today()
                    for _ in range(random.randint(1, 3)): # Simulate finding 1-3 appointments per salon
                        future_date = today + datetime.timedelta(days=random.randint(0, 7))
                        start_time = datetime.time(random.randint(9, 17), random.choice([0, 15, 30, 45]))
                        end_time = (datetime.datetime.combine(future_date, start_time) + datetime.timedelta(minutes=random.randint(45, 120))).time()
                        appointments.append({
                            "salon_name": salon_name,
                            "date": future_date.isoformat(),
                            "start_time": start_time.isoformat(),
                            "end_time": end_time.isoformat(),
                            "service": "Klippning (Simulerad)",                            "book_link": url # In a real scenario, this would be a direct booking link for the specific time slot
                        })
                    print(f"Simulated {len(appointments)} appointments for {salon_name}")
                except Exception as e:
                    print(f"Error scraping {url}: {e}")
            browser.close()
    except Exception as e:
        print(f"Playwright initialization/runtime error: {e}. Returning mock data.")
        # Fallback to mock data if Playwright setup fails or is not available
        appointments = [
            {
                "salon_name": "Klipphuset",                "date": (datetime.date.today() + datetime.timedelta(days=random.randint(1, 7))).isoformat(),
                "start_time": f"{random.randint(9,17):02d}:{random.choice([0,30]):02d}:00",                "end_time": f"{random.randint(11,19):02d}:{random.choice([0,30]):02d}:00",                "service": "Herrklippning",                "book_link": "https://www.bokadirekt.se/places/klipphuset-41113"
            },
            {
                "salon_name": "Studio Siss",                "date": (datetime.date.today() + datetime.timedelta(days=random.randint(1, 7))).isoformat(),
                "start_time": f"{random.randint(9,17):02d}:{random.choice([0,30]):02d}:00",                "end_time": f"{random.randint(11,19):02d}:{random.choice([0,30]):02d}:00",                "service": "Damklippning",                "book_link": "https://www.bokadirekt.se/places/studio-siss-XXXXX" # Placeholder
            }
        ]
    return appointments
 if __name__ == "__main__":
    salon_urls_to_scrape = [
        "https://www.bokadirekt.se/places/klipphuset-41113",        # Add more Bokadirekt salon URLs here if available
        # "https://www.bokadirekt.se/places/studio-siss-XXXXX" # Replace XXXXX with actual ID
    ]
    print("Running scraper directly:")
    result = scrape_bokadirekt_appointments(salon_urls_to_scrape)
    for appt in result:
        print(appt)