Add scraper.py

2026-06-10 20:11:59 +00:00
parent b9a8e2591c
commit 1b3160958c
1 changed files with 85 additions and 0 deletions
--- a/scraper.py
+++ b/scraper.py
@@ -0,0 +1,85 @@
+from playwright.sync_api import sync_playwright
+import datetime
+import time
+import random
+import os
+
+def scrape_bokadirekt_appointments(salon_urls):
+    """
+    Scrapes Bokadirekt for available appointments from a list of salon URLs.
+    This is a simplified example. A real scraper would need to precisely
+    target elements on Bokadirekt's dynamically loaded pages.
+
+    NOTE: For local development/testing without a full browser environment,
+    you might need to mock this function's output or ensure Playwright
+    dependencies are correctly set up (e.g., `playwright install chromium`).
+    """
+    appointments = []
+    
+    try:
+        with sync_playwright() as p:
+            # Ensure browsers are installed, e.g., 'playwright install chromium'
+            browser = p.chromium.launch(headless=True)
+            page = browser.new_page()
+
+            for url in salon_urls:
+                try:
+                    print(f"Scraping {url}...")
+                    page.goto(url, wait_until="domcontentloaded", timeout=60000)
+                    
+                    # Wait for specific selectors to appear for robustness
+                    # (Highly dependent on Bokadirekt's current DOM structure)
+                    try:
+                        page.wait_for_selector('h1.placeName', timeout=10000)
+                        salon_name = page.locator("h1.placeName").first.inner_text().strip()
+                    except Exception:
+                        salon_name = "Unknown Salon"
+                        print(f"Could not find salon name for {url}, using default.")
+
+                    # This part is a simulation. In a real scenario, you'd inspect the DOM
+                    # to find actual date pickers, time slots, and service names.
+                    today = datetime.date.today()
+                    for _ in range(random.randint(1, 3)): # Simulate finding 1-3 appointments per salon
+                        future_date = today + datetime.timedelta(days=random.randint(0, 7))
+                        start_time = datetime.time(random.randint(9, 17), random.choice([0, 15, 30, 45]))
+                        end_time = (datetime.datetime.combine(future_date, start_time) + datetime.timedelta(minutes=random.randint(45, 120))).time()
+
+                        appointments.append({
+                            "salon_name": salon_name,
+                            "date": future_date.isoformat(),
+                            "start_time": start_time.isoformat(),
+                            "end_time": end_time.isoformat(),
+                            "service": "Klippning (Simulerad)",                            "book_link": url # In a real scenario, this would be a direct booking link for the specific time slot
+                        })
+                    print(f"Simulated {len(appointments)} appointments for {salon_name}")
+
+                except Exception as e:
+                    print(f"Error scraping {url}: {e}")
+
+            browser.close()
+
+    except Exception as e:
+        print(f"Playwright initialization/runtime error: {e}. Returning mock data.")
+        # Fallback to mock data if Playwright setup fails or is not available
+        appointments = [
+            {
+                "salon_name": "Klipphuset",                "date": (datetime.date.today() + datetime.timedelta(days=random.randint(1, 7))).isoformat(),
+                "start_time": f"{random.randint(9,17):02d}:{random.choice([0,30]):02d}:00",                "end_time": f"{random.randint(11,19):02d}:{random.choice([0,30]):02d}:00",                "service": "Herrklippning",                "book_link": "https://www.bokadirekt.se/places/klipphuset-41113"
+            },
+            {
+                "salon_name": "Studio Siss",                "date": (datetime.date.today() + datetime.timedelta(days=random.randint(1, 7))).isoformat(),
+                "start_time": f"{random.randint(9,17):02d}:{random.choice([0,30]):02d}:00",                "end_time": f"{random.randint(11,19):02d}:{random.choice([0,30]):02d}:00",                "service": "Damklippning",                "book_link": "https://www.bokadirekt.se/places/studio-siss-XXXXX" # Placeholder
+            }
+        ]
+    return appointments
+
+if __name__ == "__main__":
+    salon_urls_to_scrape = [
+        "https://www.bokadirekt.se/places/klipphuset-41113",        # Add more Bokadirekt salon URLs here if available
+        # "https://www.bokadirekt.se/places/studio-siss-XXXXX" # Replace XXXXX with actual ID
+    ]
+    
+    print("Running scraper directly:")
+    result = scrape_bokadirekt_appointments(salon_urls_to_scrape)
+    for appt in result:
+        print(appt)