Files
6e0dfdb2-eb3a-44fc-8761-744…/scraper.py
2026-06-10 20:11:59 +00:00

86 lines
4.5 KiB
Python

from playwright.sync_api import sync_playwright
import datetime
import time
import random
import os
def scrape_bokadirekt_appointments(salon_urls):
"""
Scrapes Bokadirekt for available appointments from a list of salon URLs.
This is a simplified example. A real scraper would need to precisely
target elements on Bokadirekt's dynamically loaded pages.
NOTE: For local development/testing without a full browser environment,
you might need to mock this function's output or ensure Playwright
dependencies are correctly set up (e.g., `playwright install chromium`).
"""
appointments = []
try:
with sync_playwright() as p:
# Ensure browsers are installed, e.g., 'playwright install chromium'
browser = p.chromium.launch(headless=True)
page = browser.new_page()
for url in salon_urls:
try:
print(f"Scraping {url}...")
page.goto(url, wait_until="domcontentloaded", timeout=60000)
# Wait for specific selectors to appear for robustness
# (Highly dependent on Bokadirekt's current DOM structure)
try:
page.wait_for_selector('h1.placeName', timeout=10000)
salon_name = page.locator("h1.placeName").first.inner_text().strip()
except Exception:
salon_name = "Unknown Salon"
print(f"Could not find salon name for {url}, using default.")
# This part is a simulation. In a real scenario, you'd inspect the DOM
# to find actual date pickers, time slots, and service names.
today = datetime.date.today()
for _ in range(random.randint(1, 3)): # Simulate finding 1-3 appointments per salon
future_date = today + datetime.timedelta(days=random.randint(0, 7))
start_time = datetime.time(random.randint(9, 17), random.choice([0, 15, 30, 45]))
end_time = (datetime.datetime.combine(future_date, start_time) + datetime.timedelta(minutes=random.randint(45, 120))).time()
appointments.append({
"salon_name": salon_name,
"date": future_date.isoformat(),
"start_time": start_time.isoformat(),
"end_time": end_time.isoformat(),
"service": "Klippning (Simulerad)", "book_link": url # In a real scenario, this would be a direct booking link for the specific time slot
})
print(f"Simulated {len(appointments)} appointments for {salon_name}")
except Exception as e:
print(f"Error scraping {url}: {e}")
browser.close()
except Exception as e:
print(f"Playwright initialization/runtime error: {e}. Returning mock data.")
# Fallback to mock data if Playwright setup fails or is not available
appointments = [
{
"salon_name": "Klipphuset", "date": (datetime.date.today() + datetime.timedelta(days=random.randint(1, 7))).isoformat(),
"start_time": f"{random.randint(9,17):02d}:{random.choice([0,30]):02d}:00", "end_time": f"{random.randint(11,19):02d}:{random.choice([0,30]):02d}:00", "service": "Herrklippning", "book_link": "https://www.bokadirekt.se/places/klipphuset-41113"
},
{
"salon_name": "Studio Siss", "date": (datetime.date.today() + datetime.timedelta(days=random.randint(1, 7))).isoformat(),
"start_time": f"{random.randint(9,17):02d}:{random.choice([0,30]):02d}:00", "end_time": f"{random.randint(11,19):02d}:{random.choice([0,30]):02d}:00", "service": "Damklippning", "book_link": "https://www.bokadirekt.se/places/studio-siss-XXXXX" # Placeholder
}
]
return appointments
if __name__ == "__main__":
salon_urls_to_scrape = [
"https://www.bokadirekt.se/places/klipphuset-41113", # Add more Bokadirekt salon URLs here if available
# "https://www.bokadirekt.se/places/studio-siss-XXXXX" # Replace XXXXX with actual ID
]
print("Running scraper directly:")
result = scrape_bokadirekt_appointments(salon_urls_to_scrape)
for appt in result:
print(appt)