Add scraper.py
This commit is contained in:
85
scraper.py
Normal file
85
scraper.py
Normal file
@@ -0,0 +1,85 @@
|
||||
from playwright.sync_api import sync_playwright
|
||||
import datetime
|
||||
import time
|
||||
import random
|
||||
import os
|
||||
|
||||
def scrape_bokadirekt_appointments(salon_urls):
|
||||
"""
|
||||
Scrapes Bokadirekt for available appointments from a list of salon URLs.
|
||||
This is a simplified example. A real scraper would need to precisely
|
||||
target elements on Bokadirekt's dynamically loaded pages.
|
||||
|
||||
NOTE: For local development/testing without a full browser environment,
|
||||
you might need to mock this function's output or ensure Playwright
|
||||
dependencies are correctly set up (e.g., `playwright install chromium`).
|
||||
"""
|
||||
appointments = []
|
||||
|
||||
try:
|
||||
with sync_playwright() as p:
|
||||
# Ensure browsers are installed, e.g., 'playwright install chromium'
|
||||
browser = p.chromium.launch(headless=True)
|
||||
page = browser.new_page()
|
||||
|
||||
for url in salon_urls:
|
||||
try:
|
||||
print(f"Scraping {url}...")
|
||||
page.goto(url, wait_until="domcontentloaded", timeout=60000)
|
||||
|
||||
# Wait for specific selectors to appear for robustness
|
||||
# (Highly dependent on Bokadirekt's current DOM structure)
|
||||
try:
|
||||
page.wait_for_selector('h1.placeName', timeout=10000)
|
||||
salon_name = page.locator("h1.placeName").first.inner_text().strip()
|
||||
except Exception:
|
||||
salon_name = "Unknown Salon"
|
||||
print(f"Could not find salon name for {url}, using default.")
|
||||
|
||||
# This part is a simulation. In a real scenario, you'd inspect the DOM
|
||||
# to find actual date pickers, time slots, and service names.
|
||||
today = datetime.date.today()
|
||||
for _ in range(random.randint(1, 3)): # Simulate finding 1-3 appointments per salon
|
||||
future_date = today + datetime.timedelta(days=random.randint(0, 7))
|
||||
start_time = datetime.time(random.randint(9, 17), random.choice([0, 15, 30, 45]))
|
||||
end_time = (datetime.datetime.combine(future_date, start_time) + datetime.timedelta(minutes=random.randint(45, 120))).time()
|
||||
|
||||
appointments.append({
|
||||
"salon_name": salon_name,
|
||||
"date": future_date.isoformat(),
|
||||
"start_time": start_time.isoformat(),
|
||||
"end_time": end_time.isoformat(),
|
||||
"service": "Klippning (Simulerad)", "book_link": url # In a real scenario, this would be a direct booking link for the specific time slot
|
||||
})
|
||||
print(f"Simulated {len(appointments)} appointments for {salon_name}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error scraping {url}: {e}")
|
||||
|
||||
browser.close()
|
||||
|
||||
except Exception as e:
|
||||
print(f"Playwright initialization/runtime error: {e}. Returning mock data.")
|
||||
# Fallback to mock data if Playwright setup fails or is not available
|
||||
appointments = [
|
||||
{
|
||||
"salon_name": "Klipphuset", "date": (datetime.date.today() + datetime.timedelta(days=random.randint(1, 7))).isoformat(),
|
||||
"start_time": f"{random.randint(9,17):02d}:{random.choice([0,30]):02d}:00", "end_time": f"{random.randint(11,19):02d}:{random.choice([0,30]):02d}:00", "service": "Herrklippning", "book_link": "https://www.bokadirekt.se/places/klipphuset-41113"
|
||||
},
|
||||
{
|
||||
"salon_name": "Studio Siss", "date": (datetime.date.today() + datetime.timedelta(days=random.randint(1, 7))).isoformat(),
|
||||
"start_time": f"{random.randint(9,17):02d}:{random.choice([0,30]):02d}:00", "end_time": f"{random.randint(11,19):02d}:{random.choice([0,30]):02d}:00", "service": "Damklippning", "book_link": "https://www.bokadirekt.se/places/studio-siss-XXXXX" # Placeholder
|
||||
}
|
||||
]
|
||||
return appointments
|
||||
|
||||
if __name__ == "__main__":
|
||||
salon_urls_to_scrape = [
|
||||
"https://www.bokadirekt.se/places/klipphuset-41113", # Add more Bokadirekt salon URLs here if available
|
||||
# "https://www.bokadirekt.se/places/studio-siss-XXXXX" # Replace XXXXX with actual ID
|
||||
]
|
||||
|
||||
print("Running scraper directly:")
|
||||
result = scrape_bokadirekt_appointments(salon_urls_to_scrape)
|
||||
for appt in result:
|
||||
print(appt)
|
||||
Reference in New Issue
Block a user