Add scraper.py
This commit is contained in:
85
scraper.py
Normal file
85
scraper.py
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
from playwright.sync_api import sync_playwright
|
||||||
|
import datetime
|
||||||
|
import time
|
||||||
|
import random
|
||||||
|
import os
|
||||||
|
|
||||||
|
def scrape_bokadirekt_appointments(salon_urls):
|
||||||
|
"""
|
||||||
|
Scrapes Bokadirekt for available appointments from a list of salon URLs.
|
||||||
|
This is a simplified example. A real scraper would need to precisely
|
||||||
|
target elements on Bokadirekt's dynamically loaded pages.
|
||||||
|
|
||||||
|
NOTE: For local development/testing without a full browser environment,
|
||||||
|
you might need to mock this function's output or ensure Playwright
|
||||||
|
dependencies are correctly set up (e.g., `playwright install chromium`).
|
||||||
|
"""
|
||||||
|
appointments = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
with sync_playwright() as p:
|
||||||
|
# Ensure browsers are installed, e.g., 'playwright install chromium'
|
||||||
|
browser = p.chromium.launch(headless=True)
|
||||||
|
page = browser.new_page()
|
||||||
|
|
||||||
|
for url in salon_urls:
|
||||||
|
try:
|
||||||
|
print(f"Scraping {url}...")
|
||||||
|
page.goto(url, wait_until="domcontentloaded", timeout=60000)
|
||||||
|
|
||||||
|
# Wait for specific selectors to appear for robustness
|
||||||
|
# (Highly dependent on Bokadirekt's current DOM structure)
|
||||||
|
try:
|
||||||
|
page.wait_for_selector('h1.placeName', timeout=10000)
|
||||||
|
salon_name = page.locator("h1.placeName").first.inner_text().strip()
|
||||||
|
except Exception:
|
||||||
|
salon_name = "Unknown Salon"
|
||||||
|
print(f"Could not find salon name for {url}, using default.")
|
||||||
|
|
||||||
|
# This part is a simulation. In a real scenario, you'd inspect the DOM
|
||||||
|
# to find actual date pickers, time slots, and service names.
|
||||||
|
today = datetime.date.today()
|
||||||
|
for _ in range(random.randint(1, 3)): # Simulate finding 1-3 appointments per salon
|
||||||
|
future_date = today + datetime.timedelta(days=random.randint(0, 7))
|
||||||
|
start_time = datetime.time(random.randint(9, 17), random.choice([0, 15, 30, 45]))
|
||||||
|
end_time = (datetime.datetime.combine(future_date, start_time) + datetime.timedelta(minutes=random.randint(45, 120))).time()
|
||||||
|
|
||||||
|
appointments.append({
|
||||||
|
"salon_name": salon_name,
|
||||||
|
"date": future_date.isoformat(),
|
||||||
|
"start_time": start_time.isoformat(),
|
||||||
|
"end_time": end_time.isoformat(),
|
||||||
|
"service": "Klippning (Simulerad)", "book_link": url # In a real scenario, this would be a direct booking link for the specific time slot
|
||||||
|
})
|
||||||
|
print(f"Simulated {len(appointments)} appointments for {salon_name}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error scraping {url}: {e}")
|
||||||
|
|
||||||
|
browser.close()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Playwright initialization/runtime error: {e}. Returning mock data.")
|
||||||
|
# Fallback to mock data if Playwright setup fails or is not available
|
||||||
|
appointments = [
|
||||||
|
{
|
||||||
|
"salon_name": "Klipphuset", "date": (datetime.date.today() + datetime.timedelta(days=random.randint(1, 7))).isoformat(),
|
||||||
|
"start_time": f"{random.randint(9,17):02d}:{random.choice([0,30]):02d}:00", "end_time": f"{random.randint(11,19):02d}:{random.choice([0,30]):02d}:00", "service": "Herrklippning", "book_link": "https://www.bokadirekt.se/places/klipphuset-41113"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"salon_name": "Studio Siss", "date": (datetime.date.today() + datetime.timedelta(days=random.randint(1, 7))).isoformat(),
|
||||||
|
"start_time": f"{random.randint(9,17):02d}:{random.choice([0,30]):02d}:00", "end_time": f"{random.randint(11,19):02d}:{random.choice([0,30]):02d}:00", "service": "Damklippning", "book_link": "https://www.bokadirekt.se/places/studio-siss-XXXXX" # Placeholder
|
||||||
|
}
|
||||||
|
]
|
||||||
|
return appointments
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
salon_urls_to_scrape = [
|
||||||
|
"https://www.bokadirekt.se/places/klipphuset-41113", # Add more Bokadirekt salon URLs here if available
|
||||||
|
# "https://www.bokadirekt.se/places/studio-siss-XXXXX" # Replace XXXXX with actual ID
|
||||||
|
]
|
||||||
|
|
||||||
|
print("Running scraper directly:")
|
||||||
|
result = scrape_bokadirekt_appointments(salon_urls_to_scrape)
|
||||||
|
for appt in result:
|
||||||
|
print(appt)
|
||||||
Reference in New Issue
Block a user