97 lines
4.5 KiB
Python
97 lines
4.5 KiB
Python
from flask import Flask, jsonify
|
|
from flask_cors import CORS
|
|
from scraper import scrape_bokadirekt_appointments
|
|
import datetime
|
|
import threading
|
|
import os
|
|
|
|
app = Flask(__name__)
|
|
CORS(app) # Enable CORS for all origins; adjust for production
|
|
|
|
# --- Configuration ---
|
|
# Salon URLs to scrape from Bokadirekt. These should be actual salon profile pages.
|
|
# Replace with real URLs as identified from the frontend 'faq' section.
|
|
SALON_URLS = [
|
|
"https://www.bokadirekt.se/places/klipphuset-41113", # Add other Bokadirekt salon URLs in Kungsbacka here:
|
|
# e.g., "https://www.bokadirekt.se/places/klippa-kungsbacka-XXXXX", # "https://www.bokadirekt.se/places/studio-siss-XXXXX", # "https://www.bokadirekt.se/places/det-hander-XXXXX", # "https://www.bokadirekt.se/places/by-u.s.-XXXXX"]
|
|
|
|
# Cache for scraped data
|
|
cached_appointments = []
|
|
last_scraped_time = None
|
|
SCRAPE_INTERVAL_MINUTES = 30 # As mentioned in the frontend description
|
|
|
|
# --- Scraper Functions ---
|
|
# This function runs in a background thread to update the cache periodically.
|
|
def update_cached_appointments():
|
|
global cached_appointments, last_scraped_time
|
|
print(f"[{datetime.datetime.now()}] Starting periodic scrape...")
|
|
try:
|
|
scraped_data = scrape_bokadirekt_appointments(SALON_URLS)
|
|
if scraped_data:
|
|
cached_appointments = scraped_data
|
|
last_scraped_time = datetime.datetime.now()
|
|
print(f"[{datetime.datetime.now()}] Scrape successful. {len(cached_appointments)} appointments cached.")
|
|
else:
|
|
print(f"[{datetime.datetime.now()}] Scraper returned no data. Keeping previous cache if any.")
|
|
except Exception as e:
|
|
print(f"[{datetime.datetime.now()}] Error during periodic scrape: {e}")
|
|
|
|
# Schedule the next scrape
|
|
threading.Timer(SCRAPE_INTERVAL_MINUTES * 60, update_cached_appointments).start()
|
|
|
|
# Synchronous update for initial load or immediate refresh if needed.
|
|
def update_cached_appointments_sync():
|
|
global cached_appointments, last_scraped_time
|
|
print(f"[{datetime.datetime.now()}] Performing synchronous scrape...")
|
|
try:
|
|
scraped_data = scrape_bokadirekt_appointments(SALON_URLS)
|
|
if scraped_data:
|
|
cached_appointments = scraped_data
|
|
last_scraped_time = datetime.datetime.now()
|
|
print(f"[{datetime.datetime.now()}] Synchronous scrape successful. {len(cached_appointments)} appointments cached.")
|
|
else:
|
|
print(f"[{datetime.datetime.now()}] Synchronous scraper returned no data.")
|
|
except Exception as e:
|
|
print(f"[{datetime.datetime.now()}] Error during synchronous scrape: {e}")
|
|
|
|
|
|
# --- API Endpoints ---
|
|
@app.route('/api/appointments', methods=['GET'])
|
|
def get_appointments():
|
|
"""
|
|
Returns the latest available appointments from cache.
|
|
The cache is updated periodically by a background thread.
|
|
"""
|
|
# In a production environment, you might want to handle cache staleness
|
|
# differently, e.g., serve stale data while a fresh scrape runs.
|
|
# For this example, we rely on the background thread to keep it updated.
|
|
return jsonify({
|
|
"data": cached_appointments,
|
|
"last_updated": last_scraped_time.isoformat() if last_scraped_time else "Never", "message": "Appointments data from Klipptider backend."
|
|
})
|
|
|
|
@app.route('/', methods=['GET'])
|
|
def health_check():
|
|
return "Klipptider Backend is running!"
|
|
|
|
# --- Main execution ---
|
|
if __name__ == '__main__':
|
|
# Perform an initial scrape to populate the cache immediately on startup.
|
|
print("Performing initial scrape...")
|
|
update_cached_appointments_sync()
|
|
print(f"Initial scrape complete. Cache size: {len(cached_appointments)}")
|
|
|
|
# Start the periodic scraper in a background thread if there are URLs to scrape.
|
|
# Note: For production, consider using a dedicated task queue (e.g., Celery) or
|
|
# a cron job for robust background task management, instead of Flask's built-in threading.
|
|
if SALON_URLS:
|
|
print(f"Starting background scraper to update every {SCRAPE_INTERVAL_MINUTES} minutes.")
|
|
# Start the timer, it will call update_cached_appointments and then reschedule itself.
|
|
threading.Timer(SCRAPE_INTERVAL_MINUTES * 60, update_cached_appointments).start()
|
|
else:
|
|
print("No salon URLs configured, background scraper not started.")
|
|
|
|
# Run the Flask app.
|
|
# Set debug=False for production deployments, as debug=True can interfere with threading.Timer.
|
|
app.run(host='0.0.0.0', port=5000, debug=False)
|