🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
197 lines
5.2 KiB
JavaScript
Executable File
197 lines
5.2 KiB
JavaScript
Executable File
#!/usr/bin/env node
|
|
|
|
/**
|
|
* Scheduled event scraper with node-cron
|
|
* Runs continuously and checks for new events on a schedule
|
|
*
|
|
* Usage:
|
|
* node scripts/scheduled-scraper.js
|
|
*
|
|
* Default schedule: Every 15 minutes
|
|
* Set SCRAPER_CRON_SCHEDULE environment variable to override
|
|
*/
|
|
|
|
import cron from 'node-cron';
|
|
import { runEventScraper, initializeScraperOrganization } from '../src/lib/eventScraper.js';
|
|
import { fileURLToPath } from 'url';
|
|
import { dirname, join } from 'path';
|
|
import { config } from 'dotenv';
|
|
|
|
// Get the directory of this script
|
|
const __filename = fileURLToPath(import.meta.url);
|
|
const __dirname = dirname(__filename);
|
|
|
|
// Load environment variables
|
|
config({ path: join(__dirname, '..', '.env') });
|
|
config({ path: join(__dirname, '..', '.env.local') });
|
|
|
|
// Configuration
|
|
const CRON_SCHEDULE = process.env.SCRAPER_CRON_SCHEDULE || '*/15 * * * *'; // Every 15 minutes
|
|
const TIMEZONE = process.env.SCRAPER_TIMEZONE || 'America/Denver'; // Mountain Time
|
|
|
|
let isRunning = false;
|
|
let successCount = 0;
|
|
let errorCount = 0;
|
|
let lastRunTime = null;
|
|
let lastNewEvent = null;
|
|
|
|
/**
|
|
* The scheduled scraper function
|
|
*/
|
|
async function scheduledScraper() {
|
|
if (isRunning) {
|
|
console.log('⏳ Scraper already running, skipping this cycle');
|
|
return;
|
|
}
|
|
|
|
isRunning = true;
|
|
lastRunTime = new Date();
|
|
|
|
try {
|
|
console.log(`🔍 [${lastRunTime.toISOString()}] Running scheduled event scraper...`);
|
|
|
|
const result = await runEventScraper();
|
|
|
|
if (result.success) {
|
|
successCount++;
|
|
|
|
if (result.newEvent) {
|
|
lastNewEvent = result.newEvent;
|
|
console.log(`🎉 [${new Date().toISOString()}] NEW FEATURED EVENT ADDED!`);
|
|
console.log(` ${result.newEvent.title} at ${result.newEvent.venue}`);
|
|
console.log(` Category: ${result.newEvent.category}`);
|
|
|
|
// You could add webhook notifications here
|
|
// await notifyWebhook(result.newEvent);
|
|
} else {
|
|
console.log(`✅ [${new Date().toISOString()}] ${result.message}`);
|
|
}
|
|
} else {
|
|
errorCount++;
|
|
console.error(`❌ [${new Date().toISOString()}] ${result.message}`);
|
|
}
|
|
|
|
} catch (error) {
|
|
errorCount++;
|
|
console.error(`💥 [${new Date().toISOString()}] Scheduled scraper error:`, error);
|
|
} finally {
|
|
isRunning = false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Print status information
|
|
*/
|
|
function printStatus() {
|
|
console.log('\n📊 SCRAPER STATUS');
|
|
console.log('==================');
|
|
console.log(`Schedule: ${CRON_SCHEDULE} (${TIMEZONE})`);
|
|
console.log(`Running: ${isRunning ? 'Yes' : 'No'}`);
|
|
console.log(`Success Count: ${successCount}`);
|
|
console.log(`Error Count: ${errorCount}`);
|
|
console.log(`Last Run: ${lastRunTime ? lastRunTime.toISOString() : 'Never'}`);
|
|
|
|
if (lastNewEvent) {
|
|
console.log(`Last New Event: ${lastNewEvent.title} (${lastNewEvent.category})`);
|
|
}
|
|
|
|
console.log(`Uptime: ${Math.floor(process.uptime())} seconds`);
|
|
console.log('==================\n');
|
|
}
|
|
|
|
/**
|
|
* Initialize and start the scheduler
|
|
*/
|
|
async function start() {
|
|
console.log('🚀 Black Canyon Tickets Scheduled Event Scraper');
|
|
console.log('================================================');
|
|
console.log(`Schedule: ${CRON_SCHEDULE}`);
|
|
console.log(`Timezone: ${TIMEZONE}`);
|
|
console.log('Press Ctrl+C to stop\n');
|
|
|
|
// Initialize scraper organization if needed
|
|
try {
|
|
console.log('🔧 Checking scraper organization...');
|
|
await initializeScraperOrganization();
|
|
console.log('✅ Scraper organization ready\n');
|
|
} catch (error) {
|
|
console.error('❌ Failed to initialize scraper organization:', error);
|
|
process.exit(1);
|
|
}
|
|
|
|
// Run once immediately
|
|
console.log('🏃 Running initial scrape...');
|
|
await scheduledScraper();
|
|
|
|
// Schedule the cron job
|
|
const task = cron.schedule(CRON_SCHEDULE, scheduledScraper, {
|
|
scheduled: false,
|
|
timezone: TIMEZONE
|
|
});
|
|
|
|
// Start the scheduler
|
|
task.start();
|
|
console.log(`⏰ Scheduler started. Next run: ${task.nextDates()}`);
|
|
|
|
// Print status every 5 minutes
|
|
setInterval(printStatus, 5 * 60 * 1000);
|
|
|
|
return task;
|
|
}
|
|
|
|
/**
|
|
* Graceful shutdown
|
|
*/
|
|
function setupGracefulShutdown(task) {
|
|
const shutdown = (signal) => {
|
|
console.log(`\n🛑 Received ${signal}, shutting down gracefully...`);
|
|
|
|
if (task) {
|
|
task.stop();
|
|
console.log('✅ Scheduler stopped');
|
|
}
|
|
|
|
printStatus();
|
|
console.log('👋 Goodbye!');
|
|
process.exit(0);
|
|
};
|
|
|
|
process.on('SIGINT', () => shutdown('SIGINT'));
|
|
process.on('SIGTERM', () => shutdown('SIGTERM'));
|
|
}
|
|
|
|
/**
|
|
* Handle uncaught errors
|
|
*/
|
|
function setupErrorHandlers() {
|
|
process.on('unhandledRejection', (reason, promise) => {
|
|
console.error('Unhandled Rejection at:', promise, 'reason:', reason);
|
|
errorCount++;
|
|
});
|
|
|
|
process.on('uncaughtException', (error) => {
|
|
console.error('Uncaught Exception:', error);
|
|
errorCount++;
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Main function
|
|
*/
|
|
async function main() {
|
|
setupErrorHandlers();
|
|
|
|
const task = await start();
|
|
setupGracefulShutdown(task);
|
|
|
|
// Keep the process alive
|
|
setInterval(() => {
|
|
// Keep alive
|
|
}, 1000);
|
|
}
|
|
|
|
// Start the scheduled scraper
|
|
main().catch(error => {
|
|
console.error('Failed to start scheduled scraper:', error);
|
|
process.exit(1);
|
|
}); |