/** * Shattered Void MMO - Health Monitoring System * * This module provides comprehensive health monitoring for all game services, * including real-time status checks, performance metrics, and alerting. */ const http = require('http'); const { EventEmitter } = require('events'); const os = require('os'); class HealthMonitor extends EventEmitter { constructor(options = {}) { super(); this.services = options.services || {}; this.interval = options.interval || 30000; // 30 seconds this.onHealthChange = options.onHealthChange || null; this.timeout = options.timeout || 5000; // 5 seconds this.healthStatus = {}; this.metrics = {}; this.alertThresholds = { responseTime: 5000, // 5 seconds memoryUsage: 80, // 80% cpuUsage: 90, // 90% errorRate: 10 // 10% }; this.monitoringInterval = null; this.isRunning = false; this.healthHistory = {}; // Initialize health status for all services this.initializeHealthStatus(); } /** * Initialize health status tracking */ initializeHealthStatus() { Object.keys(this.services).forEach(serviceName => { this.healthStatus[serviceName] = { status: 'unknown', lastCheck: null, responseTime: null, consecutiveFailures: 0, uptime: 0, lastError: null }; this.healthHistory[serviceName] = []; }); } /** * Start health monitoring */ async start() { if (this.isRunning) { throw new Error('Health monitor is already running'); } this.isRunning = true; console.log(`🏥 Health monitoring started (interval: ${this.interval}ms)`); // Initial health check await this.performHealthChecks(); // Start periodic monitoring this.monitoringInterval = setInterval(async () => { try { await this.performHealthChecks(); } catch (error) { console.error('Health check error:', error); } }, this.interval); // Start system metrics monitoring this.startSystemMetricsMonitoring(); this.emit('started'); } /** * Stop health monitoring */ stop() { if (!this.isRunning) { return; } this.isRunning = false; if (this.monitoringInterval) { clearInterval(this.monitoringInterval); this.monitoringInterval = null; } console.log('🏥 Health monitoring stopped'); this.emit('stopped'); } /** * Perform health checks on all services */ async performHealthChecks() { const checkPromises = Object.entries(this.services).map(([serviceName, serviceInfo]) => { return this.checkServiceHealth(serviceName, serviceInfo); }); await Promise.allSettled(checkPromises); this.updateHealthSummary(); } /** * Check health of a specific service */ async checkServiceHealth(serviceName, serviceInfo) { const startTime = Date.now(); const previousStatus = this.healthStatus[serviceName].status; try { let isHealthy = false; let responseTime = null; // Different health check strategies based on service type switch (serviceName) { case 'backend': isHealthy = await this.checkHttpService(serviceInfo.port, '/health'); responseTime = Date.now() - startTime; break; case 'frontend': isHealthy = await this.checkHttpService(serviceInfo.port); responseTime = Date.now() - startTime; break; case 'database': isHealthy = await this.checkDatabaseHealth(); responseTime = Date.now() - startTime; break; case 'redis': isHealthy = await this.checkRedisHealth(); responseTime = Date.now() - startTime; break; default: // For other services, assume healthy if they exist isHealthy = true; responseTime = Date.now() - startTime; } // Update health status const newStatus = isHealthy ? 'healthy' : 'unhealthy'; this.updateServiceStatus(serviceName, { status: newStatus, lastCheck: new Date(), responseTime, consecutiveFailures: isHealthy ? 0 : this.healthStatus[serviceName].consecutiveFailures + 1, lastError: null }); // Emit health change event if status changed if (previousStatus !== newStatus && this.onHealthChange) { this.onHealthChange(serviceName, newStatus); } } catch (error) { const responseTime = Date.now() - startTime; this.updateServiceStatus(serviceName, { status: 'unhealthy', lastCheck: new Date(), responseTime, consecutiveFailures: this.healthStatus[serviceName].consecutiveFailures + 1, lastError: error.message }); // Emit health change event if status changed if (previousStatus !== 'unhealthy' && this.onHealthChange) { this.onHealthChange(serviceName, 'unhealthy'); } console.error(`Health check failed for ${serviceName}:`, error.message); } } /** * Check HTTP service health */ checkHttpService(port, path = '/') { return new Promise((resolve, reject) => { const options = { hostname: 'localhost', port: port, path: path, method: 'GET', timeout: this.timeout }; const req = http.request(options, (res) => { // Consider 2xx and 3xx status codes as healthy resolve(res.statusCode >= 200 && res.statusCode < 400); }); req.on('error', (error) => { reject(error); }); req.on('timeout', () => { req.destroy(); reject(new Error('Request timeout')); }); req.end(); }); } /** * Check database health */ async checkDatabaseHealth() { try { // Try to get database connection from the app const db = require('../src/database/connection'); // Simple query to check database connectivity await db.raw('SELECT 1'); return true; } catch (error) { return false; } } /** * Check Redis health */ async checkRedisHealth() { try { // Skip if Redis is disabled if (process.env.DISABLE_REDIS === 'true') { return true; } // Try to get Redis client from the app const redisConfig = require('../src/config/redis'); if (!redisConfig.client) { return false; } // Simple ping to check Redis connectivity await redisConfig.client.ping(); return true; } catch (error) { return false; } } /** * Update service status */ updateServiceStatus(serviceName, statusUpdate) { this.healthStatus[serviceName] = { ...this.healthStatus[serviceName], ...statusUpdate }; // Add to health history this.addToHealthHistory(serviceName, statusUpdate); // Check for alerts this.checkForAlerts(serviceName); } /** * Add health data to history */ addToHealthHistory(serviceName, statusData) { const historyEntry = { timestamp: Date.now(), status: statusData.status, responseTime: statusData.responseTime, error: statusData.lastError }; this.healthHistory[serviceName].push(historyEntry); // Keep only last 100 entries if (this.healthHistory[serviceName].length > 100) { this.healthHistory[serviceName] = this.healthHistory[serviceName].slice(-100); } } /** * Check for health alerts */ checkForAlerts(serviceName) { const health = this.healthStatus[serviceName]; const alerts = []; // Check consecutive failures if (health.consecutiveFailures >= 3) { alerts.push({ type: 'consecutive_failures', message: `Service ${serviceName} has failed ${health.consecutiveFailures} consecutive times`, severity: 'critical' }); } // Check response time if (health.responseTime && health.responseTime > this.alertThresholds.responseTime) { alerts.push({ type: 'slow_response', message: `Service ${serviceName} response time: ${health.responseTime}ms (threshold: ${this.alertThresholds.responseTime}ms)`, severity: 'warning' }); } // Emit alerts alerts.forEach(alert => { this.emit('alert', serviceName, alert); }); } /** * Start system metrics monitoring */ startSystemMetricsMonitoring() { const updateSystemMetrics = () => { const memUsage = process.memoryUsage(); const cpuUsage = process.cpuUsage(); const systemMem = { total: os.totalmem(), free: os.freemem() }; this.metrics.system = { timestamp: Date.now(), memory: { rss: memUsage.rss, heapTotal: memUsage.heapTotal, heapUsed: memUsage.heapUsed, external: memUsage.external, usage: Math.round((memUsage.heapUsed / memUsage.heapTotal) * 100) }, cpu: { user: cpuUsage.user, system: cpuUsage.system }, systemMemory: { total: systemMem.total, free: systemMem.free, used: systemMem.total - systemMem.free, usage: Math.round(((systemMem.total - systemMem.free) / systemMem.total) * 100) }, uptime: process.uptime(), loadAverage: os.loadavg() }; // Check for system alerts this.checkSystemAlerts(); }; // Update immediately updateSystemMetrics(); // Update every 10 seconds setInterval(updateSystemMetrics, 10000); } /** * Check for system-level alerts */ checkSystemAlerts() { const metrics = this.metrics.system; if (!metrics) return; // Memory usage alert if (metrics.memory.usage > this.alertThresholds.memoryUsage) { this.emit('alert', 'system', { type: 'high_memory_usage', message: `High memory usage: ${metrics.memory.usage}% (threshold: ${this.alertThresholds.memoryUsage}%)`, severity: 'warning' }); } // System memory alert if (metrics.systemMemory.usage > this.alertThresholds.memoryUsage) { this.emit('alert', 'system', { type: 'high_system_memory', message: `High system memory usage: ${metrics.systemMemory.usage}% (threshold: ${this.alertThresholds.memoryUsage}%)`, severity: 'critical' }); } } /** * Update overall health summary */ updateHealthSummary() { const services = Object.keys(this.healthStatus); const healthyServices = services.filter(s => this.healthStatus[s].status === 'healthy'); const unhealthyServices = services.filter(s => this.healthStatus[s].status === 'unhealthy'); this.metrics.summary = { timestamp: Date.now(), totalServices: services.length, healthyServices: healthyServices.length, unhealthyServices: unhealthyServices.length, overallHealth: unhealthyServices.length === 0 ? 'healthy' : 'degraded' }; } /** * Get current health status */ getHealthStatus() { return { services: this.healthStatus, metrics: this.metrics, summary: this.metrics.summary, isRunning: this.isRunning }; } /** * Get health history for a service */ getHealthHistory(serviceName) { return this.healthHistory[serviceName] || []; } /** * Get service uptime */ getServiceUptime(serviceName) { const history = this.healthHistory[serviceName]; if (!history || history.length === 0) { return 0; } const now = Date.now(); const oneDayAgo = now - (24 * 60 * 60 * 1000); const recentHistory = history.filter(entry => entry.timestamp > oneDayAgo); if (recentHistory.length === 0) { return 0; } const healthyCount = recentHistory.filter(entry => entry.status === 'healthy').length; return Math.round((healthyCount / recentHistory.length) * 100); } /** * Generate health report */ generateHealthReport() { const services = Object.keys(this.healthStatus); const report = { timestamp: new Date().toISOString(), summary: this.metrics.summary, services: {}, systemMetrics: this.metrics.system, alerts: [] }; services.forEach(serviceName => { const health = this.healthStatus[serviceName]; const uptime = this.getServiceUptime(serviceName); report.services[serviceName] = { status: health.status, lastCheck: health.lastCheck, responseTime: health.responseTime, consecutiveFailures: health.consecutiveFailures, uptime: `${uptime}%`, lastError: health.lastError }; }); return report; } /** * Export health data for monitoring systems */ exportMetrics() { return { timestamp: Date.now(), services: this.healthStatus, system: this.metrics.system, summary: this.metrics.summary, uptime: Object.keys(this.healthStatus).reduce((acc, serviceName) => { acc[serviceName] = this.getServiceUptime(serviceName); return acc; }, {}) }; } } module.exports = HealthMonitor;