# Container Health & Metrics Endpoint Implementation ## Creative Container Monitoring Solutions ### 1. Internal Health/Metrics Endpoints Add these endpoints to each container for self-reporting metrics: #### Backend Container (Node.js Example) ```javascript const express = require('express'); const os = require('os'); const fs = require('fs'); // Health & Metrics endpoint app.get('/health/metrics', (req, res) => { const memUsage = process.memoryUsage(); const cpuUsage = process.cpuUsage(); res.json({ container: process.env.CONTAINER_NAME || 'backend', timestamp: new Date().toISOString(), uptime: process.uptime(), memory: { usage: `${Math.round(memUsage.heapUsed / 1024 / 1024)}MB`, total: `${Math.round(memUsage.heapTotal / 1024 / 1024)}MB`, percentage: `${Math.round((memUsage.heapUsed / memUsage.heapTotal) * 100)}%` }, cpu: { user: cpuUsage.user, system: cpuUsage.system, load: os.loadavg()[0].toFixed(2) + '%' }, network: { connections: getActiveConnections(), requests_per_minute: getRequestRate() }, disk: { logs: getDiskUsage('/var/log'), temp: getDiskUsage('/tmp') }, health: 'healthy', version: process.env.APP_VERSION || '1.0.0' }); }); function getActiveConnections() { try { const netstat = require('child_process').execSync('netstat -an | grep ESTABLISHED | wc -l', { encoding: 'utf8' }); return parseInt(netstat.trim()); } catch (e) { return 'N/A'; } } function getRequestRate() { // Implement request counter logic return global.requestCounter || 0; } function getDiskUsage(path) { try { const stats = fs.statSync(path); return `${Math.round(stats.size / 1024 / 1024)}MB`; } catch (e) { return 'N/A'; } } ``` #### Frontend Container (Nginx + JS Example) ```nginx # Add to nginx.conf location /health/metrics { access_log off; return 200 '{"container":"frontend","status":"healthy","nginx_version":"$nginx_version","connections":"$connections_active","timestamp":"$time_iso8601"}'; add_header Content-Type application/json; } ``` ### 2. Prometheus-style Metrics Scraping ```javascript // In management.js const scrapePrometheusMetrics = async (containerUrl) => { try { const response = await fetch(`${containerUrl}/metrics`); const metricsText = await response.text(); // Parse Prometheus format const metrics = {}; metricsText.split('\n').forEach(line => { if (line.startsWith('container_cpu_usage')) { metrics.cpu = line.split(' ')[1] + '%'; } if (line.startsWith('container_memory_usage_bytes')) { const bytes = parseInt(line.split(' ')[1]); metrics.memory = Math.round(bytes / 1024 / 1024) + 'MB'; } }); return metrics; } catch (error) { return { error: 'Prometheus metrics unavailable' }; } }; ``` ### 3. Socket.IO Real-time Metrics Broadcasting ```javascript // Each container broadcasts its metrics via Socket.IO const io = require('socket.io-client'); const socket = io('http://management-backend:3000'); setInterval(() => { const metrics = { container: process.env.CONTAINER_NAME, cpu: getCurrentCPU(), memory: getCurrentMemory(), timestamp: Date.now() }; socket.emit('container_metrics', metrics); }, 10000); // Every 10 seconds // Management backend collects these io.on('container_metrics', (metrics) => { containerMetricsCache[metrics.container] = metrics; }); ``` ### 4. Log File Tailing Approach ```javascript // Parse container logs for metrics const tailContainerLogs = async (containerName) => { try { const { stdout } = await execAsync(`docker logs --tail 50 ${containerName} | grep "METRICS:"`); const logLines = stdout.split('\n').filter(line => line.includes('METRICS:')); if (logLines.length > 0) { const lastMetric = logLines[logLines.length - 1]; const metricsJson = lastMetric.split('METRICS:')[1]; return JSON.parse(metricsJson); } } catch (error) { return { error: 'Log metrics unavailable' }; } }; // Containers log metrics in structured format console.log(`METRICS: ${JSON.stringify({ cpu: getCurrentCPU(), memory: getCurrentMemory(), timestamp: new Date().toISOString() })}`); ``` ### 5. Shared Volume Metrics Files ```javascript // Each container writes metrics to shared volume const writeMetricsToFile = () => { const metrics = { container: process.env.CONTAINER_NAME, cpu: getCurrentCPU(), memory: getCurrentMemory(), timestamp: Date.now() }; fs.writeFileSync(`/shared/metrics/${process.env.CONTAINER_NAME}.json`, JSON.stringify(metrics)); }; // Management reads from shared volume const readSharedMetrics = () => { const metricsDir = '/shared/metrics'; const files = fs.readdirSync(metricsDir); return files.reduce((acc, file) => { if (file.endsWith('.json')) { const metrics = JSON.parse(fs.readFileSync(path.join(metricsDir, file))); acc[file.replace('.json', '')] = metrics; } return acc; }, {}); }; ``` ### 6. Database-based Metrics Collection ```javascript // Containers insert metrics into shared database const recordMetrics = async () => { await db.query(` INSERT INTO container_metrics (container_name, cpu_usage, memory_usage, timestamp) VALUES (?, ?, ?, ?) `, [process.env.CONTAINER_NAME, getCurrentCPU(), getCurrentMemory(), new Date()]); }; // Management queries latest metrics const getLatestMetrics = async () => { const result = await db.query(` SELECT container_name, cpu_usage, memory_usage, timestamp FROM container_metrics WHERE timestamp > NOW() - INTERVAL 1 MINUTE ORDER BY timestamp DESC `); return result.reduce((acc, row) => { acc[row.container_name] = { cpu: row.cpu_usage, memory: row.memory_usage, lastUpdate: row.timestamp }; return acc; }, {}); }; ``` ## Implementation Priority 1. **Health Endpoints** - Most reliable, direct communication 2. **Socket.IO Broadcasting** - Real-time, low overhead 3. **Prometheus Metrics** - Industry standard, rich data 4. **Shared Volume Files** - Simple, filesystem-based 5. **Log Tailing** - Works with existing logging 6. **Database Collection** - Persistent, queryable history ## Benefits - **Fallback Chain**: Multiple methods ensure metrics are always available - **Self-Reporting**: Containers know their own state best - **Real-time**: Direct communication provides immediate updates - **Standardized**: Each method can provide consistent metric format - **Resilient**: If one method fails, others still work