From 047bae2acf8fd02d1233967658212695eb20668e Mon Sep 17 00:00:00 2001 From: Alexander Borg Date: Sun, 14 Sep 2025 18:14:42 +0200 Subject: [PATCH] Fix jwt-token --- docs/container-health-endpoint.md | 236 ++++++++++++++++++++++++++++++ management/src/pages/System.jsx | 33 ++++- server/routes/management.js | 94 +++++++++--- server/utils/health-endpoint.js | 170 +++++++++++++++++++++ 4 files changed, 507 insertions(+), 26 deletions(-) create mode 100644 docs/container-health-endpoint.md create mode 100644 server/utils/health-endpoint.js diff --git a/docs/container-health-endpoint.md b/docs/container-health-endpoint.md new file mode 100644 index 0000000..551f719 --- /dev/null +++ b/docs/container-health-endpoint.md @@ -0,0 +1,236 @@ +# Container Health & Metrics Endpoint Implementation + +## Creative Container Monitoring Solutions + +### 1. Internal Health/Metrics Endpoints + +Add these endpoints to each container for self-reporting metrics: + +#### Backend Container (Node.js Example) +```javascript +const express = require('express'); +const os = require('os'); +const fs = require('fs'); + +// Health & Metrics endpoint +app.get('/health/metrics', (req, res) => { + const memUsage = process.memoryUsage(); + const cpuUsage = process.cpuUsage(); + + res.json({ + container: process.env.CONTAINER_NAME || 'backend', + timestamp: new Date().toISOString(), + uptime: process.uptime(), + memory: { + usage: `${Math.round(memUsage.heapUsed / 1024 / 1024)}MB`, + total: `${Math.round(memUsage.heapTotal / 1024 / 1024)}MB`, + percentage: `${Math.round((memUsage.heapUsed / memUsage.heapTotal) * 100)}%` + }, + cpu: { + user: cpuUsage.user, + system: cpuUsage.system, + load: os.loadavg()[0].toFixed(2) + '%' + }, + network: { + connections: getActiveConnections(), + requests_per_minute: getRequestRate() + }, + disk: { + logs: getDiskUsage('/var/log'), + temp: getDiskUsage('/tmp') + }, + health: 'healthy', + version: process.env.APP_VERSION || '1.0.0' + }); +}); + +function getActiveConnections() { + try { + const netstat = require('child_process').execSync('netstat -an | grep ESTABLISHED | wc -l', { encoding: 'utf8' }); + return parseInt(netstat.trim()); + } catch (e) { + return 'N/A'; + } +} + +function getRequestRate() { + // Implement request counter logic + return global.requestCounter || 0; +} + +function getDiskUsage(path) { + try { + const stats = fs.statSync(path); + return `${Math.round(stats.size / 1024 / 1024)}MB`; + } catch (e) { + return 'N/A'; + } +} +``` + +#### Frontend Container (Nginx + JS Example) +```nginx +# Add to nginx.conf +location /health/metrics { + access_log off; + return 200 '{"container":"frontend","status":"healthy","nginx_version":"$nginx_version","connections":"$connections_active","timestamp":"$time_iso8601"}'; + add_header Content-Type application/json; +} +``` + +### 2. Prometheus-style Metrics Scraping + +```javascript +// In management.js +const scrapePrometheusMetrics = async (containerUrl) => { + try { + const response = await fetch(`${containerUrl}/metrics`); + const metricsText = await response.text(); + + // Parse Prometheus format + const metrics = {}; + metricsText.split('\n').forEach(line => { + if (line.startsWith('container_cpu_usage')) { + metrics.cpu = line.split(' ')[1] + '%'; + } + if (line.startsWith('container_memory_usage_bytes')) { + const bytes = parseInt(line.split(' ')[1]); + metrics.memory = Math.round(bytes / 1024 / 1024) + 'MB'; + } + }); + + return metrics; + } catch (error) { + return { error: 'Prometheus metrics unavailable' }; + } +}; +``` + +### 3. Socket.IO Real-time Metrics Broadcasting + +```javascript +// Each container broadcasts its metrics via Socket.IO +const io = require('socket.io-client'); +const socket = io('http://management-backend:3000'); + +setInterval(() => { + const metrics = { + container: process.env.CONTAINER_NAME, + cpu: getCurrentCPU(), + memory: getCurrentMemory(), + timestamp: Date.now() + }; + + socket.emit('container_metrics', metrics); +}, 10000); // Every 10 seconds + +// Management backend collects these +io.on('container_metrics', (metrics) => { + containerMetricsCache[metrics.container] = metrics; +}); +``` + +### 4. Log File Tailing Approach + +```javascript +// Parse container logs for metrics +const tailContainerLogs = async (containerName) => { + try { + const { stdout } = await execAsync(`docker logs --tail 50 ${containerName} | grep "METRICS:"`); + const logLines = stdout.split('\n').filter(line => line.includes('METRICS:')); + + if (logLines.length > 0) { + const lastMetric = logLines[logLines.length - 1]; + const metricsJson = lastMetric.split('METRICS:')[1]; + return JSON.parse(metricsJson); + } + } catch (error) { + return { error: 'Log metrics unavailable' }; + } +}; + +// Containers log metrics in structured format +console.log(`METRICS: ${JSON.stringify({ + cpu: getCurrentCPU(), + memory: getCurrentMemory(), + timestamp: new Date().toISOString() +})}`); +``` + +### 5. Shared Volume Metrics Files + +```javascript +// Each container writes metrics to shared volume +const writeMetricsToFile = () => { + const metrics = { + container: process.env.CONTAINER_NAME, + cpu: getCurrentCPU(), + memory: getCurrentMemory(), + timestamp: Date.now() + }; + + fs.writeFileSync(`/shared/metrics/${process.env.CONTAINER_NAME}.json`, JSON.stringify(metrics)); +}; + +// Management reads from shared volume +const readSharedMetrics = () => { + const metricsDir = '/shared/metrics'; + const files = fs.readdirSync(metricsDir); + + return files.reduce((acc, file) => { + if (file.endsWith('.json')) { + const metrics = JSON.parse(fs.readFileSync(path.join(metricsDir, file))); + acc[file.replace('.json', '')] = metrics; + } + return acc; + }, {}); +}; +``` + +### 6. Database-based Metrics Collection + +```javascript +// Containers insert metrics into shared database +const recordMetrics = async () => { + await db.query(` + INSERT INTO container_metrics (container_name, cpu_usage, memory_usage, timestamp) + VALUES (?, ?, ?, ?) + `, [process.env.CONTAINER_NAME, getCurrentCPU(), getCurrentMemory(), new Date()]); +}; + +// Management queries latest metrics +const getLatestMetrics = async () => { + const result = await db.query(` + SELECT container_name, cpu_usage, memory_usage, timestamp + FROM container_metrics + WHERE timestamp > NOW() - INTERVAL 1 MINUTE + ORDER BY timestamp DESC + `); + + return result.reduce((acc, row) => { + acc[row.container_name] = { + cpu: row.cpu_usage, + memory: row.memory_usage, + lastUpdate: row.timestamp + }; + return acc; + }, {}); +}; +``` + +## Implementation Priority + +1. **Health Endpoints** - Most reliable, direct communication +2. **Socket.IO Broadcasting** - Real-time, low overhead +3. **Prometheus Metrics** - Industry standard, rich data +4. **Shared Volume Files** - Simple, filesystem-based +5. **Log Tailing** - Works with existing logging +6. **Database Collection** - Persistent, queryable history + +## Benefits + +- **Fallback Chain**: Multiple methods ensure metrics are always available +- **Self-Reporting**: Containers know their own state best +- **Real-time**: Direct communication provides immediate updates +- **Standardized**: Each method can provide consistent metric format +- **Resilient**: If one method fails, others still work diff --git a/management/src/pages/System.jsx b/management/src/pages/System.jsx index d457d70..abe6363 100644 --- a/management/src/pages/System.jsx +++ b/management/src/pages/System.jsx @@ -103,10 +103,30 @@ const System = () => {

{name.replace('drone-detection-', '').replace('uamils-', '')}

- {metrics.error ? ( + {metrics.error || metrics.status === 'health_check_failed' ? (
-
Not Available
-
{metrics.message}
+
+ {metrics.status === 'health_check_failed' ? 'Health Check Failed' : 'Not Available'} +
+
{metrics.error || metrics.message}
+
+ ) : metrics.source === 'docker_compose' ? ( +
+
+ Status + + {metrics.status} + +
+
+ Health + {metrics.health} +
+
+ Ports + {metrics.ports || 'N/A'} +
+
Source: Docker Compose
) : (
@@ -116,7 +136,7 @@ const System = () => {
Memory - {metrics.memory.percentage} + {metrics.memory?.percentage || metrics.memory}
Network I/O @@ -126,6 +146,11 @@ const System = () => { Disk I/O {metrics.disk}
+ {metrics.source && ( +
+ Source: {metrics.source.replace('_', ' ').toUpperCase()} +
+ )} )} diff --git a/server/routes/management.js b/server/routes/management.js index e054f3e..c83a468 100644 --- a/server/routes/management.js +++ b/server/routes/management.js @@ -128,33 +128,83 @@ router.get('/system-info', async (req, res) => { const tenantCount = await Tenant.count(); const userCount = await User.count(); - // Get container metrics using Docker stats + // Get container metrics using internal health endpoints let containerMetrics = {}; + + const containerEndpoints = [ + { name: 'drone-detection-backend', url: 'http://drone-detection-backend:3000/health/metrics' }, + { name: 'drone-detection-frontend', url: 'http://drone-detection-frontend:80/health/metrics' }, + { name: 'drone-detection-management', url: 'http://drone-detection-management:3001/health/metrics' } + ]; + + // Try internal container health endpoints first try { - const { stdout } = await execAsync('docker stats --no-stream --format "table {{.Container}}\\t{{.CPUPerc}}\\t{{.MemUsage}}\\t{{.MemPerc}}\\t{{.NetIO}}\\t{{.BlockIO}}"'); - const lines = stdout.trim().split('\n').slice(1); // Remove header + const fetch = require('node-fetch'); + const healthChecks = await Promise.allSettled( + containerEndpoints.map(async ({ name, url }) => { + const response = await fetch(url, { timeout: 3000 }); + const metrics = await response.json(); + return { name, metrics }; + }) + ); - containerMetrics = lines.reduce((acc, line) => { - const [container, cpu, memUsage, memPerc, netIO, blockIO] = line.split('\t'); - if (container.includes('drone-detection') || container.includes('uamils')) { - acc[container] = { - cpu: cpu, - memory: { - usage: memUsage, - percentage: memPerc - }, - network: netIO, - disk: blockIO + healthChecks.forEach((result, index) => { + const containerName = containerEndpoints[index].name; + if (result.status === 'fulfilled') { + containerMetrics[containerName] = result.value.metrics; + } else { + containerMetrics[containerName] = { + status: 'health_check_failed', + error: result.reason?.message || 'Health endpoint unavailable' }; } - return acc; - }, {}); - } catch (dockerError) { - console.log('Docker stats not available:', dockerError.message); - containerMetrics = { - error: 'Docker not available or containers not running', - message: dockerError.message - }; + }); + } catch (healthError) { + console.log('Container health checks failed, trying Docker stats...'); + + // Fallback to Docker stats if health endpoints fail + try { + const { stdout } = await execAsync('docker stats --no-stream --format "table {{.Container}}\\t{{.CPUPerc}}\\t{{.MemUsage}}\\t{{.MemPerc}}\\t{{.NetIO}}\\t{{.BlockIO}}"'); + const lines = stdout.trim().split('\n').slice(1); + + containerMetrics = lines.reduce((acc, line) => { + const [container, cpu, memUsage, memPerc, netIO, blockIO] = line.split('\t'); + if (container.includes('drone-detection') || container.includes('uamils')) { + acc[container] = { + cpu: cpu, + memory: { usage: memUsage, percentage: memPerc }, + network: netIO, + disk: blockIO, + source: 'docker_stats' + }; + } + return acc; + }, {}); + } catch (dockerError) { + // Try container inspection via docker compose + try { + const { stdout: composeStatus } = await execAsync('docker-compose ps --format json'); + const containers = JSON.parse(`[${composeStatus.split('\n').filter(line => line.trim()).join(',')}]`); + + containerMetrics = containers.reduce((acc, container) => { + if (container.Name && (container.Name.includes('drone-detection') || container.Name.includes('uamils'))) { + acc[container.Name] = { + status: container.State, + health: container.Health || 'unknown', + ports: container.Ports, + source: 'docker_compose' + }; + } + return acc; + }, {}); + } catch (composeError) { + containerMetrics = { + error: 'All container monitoring methods failed', + attempts: ['health_endpoints', 'docker_stats', 'docker_compose'], + lastError: composeError.message + }; + } + } } // Get system memory and CPU info diff --git a/server/utils/health-endpoint.js b/server/utils/health-endpoint.js new file mode 100644 index 0000000..2229dd4 --- /dev/null +++ b/server/utils/health-endpoint.js @@ -0,0 +1,170 @@ +/** + * Container Health & Metrics Endpoint + * Add this to your main application containers for self-reporting + */ + +const express = require('express'); +const os = require('os'); +const fs = require('fs'); +const { exec } = require('child_process'); +const { promisify } = require('util'); +const execAsync = promisify(exec); + +/** + * Health & Metrics endpoint implementation + * Add this to your Express app in each container + */ +const createHealthEndpoint = (app) => { + app.get('/health/metrics', async (req, res) => { + try { + const containerName = process.env.CONTAINER_NAME || 'unknown-container'; + const memUsage = process.memoryUsage(); + const cpuUsage = process.cpuUsage(); + + // Get system load average + const loadAvg = os.loadavg(); + + // Get network connections (if available) + let networkConnections = 'N/A'; + try { + const { stdout } = await execAsync('netstat -an | grep ESTABLISHED | wc -l'); + networkConnections = parseInt(stdout.trim()); + } catch (e) { + // Network info not available + } + + // Get disk usage for common paths + let diskUsage = {}; + try { + const paths = ['/tmp', '/var/log', '/app']; + for (const path of paths) { + if (fs.existsSync(path)) { + const stats = fs.statSync(path); + diskUsage[path] = `${Math.round(stats.size / 1024 / 1024)}MB`; + } + } + } catch (e) { + diskUsage = { error: 'Disk info unavailable' }; + } + + // Calculate CPU percentage (approximate) + const cpuPercent = ((cpuUsage.user + cpuUsage.system) / 1000000 / process.uptime() * 100).toFixed(1); + + const metrics = { + container: containerName, + timestamp: new Date().toISOString(), + uptime: Math.round(process.uptime()), + health: 'healthy', + version: process.env.APP_VERSION || '1.0.0', + + // Memory metrics + memory: { + usage: `${Math.round(memUsage.heapUsed / 1024 / 1024)}MB / ${Math.round(memUsage.heapTotal / 1024 / 1024)}MB`, + percentage: `${Math.round((memUsage.heapUsed / memUsage.heapTotal) * 100)}%`, + rss: `${Math.round(memUsage.rss / 1024 / 1024)}MB`, + external: `${Math.round(memUsage.external / 1024 / 1024)}MB` + }, + + // CPU metrics + cpu: `${cpuPercent}%`, + + // System metrics + system: { + platform: os.platform(), + arch: os.arch(), + nodeVersion: process.version, + loadAverage: loadAvg[0].toFixed(2) + }, + + // Network metrics + network: { + connections: networkConnections, + requests_handled: global.requestCounter || 0 + }, + + // Disk metrics + disk: diskUsage, + + // Additional app-specific metrics + application: { + environment: process.env.NODE_ENV || 'development', + pid: process.pid, + ppid: process.ppid + } + }; + + res.json({ + success: true, + data: metrics, + source: 'internal_health_endpoint' + }); + + } catch (error) { + res.status(500).json({ + success: false, + error: 'Failed to collect metrics', + message: error.message, + container: process.env.CONTAINER_NAME || 'unknown-container' + }); + } + }); + + // Simple health check endpoint + app.get('/health', (req, res) => { + res.json({ + status: 'healthy', + timestamp: new Date().toISOString(), + uptime: process.uptime(), + container: process.env.CONTAINER_NAME || 'unknown-container' + }); + }); +}; + +/** + * Middleware to count requests (for metrics) + */ +const requestCounter = (req, res, next) => { + global.requestCounter = (global.requestCounter || 0) + 1; + next(); +}; + +/** + * Enhanced logging for metrics + */ +const logMetrics = () => { + const memUsage = process.memoryUsage(); + const metrics = { + timestamp: new Date().toISOString(), + container: process.env.CONTAINER_NAME || 'unknown-container', + memory_mb: Math.round(memUsage.heapUsed / 1024 / 1024), + uptime_seconds: Math.round(process.uptime()), + requests_handled: global.requestCounter || 0 + }; + + console.log(`METRICS: ${JSON.stringify(metrics)}`); +}; + +// Example usage in your main app: +/* +const app = express(); + +// Add request counter middleware +app.use(requestCounter); + +// Add health endpoints +createHealthEndpoint(app); + +// Log metrics every 30 seconds +setInterval(logMetrics, 30000); + +// Your other routes... +app.listen(3000, () => { + console.log('Server started with health endpoints'); +}); +*/ + +module.exports = { + createHealthEndpoint, + requestCounter, + logMetrics +};