diff --git a/docs/container-health-endpoint.md b/docs/container-health-endpoint.md
new file mode 100644
index 0000000..551f719
--- /dev/null
+++ b/docs/container-health-endpoint.md
@@ -0,0 +1,236 @@
+# Container Health & Metrics Endpoint Implementation
+
+## Creative Container Monitoring Solutions
+
+### 1. Internal Health/Metrics Endpoints
+
+Add these endpoints to each container for self-reporting metrics:
+
+#### Backend Container (Node.js Example)
+```javascript
+const express = require('express');
+const os = require('os');
+const fs = require('fs');
+
+// Health & Metrics endpoint
+app.get('/health/metrics', (req, res) => {
+ const memUsage = process.memoryUsage();
+ const cpuUsage = process.cpuUsage();
+
+ res.json({
+ container: process.env.CONTAINER_NAME || 'backend',
+ timestamp: new Date().toISOString(),
+ uptime: process.uptime(),
+ memory: {
+ usage: `${Math.round(memUsage.heapUsed / 1024 / 1024)}MB`,
+ total: `${Math.round(memUsage.heapTotal / 1024 / 1024)}MB`,
+ percentage: `${Math.round((memUsage.heapUsed / memUsage.heapTotal) * 100)}%`
+ },
+ cpu: {
+ user: cpuUsage.user,
+ system: cpuUsage.system,
+ load: os.loadavg()[0].toFixed(2) + '%'
+ },
+ network: {
+ connections: getActiveConnections(),
+ requests_per_minute: getRequestRate()
+ },
+ disk: {
+ logs: getDiskUsage('/var/log'),
+ temp: getDiskUsage('/tmp')
+ },
+ health: 'healthy',
+ version: process.env.APP_VERSION || '1.0.0'
+ });
+});
+
+function getActiveConnections() {
+ try {
+ const netstat = require('child_process').execSync('netstat -an | grep ESTABLISHED | wc -l', { encoding: 'utf8' });
+ return parseInt(netstat.trim());
+ } catch (e) {
+ return 'N/A';
+ }
+}
+
+function getRequestRate() {
+ // Implement request counter logic
+ return global.requestCounter || 0;
+}
+
+function getDiskUsage(path) {
+ try {
+ const stats = fs.statSync(path);
+ return `${Math.round(stats.size / 1024 / 1024)}MB`;
+ } catch (e) {
+ return 'N/A';
+ }
+}
+```
+
+#### Frontend Container (Nginx + JS Example)
+```nginx
+# Add to nginx.conf
+location /health/metrics {
+ access_log off;
+ return 200 '{"container":"frontend","status":"healthy","nginx_version":"$nginx_version","connections":"$connections_active","timestamp":"$time_iso8601"}';
+ add_header Content-Type application/json;
+}
+```
+
+### 2. Prometheus-style Metrics Scraping
+
+```javascript
+// In management.js
+const scrapePrometheusMetrics = async (containerUrl) => {
+ try {
+ const response = await fetch(`${containerUrl}/metrics`);
+ const metricsText = await response.text();
+
+ // Parse Prometheus format
+ const metrics = {};
+ metricsText.split('\n').forEach(line => {
+ if (line.startsWith('container_cpu_usage')) {
+ metrics.cpu = line.split(' ')[1] + '%';
+ }
+ if (line.startsWith('container_memory_usage_bytes')) {
+ const bytes = parseInt(line.split(' ')[1]);
+ metrics.memory = Math.round(bytes / 1024 / 1024) + 'MB';
+ }
+ });
+
+ return metrics;
+ } catch (error) {
+ return { error: 'Prometheus metrics unavailable' };
+ }
+};
+```
+
+### 3. Socket.IO Real-time Metrics Broadcasting
+
+```javascript
+// Each container broadcasts its metrics via Socket.IO
+const io = require('socket.io-client');
+const socket = io('http://management-backend:3000');
+
+setInterval(() => {
+ const metrics = {
+ container: process.env.CONTAINER_NAME,
+ cpu: getCurrentCPU(),
+ memory: getCurrentMemory(),
+ timestamp: Date.now()
+ };
+
+ socket.emit('container_metrics', metrics);
+}, 10000); // Every 10 seconds
+
+// Management backend collects these
+io.on('container_metrics', (metrics) => {
+ containerMetricsCache[metrics.container] = metrics;
+});
+```
+
+### 4. Log File Tailing Approach
+
+```javascript
+// Parse container logs for metrics
+const tailContainerLogs = async (containerName) => {
+ try {
+ const { stdout } = await execAsync(`docker logs --tail 50 ${containerName} | grep "METRICS:"`);
+ const logLines = stdout.split('\n').filter(line => line.includes('METRICS:'));
+
+ if (logLines.length > 0) {
+ const lastMetric = logLines[logLines.length - 1];
+ const metricsJson = lastMetric.split('METRICS:')[1];
+ return JSON.parse(metricsJson);
+ }
+ } catch (error) {
+ return { error: 'Log metrics unavailable' };
+ }
+};
+
+// Containers log metrics in structured format
+console.log(`METRICS: ${JSON.stringify({
+ cpu: getCurrentCPU(),
+ memory: getCurrentMemory(),
+ timestamp: new Date().toISOString()
+})}`);
+```
+
+### 5. Shared Volume Metrics Files
+
+```javascript
+// Each container writes metrics to shared volume
+const writeMetricsToFile = () => {
+ const metrics = {
+ container: process.env.CONTAINER_NAME,
+ cpu: getCurrentCPU(),
+ memory: getCurrentMemory(),
+ timestamp: Date.now()
+ };
+
+ fs.writeFileSync(`/shared/metrics/${process.env.CONTAINER_NAME}.json`, JSON.stringify(metrics));
+};
+
+// Management reads from shared volume
+const readSharedMetrics = () => {
+ const metricsDir = '/shared/metrics';
+ const files = fs.readdirSync(metricsDir);
+
+ return files.reduce((acc, file) => {
+ if (file.endsWith('.json')) {
+ const metrics = JSON.parse(fs.readFileSync(path.join(metricsDir, file)));
+ acc[file.replace('.json', '')] = metrics;
+ }
+ return acc;
+ }, {});
+};
+```
+
+### 6. Database-based Metrics Collection
+
+```javascript
+// Containers insert metrics into shared database
+const recordMetrics = async () => {
+ await db.query(`
+ INSERT INTO container_metrics (container_name, cpu_usage, memory_usage, timestamp)
+ VALUES (?, ?, ?, ?)
+ `, [process.env.CONTAINER_NAME, getCurrentCPU(), getCurrentMemory(), new Date()]);
+};
+
+// Management queries latest metrics
+const getLatestMetrics = async () => {
+ const result = await db.query(`
+ SELECT container_name, cpu_usage, memory_usage, timestamp
+ FROM container_metrics
+ WHERE timestamp > NOW() - INTERVAL 1 MINUTE
+ ORDER BY timestamp DESC
+ `);
+
+ return result.reduce((acc, row) => {
+ acc[row.container_name] = {
+ cpu: row.cpu_usage,
+ memory: row.memory_usage,
+ lastUpdate: row.timestamp
+ };
+ return acc;
+ }, {});
+};
+```
+
+## Implementation Priority
+
+1. **Health Endpoints** - Most reliable, direct communication
+2. **Socket.IO Broadcasting** - Real-time, low overhead
+3. **Prometheus Metrics** - Industry standard, rich data
+4. **Shared Volume Files** - Simple, filesystem-based
+5. **Log Tailing** - Works with existing logging
+6. **Database Collection** - Persistent, queryable history
+
+## Benefits
+
+- **Fallback Chain**: Multiple methods ensure metrics are always available
+- **Self-Reporting**: Containers know their own state best
+- **Real-time**: Direct communication provides immediate updates
+- **Standardized**: Each method can provide consistent metric format
+- **Resilient**: If one method fails, others still work
diff --git a/management/src/pages/System.jsx b/management/src/pages/System.jsx
index d457d70..abe6363 100644
--- a/management/src/pages/System.jsx
+++ b/management/src/pages/System.jsx
@@ -103,10 +103,30 @@ const System = () => {
{name.replace('drone-detection-', '').replace('uamils-', '')}
- {metrics.error ? (
+ {metrics.error || metrics.status === 'health_check_failed' ? (
-
Not Available
-
{metrics.message}
+
+ {metrics.status === 'health_check_failed' ? 'Health Check Failed' : 'Not Available'}
+
+
{metrics.error || metrics.message}
+
+ ) : metrics.source === 'docker_compose' ? (
+
+
+ Status
+
+ {metrics.status}
+
+
+
+ Health
+ {metrics.health}
+
+
+ Ports
+ {metrics.ports || 'N/A'}
+
+
Source: Docker Compose
) : (
@@ -116,7 +136,7 @@ const System = () => {
Memory
- {metrics.memory.percentage}
+ {metrics.memory?.percentage || metrics.memory}
Network I/O
@@ -126,6 +146,11 @@ const System = () => {
Disk I/O
{metrics.disk}
+ {metrics.source && (
+
+ Source: {metrics.source.replace('_', ' ').toUpperCase()}
+
+ )}
)}
diff --git a/server/routes/management.js b/server/routes/management.js
index e054f3e..c83a468 100644
--- a/server/routes/management.js
+++ b/server/routes/management.js
@@ -128,33 +128,83 @@ router.get('/system-info', async (req, res) => {
const tenantCount = await Tenant.count();
const userCount = await User.count();
- // Get container metrics using Docker stats
+ // Get container metrics using internal health endpoints
let containerMetrics = {};
+
+ const containerEndpoints = [
+ { name: 'drone-detection-backend', url: 'http://drone-detection-backend:3000/health/metrics' },
+ { name: 'drone-detection-frontend', url: 'http://drone-detection-frontend:80/health/metrics' },
+ { name: 'drone-detection-management', url: 'http://drone-detection-management:3001/health/metrics' }
+ ];
+
+ // Try internal container health endpoints first
try {
- const { stdout } = await execAsync('docker stats --no-stream --format "table {{.Container}}\\t{{.CPUPerc}}\\t{{.MemUsage}}\\t{{.MemPerc}}\\t{{.NetIO}}\\t{{.BlockIO}}"');
- const lines = stdout.trim().split('\n').slice(1); // Remove header
+ const fetch = require('node-fetch');
+ const healthChecks = await Promise.allSettled(
+ containerEndpoints.map(async ({ name, url }) => {
+ const response = await fetch(url, { timeout: 3000 });
+ const metrics = await response.json();
+ return { name, metrics };
+ })
+ );
- containerMetrics = lines.reduce((acc, line) => {
- const [container, cpu, memUsage, memPerc, netIO, blockIO] = line.split('\t');
- if (container.includes('drone-detection') || container.includes('uamils')) {
- acc[container] = {
- cpu: cpu,
- memory: {
- usage: memUsage,
- percentage: memPerc
- },
- network: netIO,
- disk: blockIO
+ healthChecks.forEach((result, index) => {
+ const containerName = containerEndpoints[index].name;
+ if (result.status === 'fulfilled') {
+ containerMetrics[containerName] = result.value.metrics;
+ } else {
+ containerMetrics[containerName] = {
+ status: 'health_check_failed',
+ error: result.reason?.message || 'Health endpoint unavailable'
};
}
- return acc;
- }, {});
- } catch (dockerError) {
- console.log('Docker stats not available:', dockerError.message);
- containerMetrics = {
- error: 'Docker not available or containers not running',
- message: dockerError.message
- };
+ });
+ } catch (healthError) {
+ console.log('Container health checks failed, trying Docker stats...');
+
+ // Fallback to Docker stats if health endpoints fail
+ try {
+ const { stdout } = await execAsync('docker stats --no-stream --format "table {{.Container}}\\t{{.CPUPerc}}\\t{{.MemUsage}}\\t{{.MemPerc}}\\t{{.NetIO}}\\t{{.BlockIO}}"');
+ const lines = stdout.trim().split('\n').slice(1);
+
+ containerMetrics = lines.reduce((acc, line) => {
+ const [container, cpu, memUsage, memPerc, netIO, blockIO] = line.split('\t');
+ if (container.includes('drone-detection') || container.includes('uamils')) {
+ acc[container] = {
+ cpu: cpu,
+ memory: { usage: memUsage, percentage: memPerc },
+ network: netIO,
+ disk: blockIO,
+ source: 'docker_stats'
+ };
+ }
+ return acc;
+ }, {});
+ } catch (dockerError) {
+ // Try container inspection via docker compose
+ try {
+ const { stdout: composeStatus } = await execAsync('docker-compose ps --format json');
+ const containers = JSON.parse(`[${composeStatus.split('\n').filter(line => line.trim()).join(',')}]`);
+
+ containerMetrics = containers.reduce((acc, container) => {
+ if (container.Name && (container.Name.includes('drone-detection') || container.Name.includes('uamils'))) {
+ acc[container.Name] = {
+ status: container.State,
+ health: container.Health || 'unknown',
+ ports: container.Ports,
+ source: 'docker_compose'
+ };
+ }
+ return acc;
+ }, {});
+ } catch (composeError) {
+ containerMetrics = {
+ error: 'All container monitoring methods failed',
+ attempts: ['health_endpoints', 'docker_stats', 'docker_compose'],
+ lastError: composeError.message
+ };
+ }
+ }
}
// Get system memory and CPU info
diff --git a/server/utils/health-endpoint.js b/server/utils/health-endpoint.js
new file mode 100644
index 0000000..2229dd4
--- /dev/null
+++ b/server/utils/health-endpoint.js
@@ -0,0 +1,170 @@
+/**
+ * Container Health & Metrics Endpoint
+ * Add this to your main application containers for self-reporting
+ */
+
+const express = require('express');
+const os = require('os');
+const fs = require('fs');
+const { exec } = require('child_process');
+const { promisify } = require('util');
+const execAsync = promisify(exec);
+
+/**
+ * Health & Metrics endpoint implementation
+ * Add this to your Express app in each container
+ */
+const createHealthEndpoint = (app) => {
+ app.get('/health/metrics', async (req, res) => {
+ try {
+ const containerName = process.env.CONTAINER_NAME || 'unknown-container';
+ const memUsage = process.memoryUsage();
+ const cpuUsage = process.cpuUsage();
+
+ // Get system load average
+ const loadAvg = os.loadavg();
+
+ // Get network connections (if available)
+ let networkConnections = 'N/A';
+ try {
+ const { stdout } = await execAsync('netstat -an | grep ESTABLISHED | wc -l');
+ networkConnections = parseInt(stdout.trim());
+ } catch (e) {
+ // Network info not available
+ }
+
+ // Get disk usage for common paths
+ let diskUsage = {};
+ try {
+ const paths = ['/tmp', '/var/log', '/app'];
+ for (const path of paths) {
+ if (fs.existsSync(path)) {
+ const stats = fs.statSync(path);
+ diskUsage[path] = `${Math.round(stats.size / 1024 / 1024)}MB`;
+ }
+ }
+ } catch (e) {
+ diskUsage = { error: 'Disk info unavailable' };
+ }
+
+ // Calculate CPU percentage (approximate)
+ const cpuPercent = ((cpuUsage.user + cpuUsage.system) / 1000000 / process.uptime() * 100).toFixed(1);
+
+ const metrics = {
+ container: containerName,
+ timestamp: new Date().toISOString(),
+ uptime: Math.round(process.uptime()),
+ health: 'healthy',
+ version: process.env.APP_VERSION || '1.0.0',
+
+ // Memory metrics
+ memory: {
+ usage: `${Math.round(memUsage.heapUsed / 1024 / 1024)}MB / ${Math.round(memUsage.heapTotal / 1024 / 1024)}MB`,
+ percentage: `${Math.round((memUsage.heapUsed / memUsage.heapTotal) * 100)}%`,
+ rss: `${Math.round(memUsage.rss / 1024 / 1024)}MB`,
+ external: `${Math.round(memUsage.external / 1024 / 1024)}MB`
+ },
+
+ // CPU metrics
+ cpu: `${cpuPercent}%`,
+
+ // System metrics
+ system: {
+ platform: os.platform(),
+ arch: os.arch(),
+ nodeVersion: process.version,
+ loadAverage: loadAvg[0].toFixed(2)
+ },
+
+ // Network metrics
+ network: {
+ connections: networkConnections,
+ requests_handled: global.requestCounter || 0
+ },
+
+ // Disk metrics
+ disk: diskUsage,
+
+ // Additional app-specific metrics
+ application: {
+ environment: process.env.NODE_ENV || 'development',
+ pid: process.pid,
+ ppid: process.ppid
+ }
+ };
+
+ res.json({
+ success: true,
+ data: metrics,
+ source: 'internal_health_endpoint'
+ });
+
+ } catch (error) {
+ res.status(500).json({
+ success: false,
+ error: 'Failed to collect metrics',
+ message: error.message,
+ container: process.env.CONTAINER_NAME || 'unknown-container'
+ });
+ }
+ });
+
+ // Simple health check endpoint
+ app.get('/health', (req, res) => {
+ res.json({
+ status: 'healthy',
+ timestamp: new Date().toISOString(),
+ uptime: process.uptime(),
+ container: process.env.CONTAINER_NAME || 'unknown-container'
+ });
+ });
+};
+
+/**
+ * Middleware to count requests (for metrics)
+ */
+const requestCounter = (req, res, next) => {
+ global.requestCounter = (global.requestCounter || 0) + 1;
+ next();
+};
+
+/**
+ * Enhanced logging for metrics
+ */
+const logMetrics = () => {
+ const memUsage = process.memoryUsage();
+ const metrics = {
+ timestamp: new Date().toISOString(),
+ container: process.env.CONTAINER_NAME || 'unknown-container',
+ memory_mb: Math.round(memUsage.heapUsed / 1024 / 1024),
+ uptime_seconds: Math.round(process.uptime()),
+ requests_handled: global.requestCounter || 0
+ };
+
+ console.log(`METRICS: ${JSON.stringify(metrics)}`);
+};
+
+// Example usage in your main app:
+/*
+const app = express();
+
+// Add request counter middleware
+app.use(requestCounter);
+
+// Add health endpoints
+createHealthEndpoint(app);
+
+// Log metrics every 30 seconds
+setInterval(logMetrics, 30000);
+
+// Your other routes...
+app.listen(3000, () => {
+ console.log('Server started with health endpoints');
+});
+*/
+
+module.exports = {
+ createHealthEndpoint,
+ requestCounter,
+ logMetrics
+};