Fix jwt-token
This commit is contained in:
236
docs/container-health-endpoint.md
Normal file
236
docs/container-health-endpoint.md
Normal file
@@ -0,0 +1,236 @@
|
||||
# Container Health & Metrics Endpoint Implementation
|
||||
|
||||
## Creative Container Monitoring Solutions
|
||||
|
||||
### 1. Internal Health/Metrics Endpoints
|
||||
|
||||
Add these endpoints to each container for self-reporting metrics:
|
||||
|
||||
#### Backend Container (Node.js Example)
|
||||
```javascript
|
||||
const express = require('express');
|
||||
const os = require('os');
|
||||
const fs = require('fs');
|
||||
|
||||
// Health & Metrics endpoint
|
||||
app.get('/health/metrics', (req, res) => {
|
||||
const memUsage = process.memoryUsage();
|
||||
const cpuUsage = process.cpuUsage();
|
||||
|
||||
res.json({
|
||||
container: process.env.CONTAINER_NAME || 'backend',
|
||||
timestamp: new Date().toISOString(),
|
||||
uptime: process.uptime(),
|
||||
memory: {
|
||||
usage: `${Math.round(memUsage.heapUsed / 1024 / 1024)}MB`,
|
||||
total: `${Math.round(memUsage.heapTotal / 1024 / 1024)}MB`,
|
||||
percentage: `${Math.round((memUsage.heapUsed / memUsage.heapTotal) * 100)}%`
|
||||
},
|
||||
cpu: {
|
||||
user: cpuUsage.user,
|
||||
system: cpuUsage.system,
|
||||
load: os.loadavg()[0].toFixed(2) + '%'
|
||||
},
|
||||
network: {
|
||||
connections: getActiveConnections(),
|
||||
requests_per_minute: getRequestRate()
|
||||
},
|
||||
disk: {
|
||||
logs: getDiskUsage('/var/log'),
|
||||
temp: getDiskUsage('/tmp')
|
||||
},
|
||||
health: 'healthy',
|
||||
version: process.env.APP_VERSION || '1.0.0'
|
||||
});
|
||||
});
|
||||
|
||||
function getActiveConnections() {
|
||||
try {
|
||||
const netstat = require('child_process').execSync('netstat -an | grep ESTABLISHED | wc -l', { encoding: 'utf8' });
|
||||
return parseInt(netstat.trim());
|
||||
} catch (e) {
|
||||
return 'N/A';
|
||||
}
|
||||
}
|
||||
|
||||
function getRequestRate() {
|
||||
// Implement request counter logic
|
||||
return global.requestCounter || 0;
|
||||
}
|
||||
|
||||
function getDiskUsage(path) {
|
||||
try {
|
||||
const stats = fs.statSync(path);
|
||||
return `${Math.round(stats.size / 1024 / 1024)}MB`;
|
||||
} catch (e) {
|
||||
return 'N/A';
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Frontend Container (Nginx + JS Example)
|
||||
```nginx
|
||||
# Add to nginx.conf
|
||||
location /health/metrics {
|
||||
access_log off;
|
||||
return 200 '{"container":"frontend","status":"healthy","nginx_version":"$nginx_version","connections":"$connections_active","timestamp":"$time_iso8601"}';
|
||||
add_header Content-Type application/json;
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Prometheus-style Metrics Scraping
|
||||
|
||||
```javascript
|
||||
// In management.js
|
||||
const scrapePrometheusMetrics = async (containerUrl) => {
|
||||
try {
|
||||
const response = await fetch(`${containerUrl}/metrics`);
|
||||
const metricsText = await response.text();
|
||||
|
||||
// Parse Prometheus format
|
||||
const metrics = {};
|
||||
metricsText.split('\n').forEach(line => {
|
||||
if (line.startsWith('container_cpu_usage')) {
|
||||
metrics.cpu = line.split(' ')[1] + '%';
|
||||
}
|
||||
if (line.startsWith('container_memory_usage_bytes')) {
|
||||
const bytes = parseInt(line.split(' ')[1]);
|
||||
metrics.memory = Math.round(bytes / 1024 / 1024) + 'MB';
|
||||
}
|
||||
});
|
||||
|
||||
return metrics;
|
||||
} catch (error) {
|
||||
return { error: 'Prometheus metrics unavailable' };
|
||||
}
|
||||
};
|
||||
```
|
||||
|
||||
### 3. Socket.IO Real-time Metrics Broadcasting
|
||||
|
||||
```javascript
|
||||
// Each container broadcasts its metrics via Socket.IO
|
||||
const io = require('socket.io-client');
|
||||
const socket = io('http://management-backend:3000');
|
||||
|
||||
setInterval(() => {
|
||||
const metrics = {
|
||||
container: process.env.CONTAINER_NAME,
|
||||
cpu: getCurrentCPU(),
|
||||
memory: getCurrentMemory(),
|
||||
timestamp: Date.now()
|
||||
};
|
||||
|
||||
socket.emit('container_metrics', metrics);
|
||||
}, 10000); // Every 10 seconds
|
||||
|
||||
// Management backend collects these
|
||||
io.on('container_metrics', (metrics) => {
|
||||
containerMetricsCache[metrics.container] = metrics;
|
||||
});
|
||||
```
|
||||
|
||||
### 4. Log File Tailing Approach
|
||||
|
||||
```javascript
|
||||
// Parse container logs for metrics
|
||||
const tailContainerLogs = async (containerName) => {
|
||||
try {
|
||||
const { stdout } = await execAsync(`docker logs --tail 50 ${containerName} | grep "METRICS:"`);
|
||||
const logLines = stdout.split('\n').filter(line => line.includes('METRICS:'));
|
||||
|
||||
if (logLines.length > 0) {
|
||||
const lastMetric = logLines[logLines.length - 1];
|
||||
const metricsJson = lastMetric.split('METRICS:')[1];
|
||||
return JSON.parse(metricsJson);
|
||||
}
|
||||
} catch (error) {
|
||||
return { error: 'Log metrics unavailable' };
|
||||
}
|
||||
};
|
||||
|
||||
// Containers log metrics in structured format
|
||||
console.log(`METRICS: ${JSON.stringify({
|
||||
cpu: getCurrentCPU(),
|
||||
memory: getCurrentMemory(),
|
||||
timestamp: new Date().toISOString()
|
||||
})}`);
|
||||
```
|
||||
|
||||
### 5. Shared Volume Metrics Files
|
||||
|
||||
```javascript
|
||||
// Each container writes metrics to shared volume
|
||||
const writeMetricsToFile = () => {
|
||||
const metrics = {
|
||||
container: process.env.CONTAINER_NAME,
|
||||
cpu: getCurrentCPU(),
|
||||
memory: getCurrentMemory(),
|
||||
timestamp: Date.now()
|
||||
};
|
||||
|
||||
fs.writeFileSync(`/shared/metrics/${process.env.CONTAINER_NAME}.json`, JSON.stringify(metrics));
|
||||
};
|
||||
|
||||
// Management reads from shared volume
|
||||
const readSharedMetrics = () => {
|
||||
const metricsDir = '/shared/metrics';
|
||||
const files = fs.readdirSync(metricsDir);
|
||||
|
||||
return files.reduce((acc, file) => {
|
||||
if (file.endsWith('.json')) {
|
||||
const metrics = JSON.parse(fs.readFileSync(path.join(metricsDir, file)));
|
||||
acc[file.replace('.json', '')] = metrics;
|
||||
}
|
||||
return acc;
|
||||
}, {});
|
||||
};
|
||||
```
|
||||
|
||||
### 6. Database-based Metrics Collection
|
||||
|
||||
```javascript
|
||||
// Containers insert metrics into shared database
|
||||
const recordMetrics = async () => {
|
||||
await db.query(`
|
||||
INSERT INTO container_metrics (container_name, cpu_usage, memory_usage, timestamp)
|
||||
VALUES (?, ?, ?, ?)
|
||||
`, [process.env.CONTAINER_NAME, getCurrentCPU(), getCurrentMemory(), new Date()]);
|
||||
};
|
||||
|
||||
// Management queries latest metrics
|
||||
const getLatestMetrics = async () => {
|
||||
const result = await db.query(`
|
||||
SELECT container_name, cpu_usage, memory_usage, timestamp
|
||||
FROM container_metrics
|
||||
WHERE timestamp > NOW() - INTERVAL 1 MINUTE
|
||||
ORDER BY timestamp DESC
|
||||
`);
|
||||
|
||||
return result.reduce((acc, row) => {
|
||||
acc[row.container_name] = {
|
||||
cpu: row.cpu_usage,
|
||||
memory: row.memory_usage,
|
||||
lastUpdate: row.timestamp
|
||||
};
|
||||
return acc;
|
||||
}, {});
|
||||
};
|
||||
```
|
||||
|
||||
## Implementation Priority
|
||||
|
||||
1. **Health Endpoints** - Most reliable, direct communication
|
||||
2. **Socket.IO Broadcasting** - Real-time, low overhead
|
||||
3. **Prometheus Metrics** - Industry standard, rich data
|
||||
4. **Shared Volume Files** - Simple, filesystem-based
|
||||
5. **Log Tailing** - Works with existing logging
|
||||
6. **Database Collection** - Persistent, queryable history
|
||||
|
||||
## Benefits
|
||||
|
||||
- **Fallback Chain**: Multiple methods ensure metrics are always available
|
||||
- **Self-Reporting**: Containers know their own state best
|
||||
- **Real-time**: Direct communication provides immediate updates
|
||||
- **Standardized**: Each method can provide consistent metric format
|
||||
- **Resilient**: If one method fails, others still work
|
||||
@@ -103,10 +103,30 @@ const System = () => {
|
||||
<h4 className="font-medium text-gray-900 mb-3 truncate" title={name}>
|
||||
{name.replace('drone-detection-', '').replace('uamils-', '')}
|
||||
</h4>
|
||||
{metrics.error ? (
|
||||
{metrics.error || metrics.status === 'health_check_failed' ? (
|
||||
<div className="text-sm text-red-600">
|
||||
<div className="font-medium">Not Available</div>
|
||||
<div className="text-xs mt-1">{metrics.message}</div>
|
||||
<div className="font-medium">
|
||||
{metrics.status === 'health_check_failed' ? 'Health Check Failed' : 'Not Available'}
|
||||
</div>
|
||||
<div className="text-xs mt-1">{metrics.error || metrics.message}</div>
|
||||
</div>
|
||||
) : metrics.source === 'docker_compose' ? (
|
||||
<div className="space-y-2 text-sm">
|
||||
<div className="flex justify-between">
|
||||
<span className="text-gray-500">Status</span>
|
||||
<span className={`font-medium ${metrics.status === 'running' ? 'text-green-600' : 'text-red-600'}`}>
|
||||
{metrics.status}
|
||||
</span>
|
||||
</div>
|
||||
<div className="flex justify-between">
|
||||
<span className="text-gray-500">Health</span>
|
||||
<span className="font-medium">{metrics.health}</span>
|
||||
</div>
|
||||
<div className="flex justify-between">
|
||||
<span className="text-gray-500">Ports</span>
|
||||
<span className="font-medium text-xs">{metrics.ports || 'N/A'}</span>
|
||||
</div>
|
||||
<div className="text-xs text-blue-600 mt-2">Source: Docker Compose</div>
|
||||
</div>
|
||||
) : (
|
||||
<div className="space-y-2 text-sm">
|
||||
@@ -116,7 +136,7 @@ const System = () => {
|
||||
</div>
|
||||
<div className="flex justify-between">
|
||||
<span className="text-gray-500">Memory</span>
|
||||
<span className="font-medium">{metrics.memory.percentage}</span>
|
||||
<span className="font-medium">{metrics.memory?.percentage || metrics.memory}</span>
|
||||
</div>
|
||||
<div className="flex justify-between">
|
||||
<span className="text-gray-500">Network I/O</span>
|
||||
@@ -126,6 +146,11 @@ const System = () => {
|
||||
<span className="text-gray-500">Disk I/O</span>
|
||||
<span className="font-medium text-xs">{metrics.disk}</span>
|
||||
</div>
|
||||
{metrics.source && (
|
||||
<div className="text-xs text-blue-600 mt-2">
|
||||
Source: {metrics.source.replace('_', ' ').toUpperCase()}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
@@ -128,33 +128,83 @@ router.get('/system-info', async (req, res) => {
|
||||
const tenantCount = await Tenant.count();
|
||||
const userCount = await User.count();
|
||||
|
||||
// Get container metrics using Docker stats
|
||||
// Get container metrics using internal health endpoints
|
||||
let containerMetrics = {};
|
||||
try {
|
||||
const { stdout } = await execAsync('docker stats --no-stream --format "table {{.Container}}\\t{{.CPUPerc}}\\t{{.MemUsage}}\\t{{.MemPerc}}\\t{{.NetIO}}\\t{{.BlockIO}}"');
|
||||
const lines = stdout.trim().split('\n').slice(1); // Remove header
|
||||
|
||||
containerMetrics = lines.reduce((acc, line) => {
|
||||
const [container, cpu, memUsage, memPerc, netIO, blockIO] = line.split('\t');
|
||||
if (container.includes('drone-detection') || container.includes('uamils')) {
|
||||
acc[container] = {
|
||||
cpu: cpu,
|
||||
memory: {
|
||||
usage: memUsage,
|
||||
percentage: memPerc
|
||||
},
|
||||
network: netIO,
|
||||
disk: blockIO
|
||||
const containerEndpoints = [
|
||||
{ name: 'drone-detection-backend', url: 'http://drone-detection-backend:3000/health/metrics' },
|
||||
{ name: 'drone-detection-frontend', url: 'http://drone-detection-frontend:80/health/metrics' },
|
||||
{ name: 'drone-detection-management', url: 'http://drone-detection-management:3001/health/metrics' }
|
||||
];
|
||||
|
||||
// Try internal container health endpoints first
|
||||
try {
|
||||
const fetch = require('node-fetch');
|
||||
const healthChecks = await Promise.allSettled(
|
||||
containerEndpoints.map(async ({ name, url }) => {
|
||||
const response = await fetch(url, { timeout: 3000 });
|
||||
const metrics = await response.json();
|
||||
return { name, metrics };
|
||||
})
|
||||
);
|
||||
|
||||
healthChecks.forEach((result, index) => {
|
||||
const containerName = containerEndpoints[index].name;
|
||||
if (result.status === 'fulfilled') {
|
||||
containerMetrics[containerName] = result.value.metrics;
|
||||
} else {
|
||||
containerMetrics[containerName] = {
|
||||
status: 'health_check_failed',
|
||||
error: result.reason?.message || 'Health endpoint unavailable'
|
||||
};
|
||||
}
|
||||
return acc;
|
||||
}, {});
|
||||
} catch (dockerError) {
|
||||
console.log('Docker stats not available:', dockerError.message);
|
||||
containerMetrics = {
|
||||
error: 'Docker not available or containers not running',
|
||||
message: dockerError.message
|
||||
};
|
||||
});
|
||||
} catch (healthError) {
|
||||
console.log('Container health checks failed, trying Docker stats...');
|
||||
|
||||
// Fallback to Docker stats if health endpoints fail
|
||||
try {
|
||||
const { stdout } = await execAsync('docker stats --no-stream --format "table {{.Container}}\\t{{.CPUPerc}}\\t{{.MemUsage}}\\t{{.MemPerc}}\\t{{.NetIO}}\\t{{.BlockIO}}"');
|
||||
const lines = stdout.trim().split('\n').slice(1);
|
||||
|
||||
containerMetrics = lines.reduce((acc, line) => {
|
||||
const [container, cpu, memUsage, memPerc, netIO, blockIO] = line.split('\t');
|
||||
if (container.includes('drone-detection') || container.includes('uamils')) {
|
||||
acc[container] = {
|
||||
cpu: cpu,
|
||||
memory: { usage: memUsage, percentage: memPerc },
|
||||
network: netIO,
|
||||
disk: blockIO,
|
||||
source: 'docker_stats'
|
||||
};
|
||||
}
|
||||
return acc;
|
||||
}, {});
|
||||
} catch (dockerError) {
|
||||
// Try container inspection via docker compose
|
||||
try {
|
||||
const { stdout: composeStatus } = await execAsync('docker-compose ps --format json');
|
||||
const containers = JSON.parse(`[${composeStatus.split('\n').filter(line => line.trim()).join(',')}]`);
|
||||
|
||||
containerMetrics = containers.reduce((acc, container) => {
|
||||
if (container.Name && (container.Name.includes('drone-detection') || container.Name.includes('uamils'))) {
|
||||
acc[container.Name] = {
|
||||
status: container.State,
|
||||
health: container.Health || 'unknown',
|
||||
ports: container.Ports,
|
||||
source: 'docker_compose'
|
||||
};
|
||||
}
|
||||
return acc;
|
||||
}, {});
|
||||
} catch (composeError) {
|
||||
containerMetrics = {
|
||||
error: 'All container monitoring methods failed',
|
||||
attempts: ['health_endpoints', 'docker_stats', 'docker_compose'],
|
||||
lastError: composeError.message
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get system memory and CPU info
|
||||
|
||||
170
server/utils/health-endpoint.js
Normal file
170
server/utils/health-endpoint.js
Normal file
@@ -0,0 +1,170 @@
|
||||
/**
|
||||
* Container Health & Metrics Endpoint
|
||||
* Add this to your main application containers for self-reporting
|
||||
*/
|
||||
|
||||
const express = require('express');
|
||||
const os = require('os');
|
||||
const fs = require('fs');
|
||||
const { exec } = require('child_process');
|
||||
const { promisify } = require('util');
|
||||
const execAsync = promisify(exec);
|
||||
|
||||
/**
|
||||
* Health & Metrics endpoint implementation
|
||||
* Add this to your Express app in each container
|
||||
*/
|
||||
const createHealthEndpoint = (app) => {
|
||||
app.get('/health/metrics', async (req, res) => {
|
||||
try {
|
||||
const containerName = process.env.CONTAINER_NAME || 'unknown-container';
|
||||
const memUsage = process.memoryUsage();
|
||||
const cpuUsage = process.cpuUsage();
|
||||
|
||||
// Get system load average
|
||||
const loadAvg = os.loadavg();
|
||||
|
||||
// Get network connections (if available)
|
||||
let networkConnections = 'N/A';
|
||||
try {
|
||||
const { stdout } = await execAsync('netstat -an | grep ESTABLISHED | wc -l');
|
||||
networkConnections = parseInt(stdout.trim());
|
||||
} catch (e) {
|
||||
// Network info not available
|
||||
}
|
||||
|
||||
// Get disk usage for common paths
|
||||
let diskUsage = {};
|
||||
try {
|
||||
const paths = ['/tmp', '/var/log', '/app'];
|
||||
for (const path of paths) {
|
||||
if (fs.existsSync(path)) {
|
||||
const stats = fs.statSync(path);
|
||||
diskUsage[path] = `${Math.round(stats.size / 1024 / 1024)}MB`;
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
diskUsage = { error: 'Disk info unavailable' };
|
||||
}
|
||||
|
||||
// Calculate CPU percentage (approximate)
|
||||
const cpuPercent = ((cpuUsage.user + cpuUsage.system) / 1000000 / process.uptime() * 100).toFixed(1);
|
||||
|
||||
const metrics = {
|
||||
container: containerName,
|
||||
timestamp: new Date().toISOString(),
|
||||
uptime: Math.round(process.uptime()),
|
||||
health: 'healthy',
|
||||
version: process.env.APP_VERSION || '1.0.0',
|
||||
|
||||
// Memory metrics
|
||||
memory: {
|
||||
usage: `${Math.round(memUsage.heapUsed / 1024 / 1024)}MB / ${Math.round(memUsage.heapTotal / 1024 / 1024)}MB`,
|
||||
percentage: `${Math.round((memUsage.heapUsed / memUsage.heapTotal) * 100)}%`,
|
||||
rss: `${Math.round(memUsage.rss / 1024 / 1024)}MB`,
|
||||
external: `${Math.round(memUsage.external / 1024 / 1024)}MB`
|
||||
},
|
||||
|
||||
// CPU metrics
|
||||
cpu: `${cpuPercent}%`,
|
||||
|
||||
// System metrics
|
||||
system: {
|
||||
platform: os.platform(),
|
||||
arch: os.arch(),
|
||||
nodeVersion: process.version,
|
||||
loadAverage: loadAvg[0].toFixed(2)
|
||||
},
|
||||
|
||||
// Network metrics
|
||||
network: {
|
||||
connections: networkConnections,
|
||||
requests_handled: global.requestCounter || 0
|
||||
},
|
||||
|
||||
// Disk metrics
|
||||
disk: diskUsage,
|
||||
|
||||
// Additional app-specific metrics
|
||||
application: {
|
||||
environment: process.env.NODE_ENV || 'development',
|
||||
pid: process.pid,
|
||||
ppid: process.ppid
|
||||
}
|
||||
};
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: metrics,
|
||||
source: 'internal_health_endpoint'
|
||||
});
|
||||
|
||||
} catch (error) {
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: 'Failed to collect metrics',
|
||||
message: error.message,
|
||||
container: process.env.CONTAINER_NAME || 'unknown-container'
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// Simple health check endpoint
|
||||
app.get('/health', (req, res) => {
|
||||
res.json({
|
||||
status: 'healthy',
|
||||
timestamp: new Date().toISOString(),
|
||||
uptime: process.uptime(),
|
||||
container: process.env.CONTAINER_NAME || 'unknown-container'
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
/**
|
||||
* Middleware to count requests (for metrics)
|
||||
*/
|
||||
const requestCounter = (req, res, next) => {
|
||||
global.requestCounter = (global.requestCounter || 0) + 1;
|
||||
next();
|
||||
};
|
||||
|
||||
/**
|
||||
* Enhanced logging for metrics
|
||||
*/
|
||||
const logMetrics = () => {
|
||||
const memUsage = process.memoryUsage();
|
||||
const metrics = {
|
||||
timestamp: new Date().toISOString(),
|
||||
container: process.env.CONTAINER_NAME || 'unknown-container',
|
||||
memory_mb: Math.round(memUsage.heapUsed / 1024 / 1024),
|
||||
uptime_seconds: Math.round(process.uptime()),
|
||||
requests_handled: global.requestCounter || 0
|
||||
};
|
||||
|
||||
console.log(`METRICS: ${JSON.stringify(metrics)}`);
|
||||
};
|
||||
|
||||
// Example usage in your main app:
|
||||
/*
|
||||
const app = express();
|
||||
|
||||
// Add request counter middleware
|
||||
app.use(requestCounter);
|
||||
|
||||
// Add health endpoints
|
||||
createHealthEndpoint(app);
|
||||
|
||||
// Log metrics every 30 seconds
|
||||
setInterval(logMetrics, 30000);
|
||||
|
||||
// Your other routes...
|
||||
app.listen(3000, () => {
|
||||
console.log('Server started with health endpoints');
|
||||
});
|
||||
*/
|
||||
|
||||
module.exports = {
|
||||
createHealthEndpoint,
|
||||
requestCounter,
|
||||
logMetrics
|
||||
};
|
||||
Reference in New Issue
Block a user