Fix jwt-token
This commit is contained in:
@@ -1,83 +1,188 @@
|
||||
# Container Health & Metrics Endpoint Implementation
|
||||
|
||||
## Creative Container Monitoring Solutions
|
||||
## How It Works - Different Approaches Explained
|
||||
|
||||
### 1. Internal Health/Metrics Endpoints
|
||||
### 🎯 **Current Implementation: Multi-Layered Detection**
|
||||
|
||||
Add these endpoints to each container for self-reporting metrics:
|
||||
The system I just implemented uses a **fallback chain** approach - NO agents required! Here's how:
|
||||
|
||||
#### Backend Container (Node.js Example)
|
||||
#### **Method 1: Built-in Health Endpoints (Recommended)**
|
||||
```javascript
|
||||
// Add to your existing Express.js containers
|
||||
const express = require('express');
|
||||
const os = require('os');
|
||||
const fs = require('fs');
|
||||
const app = express();
|
||||
|
||||
// Health & Metrics endpoint
|
||||
// Simple addition to existing code - no agent needed!
|
||||
app.get('/health/metrics', (req, res) => {
|
||||
const memUsage = process.memoryUsage();
|
||||
const cpuUsage = process.cpuUsage();
|
||||
|
||||
res.json({
|
||||
container: process.env.CONTAINER_NAME || 'backend',
|
||||
timestamp: new Date().toISOString(),
|
||||
uptime: process.uptime(),
|
||||
cpu: getCurrentCPU(),
|
||||
memory: {
|
||||
usage: `${Math.round(memUsage.heapUsed / 1024 / 1024)}MB`,
|
||||
total: `${Math.round(memUsage.heapTotal / 1024 / 1024)}MB`,
|
||||
percentage: `${Math.round((memUsage.heapUsed / memUsage.heapTotal) * 100)}%`
|
||||
},
|
||||
cpu: {
|
||||
user: cpuUsage.user,
|
||||
system: cpuUsage.system,
|
||||
load: os.loadavg()[0].toFixed(2) + '%'
|
||||
},
|
||||
network: {
|
||||
connections: getActiveConnections(),
|
||||
requests_per_minute: getRequestRate()
|
||||
},
|
||||
disk: {
|
||||
logs: getDiskUsage('/var/log'),
|
||||
temp: getDiskUsage('/tmp')
|
||||
},
|
||||
health: 'healthy',
|
||||
version: process.env.APP_VERSION || '1.0.0'
|
||||
uptime: process.uptime(),
|
||||
health: 'healthy'
|
||||
});
|
||||
});
|
||||
|
||||
function getActiveConnections() {
|
||||
try {
|
||||
const netstat = require('child_process').execSync('netstat -an | grep ESTABLISHED | wc -l', { encoding: 'utf8' });
|
||||
return parseInt(netstat.trim());
|
||||
} catch (e) {
|
||||
return 'N/A';
|
||||
}
|
||||
}
|
||||
|
||||
function getRequestRate() {
|
||||
// Implement request counter logic
|
||||
return global.requestCounter || 0;
|
||||
}
|
||||
|
||||
function getDiskUsage(path) {
|
||||
try {
|
||||
const stats = fs.statSync(path);
|
||||
return `${Math.round(stats.size / 1024 / 1024)}MB`;
|
||||
} catch (e) {
|
||||
return 'N/A';
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Frontend Container (Nginx + JS Example)
|
||||
```nginx
|
||||
# Add to nginx.conf
|
||||
location /health/metrics {
|
||||
access_log off;
|
||||
return 200 '{"container":"frontend","status":"healthy","nginx_version":"$nginx_version","connections":"$connections_active","timestamp":"$time_iso8601"}';
|
||||
add_header Content-Type application/json;
|
||||
}
|
||||
**✅ Pros**: Direct from container, accurate, real-time
|
||||
**❌ Cons**: Requires code changes in each container
|
||||
|
||||
#### **Method 2: Docker Stats API (Current Fallback)**
|
||||
```javascript
|
||||
// From management container - queries Docker daemon
|
||||
const { stdout } = await execAsync('docker stats --no-stream --format "table {{.Container}}\\t{{.CPUPerc}}\\t{{.MemUsage}}"');
|
||||
```
|
||||
|
||||
**✅ Pros**: Works with ANY container, no code changes needed
|
||||
**❌ Cons**: Requires Docker daemon access
|
||||
|
||||
#### **Method 3: Docker Compose Status**
|
||||
```javascript
|
||||
// Queries docker-compose for container states
|
||||
const { stdout } = await execAsync('docker-compose ps --format json');
|
||||
```
|
||||
|
||||
**✅ Pros**: Basic status info, works everywhere
|
||||
**❌ Cons**: Limited metrics, just status/health
|
||||
|
||||
---
|
||||
|
||||
## 🤖 **Alternative: Agent-Based Approaches**
|
||||
|
||||
### **Option A: Sidecar Container Pattern**
|
||||
```yaml
|
||||
# docker-compose.yml
|
||||
services:
|
||||
app:
|
||||
image: my-app:latest
|
||||
|
||||
metrics-agent:
|
||||
image: metrics-agent:latest
|
||||
volumes:
|
||||
- /proc:/host/proc:ro
|
||||
- /sys:/host/sys:ro
|
||||
environment:
|
||||
- TARGET_CONTAINER=app
|
||||
```
|
||||
|
||||
**How it works**: Deploy a metrics agent container alongside each service
|
||||
**✅ Pros**: No code changes, detailed system metrics
|
||||
**❌ Cons**: Extra containers, more complex deployment
|
||||
|
||||
### **Option B: In-Container Agent Process**
|
||||
```dockerfile
|
||||
# Add to existing Dockerfile
|
||||
FROM node:18
|
||||
COPY . /app
|
||||
COPY metrics-agent /usr/local/bin/
|
||||
RUN chmod +x /usr/local/bin/metrics-agent
|
||||
|
||||
# Start both app and agent
|
||||
CMD ["sh", "-c", "metrics-agent & npm start"]
|
||||
```
|
||||
|
||||
**How it works**: Runs a metrics collection process inside each container
|
||||
**✅ Pros**: Single container, detailed metrics
|
||||
**❌ Cons**: Modifies container, uses more resources
|
||||
|
||||
### **Option C: External Monitoring Tools**
|
||||
|
||||
#### **Prometheus + Node Exporter**
|
||||
```yaml
|
||||
services:
|
||||
node-exporter:
|
||||
image: prom/node-exporter
|
||||
ports:
|
||||
- "9100:9100"
|
||||
volumes:
|
||||
- /proc:/host/proc:ro
|
||||
- /sys:/host/sys:ro
|
||||
- /:/rootfs:ro
|
||||
```
|
||||
|
||||
#### **cAdvisor (Container Advisor)**
|
||||
```yaml
|
||||
services:
|
||||
cadvisor:
|
||||
image: gcr.io/cadvisor/cadvisor:latest
|
||||
ports:
|
||||
- "8080:8080"
|
||||
volumes:
|
||||
- /:/rootfs:ro
|
||||
- /var/run:/var/run:rw
|
||||
- /sys:/sys:ro
|
||||
- /var/lib/docker/:/var/lib/docker:ro
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔧 **Recommended Implementation Strategy**
|
||||
|
||||
### **Phase 1: Docker Stats (Current)**
|
||||
- ✅ **Already implemented**
|
||||
- Works immediately with existing containers
|
||||
- No code changes required
|
||||
- Provides CPU, Memory, Network, Disk I/O
|
||||
|
||||
### **Phase 2: Add Health Endpoints**
|
||||
```javascript
|
||||
// Add 3 lines to each container's main file
|
||||
const { createHealthEndpoint } = require('./utils/health-endpoint');
|
||||
createHealthEndpoint(app); // app is your Express instance
|
||||
```
|
||||
|
||||
### **Phase 3: Enhanced Monitoring (Optional)**
|
||||
- Add Prometheus metrics
|
||||
- Implement custom business metrics
|
||||
- Add alerting and dashboards
|
||||
|
||||
---
|
||||
|
||||
## 🎯 **Current System Architecture**
|
||||
|
||||
```
|
||||
Management Container
|
||||
↓
|
||||
1. Try HTTP health endpoints (app containers)
|
||||
↓ (if fails)
|
||||
2. Query Docker daemon (all containers)
|
||||
↓ (if fails)
|
||||
3. Check docker-compose status
|
||||
↓ (if fails)
|
||||
4. Scan system processes
|
||||
```
|
||||
|
||||
**No agents required!** The management container does all the work:
|
||||
|
||||
1. **Health Endpoints**: Makes HTTP calls to containers that support it
|
||||
2. **Docker Stats**: Queries Docker daemon for ALL container metrics
|
||||
3. **Process Detection**: Scans system for running services
|
||||
4. **Smart Fallback**: Always tries to get SOME information
|
||||
|
||||
---
|
||||
|
||||
## 🚀 **Why This Approach is Great**
|
||||
|
||||
### **For Existing Systems**
|
||||
- **Zero downtime**: Works immediately
|
||||
- **No refactoring**: Containers don't need changes
|
||||
- **Comprehensive**: Sees ALL containers (yours + infrastructure)
|
||||
|
||||
### **For Future Development**
|
||||
- **Gradual enhancement**: Add health endpoints when convenient
|
||||
- **Flexible**: Can switch to any monitoring approach later
|
||||
- **Standards compliant**: Uses Docker APIs and HTTP standards
|
||||
|
||||
### **Production Ready**
|
||||
- **Reliable fallbacks**: Always gets some data
|
||||
- **Error handling**: Graceful degradation
|
||||
- **Performance**: Lightweight HTTP calls
|
||||
- **Security**: No privileged containers needed
|
||||
|
||||
### 2. Prometheus-style Metrics Scraping
|
||||
|
||||
```javascript
|
||||
|
||||
61
docs/monitoring-architecture.txt
Normal file
61
docs/monitoring-architecture.txt
Normal file
@@ -0,0 +1,61 @@
|
||||
```
|
||||
Container Monitoring Architecture - No Agents Required!
|
||||
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ Management Container │
|
||||
│ │
|
||||
│ ┌─────────────────────────────────────────────────────────┐ │
|
||||
│ │ Monitoring Controller │ │
|
||||
│ │ │ │
|
||||
│ │ 1. HTTP Health Endpoints ──┐ │ │
|
||||
│ │ 2. Docker Stats API ────────┼──── Fallback Chain │ │
|
||||
│ │ 3. Docker Compose Status ───┤ │ │
|
||||
│ │ 4. Process List Scanning ───┘ │ │
|
||||
│ └─────────────────────────────────────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ Target Containers │
|
||||
├─────────────────┬─────────────────┬─────────────────┬───────────┤
|
||||
│ App Containers│ Infrastructure │ Cache Layer │ Database │
|
||||
│ │ │ │ │
|
||||
│ ┌─────────────┐ │ ┌─────────────┐ │ ┌─────────────┐ │ ┌───────┐ │
|
||||
│ │ Backend │ │ │ Nginx │ │ │ Redis │ │ │ Postgres│
|
||||
│ │ │ │ │ │ │ │ │ │ │ │ │
|
||||
│ │ /health ←───┼─┼─┼─HTTP calls──┼─┼─┼─Basic ping──┼─┼─┼─Port │ │
|
||||
│ │ /metrics │ │ │ /nginx_stat │ │ │ │ │ │ check │ │
|
||||
│ └─────────────┘ │ └─────────────┘ │ └─────────────┘ │ └───────┘ │
|
||||
├─────────────────┼─────────────────┼─────────────────┼───────────┤
|
||||
│ │ │ │ │
|
||||
│ Enhanced Data │ Basic Status │ Connectivity │ Status │
|
||||
│ • CPU usage │ • Up/Down │ • Responsive │ • Running│
|
||||
│ • Memory % │ • Port info │ • Timeout │ • Health │
|
||||
│ • Custom │ • Health │ │ │
|
||||
│ metrics │ check │ │ │
|
||||
└─────────────────┴─────────────────┴─────────────────┴───────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ Docker Daemon │
|
||||
│ │
|
||||
│ If HTTP calls fail, query Docker directly: │
|
||||
│ • docker stats --no-stream (CPU, Memory, Network, Disk) │
|
||||
│ • docker-compose ps (Status, Health, Ports) │
|
||||
│ • ps aux | grep (Process detection as final fallback) │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
|
||||
Data Flow:
|
||||
1. Management container tries HTTP health endpoints first
|
||||
2. Falls back to Docker daemon APIs if containers don't respond
|
||||
3. Uses docker-compose for basic status if Docker unavailable
|
||||
4. Scans processes as absolute last resort
|
||||
|
||||
Benefits:
|
||||
✅ No agents to install or maintain
|
||||
✅ Works with existing containers immediately
|
||||
✅ Gradual enhancement possible (add health endpoints when convenient)
|
||||
✅ Comprehensive coverage (all containers, not just yours)
|
||||
✅ Multiple fallbacks ensure data is always available
|
||||
✅ Standard HTTP + Docker APIs (no proprietary protocols)
|
||||
```
|
||||
@@ -243,11 +243,20 @@ const System = () => {
|
||||
Issuer: {ssl.issuer}
|
||||
</div>
|
||||
)}
|
||||
{ssl.fingerprint && (
|
||||
<div className="text-xs text-gray-400 truncate" title={ssl.fingerprint}>
|
||||
Fingerprint: {ssl.fingerprint.substring(0, 20)}...
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
{ssl.error && (
|
||||
<div className="text-xs text-red-600">
|
||||
{ssl.error}
|
||||
<div className="text-xs text-red-600 bg-red-50 p-2 rounded border">
|
||||
<div className="font-medium">Error:</div>
|
||||
<div>{ssl.error}</div>
|
||||
{ssl.errorCode && (
|
||||
<div className="text-gray-500 mt-1">Code: {ssl.errorCode}</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
@@ -388,8 +397,36 @@ const System = () => {
|
||||
{systemInfo.containers.error ? (
|
||||
<div className="text-center text-red-600 py-8">
|
||||
<XCircleIcon className="mx-auto h-12 w-12 text-red-400 mb-2" />
|
||||
<div className="font-medium">Docker containers not available</div>
|
||||
<div className="text-sm mt-1">{systemInfo.containers.message}</div>
|
||||
<div className="font-medium mb-2">Container monitoring unavailable</div>
|
||||
<div className="text-sm mb-4">{systemInfo.containers.lastError}</div>
|
||||
|
||||
{systemInfo.containers.troubleshooting && (
|
||||
<div className="bg-blue-50 border border-blue-200 rounded-lg p-4 text-left text-sm">
|
||||
<h5 className="font-medium text-blue-900 mb-2">💡 Troubleshooting Tips:</h5>
|
||||
<ul className="space-y-1 text-blue-800">
|
||||
<li>• {systemInfo.containers.troubleshooting.docker_access}</li>
|
||||
<li>• {systemInfo.containers.troubleshooting.permissions}</li>
|
||||
<li>• {systemInfo.containers.troubleshooting.environment}</li>
|
||||
</ul>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{systemInfo.containers.suggestions && (
|
||||
<div className="bg-yellow-50 border border-yellow-200 rounded-lg p-4 text-left text-sm mt-3">
|
||||
<h5 className="font-medium text-yellow-900 mb-2">🔧 Quick Fixes:</h5>
|
||||
<ul className="space-y-1 text-yellow-800">
|
||||
{systemInfo.containers.suggestions.map((suggestion, index) => (
|
||||
<li key={index}>• {suggestion}</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
) : systemInfo.containers.info ? (
|
||||
<div className="text-center text-gray-600 py-8">
|
||||
<ServerIcon className="mx-auto h-12 w-12 text-gray-400 mb-2" />
|
||||
<div className="font-medium mb-2">{systemInfo.containers.info}</div>
|
||||
<div className="text-sm">{systemInfo.containers.message}</div>
|
||||
</div>
|
||||
) : (
|
||||
<div className="space-y-6">
|
||||
|
||||
@@ -203,6 +203,7 @@ router.get('/system-info', async (req, res) => {
|
||||
}
|
||||
|
||||
// Fallback to Docker stats for ALL containers (not just our apps)
|
||||
if (Object.keys(containerMetrics).length === 0 || Object.values(containerMetrics).every(m => m.status === 'unreachable')) {
|
||||
try {
|
||||
const { stdout } = await execAsync('docker stats --no-stream --format "table {{.Container}}\\t{{.CPUPerc}}\\t{{.MemUsage}}\\t{{.MemPerc}}\\t{{.NetIO}}\\t{{.BlockIO}}"');
|
||||
const lines = stdout.trim().split('\n').slice(1);
|
||||
@@ -212,15 +213,14 @@ router.get('/system-info', async (req, res) => {
|
||||
if (container && cpu) {
|
||||
// Determine container type
|
||||
let type = 'unknown';
|
||||
if (container.includes('postgres') || container.includes('mysql') || container.includes('mongo')) type = 'database';
|
||||
else if (container.includes('redis') || container.includes('memcached')) type = 'cache';
|
||||
else if (container.includes('nginx') || container.includes('proxy') || container.includes('traefik')) type = 'proxy';
|
||||
else if (container.includes('drone-detection') || container.includes('uamils')) type = 'application';
|
||||
else if (container.includes('elasticsearch') || container.includes('kibana') || container.includes('logstash')) type = 'logging';
|
||||
else if (container.includes('prometheus') || container.includes('grafana')) type = 'monitoring';
|
||||
const name = container.toLowerCase();
|
||||
if (name.includes('postgres') || name.includes('mysql') || name.includes('mongo')) type = 'database';
|
||||
else if (name.includes('redis') || name.includes('memcached')) type = 'cache';
|
||||
else if (name.includes('nginx') || name.includes('proxy') || name.includes('traefik')) type = 'proxy';
|
||||
else if (name.includes('drone-detection') || name.includes('uamils')) type = 'application';
|
||||
else if (name.includes('elasticsearch') || name.includes('kibana') || name.includes('logstash')) type = 'logging';
|
||||
else if (name.includes('prometheus') || name.includes('grafana')) type = 'monitoring';
|
||||
|
||||
// If we don't have health endpoint data, use docker stats
|
||||
if (!containerMetrics[container]) {
|
||||
containerMetrics[container] = {
|
||||
cpu: cpu,
|
||||
memory: { usage: memUsage, percentage: memPerc },
|
||||
@@ -229,101 +229,151 @@ router.get('/system-info', async (req, res) => {
|
||||
type: type,
|
||||
source: 'docker_stats'
|
||||
};
|
||||
} else {
|
||||
// Enhance existing health data with docker stats
|
||||
containerMetrics[container] = {
|
||||
...containerMetrics[container],
|
||||
cpu: cpu,
|
||||
memory: { usage: memUsage, percentage: memPerc },
|
||||
network: netIO,
|
||||
disk: blockIO
|
||||
};
|
||||
}
|
||||
}
|
||||
});
|
||||
} catch (dockerError) {
|
||||
console.log('Docker stats failed, trying docker compose...');
|
||||
console.log('Docker stats failed, trying compose and processes...');
|
||||
|
||||
// Try container inspection via docker compose
|
||||
try {
|
||||
const { stdout: composeStatus } = await execAsync('docker-compose ps --format json');
|
||||
const containers = JSON.parse(`[${composeStatus.split('\n').filter(line => line.trim()).join(',')}]`);
|
||||
const { stdout: composeStatus } = await execAsync('docker-compose ps --services 2>/dev/null || docker compose ps --services 2>/dev/null');
|
||||
const services = composeStatus.trim().split('\n').filter(s => s.trim());
|
||||
|
||||
containers.forEach(container => {
|
||||
if (container.Name && !containerMetrics[container.Name]) {
|
||||
if (services.length > 0) {
|
||||
for (const service of services) {
|
||||
let type = 'unknown';
|
||||
const name = container.Name.toLowerCase();
|
||||
if (name.includes('postgres') || name.includes('mysql') || name.includes('mongo')) type = 'database';
|
||||
else if (name.includes('redis') || name.includes('memcached')) type = 'cache';
|
||||
const name = service.toLowerCase();
|
||||
if (name.includes('postgres') || name.includes('mysql') || name.includes('mongo') || name.includes('db')) type = 'database';
|
||||
else if (name.includes('redis') || name.includes('cache')) type = 'cache';
|
||||
else if (name.includes('nginx') || name.includes('proxy')) type = 'proxy';
|
||||
else if (name.includes('drone-detection') || name.includes('uamils')) type = 'application';
|
||||
else if (name.includes('drone-detection') || name.includes('uamils') || name.includes('app') || name.includes('backend') || name.includes('frontend')) type = 'application';
|
||||
|
||||
containerMetrics[container.Name] = {
|
||||
status: container.State,
|
||||
health: container.Health || 'unknown',
|
||||
ports: container.Ports,
|
||||
containerMetrics[service] = {
|
||||
status: 'detected',
|
||||
health: 'unknown',
|
||||
type: type,
|
||||
source: 'docker_compose'
|
||||
source: 'docker_compose_services'
|
||||
};
|
||||
}
|
||||
});
|
||||
}
|
||||
} catch (composeError) {
|
||||
// Final fallback - try to detect containers via process list
|
||||
// Final fallback - try to detect running services via different methods
|
||||
try {
|
||||
const { stdout: processes } = await execAsync('ps aux | grep -E "(postgres|redis|nginx|docker)" | grep -v grep');
|
||||
const processLines = processes.split('\n').filter(line => line.trim());
|
||||
// Check for common database ports
|
||||
const portChecks = [
|
||||
{ port: 5432, name: 'postgresql', type: 'database' },
|
||||
{ port: 3306, name: 'mysql', type: 'database' },
|
||||
{ port: 6379, name: 'redis', type: 'cache' },
|
||||
{ port: 80, name: 'nginx', type: 'proxy' },
|
||||
{ port: 443, name: 'nginx-ssl', type: 'proxy' }
|
||||
];
|
||||
|
||||
const detectedServices = {};
|
||||
processLines.forEach(line => {
|
||||
if (line.includes('postgres')) detectedServices['postgres-process'] = { status: 'running', type: 'database', source: 'process_list' };
|
||||
if (line.includes('redis')) detectedServices['redis-process'] = { status: 'running', type: 'cache', source: 'process_list' };
|
||||
if (line.includes('nginx')) detectedServices['nginx-process'] = { status: 'running', type: 'proxy', source: 'process_list' };
|
||||
});
|
||||
const { stdout: netstatOutput } = await execAsync('netstat -tlnp 2>/dev/null || ss -tlnp 2>/dev/null || echo "no netstat"');
|
||||
|
||||
if (Object.keys(detectedServices).length > 0) {
|
||||
containerMetrics = { ...containerMetrics, ...detectedServices };
|
||||
} else {
|
||||
containerMetrics = {
|
||||
error: 'All container monitoring methods failed',
|
||||
attempts: ['health_endpoints', 'docker_stats', 'docker_compose', 'process_list'],
|
||||
lastError: composeError.message
|
||||
for (const { port, name, type } of portChecks) {
|
||||
if (netstatOutput.includes(`:${port} `)) {
|
||||
containerMetrics[`${name}-service`] = {
|
||||
status: 'port_listening',
|
||||
port: port,
|
||||
type: type,
|
||||
source: 'port_detection'
|
||||
};
|
||||
}
|
||||
} catch (processError) {
|
||||
}
|
||||
|
||||
// If still no containers found, show a helpful message
|
||||
if (Object.keys(containerMetrics).length === 0) {
|
||||
containerMetrics = {
|
||||
info: 'No containers detected',
|
||||
message: 'This could mean Docker is not running, no containers are active, or the monitoring system needs Docker access',
|
||||
suggestions: [
|
||||
'Check if Docker is running: docker ps',
|
||||
'Ensure management container has Docker socket access',
|
||||
'Try: docker run --rm -v /var/run/docker.sock:/var/run/docker.sock ...'
|
||||
]
|
||||
};
|
||||
}
|
||||
} catch (finalError) {
|
||||
containerMetrics = {
|
||||
error: 'All container monitoring methods failed',
|
||||
attempts: ['health_endpoints', 'docker_stats', 'docker_compose', 'process_list'],
|
||||
lastError: processError.message
|
||||
attempts: ['health_endpoints', 'docker_stats', 'docker_compose', 'port_detection'],
|
||||
lastError: finalError.message,
|
||||
troubleshooting: {
|
||||
docker_access: 'Ensure management container can access Docker daemon',
|
||||
permissions: 'Container may need privileged access or Docker socket mount',
|
||||
environment: 'Check if running in Docker environment vs local development'
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get system memory and CPU info
|
||||
let systemMetrics = {};
|
||||
try {
|
||||
// Try Linux commands first
|
||||
try {
|
||||
const { stdout: memInfo } = await execAsync('free -m');
|
||||
const memLines = memInfo.split('\n')[1].split(/\s+/);
|
||||
const totalMem = parseInt(memLines[1]);
|
||||
const usedMem = parseInt(memLines[2]);
|
||||
|
||||
const { stdout: cpuInfo } = await execAsync('top -bn1 | grep "Cpu(s)" | sed "s/.*, *\\([0-9.]*\\)%* id.*/\\1/" | awk \'{print 100 - $1}\'');
|
||||
const cpuUsage = parseFloat(cpuInfo.trim());
|
||||
|
||||
const { stdout: diskInfo } = await execAsync('df -h / | awk \'NR==2{print $3 " / " $2 " (" $5 ")"}\'');
|
||||
|
||||
systemMetrics = {
|
||||
memory: {
|
||||
systemMetrics.memory = {
|
||||
used: `${usedMem}MB`,
|
||||
total: `${totalMem}MB`,
|
||||
percentage: Math.round((usedMem / totalMem) * 100)
|
||||
},
|
||||
cpu: {
|
||||
};
|
||||
} catch (memError) {
|
||||
// Fallback for Windows or other systems
|
||||
const totalMem = Math.round(require('os').totalmem() / 1024 / 1024);
|
||||
const freeMem = Math.round(require('os').freemem() / 1024 / 1024);
|
||||
const usedMem = totalMem - freeMem;
|
||||
|
||||
systemMetrics.memory = {
|
||||
used: `${usedMem}MB`,
|
||||
total: `${totalMem}MB`,
|
||||
percentage: Math.round((usedMem / totalMem) * 100)
|
||||
};
|
||||
}
|
||||
|
||||
// CPU usage - fix negative values
|
||||
try {
|
||||
const { stdout: cpuInfo } = await execAsync('top -bn1 | grep "Cpu(s)" | sed "s/.*, *\\([0-9.]*\\)%* id.*/\\1/" | awk \'{print 100 - $1}\'');
|
||||
let cpuUsage = parseFloat(cpuInfo.trim());
|
||||
|
||||
// Fix negative or invalid CPU values
|
||||
if (isNaN(cpuUsage) || cpuUsage < 0 || cpuUsage > 100) {
|
||||
// Fallback to load average calculation
|
||||
const loadAvg = require('os').loadavg()[0];
|
||||
const cpuCount = require('os').cpus().length;
|
||||
cpuUsage = Math.min((loadAvg / cpuCount) * 100, 100);
|
||||
}
|
||||
|
||||
systemMetrics.cpu = {
|
||||
usage: `${cpuUsage.toFixed(1)}%`,
|
||||
percentage: cpuUsage
|
||||
},
|
||||
disk: diskInfo.trim()
|
||||
};
|
||||
} catch (cpuError) {
|
||||
// Ultimate fallback
|
||||
const loadAvg = require('os').loadavg()[0];
|
||||
const cpuCount = require('os').cpus().length;
|
||||
const cpuUsage = Math.min((loadAvg / cpuCount) * 100, 100);
|
||||
|
||||
systemMetrics.cpu = {
|
||||
usage: `${cpuUsage.toFixed(1)}%`,
|
||||
percentage: cpuUsage
|
||||
};
|
||||
}
|
||||
|
||||
// Disk usage
|
||||
try {
|
||||
const { stdout: diskInfo } = await execAsync('df -h / | awk \'NR==2{print $3 " / " $2 " (" $5 ")"}\'');
|
||||
systemMetrics.disk = diskInfo.trim();
|
||||
} catch (diskError) {
|
||||
systemMetrics.disk = 'N/A';
|
||||
}
|
||||
|
||||
} catch (sysError) {
|
||||
console.log('System metrics not available:', sysError.message);
|
||||
systemMetrics = {
|
||||
@@ -341,37 +391,54 @@ router.get('/system-info', async (req, res) => {
|
||||
const options = {
|
||||
hostname: hostname,
|
||||
port: 443,
|
||||
method: 'GET',
|
||||
timeout: 5000
|
||||
method: 'HEAD',
|
||||
timeout: 5000,
|
||||
// Allow self-signed certificates for development
|
||||
rejectUnauthorized: false
|
||||
};
|
||||
|
||||
const req = https.request(options, (res) => {
|
||||
const cert = res.connection.getPeerCertificate();
|
||||
if (cert && cert.valid_to) {
|
||||
const expiryDate = new Date(cert.valid_to);
|
||||
const daysUntilExpiry = Math.ceil((expiryDate - new Date()) / (1000 * 60 * 60 * 24));
|
||||
const now = new Date();
|
||||
const daysUntilExpiry = Math.ceil((expiryDate - now) / (1000 * 60 * 60 * 24));
|
||||
|
||||
resolve({
|
||||
status: daysUntilExpiry > 30 ? 'valid' : daysUntilExpiry > 7 ? 'warning' : 'critical',
|
||||
expiresAt: expiryDate.toISOString(),
|
||||
daysUntilExpiry: daysUntilExpiry,
|
||||
issuer: cert.issuer?.O || 'Unknown',
|
||||
subject: cert.subject?.CN || hostname
|
||||
issuer: cert.issuer?.O || cert.issuer?.CN || 'Unknown',
|
||||
subject: cert.subject?.CN || hostname,
|
||||
fingerprint: cert.fingerprint || 'N/A'
|
||||
});
|
||||
} else {
|
||||
resolve({
|
||||
status: 'error',
|
||||
expiresAt: null,
|
||||
error: 'Certificate not found'
|
||||
error: 'Certificate information not available'
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
req.on('error', () => {
|
||||
req.on('error', (error) => {
|
||||
// Try to determine the type of error
|
||||
let errorMessage = error.message;
|
||||
if (error.code === 'ENOTFOUND') {
|
||||
errorMessage = 'Domain not found (DNS resolution failed)';
|
||||
} else if (error.code === 'ECONNREFUSED') {
|
||||
errorMessage = 'Connection refused (service not running on port 443)';
|
||||
} else if (error.code === 'ETIMEDOUT') {
|
||||
errorMessage = 'Connection timeout';
|
||||
} else if (error.code === 'CERT_HAS_EXPIRED') {
|
||||
errorMessage = 'Certificate has expired';
|
||||
}
|
||||
|
||||
resolve({
|
||||
status: 'error',
|
||||
expiresAt: null,
|
||||
error: 'Connection failed'
|
||||
error: errorMessage,
|
||||
errorCode: error.code
|
||||
});
|
||||
});
|
||||
|
||||
@@ -380,7 +447,8 @@ router.get('/system-info', async (req, res) => {
|
||||
resolve({
|
||||
status: 'error',
|
||||
expiresAt: null,
|
||||
error: 'Timeout'
|
||||
error: 'Connection timeout (5 seconds)',
|
||||
errorCode: 'TIMEOUT'
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user