Initial commit: DashCaddy v1.0
Full codebase including API server (32 modules + routes), dashboard frontend, DashCA certificate distribution, installer script, and deployment skills.
This commit is contained in:
494
dashcaddy-api/resource-monitor.js
Normal file
494
dashcaddy-api/resource-monitor.js
Normal file
@@ -0,0 +1,494 @@
|
||||
/**
|
||||
* Container Resource Monitoring Module
|
||||
* Tracks CPU, memory, disk, and network usage for Docker containers
|
||||
* Provides alerts and historical data
|
||||
*/
|
||||
|
||||
const Docker = require('dockerode');
|
||||
const EventEmitter = require('events');
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const docker = new Docker();
|
||||
|
||||
// Configuration
|
||||
const STATS_FILE = process.env.STATS_FILE || path.join(__dirname, 'container-stats.json');
|
||||
const ALERT_CONFIG_FILE = process.env.ALERT_CONFIG_FILE || path.join(__dirname, 'alert-config.json');
|
||||
const STATS_RETENTION_HOURS = parseInt(process.env.STATS_RETENTION_HOURS || '168', 10); // 7 days default
|
||||
const MONITORING_INTERVAL = parseInt(process.env.MONITORING_INTERVAL || '10000', 10); // 10 seconds
|
||||
|
||||
class ResourceMonitor extends EventEmitter {
|
||||
constructor() {
|
||||
super();
|
||||
this.monitoring = false;
|
||||
this.monitoringInterval = null;
|
||||
this.stats = new Map(); // containerId -> array of stats
|
||||
this.alerts = new Map(); // containerId -> alert config
|
||||
this.lastAlerts = new Map(); // containerId -> last alert timestamp
|
||||
|
||||
this.loadStats();
|
||||
this.loadAlertConfig();
|
||||
}
|
||||
|
||||
/**
|
||||
* Start monitoring all containers
|
||||
*/
|
||||
start() {
|
||||
if (this.monitoring) {
|
||||
console.log('[ResourceMonitor] Already monitoring');
|
||||
return;
|
||||
}
|
||||
|
||||
console.log('[ResourceMonitor] Starting container monitoring');
|
||||
this.monitoring = true;
|
||||
this.monitoringInterval = setInterval(() => this.collectStats(), MONITORING_INTERVAL);
|
||||
|
||||
// Initial collection
|
||||
this.collectStats();
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop monitoring
|
||||
*/
|
||||
stop() {
|
||||
if (!this.monitoring) return;
|
||||
|
||||
console.log('[ResourceMonitor] Stopping container monitoring');
|
||||
this.monitoring = false;
|
||||
|
||||
if (this.monitoringInterval) {
|
||||
clearInterval(this.monitoringInterval);
|
||||
this.monitoringInterval = null;
|
||||
}
|
||||
|
||||
this.saveStats();
|
||||
}
|
||||
|
||||
/**
|
||||
* Collect stats from all running containers
|
||||
*/
|
||||
async collectStats() {
|
||||
try {
|
||||
const containers = await docker.listContainers({ all: false });
|
||||
|
||||
for (const containerInfo of containers) {
|
||||
try {
|
||||
const container = docker.getContainer(containerInfo.Id);
|
||||
const stats = await this.getContainerStats(container);
|
||||
|
||||
if (stats) {
|
||||
this.recordStats(containerInfo.Id, containerInfo.Names[0], stats);
|
||||
this.checkAlerts(containerInfo.Id, containerInfo.Names[0], stats);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`[ResourceMonitor] Error collecting stats for ${containerInfo.Names[0]}:`, error.message);
|
||||
}
|
||||
}
|
||||
|
||||
// Cleanup old stats
|
||||
this.cleanupOldStats();
|
||||
|
||||
// Persist stats periodically
|
||||
if (Math.random() < 0.1) { // 10% chance to save (every ~100 seconds)
|
||||
this.saveStats();
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('[ResourceMonitor] Error collecting container stats:', error.message);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get stats for a single container
|
||||
*/
|
||||
async getContainerStats(container) {
|
||||
return new Promise((resolve, reject) => {
|
||||
container.stats({ stream: false }, (err, stats) => {
|
||||
if (err) {
|
||||
reject(err);
|
||||
return;
|
||||
}
|
||||
|
||||
// Calculate CPU percentage
|
||||
const cpuDelta = stats.cpu_stats.cpu_usage.total_usage -
|
||||
(stats.precpu_stats.cpu_usage?.total_usage || 0);
|
||||
const systemDelta = stats.cpu_stats.system_cpu_usage -
|
||||
(stats.precpu_stats.system_cpu_usage || 0);
|
||||
const cpuPercent = systemDelta > 0 ? (cpuDelta / systemDelta) * 100 : 0;
|
||||
|
||||
// Calculate memory usage
|
||||
const memoryUsage = stats.memory_stats.usage || 0;
|
||||
const memoryLimit = stats.memory_stats.limit || 0;
|
||||
const memoryPercent = memoryLimit > 0 ? (memoryUsage / memoryLimit) * 100 : 0;
|
||||
|
||||
// Calculate network I/O
|
||||
let networkRx = 0;
|
||||
let networkTx = 0;
|
||||
if (stats.networks) {
|
||||
Object.values(stats.networks).forEach(net => {
|
||||
networkRx += net.rx_bytes || 0;
|
||||
networkTx += net.tx_bytes || 0;
|
||||
});
|
||||
}
|
||||
|
||||
// Calculate block I/O
|
||||
let blockRead = 0;
|
||||
let blockWrite = 0;
|
||||
if (stats.blkio_stats?.io_service_bytes_recursive) {
|
||||
stats.blkio_stats.io_service_bytes_recursive.forEach(io => {
|
||||
if (io.op === 'Read') blockRead += io.value;
|
||||
if (io.op === 'Write') blockWrite += io.value;
|
||||
});
|
||||
}
|
||||
|
||||
resolve({
|
||||
timestamp: new Date().toISOString(),
|
||||
cpu: {
|
||||
percent: Math.round(cpuPercent * 100) / 100,
|
||||
usage: stats.cpu_stats.cpu_usage.total_usage
|
||||
},
|
||||
memory: {
|
||||
usage: memoryUsage,
|
||||
limit: memoryLimit,
|
||||
percent: Math.round(memoryPercent * 100) / 100,
|
||||
usageMB: Math.round(memoryUsage / 1024 / 1024),
|
||||
limitMB: Math.round(memoryLimit / 1024 / 1024)
|
||||
},
|
||||
network: {
|
||||
rxBytes: networkRx,
|
||||
txBytes: networkTx,
|
||||
rxMB: Math.round(networkRx / 1024 / 1024 * 100) / 100,
|
||||
txMB: Math.round(networkTx / 1024 / 1024 * 100) / 100
|
||||
},
|
||||
disk: {
|
||||
readBytes: blockRead,
|
||||
writeBytes: blockWrite,
|
||||
readMB: Math.round(blockRead / 1024 / 1024 * 100) / 100,
|
||||
writeMB: Math.round(blockWrite / 1024 / 1024 * 100) / 100
|
||||
},
|
||||
pids: stats.pids_stats?.current || 0
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Record stats for a container
|
||||
*/
|
||||
recordStats(containerId, containerName, stats) {
|
||||
if (!this.stats.has(containerId)) {
|
||||
this.stats.set(containerId, {
|
||||
name: containerName,
|
||||
history: []
|
||||
});
|
||||
}
|
||||
|
||||
const containerStats = this.stats.get(containerId);
|
||||
containerStats.name = containerName; // Update name in case it changed
|
||||
containerStats.history.push(stats);
|
||||
|
||||
// Keep only recent stats (based on retention policy)
|
||||
const cutoffTime = Date.now() - (STATS_RETENTION_HOURS * 60 * 60 * 1000);
|
||||
containerStats.history = containerStats.history.filter(s =>
|
||||
new Date(s.timestamp).getTime() > cutoffTime
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if any alerts should be triggered
|
||||
*/
|
||||
checkAlerts(containerId, containerName, stats) {
|
||||
const alertConfig = this.alerts.get(containerId);
|
||||
if (!alertConfig || !alertConfig.enabled) return;
|
||||
|
||||
const now = Date.now();
|
||||
const lastAlert = this.lastAlerts.get(containerId) || 0;
|
||||
const cooldown = (alertConfig.cooldownMinutes || 15) * 60 * 1000;
|
||||
|
||||
// Don't spam alerts - respect cooldown period
|
||||
if (now - lastAlert < cooldown) return;
|
||||
|
||||
const alerts = [];
|
||||
|
||||
// Check CPU threshold
|
||||
if (alertConfig.cpuThreshold && stats.cpu.percent > alertConfig.cpuThreshold) {
|
||||
alerts.push({
|
||||
type: 'cpu',
|
||||
severity: 'warning',
|
||||
message: `CPU usage ${stats.cpu.percent.toFixed(1)}% exceeds threshold ${alertConfig.cpuThreshold}%`,
|
||||
value: stats.cpu.percent,
|
||||
threshold: alertConfig.cpuThreshold
|
||||
});
|
||||
}
|
||||
|
||||
// Check memory threshold
|
||||
if (alertConfig.memoryThreshold && stats.memory.percent > alertConfig.memoryThreshold) {
|
||||
alerts.push({
|
||||
type: 'memory',
|
||||
severity: 'warning',
|
||||
message: `Memory usage ${stats.memory.percent.toFixed(1)}% exceeds threshold ${alertConfig.memoryThreshold}%`,
|
||||
value: stats.memory.percent,
|
||||
threshold: alertConfig.memoryThreshold
|
||||
});
|
||||
}
|
||||
|
||||
// Check disk I/O threshold (MB/s)
|
||||
if (alertConfig.diskIOThreshold) {
|
||||
const diskIO = stats.disk.readMB + stats.disk.writeMB;
|
||||
if (diskIO > alertConfig.diskIOThreshold) {
|
||||
alerts.push({
|
||||
type: 'disk',
|
||||
severity: 'warning',
|
||||
message: `Disk I/O ${diskIO.toFixed(1)} MB/s exceeds threshold ${alertConfig.diskIOThreshold} MB/s`,
|
||||
value: diskIO,
|
||||
threshold: alertConfig.diskIOThreshold
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (alerts.length > 0) {
|
||||
this.lastAlerts.set(containerId, now);
|
||||
|
||||
this.emit('alert', {
|
||||
containerId,
|
||||
containerName,
|
||||
timestamp: new Date().toISOString(),
|
||||
alerts,
|
||||
stats,
|
||||
config: alertConfig
|
||||
});
|
||||
|
||||
// Auto-restart if configured
|
||||
if (alertConfig.autoRestart) {
|
||||
this.restartContainer(containerId, containerName, alerts);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Restart a container due to resource alerts
|
||||
*/
|
||||
async restartContainer(containerId, containerName, alerts) {
|
||||
try {
|
||||
console.log(`[ResourceMonitor] Auto-restarting ${containerName} due to alerts:`, alerts.map(a => a.type).join(', '));
|
||||
|
||||
const container = docker.getContainer(containerId);
|
||||
await container.restart();
|
||||
|
||||
this.emit('auto-restart', {
|
||||
containerId,
|
||||
containerName,
|
||||
timestamp: new Date().toISOString(),
|
||||
reason: alerts
|
||||
});
|
||||
} catch (error) {
|
||||
console.error(`[ResourceMonitor] Failed to restart ${containerName}:`, error.message);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current stats for a container
|
||||
*/
|
||||
getCurrentStats(containerId) {
|
||||
const containerStats = this.stats.get(containerId);
|
||||
if (!containerStats || containerStats.history.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return containerStats.history[containerStats.history.length - 1];
|
||||
}
|
||||
|
||||
/**
|
||||
* Get historical stats for a container
|
||||
*/
|
||||
getHistoricalStats(containerId, hours = 24) {
|
||||
const containerStats = this.stats.get(containerId);
|
||||
if (!containerStats) return [];
|
||||
|
||||
const cutoffTime = Date.now() - (hours * 60 * 60 * 1000);
|
||||
return containerStats.history.filter(s =>
|
||||
new Date(s.timestamp).getTime() > cutoffTime
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get aggregated stats for a container
|
||||
*/
|
||||
getAggregatedStats(containerId, hours = 24) {
|
||||
const history = this.getHistoricalStats(containerId, hours);
|
||||
if (history.length === 0) return null;
|
||||
|
||||
const cpuValues = history.map(s => s.cpu.percent);
|
||||
const memoryValues = history.map(s => s.memory.percent);
|
||||
|
||||
return {
|
||||
cpu: {
|
||||
current: cpuValues[cpuValues.length - 1],
|
||||
avg: cpuValues.reduce((a, b) => a + b, 0) / cpuValues.length,
|
||||
max: Math.max(...cpuValues),
|
||||
min: Math.min(...cpuValues)
|
||||
},
|
||||
memory: {
|
||||
current: memoryValues[memoryValues.length - 1],
|
||||
avg: memoryValues.reduce((a, b) => a + b, 0) / memoryValues.length,
|
||||
max: Math.max(...memoryValues),
|
||||
min: Math.min(...memoryValues)
|
||||
},
|
||||
dataPoints: history.length,
|
||||
timeRange: hours
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get stats for all containers
|
||||
*/
|
||||
getAllStats() {
|
||||
const result = {};
|
||||
|
||||
for (const [containerId, data] of this.stats.entries()) {
|
||||
const current = this.getCurrentStats(containerId);
|
||||
const aggregated = this.getAggregatedStats(containerId, 24);
|
||||
|
||||
result[containerId] = {
|
||||
name: data.name,
|
||||
current,
|
||||
aggregated,
|
||||
alertConfig: this.alerts.get(containerId)
|
||||
};
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure alerts for a container
|
||||
*/
|
||||
setAlertConfig(containerId, config) {
|
||||
this.alerts.set(containerId, {
|
||||
enabled: config.enabled !== false,
|
||||
cpuThreshold: config.cpuThreshold || null,
|
||||
memoryThreshold: config.memoryThreshold || null,
|
||||
diskIOThreshold: config.diskIOThreshold || null,
|
||||
cooldownMinutes: config.cooldownMinutes || 15,
|
||||
autoRestart: config.autoRestart || false,
|
||||
notificationChannels: config.notificationChannels || []
|
||||
});
|
||||
|
||||
this.saveAlertConfig();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get alert configuration for a container
|
||||
*/
|
||||
getAlertConfig(containerId) {
|
||||
return this.alerts.get(containerId) || null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove alert configuration
|
||||
*/
|
||||
removeAlertConfig(containerId) {
|
||||
this.alerts.delete(containerId);
|
||||
this.lastAlerts.delete(containerId);
|
||||
this.saveAlertConfig();
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleanup old stats beyond retention period
|
||||
*/
|
||||
cleanupOldStats() {
|
||||
const cutoffTime = Date.now() - (STATS_RETENTION_HOURS * 60 * 60 * 1000);
|
||||
|
||||
for (const [containerId, data] of this.stats.entries()) {
|
||||
data.history = data.history.filter(s =>
|
||||
new Date(s.timestamp).getTime() > cutoffTime
|
||||
);
|
||||
|
||||
// Remove container stats if no recent data
|
||||
if (data.history.length === 0) {
|
||||
this.stats.delete(containerId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Load stats from disk
|
||||
*/
|
||||
loadStats() {
|
||||
try {
|
||||
if (fs.existsSync(STATS_FILE)) {
|
||||
const data = JSON.parse(fs.readFileSync(STATS_FILE, 'utf8'));
|
||||
this.stats = new Map(Object.entries(data));
|
||||
console.log(`[ResourceMonitor] Loaded stats for ${this.stats.size} containers`);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('[ResourceMonitor] Error loading stats:', error.message);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Save stats to disk
|
||||
*/
|
||||
saveStats() {
|
||||
try {
|
||||
const data = Object.fromEntries(this.stats);
|
||||
fs.writeFileSync(STATS_FILE, JSON.stringify(data, null, 2));
|
||||
} catch (error) {
|
||||
console.error('[ResourceMonitor] Error saving stats:', error.message);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Load alert configuration from disk
|
||||
*/
|
||||
loadAlertConfig() {
|
||||
try {
|
||||
if (fs.existsSync(ALERT_CONFIG_FILE)) {
|
||||
const data = JSON.parse(fs.readFileSync(ALERT_CONFIG_FILE, 'utf8'));
|
||||
this.alerts = new Map(Object.entries(data));
|
||||
console.log(`[ResourceMonitor] Loaded alert config for ${this.alerts.size} containers`);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('[ResourceMonitor] Error loading alert config:', error.message);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Save alert configuration to disk
|
||||
*/
|
||||
saveAlertConfig() {
|
||||
try {
|
||||
const data = Object.fromEntries(this.alerts);
|
||||
fs.writeFileSync(ALERT_CONFIG_FILE, JSON.stringify(data, null, 2));
|
||||
} catch (error) {
|
||||
console.error('[ResourceMonitor] Error saving alert config:', error.message);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Export stats for backup
|
||||
*/
|
||||
exportStats() {
|
||||
return {
|
||||
stats: Object.fromEntries(this.stats),
|
||||
alerts: Object.fromEntries(this.alerts),
|
||||
exportedAt: new Date().toISOString()
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Import stats from backup
|
||||
*/
|
||||
importStats(data) {
|
||||
if (data.stats) {
|
||||
this.stats = new Map(Object.entries(data.stats));
|
||||
}
|
||||
if (data.alerts) {
|
||||
this.alerts = new Map(Object.entries(data.alerts));
|
||||
}
|
||||
this.saveStats();
|
||||
this.saveAlertConfig();
|
||||
}
|
||||
}
|
||||
|
||||
// Export singleton instance
|
||||
module.exports = new ResourceMonitor();
|
||||
Reference in New Issue
Block a user