Cloud backups (Dropbox / WebDAV / SFTP):
- backup-manager.js: save + load handlers per provider, credential
resolution via credentialManager, destination probe.
- routes/backups.js: /credentials/{provider} (masked GET, POST, DELETE),
/test-destination, scheduling endpoints.
- status/js/backup-restore.js: destination picker, provider-specific
credential forms, test button wired to backend probe.
- npm deps already present (dropbox 10.34.0, webdav 5.7.1,
ssh2-sftp-client 11.0.0).
Resource history:
- resource-monitor.js: three-tier rollup storage — raw 10s samples
(7-day retention), hourly rollups (30-day), daily rollups
(365-day). getHistoryByRange() auto-selects the appropriate tier.
- routes/monitoring.js: /monitoring/history/:containerId now supports
startTime/endTime range mode (legacy ?hours=N still works).
- status/js/resource-monitor.js + dashboard.css: "History" tab with
range buttons (1h/24h/7d/30d/1y), SVG sparklines for
CPU / memory / network. Renderer handles raw and rolled-up shapes.
status/dist/features.js rebuilt from source via build.js.
Lifted out of wip/cloud-backups-and-history; the half-finished
app-deps feature from that branch (frontend calls /api/v1/apps/
check-dependencies but the endpoint doesn't exist) is preserved
separately on wip/app-deps for later.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
839 lines
27 KiB
JavaScript
839 lines
27 KiB
JavaScript
/**
|
|
* Container Resource Monitoring Module
|
|
* Tracks CPU, memory, disk, and network usage for Docker containers
|
|
* Provides alerts and historical data
|
|
*/
|
|
|
|
const Docker = require('dockerode');
|
|
const EventEmitter = require('events');
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
|
|
const docker = new Docker();
|
|
|
|
// Configuration
|
|
const STATS_FILE = process.env.STATS_FILE || path.join(__dirname, 'container-stats.json');
|
|
const STATS_HOURLY_FILE = process.env.STATS_HOURLY_FILE || path.join(__dirname, 'container-stats-hourly.json');
|
|
const STATS_DAILY_FILE = process.env.STATS_DAILY_FILE || path.join(__dirname, 'container-stats-daily.json');
|
|
const ALERT_CONFIG_FILE = process.env.ALERT_CONFIG_FILE || path.join(__dirname, 'alert-config.json');
|
|
const STATS_RETENTION_HOURS = parseInt(process.env.STATS_RETENTION_HOURS || '168', 10); // 7 days raw
|
|
const STATS_HOURLY_RETENTION_DAYS = parseInt(process.env.STATS_HOURLY_RETENTION_DAYS || '30', 10); // 30 days hourly
|
|
const STATS_DAILY_RETENTION_DAYS = parseInt(process.env.STATS_DAILY_RETENTION_DAYS || '365', 10); // 365 days daily
|
|
const MONITORING_INTERVAL = parseInt(process.env.MONITORING_INTERVAL || '10000', 10); // 10 seconds
|
|
const ROLLUP_HOURLY_INTERVAL = parseInt(process.env.ROLLUP_HOURLY_INTERVAL || String(60 * 60 * 1000), 10); // 1h
|
|
const ROLLUP_DAILY_INTERVAL = parseInt(process.env.ROLLUP_DAILY_INTERVAL || String(24 * 60 * 60 * 1000), 10); // 24h
|
|
|
|
class ResourceMonitor extends EventEmitter {
|
|
constructor() {
|
|
super();
|
|
this.monitoring = false;
|
|
this.monitoringInterval = null;
|
|
this.hourlyRollupTimer = null;
|
|
this.dailyRollupTimer = null;
|
|
this.stats = new Map(); // containerId -> { name, history: [...] } (raw 10s samples, 7d)
|
|
this.hourlyHistory = new Map(); // containerId -> { name, samples: [...] } (hourly avg, 30d)
|
|
this.dailyHistory = new Map(); // containerId -> { name, samples: [...] } (daily avg, 365d)
|
|
this.alerts = new Map(); // containerId -> alert config
|
|
this.lastAlerts = new Map(); // containerId -> last alert timestamp
|
|
|
|
this.loadStats();
|
|
this.loadHourlyStats();
|
|
this.loadDailyStats();
|
|
this.loadAlertConfig();
|
|
}
|
|
|
|
/**
|
|
* Start monitoring all containers
|
|
*/
|
|
start() {
|
|
if (this.monitoring) {
|
|
console.log('[ResourceMonitor] Already monitoring');
|
|
return;
|
|
}
|
|
|
|
console.log('[ResourceMonitor] Starting container monitoring');
|
|
this.monitoring = true;
|
|
this.monitoringInterval = setInterval(() => this.collectStats(), MONITORING_INTERVAL);
|
|
|
|
// Hourly rollup — fires once an hour, computes the previous full hour
|
|
this.hourlyRollupTimer = setInterval(() => {
|
|
try { this.rollupHourly(); } catch (e) { console.error('[ResourceMonitor] hourly rollup error:', e.message); }
|
|
}, ROLLUP_HOURLY_INTERVAL);
|
|
|
|
// Daily rollup — schedule first run at the next midnight, then fire every 24h
|
|
const now = new Date();
|
|
const nextMidnight = new Date(now.getFullYear(), now.getMonth(), now.getDate() + 1, 0, 0, 5);
|
|
const msUntilMidnight = nextMidnight.getTime() - now.getTime();
|
|
setTimeout(() => {
|
|
try { this.rollupDaily(); } catch (e) { console.error('[ResourceMonitor] daily rollup error:', e.message); }
|
|
this.dailyRollupTimer = setInterval(() => {
|
|
try { this.rollupDaily(); } catch (e) { console.error('[ResourceMonitor] daily rollup error:', e.message); }
|
|
}, ROLLUP_DAILY_INTERVAL);
|
|
}, msUntilMidnight);
|
|
|
|
// Initial collection
|
|
this.collectStats();
|
|
}
|
|
|
|
/**
|
|
* Stop monitoring
|
|
*/
|
|
stop() {
|
|
if (!this.monitoring) return;
|
|
|
|
console.log('[ResourceMonitor] Stopping container monitoring');
|
|
this.monitoring = false;
|
|
|
|
if (this.monitoringInterval) {
|
|
clearInterval(this.monitoringInterval);
|
|
this.monitoringInterval = null;
|
|
}
|
|
if (this.hourlyRollupTimer) {
|
|
clearInterval(this.hourlyRollupTimer);
|
|
this.hourlyRollupTimer = null;
|
|
}
|
|
if (this.dailyRollupTimer) {
|
|
clearInterval(this.dailyRollupTimer);
|
|
this.dailyRollupTimer = null;
|
|
}
|
|
|
|
this.saveStats();
|
|
this.saveHourlyStats();
|
|
this.saveDailyStats();
|
|
}
|
|
|
|
/**
|
|
* Collect stats from all running containers
|
|
*/
|
|
async collectStats() {
|
|
try {
|
|
const containers = await docker.listContainers({ all: false });
|
|
|
|
for (const containerInfo of containers) {
|
|
try {
|
|
const container = docker.getContainer(containerInfo.Id);
|
|
const stats = await this.getContainerStats(container);
|
|
|
|
if (stats) {
|
|
this.recordStats(containerInfo.Id, containerInfo.Names[0], stats);
|
|
this.checkAlerts(containerInfo.Id, containerInfo.Names[0], stats);
|
|
}
|
|
} catch (error) {
|
|
console.error(`[ResourceMonitor] Error collecting stats for ${containerInfo.Names[0]}:`, error.message);
|
|
}
|
|
}
|
|
|
|
// Cleanup old stats
|
|
this.cleanupOldStats();
|
|
|
|
// Persist stats periodically
|
|
if (Math.random() < 0.1) { // 10% chance to save (every ~100 seconds)
|
|
this.saveStats();
|
|
}
|
|
} catch (error) {
|
|
console.error('[ResourceMonitor] Error collecting container stats:', error.message);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get stats for a single container
|
|
*/
|
|
async getContainerStats(container) {
|
|
return new Promise((resolve, reject) => {
|
|
container.stats({ stream: false }, (err, stats) => {
|
|
if (err) {
|
|
reject(err);
|
|
return;
|
|
}
|
|
|
|
// Calculate CPU percentage
|
|
const cpuDelta = stats.cpu_stats.cpu_usage.total_usage -
|
|
(stats.precpu_stats.cpu_usage?.total_usage || 0);
|
|
const systemDelta = stats.cpu_stats.system_cpu_usage -
|
|
(stats.precpu_stats.system_cpu_usage || 0);
|
|
const cpuPercent = systemDelta > 0 ? (cpuDelta / systemDelta) * 100 : 0;
|
|
|
|
// Calculate memory usage
|
|
const memoryUsage = stats.memory_stats.usage || 0;
|
|
const memoryLimit = stats.memory_stats.limit || 0;
|
|
const memoryPercent = memoryLimit > 0 ? (memoryUsage / memoryLimit) * 100 : 0;
|
|
|
|
// Calculate network I/O
|
|
let networkRx = 0;
|
|
let networkTx = 0;
|
|
if (stats.networks) {
|
|
Object.values(stats.networks).forEach(net => {
|
|
networkRx += net.rx_bytes || 0;
|
|
networkTx += net.tx_bytes || 0;
|
|
});
|
|
}
|
|
|
|
// Calculate block I/O
|
|
let blockRead = 0;
|
|
let blockWrite = 0;
|
|
if (stats.blkio_stats?.io_service_bytes_recursive) {
|
|
stats.blkio_stats.io_service_bytes_recursive.forEach(io => {
|
|
if (io.op === 'Read') blockRead += io.value;
|
|
if (io.op === 'Write') blockWrite += io.value;
|
|
});
|
|
}
|
|
|
|
resolve({
|
|
timestamp: new Date().toISOString(),
|
|
cpu: {
|
|
percent: Math.round(cpuPercent * 100) / 100,
|
|
usage: stats.cpu_stats.cpu_usage.total_usage
|
|
},
|
|
memory: {
|
|
usage: memoryUsage,
|
|
limit: memoryLimit,
|
|
percent: Math.round(memoryPercent * 100) / 100,
|
|
usageMB: Math.round(memoryUsage / 1024 / 1024),
|
|
limitMB: Math.round(memoryLimit / 1024 / 1024)
|
|
},
|
|
network: {
|
|
rxBytes: networkRx,
|
|
txBytes: networkTx,
|
|
rxMB: Math.round(networkRx / 1024 / 1024 * 100) / 100,
|
|
txMB: Math.round(networkTx / 1024 / 1024 * 100) / 100
|
|
},
|
|
disk: {
|
|
readBytes: blockRead,
|
|
writeBytes: blockWrite,
|
|
readMB: Math.round(blockRead / 1024 / 1024 * 100) / 100,
|
|
writeMB: Math.round(blockWrite / 1024 / 1024 * 100) / 100
|
|
},
|
|
pids: stats.pids_stats?.current || 0
|
|
});
|
|
});
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Record stats for a container
|
|
*/
|
|
recordStats(containerId, containerName, stats) {
|
|
if (!this.stats.has(containerId)) {
|
|
this.stats.set(containerId, {
|
|
name: containerName,
|
|
history: []
|
|
});
|
|
}
|
|
|
|
const containerStats = this.stats.get(containerId);
|
|
containerStats.name = containerName; // Update name in case it changed
|
|
containerStats.history.push(stats);
|
|
|
|
// Keep only recent stats (based on retention policy)
|
|
const cutoffTime = Date.now() - (STATS_RETENTION_HOURS * 60 * 60 * 1000);
|
|
containerStats.history = containerStats.history.filter(s =>
|
|
new Date(s.timestamp).getTime() > cutoffTime
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Check if any alerts should be triggered
|
|
*/
|
|
checkAlerts(containerId, containerName, stats) {
|
|
const alertConfig = this.alerts.get(containerId);
|
|
if (!alertConfig || !alertConfig.enabled) return;
|
|
|
|
const now = Date.now();
|
|
const lastAlert = this.lastAlerts.get(containerId) || 0;
|
|
const cooldown = (alertConfig.cooldownMinutes || 15) * 60 * 1000;
|
|
|
|
// Don't spam alerts - respect cooldown period
|
|
if (now - lastAlert < cooldown) return;
|
|
|
|
const alerts = [];
|
|
|
|
// Check CPU threshold
|
|
if (alertConfig.cpuThreshold && stats.cpu.percent > alertConfig.cpuThreshold) {
|
|
alerts.push({
|
|
type: 'cpu',
|
|
severity: 'warning',
|
|
message: `CPU usage ${stats.cpu.percent.toFixed(1)}% exceeds threshold ${alertConfig.cpuThreshold}%`,
|
|
value: stats.cpu.percent,
|
|
threshold: alertConfig.cpuThreshold
|
|
});
|
|
}
|
|
|
|
// Check memory threshold
|
|
if (alertConfig.memoryThreshold && stats.memory.percent > alertConfig.memoryThreshold) {
|
|
alerts.push({
|
|
type: 'memory',
|
|
severity: 'warning',
|
|
message: `Memory usage ${stats.memory.percent.toFixed(1)}% exceeds threshold ${alertConfig.memoryThreshold}%`,
|
|
value: stats.memory.percent,
|
|
threshold: alertConfig.memoryThreshold
|
|
});
|
|
}
|
|
|
|
// Check disk I/O threshold (MB/s)
|
|
if (alertConfig.diskIOThreshold) {
|
|
const diskIO = stats.disk.readMB + stats.disk.writeMB;
|
|
if (diskIO > alertConfig.diskIOThreshold) {
|
|
alerts.push({
|
|
type: 'disk',
|
|
severity: 'warning',
|
|
message: `Disk I/O ${diskIO.toFixed(1)} MB/s exceeds threshold ${alertConfig.diskIOThreshold} MB/s`,
|
|
value: diskIO,
|
|
threshold: alertConfig.diskIOThreshold
|
|
});
|
|
}
|
|
}
|
|
|
|
if (alerts.length > 0) {
|
|
this.lastAlerts.set(containerId, now);
|
|
|
|
this.emit('alert', {
|
|
containerId,
|
|
containerName,
|
|
timestamp: new Date().toISOString(),
|
|
alerts,
|
|
stats,
|
|
config: alertConfig
|
|
});
|
|
|
|
// Auto-restart if configured
|
|
if (alertConfig.autoRestart) {
|
|
this.restartContainer(containerId, containerName, alerts);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Restart a container due to resource alerts
|
|
*/
|
|
async restartContainer(containerId, containerName, alerts) {
|
|
try {
|
|
console.log(`[ResourceMonitor] Auto-restarting ${containerName} due to alerts:`, alerts.map(a => a.type).join(', '));
|
|
|
|
const container = docker.getContainer(containerId);
|
|
await container.restart();
|
|
|
|
this.emit('auto-restart', {
|
|
containerId,
|
|
containerName,
|
|
timestamp: new Date().toISOString(),
|
|
reason: alerts
|
|
});
|
|
} catch (error) {
|
|
console.error(`[ResourceMonitor] Failed to restart ${containerName}:`, error.message);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get current stats for a container
|
|
*/
|
|
getCurrentStats(containerId) {
|
|
const containerStats = this.stats.get(containerId);
|
|
if (!containerStats || containerStats.history.length === 0) {
|
|
return null;
|
|
}
|
|
|
|
return containerStats.history[containerStats.history.length - 1];
|
|
}
|
|
|
|
/**
|
|
* Get historical stats for a container
|
|
*/
|
|
getHistoricalStats(containerId, hours = 24) {
|
|
const containerStats = this.stats.get(containerId);
|
|
if (!containerStats) return [];
|
|
|
|
const cutoffTime = Date.now() - (hours * 60 * 60 * 1000);
|
|
return containerStats.history.filter(s =>
|
|
new Date(s.timestamp).getTime() > cutoffTime
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Get aggregated stats for a container
|
|
*/
|
|
getAggregatedStats(containerId, hours = 24) {
|
|
const history = this.getHistoricalStats(containerId, hours);
|
|
if (history.length === 0) return null;
|
|
|
|
const cpuValues = history.map(s => s.cpu.percent);
|
|
const memoryValues = history.map(s => s.memory.percent);
|
|
|
|
return {
|
|
cpu: {
|
|
current: cpuValues[cpuValues.length - 1],
|
|
avg: cpuValues.reduce((a, b) => a + b, 0) / cpuValues.length,
|
|
max: Math.max(...cpuValues),
|
|
min: Math.min(...cpuValues)
|
|
},
|
|
memory: {
|
|
current: memoryValues[memoryValues.length - 1],
|
|
avg: memoryValues.reduce((a, b) => a + b, 0) / memoryValues.length,
|
|
max: Math.max(...memoryValues),
|
|
min: Math.min(...memoryValues)
|
|
},
|
|
dataPoints: history.length,
|
|
timeRange: hours
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Get stats for all containers
|
|
*/
|
|
getAllStats() {
|
|
const result = {};
|
|
|
|
for (const [containerId, data] of this.stats.entries()) {
|
|
const current = this.getCurrentStats(containerId);
|
|
const aggregated = this.getAggregatedStats(containerId, 24);
|
|
|
|
result[containerId] = {
|
|
name: data.name,
|
|
current,
|
|
aggregated,
|
|
alertConfig: this.alerts.get(containerId)
|
|
};
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Configure alerts for a container
|
|
*/
|
|
setAlertConfig(containerId, config) {
|
|
this.alerts.set(containerId, {
|
|
enabled: config.enabled !== false,
|
|
cpuThreshold: config.cpuThreshold || null,
|
|
memoryThreshold: config.memoryThreshold || null,
|
|
diskIOThreshold: config.diskIOThreshold || null,
|
|
cooldownMinutes: config.cooldownMinutes || 15,
|
|
autoRestart: config.autoRestart || false,
|
|
notificationChannels: config.notificationChannels || []
|
|
});
|
|
|
|
this.saveAlertConfig();
|
|
}
|
|
|
|
/**
|
|
* Get alert configuration for a container
|
|
*/
|
|
getAlertConfig(containerId) {
|
|
return this.alerts.get(containerId) || null;
|
|
}
|
|
|
|
/**
|
|
* Remove alert configuration
|
|
*/
|
|
removeAlertConfig(containerId) {
|
|
this.alerts.delete(containerId);
|
|
this.lastAlerts.delete(containerId);
|
|
this.saveAlertConfig();
|
|
}
|
|
|
|
/**
|
|
* Cleanup old stats beyond retention period
|
|
*/
|
|
cleanupOldStats() {
|
|
const cutoffTime = Date.now() - (STATS_RETENTION_HOURS * 60 * 60 * 1000);
|
|
|
|
for (const [containerId, data] of this.stats.entries()) {
|
|
data.history = data.history.filter(s =>
|
|
new Date(s.timestamp).getTime() > cutoffTime
|
|
);
|
|
|
|
// Remove container stats if no recent data
|
|
if (data.history.length === 0) {
|
|
this.stats.delete(containerId);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Load stats from disk
|
|
*/
|
|
loadStats() {
|
|
try {
|
|
if (fs.existsSync(STATS_FILE)) {
|
|
const data = JSON.parse(fs.readFileSync(STATS_FILE, 'utf8'));
|
|
this.stats = new Map(Object.entries(data));
|
|
console.log(`[ResourceMonitor] Loaded stats for ${this.stats.size} containers`);
|
|
}
|
|
} catch (error) {
|
|
console.error('[ResourceMonitor] Error loading stats:', error.message);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Save stats to disk
|
|
*/
|
|
saveStats() {
|
|
try {
|
|
const data = Object.fromEntries(this.stats);
|
|
fs.writeFileSync(STATS_FILE, JSON.stringify(data, null, 2));
|
|
} catch (error) {
|
|
console.error('[ResourceMonitor] Error saving stats:', error.message);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Load alert configuration from disk
|
|
*/
|
|
loadAlertConfig() {
|
|
try {
|
|
if (fs.existsSync(ALERT_CONFIG_FILE)) {
|
|
const data = JSON.parse(fs.readFileSync(ALERT_CONFIG_FILE, 'utf8'));
|
|
this.alerts = new Map(Object.entries(data));
|
|
console.log(`[ResourceMonitor] Loaded alert config for ${this.alerts.size} containers`);
|
|
}
|
|
} catch (error) {
|
|
console.error('[ResourceMonitor] Error loading alert config:', error.message);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Save alert configuration to disk
|
|
*/
|
|
saveAlertConfig() {
|
|
try {
|
|
const data = Object.fromEntries(this.alerts);
|
|
fs.writeFileSync(ALERT_CONFIG_FILE, JSON.stringify(data, null, 2));
|
|
} catch (error) {
|
|
console.error('[ResourceMonitor] Error saving alert config:', error.message);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Aggregate a list of raw samples into a single rollup sample
|
|
* @param {Array} samples - Raw stats samples
|
|
* @param {string} timestamp - ISO timestamp to use for the rollup bucket
|
|
* @returns {Object|null} Aggregated sample, or null if input is empty
|
|
*/
|
|
_aggregateSamples(samples, timestamp) {
|
|
if (!samples || samples.length === 0) return null;
|
|
|
|
let cpuSum = 0, cpuMax = 0;
|
|
let memSum = 0, memMax = 0;
|
|
let memPctSum = 0, memPctMax = 0;
|
|
let netRxSum = 0, netTxSum = 0;
|
|
let diskRSum = 0, diskWSum = 0;
|
|
|
|
for (const s of samples) {
|
|
const cpu = s.cpu?.percent || 0;
|
|
const memUsage = s.memory?.usage || 0;
|
|
const memPct = s.memory?.percent || 0;
|
|
cpuSum += cpu; if (cpu > cpuMax) cpuMax = cpu;
|
|
memSum += memUsage; if (memUsage > memMax) memMax = memUsage;
|
|
memPctSum += memPct; if (memPct > memPctMax) memPctMax = memPct;
|
|
netRxSum += s.network?.rxBytes || 0;
|
|
netTxSum += s.network?.txBytes || 0;
|
|
diskRSum += s.disk?.readBytes || 0;
|
|
diskWSum += s.disk?.writeBytes || 0;
|
|
}
|
|
|
|
const n = samples.length;
|
|
return {
|
|
timestamp,
|
|
sampleCount: n,
|
|
cpu: {
|
|
avg: Math.round((cpuSum / n) * 100) / 100,
|
|
max: Math.round(cpuMax * 100) / 100,
|
|
},
|
|
memory: {
|
|
avgUsage: Math.round(memSum / n),
|
|
maxUsage: memMax,
|
|
avgPercent: Math.round((memPctSum / n) * 100) / 100,
|
|
maxPercent: Math.round(memPctMax * 100) / 100,
|
|
avgUsageMB: Math.round(memSum / n / 1024 / 1024),
|
|
maxUsageMB: Math.round(memMax / 1024 / 1024),
|
|
},
|
|
network: {
|
|
rxBytes: netRxSum,
|
|
txBytes: netTxSum,
|
|
rxMB: Math.round(netRxSum / 1024 / 1024 * 100) / 100,
|
|
txMB: Math.round(netTxSum / 1024 / 1024 * 100) / 100,
|
|
},
|
|
disk: {
|
|
readBytes: diskRSum,
|
|
writeBytes: diskWSum,
|
|
readMB: Math.round(diskRSum / 1024 / 1024 * 100) / 100,
|
|
writeMB: Math.round(diskWSum / 1024 / 1024 * 100) / 100,
|
|
},
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Combine already-aggregated samples (e.g. hourly buckets) into a single coarser bucket
|
|
* @param {Array} samples - Aggregated samples (output of _aggregateSamples)
|
|
* @param {string} timestamp - ISO timestamp to use for the rollup bucket
|
|
* @returns {Object|null}
|
|
*/
|
|
_combineAggregated(samples, timestamp) {
|
|
if (!samples || samples.length === 0) return null;
|
|
|
|
let totalCount = 0;
|
|
let cpuWeightedSum = 0, cpuMax = 0;
|
|
let memWeightedSum = 0, memMax = 0;
|
|
let memPctWeightedSum = 0, memPctMax = 0;
|
|
let netRxSum = 0, netTxSum = 0;
|
|
let diskRSum = 0, diskWSum = 0;
|
|
|
|
for (const s of samples) {
|
|
const w = s.sampleCount || 1;
|
|
totalCount += w;
|
|
cpuWeightedSum += (s.cpu?.avg || 0) * w;
|
|
if ((s.cpu?.max || 0) > cpuMax) cpuMax = s.cpu.max;
|
|
memWeightedSum += (s.memory?.avgUsage || 0) * w;
|
|
if ((s.memory?.maxUsage || 0) > memMax) memMax = s.memory.maxUsage;
|
|
memPctWeightedSum += (s.memory?.avgPercent || 0) * w;
|
|
if ((s.memory?.maxPercent || 0) > memPctMax) memPctMax = s.memory.maxPercent;
|
|
netRxSum += s.network?.rxBytes || 0;
|
|
netTxSum += s.network?.txBytes || 0;
|
|
diskRSum += s.disk?.readBytes || 0;
|
|
diskWSum += s.disk?.writeBytes || 0;
|
|
}
|
|
|
|
return {
|
|
timestamp,
|
|
sampleCount: totalCount,
|
|
cpu: {
|
|
avg: Math.round((cpuWeightedSum / totalCount) * 100) / 100,
|
|
max: Math.round(cpuMax * 100) / 100,
|
|
},
|
|
memory: {
|
|
avgUsage: Math.round(memWeightedSum / totalCount),
|
|
maxUsage: memMax,
|
|
avgPercent: Math.round((memPctWeightedSum / totalCount) * 100) / 100,
|
|
maxPercent: Math.round(memPctMax * 100) / 100,
|
|
avgUsageMB: Math.round(memWeightedSum / totalCount / 1024 / 1024),
|
|
maxUsageMB: Math.round(memMax / 1024 / 1024),
|
|
},
|
|
network: {
|
|
rxBytes: netRxSum,
|
|
txBytes: netTxSum,
|
|
rxMB: Math.round(netRxSum / 1024 / 1024 * 100) / 100,
|
|
txMB: Math.round(netTxSum / 1024 / 1024 * 100) / 100,
|
|
},
|
|
disk: {
|
|
readBytes: diskRSum,
|
|
writeBytes: diskWSum,
|
|
readMB: Math.round(diskRSum / 1024 / 1024 * 100) / 100,
|
|
writeMB: Math.round(diskWSum / 1024 / 1024 * 100) / 100,
|
|
},
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Roll up the previous complete hour of raw samples into a single hourly point.
|
|
* Trims hourlyHistory entries older than STATS_HOURLY_RETENTION_DAYS.
|
|
*/
|
|
rollupHourly() {
|
|
const now = new Date();
|
|
// The "previous complete hour" — bucket starts at top of (current_hour - 1)
|
|
const bucketStart = new Date(now.getFullYear(), now.getMonth(), now.getDate(), now.getHours() - 1, 0, 0);
|
|
const bucketEnd = new Date(bucketStart.getTime() + 60 * 60 * 1000);
|
|
const bucketStartMs = bucketStart.getTime();
|
|
const bucketEndMs = bucketEnd.getTime();
|
|
const bucketTimestamp = bucketStart.toISOString();
|
|
|
|
for (const [containerId, data] of this.stats.entries()) {
|
|
const samples = data.history.filter(s => {
|
|
const t = new Date(s.timestamp).getTime();
|
|
return t >= bucketStartMs && t < bucketEndMs;
|
|
});
|
|
if (samples.length === 0) continue;
|
|
|
|
const rollup = this._aggregateSamples(samples, bucketTimestamp);
|
|
if (!rollup) continue;
|
|
|
|
if (!this.hourlyHistory.has(containerId)) {
|
|
this.hourlyHistory.set(containerId, { name: data.name, samples: [] });
|
|
}
|
|
const entry = this.hourlyHistory.get(containerId);
|
|
entry.name = data.name;
|
|
// Avoid duplicate buckets if rollup ran twice
|
|
if (!entry.samples.find(s => s.timestamp === bucketTimestamp)) {
|
|
entry.samples.push(rollup);
|
|
}
|
|
|
|
// Trim old entries
|
|
const cutoff = Date.now() - (STATS_HOURLY_RETENTION_DAYS * 24 * 60 * 60 * 1000);
|
|
entry.samples = entry.samples.filter(s => new Date(s.timestamp).getTime() > cutoff);
|
|
}
|
|
|
|
this.saveHourlyStats();
|
|
}
|
|
|
|
/**
|
|
* Roll up the previous complete day of hourly samples into a single daily point.
|
|
* Trims dailyHistory entries older than STATS_DAILY_RETENTION_DAYS.
|
|
*/
|
|
rollupDaily() {
|
|
const now = new Date();
|
|
// Previous calendar day, midnight to midnight
|
|
const bucketStart = new Date(now.getFullYear(), now.getMonth(), now.getDate() - 1, 0, 0, 0);
|
|
const bucketEnd = new Date(now.getFullYear(), now.getMonth(), now.getDate(), 0, 0, 0);
|
|
const bucketStartMs = bucketStart.getTime();
|
|
const bucketEndMs = bucketEnd.getTime();
|
|
const bucketTimestamp = bucketStart.toISOString();
|
|
|
|
for (const [containerId, data] of this.hourlyHistory.entries()) {
|
|
const samples = data.samples.filter(s => {
|
|
const t = new Date(s.timestamp).getTime();
|
|
return t >= bucketStartMs && t < bucketEndMs;
|
|
});
|
|
if (samples.length === 0) continue;
|
|
|
|
const rollup = this._combineAggregated(samples, bucketTimestamp);
|
|
if (!rollup) continue;
|
|
|
|
if (!this.dailyHistory.has(containerId)) {
|
|
this.dailyHistory.set(containerId, { name: data.name, samples: [] });
|
|
}
|
|
const entry = this.dailyHistory.get(containerId);
|
|
entry.name = data.name;
|
|
if (!entry.samples.find(s => s.timestamp === bucketTimestamp)) {
|
|
entry.samples.push(rollup);
|
|
}
|
|
|
|
const cutoff = Date.now() - (STATS_DAILY_RETENTION_DAYS * 24 * 60 * 60 * 1000);
|
|
entry.samples = entry.samples.filter(s => new Date(s.timestamp).getTime() > cutoff);
|
|
}
|
|
|
|
this.saveDailyStats();
|
|
}
|
|
|
|
/**
|
|
* Get history for a container by time range, auto-selecting the appropriate tier.
|
|
* - <= 24h → raw 10s samples
|
|
* - 1-30 days → hourly rollups
|
|
* - > 30 days → daily rollups
|
|
* @param {string} containerId
|
|
* @param {number} startTime - epoch ms
|
|
* @param {number} endTime - epoch ms
|
|
* @returns {{ tier: 'raw'|'hourly'|'daily', samples: Array, unit: string }}
|
|
*/
|
|
getHistoryByRange(containerId, startTime, endTime) {
|
|
const rangeMs = endTime - startTime;
|
|
const oneDay = 24 * 60 * 60 * 1000;
|
|
const thirtyDays = 30 * oneDay;
|
|
|
|
let tier, samples;
|
|
if (rangeMs <= oneDay) {
|
|
tier = 'raw';
|
|
const data = this.stats.get(containerId);
|
|
samples = data ? data.history.filter(s => {
|
|
const t = new Date(s.timestamp).getTime();
|
|
return t >= startTime && t <= endTime;
|
|
}) : [];
|
|
} else if (rangeMs <= thirtyDays) {
|
|
tier = 'hourly';
|
|
const data = this.hourlyHistory.get(containerId);
|
|
samples = data ? data.samples.filter(s => {
|
|
const t = new Date(s.timestamp).getTime();
|
|
return t >= startTime && t <= endTime;
|
|
}) : [];
|
|
} else {
|
|
tier = 'daily';
|
|
const data = this.dailyHistory.get(containerId);
|
|
samples = data ? data.samples.filter(s => {
|
|
const t = new Date(s.timestamp).getTime();
|
|
return t >= startTime && t <= endTime;
|
|
}) : [];
|
|
}
|
|
|
|
return { tier, samples, unit: tier === 'raw' ? '10s' : tier === 'hourly' ? '1h' : '1d' };
|
|
}
|
|
|
|
/**
|
|
* Load hourly rollups from disk
|
|
*/
|
|
loadHourlyStats() {
|
|
try {
|
|
if (fs.existsSync(STATS_HOURLY_FILE)) {
|
|
const data = JSON.parse(fs.readFileSync(STATS_HOURLY_FILE, 'utf8'));
|
|
this.hourlyHistory = new Map(Object.entries(data));
|
|
console.log(`[ResourceMonitor] Loaded hourly rollups for ${this.hourlyHistory.size} containers`);
|
|
}
|
|
} catch (error) {
|
|
console.error('[ResourceMonitor] Error loading hourly stats:', error.message);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Save hourly rollups to disk
|
|
*/
|
|
saveHourlyStats() {
|
|
try {
|
|
const data = Object.fromEntries(this.hourlyHistory);
|
|
fs.writeFileSync(STATS_HOURLY_FILE, JSON.stringify(data, null, 2));
|
|
} catch (error) {
|
|
console.error('[ResourceMonitor] Error saving hourly stats:', error.message);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Load daily rollups from disk
|
|
*/
|
|
loadDailyStats() {
|
|
try {
|
|
if (fs.existsSync(STATS_DAILY_FILE)) {
|
|
const data = JSON.parse(fs.readFileSync(STATS_DAILY_FILE, 'utf8'));
|
|
this.dailyHistory = new Map(Object.entries(data));
|
|
console.log(`[ResourceMonitor] Loaded daily rollups for ${this.dailyHistory.size} containers`);
|
|
}
|
|
} catch (error) {
|
|
console.error('[ResourceMonitor] Error loading daily stats:', error.message);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Save daily rollups to disk
|
|
*/
|
|
saveDailyStats() {
|
|
try {
|
|
const data = Object.fromEntries(this.dailyHistory);
|
|
fs.writeFileSync(STATS_DAILY_FILE, JSON.stringify(data, null, 2));
|
|
} catch (error) {
|
|
console.error('[ResourceMonitor] Error saving daily stats:', error.message);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Export stats for backup
|
|
*/
|
|
exportStats() {
|
|
return {
|
|
stats: Object.fromEntries(this.stats),
|
|
hourlyHistory: Object.fromEntries(this.hourlyHistory),
|
|
dailyHistory: Object.fromEntries(this.dailyHistory),
|
|
alerts: Object.fromEntries(this.alerts),
|
|
exportedAt: new Date().toISOString()
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Import stats from backup
|
|
*/
|
|
importStats(data) {
|
|
if (data.stats) {
|
|
this.stats = new Map(Object.entries(data.stats));
|
|
}
|
|
if (data.hourlyHistory) {
|
|
this.hourlyHistory = new Map(Object.entries(data.hourlyHistory));
|
|
}
|
|
if (data.dailyHistory) {
|
|
this.dailyHistory = new Map(Object.entries(data.dailyHistory));
|
|
}
|
|
if (data.alerts) {
|
|
this.alerts = new Map(Object.entries(data.alerts));
|
|
}
|
|
this.saveStats();
|
|
this.saveHourlyStats();
|
|
this.saveDailyStats();
|
|
this.saveAlertConfig();
|
|
}
|
|
}
|
|
|
|
// Export singleton instance
|
|
module.exports = new ResourceMonitor();
|