Add Docker hygiene, deployment manifests, and daily log digest
Prevents Docker disk bloat by adding log rotation (10MB max, 3 files) to all container creation and update paths, auto-pruning dangling images after deploy/remove/update, and a daily maintenance module that cleans build cache and warns on disk thresholds. Saves a deployment manifest in services.json at deploy time so users can restore all their apps after a Docker purge. Adds restore-all and restore-single endpoints that recreate containers, Caddy config, and DNS records from the saved manifests. Adds an hourly log collector and daily digest generator that summarizes errors, warnings, and events across all services into a single human-readable report with guidance on where to investigate. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
212
dashcaddy-api/docker-maintenance.js
Normal file
212
dashcaddy-api/docker-maintenance.js
Normal file
@@ -0,0 +1,212 @@
|
||||
/**
|
||||
* Docker Maintenance Module
|
||||
* Scheduled cleanup to prevent Docker disk bloat:
|
||||
* - Prunes dangling images
|
||||
* - Prunes stopped non-managed containers
|
||||
* - Clears build cache
|
||||
* - Monitors disk usage and warns when thresholds exceeded
|
||||
*/
|
||||
|
||||
const Docker = require('dockerode');
|
||||
const EventEmitter = require('events');
|
||||
const { DOCKER } = require('./constants');
|
||||
|
||||
const docker = new Docker();
|
||||
|
||||
class DockerMaintenance extends EventEmitter {
|
||||
constructor() {
|
||||
super();
|
||||
this.interval = null;
|
||||
this.running = false;
|
||||
this.lastRun = null;
|
||||
this.lastResult = null;
|
||||
}
|
||||
|
||||
start() {
|
||||
if (this.running) return;
|
||||
this.running = true;
|
||||
|
||||
// Run first maintenance 5 minutes after startup (let everything settle)
|
||||
setTimeout(() => {
|
||||
if (!this.running) return;
|
||||
this.runMaintenance().catch(() => {});
|
||||
}, 5 * 60 * 1000);
|
||||
|
||||
// Then run on the configured interval (default 24h)
|
||||
this.interval = setInterval(() => {
|
||||
this.runMaintenance().catch(() => {});
|
||||
}, DOCKER.MAINTENANCE.INTERVAL);
|
||||
}
|
||||
|
||||
stop() {
|
||||
if (!this.running) return;
|
||||
this.running = false;
|
||||
if (this.interval) {
|
||||
clearInterval(this.interval);
|
||||
this.interval = null;
|
||||
}
|
||||
}
|
||||
|
||||
async runMaintenance() {
|
||||
const startTime = Date.now();
|
||||
const result = {
|
||||
timestamp: new Date().toISOString(),
|
||||
pruned: { images: 0, containers: 0, buildCache: 0 },
|
||||
spaceReclaimed: { images: 0, containers: 0, buildCache: 0, total: 0 },
|
||||
diskUsage: null,
|
||||
warnings: [],
|
||||
containersWithoutLogLimits: []
|
||||
};
|
||||
|
||||
try {
|
||||
// 1. Prune dangling images
|
||||
try {
|
||||
const imgResult = await docker.pruneImages({ filters: { dangling: { true: true } } });
|
||||
result.pruned.images = (imgResult.ImagesDeleted || []).length;
|
||||
result.spaceReclaimed.images = imgResult.SpaceReclaimed || 0;
|
||||
} catch (e) {
|
||||
result.warnings.push(`Image prune failed: ${e.message}`);
|
||||
}
|
||||
|
||||
// 2. Prune stopped containers (only non-managed ones)
|
||||
try {
|
||||
const stopped = await docker.listContainers({
|
||||
all: true,
|
||||
filters: { status: ['exited', 'dead'] }
|
||||
});
|
||||
for (const c of stopped) {
|
||||
// Skip DashCaddy-managed containers — user may want to restart them
|
||||
if (c.Labels?.['sami.managed'] === 'true') continue;
|
||||
// Skip containers stopped less than 24h ago
|
||||
const stoppedAge = Date.now() / 1000 - c.Created;
|
||||
if (stoppedAge < 86400) continue;
|
||||
try {
|
||||
const container = docker.getContainer(c.Id);
|
||||
await container.remove({ force: true });
|
||||
result.pruned.containers++;
|
||||
} catch (e) {
|
||||
// Container may have been removed between list and remove
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
result.warnings.push(`Container prune failed: ${e.message}`);
|
||||
}
|
||||
|
||||
// 3. Prune build cache
|
||||
try {
|
||||
const cacheResult = await docker.pruneBuilder();
|
||||
result.spaceReclaimed.buildCache = cacheResult.SpaceReclaimed || 0;
|
||||
result.pruned.buildCache = (cacheResult.CachesDeleted || []).length;
|
||||
} catch (e) {
|
||||
// Build cache prune may not be available on all Docker versions
|
||||
result.warnings.push(`Build cache prune failed: ${e.message}`);
|
||||
}
|
||||
|
||||
// 4. Get disk usage
|
||||
try {
|
||||
const df = await docker.df();
|
||||
result.diskUsage = {
|
||||
images: {
|
||||
count: (df.Images || []).length,
|
||||
sizeBytes: (df.Images || []).reduce((sum, i) => sum + (i.Size || 0), 0)
|
||||
},
|
||||
containers: {
|
||||
count: (df.Containers || []).length,
|
||||
sizeBytes: (df.Containers || []).reduce((sum, c) => sum + (c.SizeRw || 0), 0)
|
||||
},
|
||||
volumes: {
|
||||
count: (df.Volumes?.Volumes || []).length,
|
||||
sizeBytes: (df.Volumes?.Volumes || []).reduce((sum, v) => sum + (v.UsageData?.Size || 0), 0)
|
||||
},
|
||||
buildCache: {
|
||||
count: (df.BuildCache || []).length,
|
||||
sizeBytes: (df.BuildCache || []).reduce((sum, b) => sum + (b.Size || 0), 0)
|
||||
}
|
||||
};
|
||||
result.diskUsage.totalBytes =
|
||||
result.diskUsage.images.sizeBytes +
|
||||
result.diskUsage.containers.sizeBytes +
|
||||
result.diskUsage.volumes.sizeBytes +
|
||||
result.diskUsage.buildCache.sizeBytes;
|
||||
result.diskUsage.totalGB = +(result.diskUsage.totalBytes / (1024 ** 3)).toFixed(2);
|
||||
|
||||
if (result.diskUsage.totalGB > DOCKER.MAINTENANCE.DISK_WARN_GB) {
|
||||
result.warnings.push(`Docker disk usage is ${result.diskUsage.totalGB}GB (threshold: ${DOCKER.MAINTENANCE.DISK_WARN_GB}GB)`);
|
||||
}
|
||||
} catch (e) {
|
||||
result.warnings.push(`Disk usage check failed: ${e.message}`);
|
||||
}
|
||||
|
||||
// 5. Check for containers without log rotation
|
||||
try {
|
||||
const running = await docker.listContainers({ all: false });
|
||||
for (const c of running) {
|
||||
if (c.Labels?.['sami.managed'] !== 'true') continue;
|
||||
try {
|
||||
const container = docker.getContainer(c.Id);
|
||||
const info = await container.inspect();
|
||||
const logConfig = info.HostConfig?.LogConfig;
|
||||
if (!logConfig?.Config?.['max-size']) {
|
||||
result.containersWithoutLogLimits.push({
|
||||
name: c.Names[0]?.replace(/^\//, '') || c.Id.slice(0, 12),
|
||||
id: c.Id.slice(0, 12)
|
||||
});
|
||||
}
|
||||
} catch (e) {
|
||||
// Container may have stopped between list and inspect
|
||||
}
|
||||
}
|
||||
if (result.containersWithoutLogLimits.length > 0) {
|
||||
result.warnings.push(
|
||||
`${result.containersWithoutLogLimits.length} container(s) have no log rotation — restart or update them to apply log limits: ${result.containersWithoutLogLimits.map(c => c.name).join(', ')}`
|
||||
);
|
||||
}
|
||||
} catch (e) {
|
||||
result.warnings.push(`Log config check failed: ${e.message}`);
|
||||
}
|
||||
|
||||
result.spaceReclaimed.total =
|
||||
result.spaceReclaimed.images +
|
||||
result.spaceReclaimed.containers +
|
||||
result.spaceReclaimed.buildCache;
|
||||
|
||||
result.duration = Date.now() - startTime;
|
||||
this.lastRun = new Date().toISOString();
|
||||
this.lastResult = result;
|
||||
|
||||
this.emit('maintenance-complete', result);
|
||||
return result;
|
||||
} catch (error) {
|
||||
result.error = error.message;
|
||||
result.duration = Date.now() - startTime;
|
||||
this.lastResult = result;
|
||||
this.emit('maintenance-failed', result);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/** Get Docker disk usage snapshot (callable on demand) */
|
||||
async getDiskUsage() {
|
||||
try {
|
||||
const df = await docker.df();
|
||||
const images = { count: (df.Images || []).length, sizeBytes: (df.Images || []).reduce((sum, i) => sum + (i.Size || 0), 0) };
|
||||
const containers = { count: (df.Containers || []).length, sizeBytes: (df.Containers || []).reduce((sum, c) => sum + (c.SizeRw || 0), 0) };
|
||||
const volumes = { count: (df.Volumes?.Volumes || []).length, sizeBytes: (df.Volumes?.Volumes || []).reduce((sum, v) => sum + (v.UsageData?.Size || 0), 0) };
|
||||
const buildCache = { count: (df.BuildCache || []).length, sizeBytes: (df.BuildCache || []).reduce((sum, b) => sum + (b.Size || 0), 0) };
|
||||
const totalBytes = images.sizeBytes + containers.sizeBytes + volumes.sizeBytes + buildCache.sizeBytes;
|
||||
return { images, containers, volumes, buildCache, totalBytes, totalGB: +(totalBytes / (1024 ** 3)).toFixed(2) };
|
||||
} catch (e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
getStatus() {
|
||||
return {
|
||||
running: this.running,
|
||||
lastRun: this.lastRun,
|
||||
lastResult: this.lastResult
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = new DockerMaintenance();
|
||||
Reference in New Issue
Block a user