Files
dashcaddy/dashcaddy-api/routes/health.js
Sami fc6275a96b feat: add Pylon health relay for remote service health checks
DashCaddy Pylon is a lightweight probe agent that runs on remote
networks to relay health checks for services the main DashCaddy
instance can't reach directly (e.g., .sami domains, LAN IPs).

- Standalone zero-dependency Node.js script (pylon/dashcaddy-pylon.js)
- Optional API key auth, HEAD→GET fallback, batch probe support
- Health routes now try direct check first, fall back to pylon relay
- New endpoints: /health/probe (act as pylon), /health/pylon (status)
- Config: add "pylon": { "url": "...", "key": "..." } to config.json

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-23 15:52:43 -07:00

351 lines
12 KiB
JavaScript

const express = require('express');
const fs = require('fs');
const path = require('path');
const { execSync } = require('child_process');
const { TIMEOUTS } = require('../constants');
const { exists } = require('../fs-helpers');
const { paginate, parsePaginationParams } = require('../pagination');
const platformPaths = require('../platform-paths');
const { resolveServiceUrl } = require('../url-resolver');
module.exports = function(ctx) {
const router = express.Router();
// In-memory cache for health results (local to this router)
let serviceHealthCache = {};
let lastHealthCheck = null;
/**
* Check a URL directly via fetch. Returns health object or null on failure.
*/
async function checkDirect(url) {
// Try HEAD first
try {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), 5000);
const response = await ctx.fetchT(url, { method: 'HEAD', signal: controller.signal, redirect: 'follow' });
clearTimeout(timeout);
return {
status: response.ok || response.status < 500 ? 'healthy' : 'unhealthy',
statusCode: response.status,
url,
checkedAt: new Date().toISOString()
};
} catch (_) {}
// Fallback to GET
try {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), 5000);
const response = await ctx.fetchT(url, { method: 'GET', signal: controller.signal, redirect: 'follow' });
clearTimeout(timeout);
return {
status: response.ok || response.status < 500 ? 'healthy' : 'unhealthy',
statusCode: response.status,
url,
checkedAt: new Date().toISOString()
};
} catch (e) {
return null; // Direct check completely failed
}
}
/**
* Check a URL through a Pylon relay. Returns health object or null on failure.
*/
async function checkViaPylon(pylonConfig, url) {
if (!pylonConfig?.url) return null;
try {
const probeUrl = `${pylonConfig.url}/probe?url=${encodeURIComponent(url)}`;
const headers = {};
if (pylonConfig.key) headers['x-pylon-key'] = pylonConfig.key;
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), 12000);
const response = await ctx.fetchT(probeUrl, { method: 'GET', signal: controller.signal, headers });
clearTimeout(timeout);
if (!response.ok) return null;
const data = await response.json();
return {
status: data.status || 'unhealthy',
statusCode: data.statusCode,
responseTime: data.responseTime,
reason: data.reason,
url,
via: 'pylon',
checkedAt: data.checkedAt || new Date().toISOString()
};
} catch (_) {
return null;
}
}
// ===== HEALTH / SERVICES =====
// Check health of all services (performs live checks)
router.get('/health/services', ctx.asyncHandler(async (req, res) => {
if (!await exists(ctx.SERVICES_FILE)) {
return res.json({ success: true, health: {} });
}
const servicesData = await ctx.servicesStateManager.read();
const services = Array.isArray(servicesData) ? servicesData : servicesData.services || [];
const health = {};
const pylonConfig = ctx.siteConfig?.pylon;
// Check each service
await Promise.all(services.map(async (service) => {
const serviceId = service.id || service.name?.toLowerCase();
if (!serviceId) return;
try {
const url = resolveServiceUrl(serviceId, service, ctx.siteConfig, ctx.buildServiceUrl);
if (!url) {
health[serviceId] = { status: 'unknown', reason: 'No URL configured' };
return;
}
// 1. Try direct check
let result = await checkDirect(url);
if (result) {
health[serviceId] = result;
return;
}
// 2. Direct failed — try through pylon relay
if (pylonConfig) {
result = await checkViaPylon(pylonConfig, url);
if (result) {
health[serviceId] = result;
return;
}
}
// 3. Both failed
health[serviceId] = {
status: 'unhealthy',
reason: pylonConfig ? 'Unreachable (direct + pylon)' : 'fetch failed',
url,
checkedAt: new Date().toISOString()
};
} catch (e) {
health[serviceId] = {
status: 'error',
reason: e.message,
checkedAt: new Date().toISOString()
};
}
}));
// Cache results
serviceHealthCache = health;
lastHealthCheck = new Date().toISOString();
const paginationParams = parsePaginationParams(req.query);
const healthEntries = Object.entries(health);
const result = paginate(healthEntries, paginationParams);
const paginatedHealth = Object.fromEntries(result.data);
res.json({
success: true,
health: paginatedHealth,
checkedAt: lastHealthCheck,
...(result.pagination && { pagination: result.pagination })
});
}, 'health-services'));
// Get cached health status (fast, no re-check)
router.get('/health/cached', ctx.asyncHandler(async (req, res) => {
res.json({
success: true,
health: serviceHealthCache,
lastCheck: lastHealthCheck,
cacheAge: lastHealthCheck ? Date.now() - new Date(lastHealthCheck).getTime() : null
});
}, 'health-cached'));
// Check health of single service
router.get('/health/service/:id', ctx.asyncHandler(async (req, res) => {
const serviceId = req.params.id;
// Load service config
if (!await exists(ctx.SERVICES_FILE)) {
const { NotFoundError } = require('../errors');
throw new NotFoundError('Services file');
}
const servicesData = await ctx.servicesStateManager.read();
const services = Array.isArray(servicesData) ? servicesData : servicesData.services || [];
const service = services.find(s => (s.id || s.name?.toLowerCase()) === serviceId);
if (!service) {
const { NotFoundError } = require('../errors');
throw new NotFoundError('Service');
}
// Determine URL
const url = resolveServiceUrl(serviceId, service, ctx.siteConfig, ctx.buildServiceUrl);
const pylonConfig = ctx.siteConfig?.pylon;
// Try direct, then pylon relay
let result = await checkDirect(url);
if (!result && pylonConfig) {
result = await checkViaPylon(pylonConfig, url);
}
if (!result) {
result = {
status: 'unhealthy',
reason: pylonConfig ? 'Unreachable (direct + pylon)' : 'fetch failed',
url,
checkedAt: new Date().toISOString()
};
}
res.json({ success: true, serviceId, health: result });
}, 'health-service'));
// ===== HEALTH / PROBE (Pylon-compatible) =====
// Probe endpoint — lets this DashCaddy act as a pylon for other instances
router.get('/health/probe', ctx.asyncHandler(async (req, res) => {
const targetUrl = req.query.url;
if (!targetUrl) {
return ctx.errorResponse(res, 400, 'Missing ?url= parameter');
}
const result = await checkDirect(targetUrl);
res.json(result || {
status: 'unhealthy',
reason: 'fetch failed',
url: targetUrl,
checkedAt: new Date().toISOString()
});
}, 'health-probe'));
// Pylon status — check if the configured pylon is reachable
router.get('/health/pylon', ctx.asyncHandler(async (req, res) => {
const pylonConfig = ctx.siteConfig?.pylon;
if (!pylonConfig?.url) {
return res.json({ success: true, configured: false });
}
try {
const headers = {};
if (pylonConfig.key) headers['x-pylon-key'] = pylonConfig.key;
const response = await ctx.fetchT(`${pylonConfig.url}/health`, {
method: 'GET',
headers
}, 5000);
const data = await response.json();
res.json({ success: true, configured: true, reachable: true, pylon: data });
} catch (e) {
res.json({ success: true, configured: true, reachable: false, error: e.message });
}
}, 'health-pylon'));
// ===== HEALTH / CA =====
// Get CA certificate health status
router.get('/health/ca', ctx.asyncHandler(async (req, res) => {
// Try deployed location first, then Caddy PKI location
const deployedCertPath = path.join(platformPaths.caCertDir, 'root.crt');
const pkiCertPath = platformPaths.pkiRootCert;
const rootCertPath = await exists(deployedCertPath) ? deployedCertPath : pkiCertPath;
try {
// Check if certificate exists
if (!await exists(rootCertPath)) {
return res.json({
status: 'error',
message: 'Root CA certificate not found',
daysUntilExpiration: null
});
}
const dates = execSync(`openssl x509 -in "${rootCertPath}" -noout -dates`).toString();
const notAfter = dates.match(/notAfter=(.*)/)[1].trim();
const expirationDate = new Date(notAfter);
const daysUntilExpiration = Math.floor((expirationDate - new Date()) / (1000 * 60 * 60 * 24));
// Alert thresholds
let status = 'healthy';
let message = `CA certificate valid for ${daysUntilExpiration} days`;
if (daysUntilExpiration < 0) {
status = 'critical';
message = `CA certificate EXPIRED ${Math.abs(daysUntilExpiration)} days ago!`;
} else if (daysUntilExpiration < 7) {
status = 'critical';
message = `CA certificate expires in ${daysUntilExpiration} days!`;
} else if (daysUntilExpiration < 30) {
status = 'critical';
message = `CA certificate expires in ${daysUntilExpiration} days!`;
} else if (daysUntilExpiration < 90) {
status = 'warning';
message = `CA certificate expires in ${daysUntilExpiration} days`;
}
res.json({
status: status,
message: message,
daysUntilExpiration: daysUntilExpiration,
expiresAt: notAfter
});
} catch (error) {
await ctx.logError('GET /api/health/ca', error);
res.json({
status: 'error',
message: error.message,
daysUntilExpiration: null
});
}
}, 'health-ca'));
// ===== HEALTH CHECK (health-checker module) =====
// Get current status for all services
router.get('/health-checks/status', ctx.asyncHandler(async (req, res) => {
const status = ctx.healthChecker.getCurrentStatus();
res.json({ success: true, status });
}, 'health-check-status'));
// Get service statistics
router.get('/health-checks/:serviceId/stats', ctx.asyncHandler(async (req, res) => {
const hours = parseInt(req.query.hours) || 24;
const stats = ctx.healthChecker.getServiceStats(req.params.serviceId, hours);
if (!stats) {
const { NotFoundError } = require('../errors');
throw new NotFoundError('Service');
}
res.json({ success: true, stats });
}, 'health-check-stats'));
// Configure health check
router.post('/health-checks/:serviceId/configure', ctx.asyncHandler(async (req, res) => {
ctx.healthChecker.configureService(req.params.serviceId, req.body);
res.json({ success: true, message: 'Health check configured' });
}, 'health-check-configure'));
// Remove health check configuration
router.delete('/health-checks/:serviceId/configure', ctx.asyncHandler(async (req, res) => {
ctx.healthChecker.removeService(req.params.serviceId);
res.json({ success: true, message: 'Health check removed' });
}, 'health-check-remove'));
// Get open incidents
router.get('/health-checks/incidents', ctx.asyncHandler(async (req, res) => {
const incidents = ctx.healthChecker.getOpenIncidents();
const paginationParams = parsePaginationParams(req.query);
const result = paginate(incidents, paginationParams);
res.json({ success: true, incidents: result.data, ...(result.pagination && { pagination: result.pagination }) });
}, 'health-check-incidents'));
// Get incident history
router.get('/health-checks/incidents/history', ctx.asyncHandler(async (req, res) => {
const paginationParams = parsePaginationParams(req.query);
// When paginating, fetch all history so pagination can slice correctly
const fetchLimit = paginationParams ? Number.MAX_SAFE_INTEGER : (parseInt(req.query.limit) || 50);
const history = ctx.healthChecker.getIncidentHistory(fetchLimit);
const result = paginate(history, paginationParams);
res.json({ success: true, history: result.data, ...(result.pagination && { pagination: result.pagination }) });
}, 'health-check-incidents-history'));
return router;
};