Files
FEDEO/backend/src/modules/system-status.service.ts

175 lines
6.5 KiB
TypeScript

import { FastifyInstance } from "fastify"
import { matrixService } from "./matrix.service"
type MetricSample = {
labels: Record<string, string>
value: number
}
const metricLinePattern = /^([a-zA-Z_:][a-zA-Z0-9_:]*)(?:\{([^}]*)\})?\s+(-?(?:\d+(?:\.\d+)?|\.\d+)(?:e[+-]?\d+)?|-?Inf|NaN)$/i
const nodeExporterUrl = () =>
(process.env.NODE_EXPORTER_URL || "http://node-exporter:9100").replace(/\/+$/, "")
const s3EndpointUrl = () =>
(process.env.S3_ENDPOINT || "").replace(/\/+$/, "")
const parseLabels = (value = "") => {
const labels: Record<string, string> = {}
const labelPattern = /(\w+)="((?:\\"|[^"])*)"/g
let match: RegExpExecArray | null
while ((match = labelPattern.exec(value))) {
labels[match[1]] = match[2].replace(/\\"/g, "\"")
}
return labels
}
const parsePrometheusMetrics = (text: string) => {
const metrics = new Map<string, MetricSample[]>()
for (const line of text.split("\n")) {
if (!line || line.startsWith("#")) continue
const match = line.match(metricLinePattern)
if (!match) continue
const value = Number(match[3])
if (!Number.isFinite(value)) continue
const samples = metrics.get(match[1]) || []
samples.push({
labels: parseLabels(match[2]),
value,
})
metrics.set(match[1], samples)
}
return metrics
}
const firstMetricValue = (metrics: Map<string, MetricSample[]>, name: string) =>
metrics.get(name)?.[0]?.value ?? null
const findMetricValue = (
metrics: Map<string, MetricSample[]>,
name: string,
predicate: (sample: MetricSample) => boolean
) => metrics.get(name)?.find(predicate)?.value ?? null
const serviceState = (ok: boolean, detail?: Record<string, any>) => ({
ok,
status: ok ? "ok" : "error",
...detail,
})
const checkHttp = async (url: string, timeoutMs = 3000) => {
const controller = new AbortController()
const timeout = setTimeout(() => controller.abort(), timeoutMs)
try {
const response = await fetch(url, { signal: controller.signal })
return serviceState(response.ok, {
httpStatus: response.status,
url,
})
} catch (err: any) {
return serviceState(false, {
url,
error: err?.message || "HTTP-Abfrage fehlgeschlagen",
})
} finally {
clearTimeout(timeout)
}
}
export const buildSystemStatus = async (server: FastifyInstance) => {
const checkedAt = new Date()
const nodeExporterMetricsUrl = `${nodeExporterUrl()}/metrics`
let nodeMetrics: Map<string, MetricSample[]> | null = null
let nodeExporterError: string | null = null
try {
const response = await fetch(nodeExporterMetricsUrl)
if (!response.ok) {
throw new Error(`Node Exporter antwortet mit ${response.status}`)
}
nodeMetrics = parsePrometheusMetrics(await response.text())
} catch (err: any) {
nodeExporterError = err?.message || "Node Exporter nicht erreichbar"
}
const memoryTotal = nodeMetrics ? firstMetricValue(nodeMetrics, "node_memory_MemTotal_bytes") : null
const memoryAvailable = nodeMetrics ? firstMetricValue(nodeMetrics, "node_memory_MemAvailable_bytes") : null
const rootSize = nodeMetrics
? findMetricValue(nodeMetrics, "node_filesystem_size_bytes", (sample) => sample.labels.mountpoint === "/")
: null
const rootAvailable = nodeMetrics
? findMetricValue(nodeMetrics, "node_filesystem_avail_bytes", (sample) => sample.labels.mountpoint === "/")
: null
const bootTime = nodeMetrics ? firstMetricValue(nodeMetrics, "node_boot_time_seconds") : null
const cpuCount = nodeMetrics
? new Set((nodeMetrics.get("node_cpu_seconds_total") || [])
.filter((sample) => sample.labels.mode === "idle")
.map((sample) => sample.labels.cpu)).size
: null
const uname = nodeMetrics?.get("node_uname_info")?.[0]?.labels || null
const databaseCheck = await server.db.execute("SELECT NOW() as now")
const matrixStatus = await matrixService(server).getStatus().catch((err: any) => ({
reachable: false,
error: err?.message || "Matrix-Status nicht verfügbar",
}))
const minioUrl = s3EndpointUrl()
return {
checkedAt: checkedAt.toISOString(),
backend: {
status: "ok",
uptimeSeconds: Math.round(process.uptime()),
nodeVersion: process.version,
environment: process.env.NODE_ENV || "development",
},
server: {
status: nodeMetrics ? "ok" : "unavailable",
nodeExporterUrl: nodeExporterMetricsUrl,
error: nodeExporterError,
hostname: uname?.nodename || null,
kernel: uname?.release || null,
cpuCount,
load: {
one: nodeMetrics ? firstMetricValue(nodeMetrics, "node_load1") : null,
five: nodeMetrics ? firstMetricValue(nodeMetrics, "node_load5") : null,
fifteen: nodeMetrics ? firstMetricValue(nodeMetrics, "node_load15") : null,
},
memory: {
totalBytes: memoryTotal,
availableBytes: memoryAvailable,
usedBytes: memoryTotal !== null && memoryAvailable !== null ? memoryTotal - memoryAvailable : null,
usedPercent: memoryTotal ? Math.round(((memoryTotal - (memoryAvailable || 0)) / memoryTotal) * 1000) / 10 : null,
},
disk: {
rootTotalBytes: rootSize,
rootAvailableBytes: rootAvailable,
rootUsedBytes: rootSize !== null && rootAvailable !== null ? rootSize - rootAvailable : null,
rootUsedPercent: rootSize ? Math.round(((rootSize - (rootAvailable || 0)) / rootSize) * 1000) / 10 : null,
},
uptimeSeconds: bootTime ? Math.max(0, Math.round(Date.now() / 1000 - bootTime)) : null,
},
services: {
database: serviceState(true, {
checkedAt: String(databaseCheck.rows?.[0]?.now || checkedAt.toISOString()),
}),
nodeExporter: serviceState(Boolean(nodeMetrics), {
url: nodeExporterMetricsUrl,
error: nodeExporterError,
}),
matrix: serviceState(Boolean((matrixStatus as any).reachable), matrixStatus as Record<string, any>),
minio: minioUrl ? await checkHttp(`${minioUrl}/minio/health/live`) : serviceState(false, {
error: "S3_ENDPOINT ist nicht gesetzt",
}),
},
}
}