175 lines
6.5 KiB
TypeScript
175 lines
6.5 KiB
TypeScript
import { FastifyInstance } from "fastify"
|
|
import { matrixService } from "./matrix.service"
|
|
|
|
type MetricSample = {
|
|
labels: Record<string, string>
|
|
value: number
|
|
}
|
|
|
|
const metricLinePattern = /^([a-zA-Z_:][a-zA-Z0-9_:]*)(?:\{([^}]*)\})?\s+(-?(?:\d+(?:\.\d+)?|\.\d+)(?:e[+-]?\d+)?|-?Inf|NaN)$/i
|
|
|
|
const nodeExporterUrl = () =>
|
|
(process.env.NODE_EXPORTER_URL || "http://node-exporter:9100").replace(/\/+$/, "")
|
|
|
|
const s3EndpointUrl = () =>
|
|
(process.env.S3_ENDPOINT || "").replace(/\/+$/, "")
|
|
|
|
const parseLabels = (value = "") => {
|
|
const labels: Record<string, string> = {}
|
|
const labelPattern = /(\w+)="((?:\\"|[^"])*)"/g
|
|
let match: RegExpExecArray | null
|
|
|
|
while ((match = labelPattern.exec(value))) {
|
|
labels[match[1]] = match[2].replace(/\\"/g, "\"")
|
|
}
|
|
|
|
return labels
|
|
}
|
|
|
|
const parsePrometheusMetrics = (text: string) => {
|
|
const metrics = new Map<string, MetricSample[]>()
|
|
|
|
for (const line of text.split("\n")) {
|
|
if (!line || line.startsWith("#")) continue
|
|
|
|
const match = line.match(metricLinePattern)
|
|
if (!match) continue
|
|
|
|
const value = Number(match[3])
|
|
if (!Number.isFinite(value)) continue
|
|
|
|
const samples = metrics.get(match[1]) || []
|
|
samples.push({
|
|
labels: parseLabels(match[2]),
|
|
value,
|
|
})
|
|
metrics.set(match[1], samples)
|
|
}
|
|
|
|
return metrics
|
|
}
|
|
|
|
const firstMetricValue = (metrics: Map<string, MetricSample[]>, name: string) =>
|
|
metrics.get(name)?.[0]?.value ?? null
|
|
|
|
const findMetricValue = (
|
|
metrics: Map<string, MetricSample[]>,
|
|
name: string,
|
|
predicate: (sample: MetricSample) => boolean
|
|
) => metrics.get(name)?.find(predicate)?.value ?? null
|
|
|
|
const serviceState = (ok: boolean, detail?: Record<string, any>) => ({
|
|
ok,
|
|
status: ok ? "ok" : "error",
|
|
...detail,
|
|
})
|
|
|
|
const checkHttp = async (url: string, timeoutMs = 3000) => {
|
|
const controller = new AbortController()
|
|
const timeout = setTimeout(() => controller.abort(), timeoutMs)
|
|
|
|
try {
|
|
const response = await fetch(url, { signal: controller.signal })
|
|
return serviceState(response.ok, {
|
|
httpStatus: response.status,
|
|
url,
|
|
})
|
|
} catch (err: any) {
|
|
return serviceState(false, {
|
|
url,
|
|
error: err?.message || "HTTP-Abfrage fehlgeschlagen",
|
|
})
|
|
} finally {
|
|
clearTimeout(timeout)
|
|
}
|
|
}
|
|
|
|
export const buildSystemStatus = async (server: FastifyInstance) => {
|
|
const checkedAt = new Date()
|
|
const nodeExporterMetricsUrl = `${nodeExporterUrl()}/metrics`
|
|
let nodeMetrics: Map<string, MetricSample[]> | null = null
|
|
let nodeExporterError: string | null = null
|
|
|
|
try {
|
|
const response = await fetch(nodeExporterMetricsUrl)
|
|
if (!response.ok) {
|
|
throw new Error(`Node Exporter antwortet mit ${response.status}`)
|
|
}
|
|
nodeMetrics = parsePrometheusMetrics(await response.text())
|
|
} catch (err: any) {
|
|
nodeExporterError = err?.message || "Node Exporter nicht erreichbar"
|
|
}
|
|
|
|
const memoryTotal = nodeMetrics ? firstMetricValue(nodeMetrics, "node_memory_MemTotal_bytes") : null
|
|
const memoryAvailable = nodeMetrics ? firstMetricValue(nodeMetrics, "node_memory_MemAvailable_bytes") : null
|
|
const rootSize = nodeMetrics
|
|
? findMetricValue(nodeMetrics, "node_filesystem_size_bytes", (sample) => sample.labels.mountpoint === "/")
|
|
: null
|
|
const rootAvailable = nodeMetrics
|
|
? findMetricValue(nodeMetrics, "node_filesystem_avail_bytes", (sample) => sample.labels.mountpoint === "/")
|
|
: null
|
|
const bootTime = nodeMetrics ? firstMetricValue(nodeMetrics, "node_boot_time_seconds") : null
|
|
const cpuCount = nodeMetrics
|
|
? new Set((nodeMetrics.get("node_cpu_seconds_total") || [])
|
|
.filter((sample) => sample.labels.mode === "idle")
|
|
.map((sample) => sample.labels.cpu)).size
|
|
: null
|
|
const uname = nodeMetrics?.get("node_uname_info")?.[0]?.labels || null
|
|
|
|
const databaseCheck = await server.db.execute("SELECT NOW() as now")
|
|
const matrixStatus = await matrixService(server).getStatus().catch((err: any) => ({
|
|
reachable: false,
|
|
error: err?.message || "Matrix-Status nicht verfügbar",
|
|
}))
|
|
const minioUrl = s3EndpointUrl()
|
|
|
|
return {
|
|
checkedAt: checkedAt.toISOString(),
|
|
backend: {
|
|
status: "ok",
|
|
uptimeSeconds: Math.round(process.uptime()),
|
|
nodeVersion: process.version,
|
|
environment: process.env.NODE_ENV || "development",
|
|
},
|
|
server: {
|
|
status: nodeMetrics ? "ok" : "unavailable",
|
|
nodeExporterUrl: nodeExporterMetricsUrl,
|
|
error: nodeExporterError,
|
|
hostname: uname?.nodename || null,
|
|
kernel: uname?.release || null,
|
|
cpuCount,
|
|
load: {
|
|
one: nodeMetrics ? firstMetricValue(nodeMetrics, "node_load1") : null,
|
|
five: nodeMetrics ? firstMetricValue(nodeMetrics, "node_load5") : null,
|
|
fifteen: nodeMetrics ? firstMetricValue(nodeMetrics, "node_load15") : null,
|
|
},
|
|
memory: {
|
|
totalBytes: memoryTotal,
|
|
availableBytes: memoryAvailable,
|
|
usedBytes: memoryTotal !== null && memoryAvailable !== null ? memoryTotal - memoryAvailable : null,
|
|
usedPercent: memoryTotal ? Math.round(((memoryTotal - (memoryAvailable || 0)) / memoryTotal) * 1000) / 10 : null,
|
|
},
|
|
disk: {
|
|
rootTotalBytes: rootSize,
|
|
rootAvailableBytes: rootAvailable,
|
|
rootUsedBytes: rootSize !== null && rootAvailable !== null ? rootSize - rootAvailable : null,
|
|
rootUsedPercent: rootSize ? Math.round(((rootSize - (rootAvailable || 0)) / rootSize) * 1000) / 10 : null,
|
|
},
|
|
uptimeSeconds: bootTime ? Math.max(0, Math.round(Date.now() / 1000 - bootTime)) : null,
|
|
},
|
|
services: {
|
|
database: serviceState(true, {
|
|
checkedAt: String(databaseCheck.rows?.[0]?.now || checkedAt.toISOString()),
|
|
}),
|
|
nodeExporter: serviceState(Boolean(nodeMetrics), {
|
|
url: nodeExporterMetricsUrl,
|
|
error: nodeExporterError,
|
|
}),
|
|
matrix: serviceState(Boolean((matrixStatus as any).reachable), matrixStatus as Record<string, any>),
|
|
minio: minioUrl ? await checkHttp(`${minioUrl}/minio/health/live`) : serviceState(false, {
|
|
error: "S3_ENDPOINT ist nicht gesetzt",
|
|
}),
|
|
},
|
|
}
|
|
}
|