KI-AGENT: Systemstatus und Node Exporter ergänzen
This commit is contained in:
174
backend/src/modules/system-status.service.ts
Normal file
174
backend/src/modules/system-status.service.ts
Normal file
@@ -0,0 +1,174 @@
|
||||
import { FastifyInstance } from "fastify"
|
||||
import { matrixService } from "./matrix.service"
|
||||
|
||||
type MetricSample = {
|
||||
labels: Record<string, string>
|
||||
value: number
|
||||
}
|
||||
|
||||
const metricLinePattern = /^([a-zA-Z_:][a-zA-Z0-9_:]*)(?:\{([^}]*)\})?\s+(-?(?:\d+(?:\.\d+)?|\.\d+)(?:e[+-]?\d+)?|-?Inf|NaN)$/i
|
||||
|
||||
const nodeExporterUrl = () =>
|
||||
(process.env.NODE_EXPORTER_URL || "http://node-exporter:9100").replace(/\/+$/, "")
|
||||
|
||||
const s3EndpointUrl = () =>
|
||||
(process.env.S3_ENDPOINT || "").replace(/\/+$/, "")
|
||||
|
||||
const parseLabels = (value = "") => {
|
||||
const labels: Record<string, string> = {}
|
||||
const labelPattern = /(\w+)="((?:\\"|[^"])*)"/g
|
||||
let match: RegExpExecArray | null
|
||||
|
||||
while ((match = labelPattern.exec(value))) {
|
||||
labels[match[1]] = match[2].replace(/\\"/g, "\"")
|
||||
}
|
||||
|
||||
return labels
|
||||
}
|
||||
|
||||
const parsePrometheusMetrics = (text: string) => {
|
||||
const metrics = new Map<string, MetricSample[]>()
|
||||
|
||||
for (const line of text.split("\n")) {
|
||||
if (!line || line.startsWith("#")) continue
|
||||
|
||||
const match = line.match(metricLinePattern)
|
||||
if (!match) continue
|
||||
|
||||
const value = Number(match[3])
|
||||
if (!Number.isFinite(value)) continue
|
||||
|
||||
const samples = metrics.get(match[1]) || []
|
||||
samples.push({
|
||||
labels: parseLabels(match[2]),
|
||||
value,
|
||||
})
|
||||
metrics.set(match[1], samples)
|
||||
}
|
||||
|
||||
return metrics
|
||||
}
|
||||
|
||||
const firstMetricValue = (metrics: Map<string, MetricSample[]>, name: string) =>
|
||||
metrics.get(name)?.[0]?.value ?? null
|
||||
|
||||
const findMetricValue = (
|
||||
metrics: Map<string, MetricSample[]>,
|
||||
name: string,
|
||||
predicate: (sample: MetricSample) => boolean
|
||||
) => metrics.get(name)?.find(predicate)?.value ?? null
|
||||
|
||||
const serviceState = (ok: boolean, detail?: Record<string, any>) => ({
|
||||
ok,
|
||||
status: ok ? "ok" : "error",
|
||||
...detail,
|
||||
})
|
||||
|
||||
const checkHttp = async (url: string, timeoutMs = 3000) => {
|
||||
const controller = new AbortController()
|
||||
const timeout = setTimeout(() => controller.abort(), timeoutMs)
|
||||
|
||||
try {
|
||||
const response = await fetch(url, { signal: controller.signal })
|
||||
return serviceState(response.ok, {
|
||||
httpStatus: response.status,
|
||||
url,
|
||||
})
|
||||
} catch (err: any) {
|
||||
return serviceState(false, {
|
||||
url,
|
||||
error: err?.message || "HTTP-Abfrage fehlgeschlagen",
|
||||
})
|
||||
} finally {
|
||||
clearTimeout(timeout)
|
||||
}
|
||||
}
|
||||
|
||||
export const buildSystemStatus = async (server: FastifyInstance) => {
|
||||
const checkedAt = new Date()
|
||||
const nodeExporterMetricsUrl = `${nodeExporterUrl()}/metrics`
|
||||
let nodeMetrics: Map<string, MetricSample[]> | null = null
|
||||
let nodeExporterError: string | null = null
|
||||
|
||||
try {
|
||||
const response = await fetch(nodeExporterMetricsUrl)
|
||||
if (!response.ok) {
|
||||
throw new Error(`Node Exporter antwortet mit ${response.status}`)
|
||||
}
|
||||
nodeMetrics = parsePrometheusMetrics(await response.text())
|
||||
} catch (err: any) {
|
||||
nodeExporterError = err?.message || "Node Exporter nicht erreichbar"
|
||||
}
|
||||
|
||||
const memoryTotal = nodeMetrics ? firstMetricValue(nodeMetrics, "node_memory_MemTotal_bytes") : null
|
||||
const memoryAvailable = nodeMetrics ? firstMetricValue(nodeMetrics, "node_memory_MemAvailable_bytes") : null
|
||||
const rootSize = nodeMetrics
|
||||
? findMetricValue(nodeMetrics, "node_filesystem_size_bytes", (sample) => sample.labels.mountpoint === "/")
|
||||
: null
|
||||
const rootAvailable = nodeMetrics
|
||||
? findMetricValue(nodeMetrics, "node_filesystem_avail_bytes", (sample) => sample.labels.mountpoint === "/")
|
||||
: null
|
||||
const bootTime = nodeMetrics ? firstMetricValue(nodeMetrics, "node_boot_time_seconds") : null
|
||||
const cpuCount = nodeMetrics
|
||||
? new Set((nodeMetrics.get("node_cpu_seconds_total") || [])
|
||||
.filter((sample) => sample.labels.mode === "idle")
|
||||
.map((sample) => sample.labels.cpu)).size
|
||||
: null
|
||||
const uname = nodeMetrics?.get("node_uname_info")?.[0]?.labels || null
|
||||
|
||||
const databaseCheck = await server.db.execute("SELECT NOW() as now")
|
||||
const matrixStatus = await matrixService(server).getStatus().catch((err: any) => ({
|
||||
reachable: false,
|
||||
error: err?.message || "Matrix-Status nicht verfügbar",
|
||||
}))
|
||||
const minioUrl = s3EndpointUrl()
|
||||
|
||||
return {
|
||||
checkedAt: checkedAt.toISOString(),
|
||||
backend: {
|
||||
status: "ok",
|
||||
uptimeSeconds: Math.round(process.uptime()),
|
||||
nodeVersion: process.version,
|
||||
environment: process.env.NODE_ENV || "development",
|
||||
},
|
||||
server: {
|
||||
status: nodeMetrics ? "ok" : "unavailable",
|
||||
nodeExporterUrl: nodeExporterMetricsUrl,
|
||||
error: nodeExporterError,
|
||||
hostname: uname?.nodename || null,
|
||||
kernel: uname?.release || null,
|
||||
cpuCount,
|
||||
load: {
|
||||
one: nodeMetrics ? firstMetricValue(nodeMetrics, "node_load1") : null,
|
||||
five: nodeMetrics ? firstMetricValue(nodeMetrics, "node_load5") : null,
|
||||
fifteen: nodeMetrics ? firstMetricValue(nodeMetrics, "node_load15") : null,
|
||||
},
|
||||
memory: {
|
||||
totalBytes: memoryTotal,
|
||||
availableBytes: memoryAvailable,
|
||||
usedBytes: memoryTotal !== null && memoryAvailable !== null ? memoryTotal - memoryAvailable : null,
|
||||
usedPercent: memoryTotal ? Math.round(((memoryTotal - (memoryAvailable || 0)) / memoryTotal) * 1000) / 10 : null,
|
||||
},
|
||||
disk: {
|
||||
rootTotalBytes: rootSize,
|
||||
rootAvailableBytes: rootAvailable,
|
||||
rootUsedBytes: rootSize !== null && rootAvailable !== null ? rootSize - rootAvailable : null,
|
||||
rootUsedPercent: rootSize ? Math.round(((rootSize - (rootAvailable || 0)) / rootSize) * 1000) / 10 : null,
|
||||
},
|
||||
uptimeSeconds: bootTime ? Math.max(0, Math.round(Date.now() / 1000 - bootTime)) : null,
|
||||
},
|
||||
services: {
|
||||
database: serviceState(true, {
|
||||
checkedAt: String(databaseCheck.rows?.[0]?.now || checkedAt.toISOString()),
|
||||
}),
|
||||
nodeExporter: serviceState(Boolean(nodeMetrics), {
|
||||
url: nodeExporterMetricsUrl,
|
||||
error: nodeExporterError,
|
||||
}),
|
||||
matrix: serviceState(Boolean((matrixStatus as any).reachable), matrixStatus as Record<string, any>),
|
||||
minio: minioUrl ? await checkHttp(`${minioUrl}/minio/health/live`) : serviceState(false, {
|
||||
error: "S3_ENDPOINT ist nicht gesetzt",
|
||||
}),
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -17,6 +17,7 @@ import { sendMail } from "../utils/mailer";
|
||||
import { ensureTenantBaseData } from "../modules/bootstrap.service";
|
||||
import { buildTenantFullExport, importTenantFullExport } from "../utils/tenantFullExport";
|
||||
import type { TenantFullExport } from "../utils/tenantFullExport";
|
||||
import { buildSystemStatus } from "../modules/system-status.service";
|
||||
|
||||
export default async function adminRoutes(server: FastifyInstance) {
|
||||
const deriveNameFromEmail = (email: string) => {
|
||||
@@ -393,6 +394,21 @@ export default async function adminRoutes(server: FastifyInstance) {
|
||||
}
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------
|
||||
// GET /admin/system-status
|
||||
// -------------------------------------------------------------
|
||||
server.get("/admin/system-status", async (req, reply) => {
|
||||
try {
|
||||
const currentUser = await requireAdmin(req, reply);
|
||||
if (!currentUser) return;
|
||||
|
||||
return await buildSystemStatus(server);
|
||||
} catch (err) {
|
||||
console.error("ERROR /admin/system-status:", err);
|
||||
return reply.code(500).send({ error: "Internal Server Error" });
|
||||
}
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------
|
||||
// POST /admin/users
|
||||
// -------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user