KI-AGENT: Systemstatus und Node Exporter ergänzen

This commit is contained in:
2026-05-20 20:41:48 +02:00
parent 3796bc2953
commit 8df587f9e2
7 changed files with 494 additions and 4 deletions

View File

@@ -0,0 +1,174 @@
import { FastifyInstance } from "fastify"
import { matrixService } from "./matrix.service"
type MetricSample = {
labels: Record<string, string>
value: number
}
const metricLinePattern = /^([a-zA-Z_:][a-zA-Z0-9_:]*)(?:\{([^}]*)\})?\s+(-?(?:\d+(?:\.\d+)?|\.\d+)(?:e[+-]?\d+)?|-?Inf|NaN)$/i
const nodeExporterUrl = () =>
(process.env.NODE_EXPORTER_URL || "http://node-exporter:9100").replace(/\/+$/, "")
const s3EndpointUrl = () =>
(process.env.S3_ENDPOINT || "").replace(/\/+$/, "")
const parseLabels = (value = "") => {
const labels: Record<string, string> = {}
const labelPattern = /(\w+)="((?:\\"|[^"])*)"/g
let match: RegExpExecArray | null
while ((match = labelPattern.exec(value))) {
labels[match[1]] = match[2].replace(/\\"/g, "\"")
}
return labels
}
const parsePrometheusMetrics = (text: string) => {
const metrics = new Map<string, MetricSample[]>()
for (const line of text.split("\n")) {
if (!line || line.startsWith("#")) continue
const match = line.match(metricLinePattern)
if (!match) continue
const value = Number(match[3])
if (!Number.isFinite(value)) continue
const samples = metrics.get(match[1]) || []
samples.push({
labels: parseLabels(match[2]),
value,
})
metrics.set(match[1], samples)
}
return metrics
}
const firstMetricValue = (metrics: Map<string, MetricSample[]>, name: string) =>
metrics.get(name)?.[0]?.value ?? null
const findMetricValue = (
metrics: Map<string, MetricSample[]>,
name: string,
predicate: (sample: MetricSample) => boolean
) => metrics.get(name)?.find(predicate)?.value ?? null
const serviceState = (ok: boolean, detail?: Record<string, any>) => ({
ok,
status: ok ? "ok" : "error",
...detail,
})
const checkHttp = async (url: string, timeoutMs = 3000) => {
const controller = new AbortController()
const timeout = setTimeout(() => controller.abort(), timeoutMs)
try {
const response = await fetch(url, { signal: controller.signal })
return serviceState(response.ok, {
httpStatus: response.status,
url,
})
} catch (err: any) {
return serviceState(false, {
url,
error: err?.message || "HTTP-Abfrage fehlgeschlagen",
})
} finally {
clearTimeout(timeout)
}
}
export const buildSystemStatus = async (server: FastifyInstance) => {
const checkedAt = new Date()
const nodeExporterMetricsUrl = `${nodeExporterUrl()}/metrics`
let nodeMetrics: Map<string, MetricSample[]> | null = null
let nodeExporterError: string | null = null
try {
const response = await fetch(nodeExporterMetricsUrl)
if (!response.ok) {
throw new Error(`Node Exporter antwortet mit ${response.status}`)
}
nodeMetrics = parsePrometheusMetrics(await response.text())
} catch (err: any) {
nodeExporterError = err?.message || "Node Exporter nicht erreichbar"
}
const memoryTotal = nodeMetrics ? firstMetricValue(nodeMetrics, "node_memory_MemTotal_bytes") : null
const memoryAvailable = nodeMetrics ? firstMetricValue(nodeMetrics, "node_memory_MemAvailable_bytes") : null
const rootSize = nodeMetrics
? findMetricValue(nodeMetrics, "node_filesystem_size_bytes", (sample) => sample.labels.mountpoint === "/")
: null
const rootAvailable = nodeMetrics
? findMetricValue(nodeMetrics, "node_filesystem_avail_bytes", (sample) => sample.labels.mountpoint === "/")
: null
const bootTime = nodeMetrics ? firstMetricValue(nodeMetrics, "node_boot_time_seconds") : null
const cpuCount = nodeMetrics
? new Set((nodeMetrics.get("node_cpu_seconds_total") || [])
.filter((sample) => sample.labels.mode === "idle")
.map((sample) => sample.labels.cpu)).size
: null
const uname = nodeMetrics?.get("node_uname_info")?.[0]?.labels || null
const databaseCheck = await server.db.execute("SELECT NOW() as now")
const matrixStatus = await matrixService(server).getStatus().catch((err: any) => ({
reachable: false,
error: err?.message || "Matrix-Status nicht verfügbar",
}))
const minioUrl = s3EndpointUrl()
return {
checkedAt: checkedAt.toISOString(),
backend: {
status: "ok",
uptimeSeconds: Math.round(process.uptime()),
nodeVersion: process.version,
environment: process.env.NODE_ENV || "development",
},
server: {
status: nodeMetrics ? "ok" : "unavailable",
nodeExporterUrl: nodeExporterMetricsUrl,
error: nodeExporterError,
hostname: uname?.nodename || null,
kernel: uname?.release || null,
cpuCount,
load: {
one: nodeMetrics ? firstMetricValue(nodeMetrics, "node_load1") : null,
five: nodeMetrics ? firstMetricValue(nodeMetrics, "node_load5") : null,
fifteen: nodeMetrics ? firstMetricValue(nodeMetrics, "node_load15") : null,
},
memory: {
totalBytes: memoryTotal,
availableBytes: memoryAvailable,
usedBytes: memoryTotal !== null && memoryAvailable !== null ? memoryTotal - memoryAvailable : null,
usedPercent: memoryTotal ? Math.round(((memoryTotal - (memoryAvailable || 0)) / memoryTotal) * 1000) / 10 : null,
},
disk: {
rootTotalBytes: rootSize,
rootAvailableBytes: rootAvailable,
rootUsedBytes: rootSize !== null && rootAvailable !== null ? rootSize - rootAvailable : null,
rootUsedPercent: rootSize ? Math.round(((rootSize - (rootAvailable || 0)) / rootSize) * 1000) / 10 : null,
},
uptimeSeconds: bootTime ? Math.max(0, Math.round(Date.now() / 1000 - bootTime)) : null,
},
services: {
database: serviceState(true, {
checkedAt: String(databaseCheck.rows?.[0]?.now || checkedAt.toISOString()),
}),
nodeExporter: serviceState(Boolean(nodeMetrics), {
url: nodeExporterMetricsUrl,
error: nodeExporterError,
}),
matrix: serviceState(Boolean((matrixStatus as any).reachable), matrixStatus as Record<string, any>),
minio: minioUrl ? await checkHttp(`${minioUrl}/minio/health/live`) : serviceState(false, {
error: "S3_ENDPOINT ist nicht gesetzt",
}),
},
}
}

View File

@@ -17,6 +17,7 @@ import { sendMail } from "../utils/mailer";
import { ensureTenantBaseData } from "../modules/bootstrap.service";
import { buildTenantFullExport, importTenantFullExport } from "../utils/tenantFullExport";
import type { TenantFullExport } from "../utils/tenantFullExport";
import { buildSystemStatus } from "../modules/system-status.service";
export default async function adminRoutes(server: FastifyInstance) {
const deriveNameFromEmail = (email: string) => {
@@ -393,6 +394,21 @@ export default async function adminRoutes(server: FastifyInstance) {
}
});
// -------------------------------------------------------------
// GET /admin/system-status
// -------------------------------------------------------------
server.get("/admin/system-status", async (req, reply) => {
try {
const currentUser = await requireAdmin(req, reply);
if (!currentUser) return;
return await buildSystemStatus(server);
} catch (err) {
console.error("ERROR /admin/system-status:", err);
return reply.code(500).send({ error: "Internal Server Error" });
}
});
// -------------------------------------------------------------
// POST /admin/users
// -------------------------------------------------------------

View File

@@ -81,8 +81,7 @@ services:
- internal
backend:
build:
context: ./backend
image: git.federspiel.tech/flfeders/fedeo/backend:dev
container_name: fedeo-backend
restart: unless-stopped
depends_on:
@@ -139,6 +138,7 @@ services:
MATRIX_SERVICE_USER_LOCALPART: ${MATRIX_SERVICE_USER_LOCALPART:-fedeo_service}
LIVEKIT_KEY: ${LIVEKIT_KEY:-fedeo-livekit}
LIVEKIT_SECRET: ${LIVEKIT_SECRET:-change-this-livekit-secret-please-replace}
NODE_EXPORTER_URL: ${NODE_EXPORTER_URL:-http://node-exporter:9100}
labels:
- traefik.enable=true
- traefik.http.routers.fedeo-backend.rule=Host(`${DOMAIN}`) && PathPrefix(`/backend`)
@@ -152,9 +152,25 @@ services:
- web
- internal
node-exporter:
image: prom/node-exporter:v1.8.2
container_name: fedeo-node-exporter
restart: unless-stopped
command:
- --path.procfs=/host/proc
- --path.sysfs=/host/sys
- --path.rootfs=/rootfs
- --collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)
pid: host
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro,rslave
networks:
- internal
frontend:
build:
context: ./frontend
image: git.federspiel.tech/flfeders/fedeo/frontend:dev
container_name: fedeo-frontend
restart: unless-stopped
depends_on:

View File

@@ -56,6 +56,7 @@ services:
- WEB_PUSH_PUBLIC_KEY=${WEB_PUSH_PUBLIC_KEY:-}
- WEB_PUSH_PRIVATE_KEY=${WEB_PUSH_PRIVATE_KEY:-}
- WEB_PUSH_SUBJECT=${WEB_PUSH_SUBJECT:-mailto:admin@example.com}
- NODE_EXPORTER_URL=${NODE_EXPORTER_URL:-http://node-exporter:9100}
networks:
- traefik
labels:
@@ -74,6 +75,23 @@ services:
- "traefik.http.routers.fedeo-backend-secure.entrypoints=web-secured" #
- "traefik.http.routers.fedeo-backend-secure.tls.certresolver=mytlschallenge"
- "traefik.http.routers.fedeo-backend-secure.middlewares=fedeo-backend-strip"
node-exporter:
image: prom/node-exporter:v1.8.2
restart: unless-stopped
command:
- --path.procfs=/host/proc
- --path.sysfs=/host/sys
- --path.rootfs=/rootfs
- --collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)
pid: host
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro,rslave
networks:
- traefik
matrix-db:
image: postgres:16-alpine
restart: unless-stopped

View File

@@ -355,6 +355,11 @@ const links = computed(() => {
to: "/administration/tenants",
icon: "i-heroicons-building-office-2",
},
{
label: "Systemstatus",
to: "/administration/system",
icon: "i-heroicons-server-stack",
},
] : []
const visibleOrganisationChildren = visibleItems(organisationChildren)

View File

@@ -55,6 +55,34 @@ export type TenantImportResult = {
files: { restored: number; skipped: number }
}
export type SystemStatus = {
checkedAt: string
backend: {
status: string
uptimeSeconds: number
nodeVersion: string
environment: string
}
server: {
status: string
nodeExporterUrl: string
error?: string | null
hostname?: string | null
kernel?: string | null
cpuCount?: number | null
uptimeSeconds?: number | null
load: { one?: number | null; five?: number | null; fifteen?: number | null }
memory: { totalBytes?: number | null; availableBytes?: number | null; usedBytes?: number | null; usedPercent?: number | null }
disk: { rootTotalBytes?: number | null; rootAvailableBytes?: number | null; rootUsedBytes?: number | null; rootUsedPercent?: number | null }
}
services: Record<string, {
ok: boolean
status: string
error?: string | null
[key: string]: any
}>
}
export const useAdmin = () => {
const { $api } = useNuxtApp()
@@ -130,8 +158,13 @@ export const useAdmin = () => {
})
}
const getSystemStatus = async (): Promise<SystemStatus> => {
return await $api("/api/admin/system-status")
}
return {
getOverview,
getSystemStatus,
createUser,
createUserForProfile,
updateUser,

View File

@@ -0,0 +1,228 @@
<script setup lang="ts">
import type { SystemStatus } from "~/composables/useAdmin"
const auth = useAuthStore()
const toast = useToast()
const router = useRouter()
const admin = useAdmin()
const loading = ref(true)
const status = ref<SystemStatus | null>(null)
const serviceLabels: Record<string, string> = {
backend: "Backend",
database: "Datenbank",
nodeExporter: "Node Exporter",
matrix: "Matrix",
minio: "Dateispeicher",
}
const formatBytes = (value?: number | null) => {
const bytes = Number(value || 0)
if (!bytes) return "-"
if (bytes < 1024) return `${bytes} B`
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`
if (bytes < 1024 * 1024 * 1024) return `${(bytes / 1024 / 1024).toFixed(1)} MB`
return `${(bytes / 1024 / 1024 / 1024).toFixed(1)} GB`
}
const formatDuration = (seconds?: number | null) => {
const value = Number(seconds || 0)
if (!value) return "-"
const days = Math.floor(value / 86400)
const hours = Math.floor((value % 86400) / 3600)
const minutes = Math.floor((value % 3600) / 60)
if (days) return `${days} d ${hours} h`
if (hours) return `${hours} h ${minutes} min`
return `${minutes} min`
}
const loadStatus = async () => {
loading.value = true
try {
status.value = await admin.getSystemStatus()
} catch (err: any) {
console.error("[administration/system]", err)
toast.add({
title: "Systemstatus konnte nicht geladen werden",
description: err?.data?.error || err?.message || "Unbekannter Fehler",
color: "red",
})
} finally {
loading.value = false
}
}
const serviceRows = computed(() => {
const services = status.value?.services || {}
return Object.entries(services).map(([key, service]) => ({
key,
label: serviceLabels[key] || key,
...service,
}))
})
const overallStatus = computed(() => {
if (!status.value) return "unavailable"
return serviceRows.value.every((service) => service.ok) ? "ok" : "warning"
})
onMounted(async () => {
if (!auth.user?.is_admin) {
await router.push("/")
return
}
await loadStatus()
})
</script>
<template>
<UDashboardNavbar title="Administration: Systemstatus">
<template #right>
<UButton
icon="i-heroicons-arrow-path"
color="neutral"
variant="outline"
:loading="loading"
@click="loadStatus"
>
Aktualisieren
</UButton>
</template>
</UDashboardNavbar>
<UDashboardPanelContent>
<div class="space-y-6">
<UAlert
:icon="overallStatus === 'ok' ? 'i-heroicons-check-circle' : 'i-heroicons-exclamation-triangle'"
:color="overallStatus === 'ok' ? 'success' : 'warning'"
variant="soft"
:title="overallStatus === 'ok' ? 'System läuft' : 'System prüfen'"
:description="status?.checkedAt ? `Letzte Prüfung: ${new Date(status.checkedAt).toLocaleString('de-DE')}` : 'Noch keine Prüfung geladen.'"
/>
<div class="grid gap-4 xl:grid-cols-3">
<UCard :ui="{ root: 'rounded-lg' }">
<template #header>
<div class="flex items-center gap-2">
<UIcon name="i-heroicons-cpu-chip" class="size-5 text-primary" />
<h2 class="text-base font-semibold text-highlighted">Server</h2>
</div>
</template>
<div class="space-y-3 text-sm">
<div class="flex justify-between gap-3">
<span class="text-muted">Host</span>
<span class="truncate text-highlighted">{{ status?.server.hostname || "-" }}</span>
</div>
<div class="flex justify-between gap-3">
<span class="text-muted">CPU</span>
<span class="text-highlighted">{{ status?.server.cpuCount || "-" }} Kerne</span>
</div>
<div class="flex justify-between gap-3">
<span class="text-muted">Load</span>
<span class="text-highlighted">
{{ status?.server.load.one ?? "-" }} · {{ status?.server.load.five ?? "-" }} · {{ status?.server.load.fifteen ?? "-" }}
</span>
</div>
<div class="flex justify-between gap-3">
<span class="text-muted">Uptime</span>
<span class="text-highlighted">{{ formatDuration(status?.server.uptimeSeconds) }}</span>
</div>
</div>
</UCard>
<UCard :ui="{ root: 'rounded-lg' }">
<template #header>
<div class="flex items-center gap-2">
<UIcon name="i-heroicons-circle-stack" class="size-5 text-primary" />
<h2 class="text-base font-semibold text-highlighted">Speicher</h2>
</div>
</template>
<div class="space-y-4 text-sm">
<div>
<div class="mb-1 flex justify-between">
<span class="text-muted">RAM</span>
<span class="text-highlighted">{{ status?.server.memory.usedPercent ?? "-" }}%</span>
</div>
<UProgress :model-value="status?.server.memory.usedPercent || 0" />
<p class="mt-1 text-xs text-muted">
{{ formatBytes(status?.server.memory.usedBytes) }} von {{ formatBytes(status?.server.memory.totalBytes) }}
</p>
</div>
<div>
<div class="mb-1 flex justify-between">
<span class="text-muted">Root-Dateisystem</span>
<span class="text-highlighted">{{ status?.server.disk.rootUsedPercent ?? "-" }}%</span>
</div>
<UProgress :model-value="status?.server.disk.rootUsedPercent || 0" />
<p class="mt-1 text-xs text-muted">
{{ formatBytes(status?.server.disk.rootUsedBytes) }} von {{ formatBytes(status?.server.disk.rootTotalBytes) }}
</p>
</div>
</div>
</UCard>
<UCard :ui="{ root: 'rounded-lg' }">
<template #header>
<div class="flex items-center gap-2">
<UIcon name="i-heroicons-server-stack" class="size-5 text-primary" />
<h2 class="text-base font-semibold text-highlighted">Backend</h2>
</div>
</template>
<div class="space-y-3 text-sm">
<div class="flex justify-between gap-3">
<span class="text-muted">Status</span>
<UBadge color="success" variant="soft">{{ status?.backend.status || "-" }}</UBadge>
</div>
<div class="flex justify-between gap-3">
<span class="text-muted">Laufzeit</span>
<span class="text-highlighted">{{ formatDuration(status?.backend.uptimeSeconds) }}</span>
</div>
<div class="flex justify-between gap-3">
<span class="text-muted">Node.js</span>
<span class="text-highlighted">{{ status?.backend.nodeVersion || "-" }}</span>
</div>
<div class="flex justify-between gap-3">
<span class="text-muted">Umgebung</span>
<span class="text-highlighted">{{ status?.backend.environment || "-" }}</span>
</div>
</div>
</UCard>
</div>
<UCard :ui="{ root: 'rounded-lg' }">
<template #header>
<div class="flex items-center gap-2">
<UIcon name="i-heroicons-signal" class="size-5 text-primary" />
<h2 class="text-base font-semibold text-highlighted">Dienste</h2>
</div>
</template>
<div class="divide-y divide-default">
<div
v-for="service in serviceRows"
:key="service.key"
class="flex items-center justify-between gap-4 py-3"
>
<div class="min-w-0">
<p class="font-medium text-highlighted">{{ service.label }}</p>
<p class="truncate text-xs text-muted">
{{ service.error || service.url || service.publicBaseUrl || service.status }}
</p>
</div>
<UBadge
:color="service.ok ? 'success' : 'error'"
variant="soft"
>
{{ service.ok ? "OK" : "Fehler" }}
</UBadge>
</div>
</div>
</UCard>
</div>
</UDashboardPanelContent>
</template>