OpenCV Pipeline für Scan Korrekturen ergänzen

2026-06-02 16:37:38 +02:00
parent 0ea4efdc43
commit 0ecdff4d7d
12 changed files with 429 additions and 5 deletions
--- a/agents/fedeo-device-agent/.env.example
+++ b/agents/fedeo-device-agent/.env.example
@@ -8,3 +8,6 @@ FEDEO_SCAN_FORMAT=pdf
 FEDEO_SCAN_RESOLUTION=300
 FEDEO_SCAN_MODE=Color
 FEDEO_SCAN_SOURCE=
 FEDEO_SCAN_POSTPROCESS=false
 FEDEO_SCAN_POSTPROCESS_PROFILE=document
 FEDEO_SCAN_POSTPROCESS_PYTHON=python3
--- a/agents/fedeo-device-agent/README.md
+++ b/agents/fedeo-device-agent/README.md
@@ -53,6 +53,30 @@ npm install
 npm run dev
 ```
 ## OpenCV-Nachbearbeitung
 Für automatischen Zuschnitt, leichte Entzerrung, Rotation und Kontrastkorrektur kann die OpenCV-Pipeline aktiviert werden.
 ```bash
 python3 -m venv .venv-opencv
 . .venv-opencv/bin/activate
 pip install -r requirements-opencv.txt
 ```
 Konfiguration:
 ```env
 FEDEO_SCAN_POSTPROCESS=true
 FEDEO_SCAN_POSTPROCESS_PROFILE=receipt
 FEDEO_SCAN_POSTPROCESS_PYTHON=/pfad/zum/agent/.venv-opencv/bin/python
 ```
 Profile:
 - `receipt`: Bons und schmale Belege werden bevorzugt hochkant zugeschnitten und kontrastiert.
 - `document`: allgemeine Dokumente mit Farberhalt und moderater Verbesserung.
 - `raw`: Zuschnitt/Entzerrung ohne starke Kontrastkorrektur.
 ## Build
 ```bash
--- a/agents/fedeo-device-agent/requirements-opencv.txt
+++ b/agents/fedeo-device-agent/requirements-opencv.txt
@@ -0,0 +1,3 @@
 opencv-python-headless>=4.9
 Pillow>=10.0
 numpy>=1.26
--- a/agents/fedeo-device-agent/scripts/opencv_postprocess.py
+++ b/agents/fedeo-device-agent/scripts/opencv_postprocess.py
@@ -0,0 +1,219 @@
 #!/usr/bin/env python3
 import argparse
 import math
 from pathlib import Path
 import cv2
 import numpy as np
 from PIL import Image
 def order_points(points):
    rect = np.zeros((4, 2), dtype="float32")
    point_sum = points.sum(axis=1)
    point_diff = np.diff(points, axis=1)
    rect[0] = points[np.argmin(point_sum)]
    rect[2] = points[np.argmax(point_sum)]
    rect[1] = points[np.argmin(point_diff)]
    rect[3] = points[np.argmax(point_diff)]
    return rect
 def four_point_transform(image, points):
    rect = order_points(points)
    top_left, top_right, bottom_right, bottom_left = rect
    width_a = np.linalg.norm(bottom_right - bottom_left)
    width_b = np.linalg.norm(top_right - top_left)
    max_width = int(max(width_a, width_b))
    height_a = np.linalg.norm(top_right - bottom_right)
    height_b = np.linalg.norm(top_left - bottom_left)
    max_height = int(max(height_a, height_b))
    destination = np.array([
        [0, 0],
        [max_width - 1, 0],
        [max_width - 1, max_height - 1],
        [0, max_height - 1],
    ], dtype="float32")
    matrix = cv2.getPerspectiveTransform(rect, destination)
    return cv2.warpPerspective(image, matrix, (max_width, max_height), borderValue=(255, 255, 255))
 def rotate_bound(image, angle):
    height, width = image.shape[:2]
    center = (width / 2, height / 2)
    matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
    cos = abs(matrix[0, 0])
    sin = abs(matrix[0, 1])
    new_width = int((height * sin) + (width * cos))
    new_height = int((height * cos) + (width * sin))
    matrix[0, 2] += (new_width / 2) - center[0]
    matrix[1, 2] += (new_height / 2) - center[1]
    return cv2.warpAffine(image, matrix, (new_width, new_height), borderValue=(255, 255, 255))
 def deskew_by_text_angle(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    inverted = cv2.bitwise_not(gray)
    threshold = cv2.threshold(inverted, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
    coordinates = np.column_stack(np.where(threshold > 0))
    if len(coordinates) < 500:
        return image
    angle = cv2.minAreaRect(coordinates)[-1]
    if angle < -45:
        angle = -(90 + angle)
    else:
        angle = -angle
    if abs(angle) < 0.2 or abs(angle) > 8:
        return image
    return rotate_bound(image, angle)
 def find_document_contour(image, profile):
    ratio = image.shape[0] / 900.0
    resized = cv2.resize(image, (int(image.shape[1] / ratio), 900))
    gray = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (5, 5), 0)
    edges = cv2.Canny(gray, 45, 140)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7))
    edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)
    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours = sorted(contours, key=cv2.contourArea, reverse=True)[:8]
    min_area = resized.shape[0] * resized.shape[1] * (0.03 if profile == "receipt" else 0.12)
    for contour in contours:
        if cv2.contourArea(contour) < min_area:
            continue
        perimeter = cv2.arcLength(contour, True)
        approx = cv2.approxPolyDP(contour, 0.025 * perimeter, True)
        if len(approx) == 4:
            return approx.reshape(4, 2) * ratio
    return None
 def trim_light_border(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    mask = cv2.threshold(gray, 245, 255, cv2.THRESH_BINARY_INV)[1]
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 9))
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not contours:
        return image
    contour = max(contours, key=cv2.contourArea)
    if cv2.contourArea(contour) < image.shape[0] * image.shape[1] * 0.02:
        return image
    x, y, width, height = cv2.boundingRect(contour)
    padding = max(12, int(min(width, height) * 0.025))
    x = max(0, x - padding)
    y = max(0, y - padding)
    width = min(image.shape[1] - x, width + padding * 2)
    height = min(image.shape[0] - y, height + padding * 2)
    return image[y:y + height, x:x + width]
 def enhance_receipt(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    gray = clahe.apply(gray)
    gray = cv2.fastNlMeansDenoising(gray, None, 8, 7, 21)
    gray = cv2.normalize(gray, None, 0, 255, cv2.NORM_MINMAX)
    return cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
 def enhance_document(image):
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    l_channel, a_channel, b_channel = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=1.6, tileGridSize=(8, 8))
    l_channel = clahe.apply(l_channel)
    return cv2.cvtColor(cv2.merge((l_channel, a_channel, b_channel)), cv2.COLOR_LAB2BGR)
 def auto_rotate_profile(image, profile):
    height, width = image.shape[:2]
    if profile == "receipt" and width > height:
        return cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
    return image
 def postprocess(input_path, output_path, profile):
    image = cv2.imread(str(input_path), cv2.IMREAD_COLOR)
    if image is None:
        raise RuntimeError(f"OpenCV konnte {input_path} nicht lesen")
    contour = find_document_contour(image, profile)
    if contour is not None:
        processed = four_point_transform(image, contour.astype("float32"))
    else:
        processed = trim_light_border(image)
    processed = deskew_by_text_angle(processed)
    processed = trim_light_border(processed)
    processed = auto_rotate_profile(processed, profile)
    if profile == "receipt":
        processed = enhance_receipt(processed)
    elif profile != "raw":
        processed = enhance_document(processed)
    save_output(processed, output_path)
 def save_output(image, output_path):
    suffix = output_path.suffix.lower()
    if suffix == ".pdf":
        rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        pil_image = Image.fromarray(rgb)
        if pil_image.mode != "RGB":
            pil_image = pil_image.convert("RGB")
        pil_image.save(output_path, "PDF", resolution=300.0)
        return
    if suffix in {".jpg", ".jpeg"}:
        cv2.imwrite(str(output_path), image, [cv2.IMWRITE_JPEG_QUALITY, 92])
        return
    if suffix == ".png":
        cv2.imwrite(str(output_path), image, [cv2.IMWRITE_PNG_COMPRESSION, 3])
        return
    if suffix in {".tif", ".tiff"}:
        cv2.imwrite(str(output_path), image)
        return
    raise RuntimeError(f"Nicht unterstütztes Ausgabeformat: {suffix}")
 def main():
    parser = argparse.ArgumentParser(description="FEDEO Scan-Nachbearbeitung mit OpenCV")
    parser.add_argument("--input", required=True)
    parser.add_argument("--output", required=True)
    parser.add_argument("--profile", default="document", choices=["document", "receipt", "raw"])
    args = parser.parse_args()
    postprocess(Path(args.input), Path(args.output), args.profile)
 if __name__ == "__main__":
    main()
--- a/agents/fedeo-device-agent/src/config.ts
+++ b/agents/fedeo-device-agent/src/config.ts
@@ -20,6 +20,16 @@ const scanFormatFromEnv = (value: string | undefined): AgentConfig["scanFormat"]
    return "pdf"
 }
 const booleanFromEnv = (value: string | undefined, fallback: boolean) => {
    if (!value) return fallback
    return ["1", "true", "yes", "ja", "on"].includes(value.trim().toLowerCase())
 }
 const postprocessProfileFromEnv = (value: string | undefined): AgentConfig["postprocessProfile"] => {
    if (value === "document" || value === "receipt" || value === "raw") return value
    return "document"
 }
 export const loadConfig = (): AgentConfig => {
    loadDotEnv(process.env.FEDEO_AGENT_ENV || ".env")
@@ -40,5 +50,8 @@ export const loadConfig = (): AgentConfig => {
        scanResolution: numberFromEnv(process.env.FEDEO_SCAN_RESOLUTION, 300),
        scanMode: optional(process.env.FEDEO_SCAN_MODE) || "Color",
        scanSource: optional(process.env.FEDEO_SCAN_SOURCE),
        scanPostprocess: booleanFromEnv(process.env.FEDEO_SCAN_POSTPROCESS, false),
        postprocessProfile: postprocessProfileFromEnv(process.env.FEDEO_SCAN_POSTPROCESS_PROFILE),
        postprocessPython: optional(process.env.FEDEO_SCAN_POSTPROCESS_PYTHON) || "python3",
    }
 }
--- a/agents/fedeo-device-agent/src/scan/postprocess.ts
+++ b/agents/fedeo-device-agent/src/scan/postprocess.ts
@@ -0,0 +1,66 @@
 import path from "node:path"
 import { fileURLToPath } from "node:url"
 import { AgentConfig, ScanResult } from "../types.js"
 import { commandExists, runCommand } from "../commands.js"
 const currentFile = fileURLToPath(import.meta.url)
 const agentRoot = path.resolve(path.dirname(currentFile), "../..")
 const postprocessScript = path.join(agentRoot, "scripts/opencv_postprocess.py")
 const extensionMimeTypes: Record<string, string> = {
    ".pdf": "application/pdf",
    ".png": "image/png",
    ".tif": "image/tiff",
    ".tiff": "image/tiff",
    ".jpg": "image/jpeg",
    ".jpeg": "image/jpeg",
 }
 const ensureOutputExtension = (filename: string, format: AgentConfig["scanFormat"]) => {
    const ext = path.extname(filename)
    if (ext) return filename
    return `${filename}.${format}`
 }
 export const hasOpenCvPostprocessRuntime = async (config: AgentConfig) => {
    if (!await commandExists(config.postprocessPython)) return false
    const result = await runCommand(config.postprocessPython, [
        "-c",
        "import cv2, PIL, numpy",
    ], { timeoutMs: 10_000 })
    return result.code === 0
 }
 export const postprocessScan = async (
    config: AgentConfig,
    inputPath: string,
    outputFilename: string,
    outputFormat: AgentConfig["scanFormat"],
    profile: AgentConfig["postprocessProfile"]
 ): Promise<ScanResult> => {
    const filename = ensureOutputExtension(outputFilename, outputFormat)
    const outputPath = path.join(config.workDir, filename)
    const result = await runCommand(config.postprocessPython, [
        postprocessScript,
        "--input",
        inputPath,
        "--output",
        outputPath,
        "--profile",
        profile,
    ], { timeoutMs: 5 * 60 * 1000 })
    if (result.code !== 0) {
        throw new Error(result.stderr || `OpenCV-Nachbearbeitung wurde mit Code ${result.code} beendet`)
    }
    const extension = path.extname(outputPath).toLowerCase()
    return {
        path: outputPath,
        filename,
        mimeType: extensionMimeTypes[extension] || "application/octet-stream",
    }
 }
--- a/agents/fedeo-device-agent/src/scan/sane.ts
+++ b/agents/fedeo-device-agent/src/scan/sane.ts
@@ -2,6 +2,7 @@ import { mkdirSync } from "node:fs"
 import path from "node:path"
 import { AgentConfig, ScanJob, ScanResult } from "../types.js"
 import { commandExists, runCommand } from "../commands.js"
 import { hasOpenCvPostprocessRuntime, postprocessScan } from "./postprocess.js"
 const mimeTypes = {
    pdf: "application/pdf",
@@ -25,6 +26,31 @@ const numberSetting = (settings: Record<string, unknown> | undefined, key: strin
    return undefined
 }
 const booleanSetting = (settings: Record<string, unknown> | undefined, key: string, fallback: boolean) => {
    const value = settings?.[key]
    if (typeof value === "boolean") return value
    if (typeof value === "string") return ["1", "true", "yes", "ja", "on"].includes(value.trim().toLowerCase())
    return fallback
 }
 const profileSetting = (
    settings: Record<string, unknown> | undefined,
    fallback: AgentConfig["postprocessProfile"]
 ): AgentConfig["postprocessProfile"] => {
    const value = settings?.postprocessProfile
    if (value === "document" || value === "receipt" || value === "raw") return value
    return fallback
 }
 const ensureFilenameExtension = (filename: string, format: AgentConfig["scanFormat"]) => {
    const ext = path.extname(filename)
    if (!ext) return `${filename}.${format}`
    const expectedExt = `.${format}`
    if (ext.toLowerCase() === expectedExt) return filename
    return `${filename.slice(0, -ext.length)}${expectedExt}`
 }
 export const hasSane = () => commandExists("scanimage")
 export const listScanners = async () => {
@@ -54,18 +80,24 @@ export const runScan = async (config: AgentConfig, job: ScanJob): Promise<ScanRe
    const mode = stringSetting(settings, "mode") || config.scanMode
    const source = stringSetting(settings, "source") || config.scanSource
    const scannerName = job.scannerName || config.scannerName
-    const filename = job.requestedFilename || `${job.id}.${format}`
+    const filename = ensureFilenameExtension(job.requestedFilename || `${job.id}.${format}`, format)
    const outputPath = path.join(config.workDir, filename)
    const shouldPostprocess = booleanSetting(settings, "postprocess", config.scanPostprocess)
    const postprocessProfile = profileSetting(settings, config.postprocessProfile)
    const scanFormat = shouldPostprocess ? "png" : format
    const scanOutputPath = shouldPostprocess
        ? path.join(config.workDir, `${job.id}.raw.png`)
        : outputPath
    const args = [
        "--format",
-        format,
+        scanFormat,
        "--resolution",
        String(resolution),
        "--mode",
        mode,
        "--output-file",
-        outputPath,
+        scanOutputPath,
    ]
    if (source) args.push("--source", source)
@@ -77,6 +109,14 @@ export const runScan = async (config: AgentConfig, job: ScanJob): Promise<ScanRe
        throw new Error(result.stderr || `scanimage wurde mit Code ${result.code} beendet`)
    }
    if (shouldPostprocess) {
        if (!await hasOpenCvPostprocessRuntime(config)) {
            throw new Error("OpenCV-Nachbearbeitung ist aktiviert, aber python3 mit cv2, Pillow und numpy ist nicht verfügbar")
        }
        return await postprocessScan(config, scanOutputPath, filename, format, postprocessProfile)
    }
    return {
        path: outputPath,
        filename,
--- a/agents/fedeo-device-agent/src/types.ts
+++ b/agents/fedeo-device-agent/src/types.ts
@@ -9,6 +9,9 @@ export type AgentConfig = {
    scanResolution: number
    scanMode: string
    scanSource?: string
    scanPostprocess: boolean
    postprocessProfile: "document" | "receipt" | "raw"
    postprocessPython: string
 }
 export type AgentHeartbeat = {
--- a/backend/db/migrations/0053_instance_agent_postprocess_defaults.sql
+++ b/backend/db/migrations/0053_instance_agent_postprocess_defaults.sql
@@ -0,0 +1 @@
 ALTER TABLE "instance_agents" ALTER COLUMN "scan_defaults" SET DEFAULT '{"format":"pdf","resolution":300,"mode":"Color","source":null,"postprocess":false,"postprocessProfile":"document"}'::jsonb;
--- a/backend/db/schema/instance_agents.ts
+++ b/backend/db/schema/instance_agents.ts
@@ -35,6 +35,8 @@ export const instanceAgents = pgTable("instance_agents", {
        resolution: 300,
        mode: "Color",
        source: null,
        postprocess: false,
        postprocessProfile: "document",
    }),
    lastSeenAt: timestamp("last_seen_at", { withTimezone: true }),
--- a/frontend/components/FileScanModal.vue
+++ b/frontend/components/FileScanModal.vue
@@ -29,7 +29,9 @@ const scanForm = reactive({
  format: "pdf",
  resolution: 300,
  mode: "Color",
-  source: "ADF Duplex"
+  source: "ADF Duplex",
  postprocess: true,
  postprocessProfile: "receipt"
 })
 const activeAgents = computed(() =>
@@ -60,6 +62,8 @@ const applyAgentDefaults = (agent) => {
  scanForm.resolution = Number(agent.scanDefaults?.resolution || 300)
  scanForm.mode = agent.scanDefaults?.mode || "Color"
  scanForm.source = agent.scanDefaults?.source || "ADF Duplex"
  scanForm.postprocess = agent.scanDefaults?.postprocess !== false
  scanForm.postprocessProfile = agent.scanDefaults?.postprocessProfile || "receipt"
  if (!scanForm.filename || scanForm.filename.startsWith("scan-")) {
    scanForm.filename = `scan-${new Date().toISOString().slice(0, 10)}.${scanForm.format || "pdf"}`
@@ -128,7 +132,9 @@ const startScan = async () => {
          format: scanForm.format || "pdf",
          resolution: Number(scanForm.resolution || 300),
          mode: scanForm.mode || "Color",
-          source: scanForm.source || null
+          source: scanForm.source || null,
          postprocess: scanForm.postprocess,
          postprocessProfile: scanForm.postprocessProfile
        },
        target: {
          folder: props.scanData.folder || null,
@@ -268,6 +274,27 @@ loadAgents()
            </UFormField>
          </div>
          <div class="grid gap-3 sm:grid-cols-[auto_minmax(0,1fr)]">
            <UCheckbox
                v-model="scanForm.postprocess"
                label="OpenCV-Korrektur"
                :disabled="scanInProgress"
            />
            <UFormField label="Profil">
              <USelectMenu
                  v-model="scanForm.postprocessProfile"
                  :items="[
                    { label: 'Bon', value: 'receipt' },
                    { label: 'Dokument', value: 'document' },
                    { label: 'Rohscan', value: 'raw' }
                  ]"
                  value-key="value"
                  label-key="label"
                  :disabled="scanInProgress || !scanForm.postprocess"
              />
            </UFormField>
          </div>
          <UAlert
              v-if="statusMessage"
              color="info"
--- a/frontend/pages/administration/scanners.vue
+++ b/frontend/pages/administration/scanners.vue
@@ -27,6 +27,8 @@ const editState = reactive({
  resolution: 300,
  mode: "Color",
  source: "",
  postprocess: false,
  postprocessProfile: "document",
 })
 const selectedAgent = computed(() =>
@@ -62,6 +64,8 @@ const applyAgentToForm = (agent: InstanceAgent | null) => {
  editState.resolution = Number(agent.scanDefaults?.resolution || 300)
  editState.mode = agent.scanDefaults?.mode || "Color"
  editState.source = agent.scanDefaults?.source || ""
  editState.postprocess = Boolean(agent.scanDefaults?.postprocess)
  editState.postprocessProfile = agent.scanDefaults?.postprocessProfile || "document"
 }
 watch(selectedAgent, (agent) => applyAgentToForm(agent), { immediate: true })
@@ -140,6 +144,8 @@ const saveAgent = async () => {
        resolution: Number(editState.resolution || 300),
        mode: editState.mode,
        source: editState.source || null,
        postprocess: editState.postprocess,
        postprocessProfile: editState.postprocessProfile,
      },
    })
@@ -335,6 +341,23 @@ onMounted(async () => {
                  <UInput v-model="editState.source" placeholder="ADF Duplex" />
                </UFormField>
              </div>
              <div class="grid gap-3 sm:grid-cols-[auto_minmax(0,1fr)]">
                <UCheckbox v-model="editState.postprocess" label="OpenCV-Nachbearbeitung" />
                <UFormField label="Profil">
                  <USelectMenu
                      v-model="editState.postprocessProfile"
                      :items="[
                        { label: 'Dokument', value: 'document' },
                        { label: 'Bon', value: 'receipt' },
                        { label: 'Rohscan', value: 'raw' },
                      ]"
                      value-key="value"
                      label-key="label"
                      :disabled="!editState.postprocess"
                  />
                </UFormField>
              </div>
            </div>
          </div>
		`@@ -0,0 +1 @@`
							`ALTER TABLE "instance_agents" ALTER COLUMN "scan_defaults" SET DEFAULT '{"format":"pdf","resolution":300,"mode":"Color","source":null,"postprocess":false,"postprocessProfile":"document"}'::jsonb;`