diff --git a/agents/fedeo-device-agent/.env.example b/agents/fedeo-device-agent/.env.example index 9ed335f..80b0fb4 100644 --- a/agents/fedeo-device-agent/.env.example +++ b/agents/fedeo-device-agent/.env.example @@ -8,3 +8,6 @@ FEDEO_SCAN_FORMAT=pdf FEDEO_SCAN_RESOLUTION=300 FEDEO_SCAN_MODE=Color FEDEO_SCAN_SOURCE= +FEDEO_SCAN_POSTPROCESS=false +FEDEO_SCAN_POSTPROCESS_PROFILE=document +FEDEO_SCAN_POSTPROCESS_PYTHON=python3 diff --git a/agents/fedeo-device-agent/README.md b/agents/fedeo-device-agent/README.md index 5d6490b..3592b4d 100644 --- a/agents/fedeo-device-agent/README.md +++ b/agents/fedeo-device-agent/README.md @@ -53,6 +53,30 @@ npm install npm run dev ``` +## OpenCV-Nachbearbeitung + +Für automatischen Zuschnitt, leichte Entzerrung, Rotation und Kontrastkorrektur kann die OpenCV-Pipeline aktiviert werden. + +```bash +python3 -m venv .venv-opencv +. .venv-opencv/bin/activate +pip install -r requirements-opencv.txt +``` + +Konfiguration: + +```env +FEDEO_SCAN_POSTPROCESS=true +FEDEO_SCAN_POSTPROCESS_PROFILE=receipt +FEDEO_SCAN_POSTPROCESS_PYTHON=/pfad/zum/agent/.venv-opencv/bin/python +``` + +Profile: + +- `receipt`: Bons und schmale Belege werden bevorzugt hochkant zugeschnitten und kontrastiert. +- `document`: allgemeine Dokumente mit Farberhalt und moderater Verbesserung. +- `raw`: Zuschnitt/Entzerrung ohne starke Kontrastkorrektur. + ## Build ```bash diff --git a/agents/fedeo-device-agent/requirements-opencv.txt b/agents/fedeo-device-agent/requirements-opencv.txt new file mode 100644 index 0000000..37753f3 --- /dev/null +++ b/agents/fedeo-device-agent/requirements-opencv.txt @@ -0,0 +1,3 @@ +opencv-python-headless>=4.9 +Pillow>=10.0 +numpy>=1.26 diff --git a/agents/fedeo-device-agent/scripts/opencv_postprocess.py b/agents/fedeo-device-agent/scripts/opencv_postprocess.py new file mode 100644 index 0000000..127c61d --- /dev/null +++ b/agents/fedeo-device-agent/scripts/opencv_postprocess.py @@ -0,0 +1,219 @@ +#!/usr/bin/env python3 +import argparse +import math +from pathlib import Path + +import cv2 +import numpy as np +from PIL import Image + + +def order_points(points): + rect = np.zeros((4, 2), dtype="float32") + point_sum = points.sum(axis=1) + point_diff = np.diff(points, axis=1) + + rect[0] = points[np.argmin(point_sum)] + rect[2] = points[np.argmax(point_sum)] + rect[1] = points[np.argmin(point_diff)] + rect[3] = points[np.argmax(point_diff)] + return rect + + +def four_point_transform(image, points): + rect = order_points(points) + top_left, top_right, bottom_right, bottom_left = rect + + width_a = np.linalg.norm(bottom_right - bottom_left) + width_b = np.linalg.norm(top_right - top_left) + max_width = int(max(width_a, width_b)) + + height_a = np.linalg.norm(top_right - bottom_right) + height_b = np.linalg.norm(top_left - bottom_left) + max_height = int(max(height_a, height_b)) + + destination = np.array([ + [0, 0], + [max_width - 1, 0], + [max_width - 1, max_height - 1], + [0, max_height - 1], + ], dtype="float32") + + matrix = cv2.getPerspectiveTransform(rect, destination) + return cv2.warpPerspective(image, matrix, (max_width, max_height), borderValue=(255, 255, 255)) + + +def rotate_bound(image, angle): + height, width = image.shape[:2] + center = (width / 2, height / 2) + matrix = cv2.getRotationMatrix2D(center, angle, 1.0) + cos = abs(matrix[0, 0]) + sin = abs(matrix[0, 1]) + + new_width = int((height * sin) + (width * cos)) + new_height = int((height * cos) + (width * sin)) + + matrix[0, 2] += (new_width / 2) - center[0] + matrix[1, 2] += (new_height / 2) - center[1] + + return cv2.warpAffine(image, matrix, (new_width, new_height), borderValue=(255, 255, 255)) + + +def deskew_by_text_angle(image): + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + inverted = cv2.bitwise_not(gray) + threshold = cv2.threshold(inverted, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1] + coordinates = np.column_stack(np.where(threshold > 0)) + + if len(coordinates) < 500: + return image + + angle = cv2.minAreaRect(coordinates)[-1] + if angle < -45: + angle = -(90 + angle) + else: + angle = -angle + + if abs(angle) < 0.2 or abs(angle) > 8: + return image + + return rotate_bound(image, angle) + + +def find_document_contour(image, profile): + ratio = image.shape[0] / 900.0 + resized = cv2.resize(image, (int(image.shape[1] / ratio), 900)) + gray = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY) + gray = cv2.GaussianBlur(gray, (5, 5), 0) + + edges = cv2.Canny(gray, 45, 140) + kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7)) + edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel) + + contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + contours = sorted(contours, key=cv2.contourArea, reverse=True)[:8] + + min_area = resized.shape[0] * resized.shape[1] * (0.03 if profile == "receipt" else 0.12) + + for contour in contours: + if cv2.contourArea(contour) < min_area: + continue + + perimeter = cv2.arcLength(contour, True) + approx = cv2.approxPolyDP(contour, 0.025 * perimeter, True) + if len(approx) == 4: + return approx.reshape(4, 2) * ratio + + return None + + +def trim_light_border(image): + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + mask = cv2.threshold(gray, 245, 255, cv2.THRESH_BINARY_INV)[1] + kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 9)) + mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel) + + contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + if not contours: + return image + + contour = max(contours, key=cv2.contourArea) + if cv2.contourArea(contour) < image.shape[0] * image.shape[1] * 0.02: + return image + + x, y, width, height = cv2.boundingRect(contour) + padding = max(12, int(min(width, height) * 0.025)) + x = max(0, x - padding) + y = max(0, y - padding) + width = min(image.shape[1] - x, width + padding * 2) + height = min(image.shape[0] - y, height + padding * 2) + return image[y:y + height, x:x + width] + + +def enhance_receipt(image): + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) + gray = clahe.apply(gray) + gray = cv2.fastNlMeansDenoising(gray, None, 8, 7, 21) + gray = cv2.normalize(gray, None, 0, 255, cv2.NORM_MINMAX) + return cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR) + + +def enhance_document(image): + lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB) + l_channel, a_channel, b_channel = cv2.split(lab) + clahe = cv2.createCLAHE(clipLimit=1.6, tileGridSize=(8, 8)) + l_channel = clahe.apply(l_channel) + return cv2.cvtColor(cv2.merge((l_channel, a_channel, b_channel)), cv2.COLOR_LAB2BGR) + + +def auto_rotate_profile(image, profile): + height, width = image.shape[:2] + + if profile == "receipt" and width > height: + return cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE) + + return image + + +def postprocess(input_path, output_path, profile): + image = cv2.imread(str(input_path), cv2.IMREAD_COLOR) + if image is None: + raise RuntimeError(f"OpenCV konnte {input_path} nicht lesen") + + contour = find_document_contour(image, profile) + if contour is not None: + processed = four_point_transform(image, contour.astype("float32")) + else: + processed = trim_light_border(image) + + processed = deskew_by_text_angle(processed) + processed = trim_light_border(processed) + processed = auto_rotate_profile(processed, profile) + + if profile == "receipt": + processed = enhance_receipt(processed) + elif profile != "raw": + processed = enhance_document(processed) + + save_output(processed, output_path) + + +def save_output(image, output_path): + suffix = output_path.suffix.lower() + + if suffix == ".pdf": + rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + pil_image = Image.fromarray(rgb) + if pil_image.mode != "RGB": + pil_image = pil_image.convert("RGB") + pil_image.save(output_path, "PDF", resolution=300.0) + return + + if suffix in {".jpg", ".jpeg"}: + cv2.imwrite(str(output_path), image, [cv2.IMWRITE_JPEG_QUALITY, 92]) + return + + if suffix == ".png": + cv2.imwrite(str(output_path), image, [cv2.IMWRITE_PNG_COMPRESSION, 3]) + return + + if suffix in {".tif", ".tiff"}: + cv2.imwrite(str(output_path), image) + return + + raise RuntimeError(f"Nicht unterstütztes Ausgabeformat: {suffix}") + + +def main(): + parser = argparse.ArgumentParser(description="FEDEO Scan-Nachbearbeitung mit OpenCV") + parser.add_argument("--input", required=True) + parser.add_argument("--output", required=True) + parser.add_argument("--profile", default="document", choices=["document", "receipt", "raw"]) + args = parser.parse_args() + + postprocess(Path(args.input), Path(args.output), args.profile) + + +if __name__ == "__main__": + main() diff --git a/agents/fedeo-device-agent/src/config.ts b/agents/fedeo-device-agent/src/config.ts index fcb11e4..7c23b64 100644 --- a/agents/fedeo-device-agent/src/config.ts +++ b/agents/fedeo-device-agent/src/config.ts @@ -20,6 +20,16 @@ const scanFormatFromEnv = (value: string | undefined): AgentConfig["scanFormat"] return "pdf" } +const booleanFromEnv = (value: string | undefined, fallback: boolean) => { + if (!value) return fallback + return ["1", "true", "yes", "ja", "on"].includes(value.trim().toLowerCase()) +} + +const postprocessProfileFromEnv = (value: string | undefined): AgentConfig["postprocessProfile"] => { + if (value === "document" || value === "receipt" || value === "raw") return value + return "document" +} + export const loadConfig = (): AgentConfig => { loadDotEnv(process.env.FEDEO_AGENT_ENV || ".env") @@ -40,5 +50,8 @@ export const loadConfig = (): AgentConfig => { scanResolution: numberFromEnv(process.env.FEDEO_SCAN_RESOLUTION, 300), scanMode: optional(process.env.FEDEO_SCAN_MODE) || "Color", scanSource: optional(process.env.FEDEO_SCAN_SOURCE), + scanPostprocess: booleanFromEnv(process.env.FEDEO_SCAN_POSTPROCESS, false), + postprocessProfile: postprocessProfileFromEnv(process.env.FEDEO_SCAN_POSTPROCESS_PROFILE), + postprocessPython: optional(process.env.FEDEO_SCAN_POSTPROCESS_PYTHON) || "python3", } } diff --git a/agents/fedeo-device-agent/src/scan/postprocess.ts b/agents/fedeo-device-agent/src/scan/postprocess.ts new file mode 100644 index 0000000..9b99df2 --- /dev/null +++ b/agents/fedeo-device-agent/src/scan/postprocess.ts @@ -0,0 +1,66 @@ +import path from "node:path" +import { fileURLToPath } from "node:url" +import { AgentConfig, ScanResult } from "../types.js" +import { commandExists, runCommand } from "../commands.js" + +const currentFile = fileURLToPath(import.meta.url) +const agentRoot = path.resolve(path.dirname(currentFile), "../..") +const postprocessScript = path.join(agentRoot, "scripts/opencv_postprocess.py") + +const extensionMimeTypes: Record = { + ".pdf": "application/pdf", + ".png": "image/png", + ".tif": "image/tiff", + ".tiff": "image/tiff", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", +} + +const ensureOutputExtension = (filename: string, format: AgentConfig["scanFormat"]) => { + const ext = path.extname(filename) + if (ext) return filename + return `${filename}.${format}` +} + +export const hasOpenCvPostprocessRuntime = async (config: AgentConfig) => { + if (!await commandExists(config.postprocessPython)) return false + + const result = await runCommand(config.postprocessPython, [ + "-c", + "import cv2, PIL, numpy", + ], { timeoutMs: 10_000 }) + + return result.code === 0 +} + +export const postprocessScan = async ( + config: AgentConfig, + inputPath: string, + outputFilename: string, + outputFormat: AgentConfig["scanFormat"], + profile: AgentConfig["postprocessProfile"] +): Promise => { + const filename = ensureOutputExtension(outputFilename, outputFormat) + const outputPath = path.join(config.workDir, filename) + + const result = await runCommand(config.postprocessPython, [ + postprocessScript, + "--input", + inputPath, + "--output", + outputPath, + "--profile", + profile, + ], { timeoutMs: 5 * 60 * 1000 }) + + if (result.code !== 0) { + throw new Error(result.stderr || `OpenCV-Nachbearbeitung wurde mit Code ${result.code} beendet`) + } + + const extension = path.extname(outputPath).toLowerCase() + return { + path: outputPath, + filename, + mimeType: extensionMimeTypes[extension] || "application/octet-stream", + } +} diff --git a/agents/fedeo-device-agent/src/scan/sane.ts b/agents/fedeo-device-agent/src/scan/sane.ts index ef1576e..94290a6 100644 --- a/agents/fedeo-device-agent/src/scan/sane.ts +++ b/agents/fedeo-device-agent/src/scan/sane.ts @@ -2,6 +2,7 @@ import { mkdirSync } from "node:fs" import path from "node:path" import { AgentConfig, ScanJob, ScanResult } from "../types.js" import { commandExists, runCommand } from "../commands.js" +import { hasOpenCvPostprocessRuntime, postprocessScan } from "./postprocess.js" const mimeTypes = { pdf: "application/pdf", @@ -25,6 +26,31 @@ const numberSetting = (settings: Record | undefined, key: strin return undefined } +const booleanSetting = (settings: Record | undefined, key: string, fallback: boolean) => { + const value = settings?.[key] + if (typeof value === "boolean") return value + if (typeof value === "string") return ["1", "true", "yes", "ja", "on"].includes(value.trim().toLowerCase()) + return fallback +} + +const profileSetting = ( + settings: Record | undefined, + fallback: AgentConfig["postprocessProfile"] +): AgentConfig["postprocessProfile"] => { + const value = settings?.postprocessProfile + if (value === "document" || value === "receipt" || value === "raw") return value + return fallback +} + +const ensureFilenameExtension = (filename: string, format: AgentConfig["scanFormat"]) => { + const ext = path.extname(filename) + if (!ext) return `${filename}.${format}` + + const expectedExt = `.${format}` + if (ext.toLowerCase() === expectedExt) return filename + return `${filename.slice(0, -ext.length)}${expectedExt}` +} + export const hasSane = () => commandExists("scanimage") export const listScanners = async () => { @@ -54,18 +80,24 @@ export const runScan = async (config: AgentConfig, job: ScanJob): Promise @@ -60,6 +62,8 @@ const applyAgentDefaults = (agent) => { scanForm.resolution = Number(agent.scanDefaults?.resolution || 300) scanForm.mode = agent.scanDefaults?.mode || "Color" scanForm.source = agent.scanDefaults?.source || "ADF Duplex" + scanForm.postprocess = agent.scanDefaults?.postprocess !== false + scanForm.postprocessProfile = agent.scanDefaults?.postprocessProfile || "receipt" if (!scanForm.filename || scanForm.filename.startsWith("scan-")) { scanForm.filename = `scan-${new Date().toISOString().slice(0, 10)}.${scanForm.format || "pdf"}` @@ -128,7 +132,9 @@ const startScan = async () => { format: scanForm.format || "pdf", resolution: Number(scanForm.resolution || 300), mode: scanForm.mode || "Color", - source: scanForm.source || null + source: scanForm.source || null, + postprocess: scanForm.postprocess, + postprocessProfile: scanForm.postprocessProfile }, target: { folder: props.scanData.folder || null, @@ -268,6 +274,27 @@ loadAgents() +
+ + + + +
+ @@ -62,6 +64,8 @@ const applyAgentToForm = (agent: InstanceAgent | null) => { editState.resolution = Number(agent.scanDefaults?.resolution || 300) editState.mode = agent.scanDefaults?.mode || "Color" editState.source = agent.scanDefaults?.source || "" + editState.postprocess = Boolean(agent.scanDefaults?.postprocess) + editState.postprocessProfile = agent.scanDefaults?.postprocessProfile || "document" } watch(selectedAgent, (agent) => applyAgentToForm(agent), { immediate: true }) @@ -140,6 +144,8 @@ const saveAgent = async () => { resolution: Number(editState.resolution || 300), mode: editState.mode, source: editState.source || null, + postprocess: editState.postprocess, + postprocessProfile: editState.postprocessProfile, }, }) @@ -335,6 +341,23 @@ onMounted(async () => { + +
+ + + + +