OpenCV Pipeline für Scan Korrekturen ergänzen

This commit is contained in:
2026-06-02 16:37:38 +02:00
parent 0ea4efdc43
commit 0ecdff4d7d
12 changed files with 429 additions and 5 deletions

View File

@@ -8,3 +8,6 @@ FEDEO_SCAN_FORMAT=pdf
FEDEO_SCAN_RESOLUTION=300
FEDEO_SCAN_MODE=Color
FEDEO_SCAN_SOURCE=
FEDEO_SCAN_POSTPROCESS=false
FEDEO_SCAN_POSTPROCESS_PROFILE=document
FEDEO_SCAN_POSTPROCESS_PYTHON=python3

View File

@@ -53,6 +53,30 @@ npm install
npm run dev
```
## OpenCV-Nachbearbeitung
Für automatischen Zuschnitt, leichte Entzerrung, Rotation und Kontrastkorrektur kann die OpenCV-Pipeline aktiviert werden.
```bash
python3 -m venv .venv-opencv
. .venv-opencv/bin/activate
pip install -r requirements-opencv.txt
```
Konfiguration:
```env
FEDEO_SCAN_POSTPROCESS=true
FEDEO_SCAN_POSTPROCESS_PROFILE=receipt
FEDEO_SCAN_POSTPROCESS_PYTHON=/pfad/zum/agent/.venv-opencv/bin/python
```
Profile:
- `receipt`: Bons und schmale Belege werden bevorzugt hochkant zugeschnitten und kontrastiert.
- `document`: allgemeine Dokumente mit Farberhalt und moderater Verbesserung.
- `raw`: Zuschnitt/Entzerrung ohne starke Kontrastkorrektur.
## Build
```bash

View File

@@ -0,0 +1,3 @@
opencv-python-headless>=4.9
Pillow>=10.0
numpy>=1.26

View File

@@ -0,0 +1,219 @@
#!/usr/bin/env python3
import argparse
import math
from pathlib import Path
import cv2
import numpy as np
from PIL import Image
def order_points(points):
rect = np.zeros((4, 2), dtype="float32")
point_sum = points.sum(axis=1)
point_diff = np.diff(points, axis=1)
rect[0] = points[np.argmin(point_sum)]
rect[2] = points[np.argmax(point_sum)]
rect[1] = points[np.argmin(point_diff)]
rect[3] = points[np.argmax(point_diff)]
return rect
def four_point_transform(image, points):
rect = order_points(points)
top_left, top_right, bottom_right, bottom_left = rect
width_a = np.linalg.norm(bottom_right - bottom_left)
width_b = np.linalg.norm(top_right - top_left)
max_width = int(max(width_a, width_b))
height_a = np.linalg.norm(top_right - bottom_right)
height_b = np.linalg.norm(top_left - bottom_left)
max_height = int(max(height_a, height_b))
destination = np.array([
[0, 0],
[max_width - 1, 0],
[max_width - 1, max_height - 1],
[0, max_height - 1],
], dtype="float32")
matrix = cv2.getPerspectiveTransform(rect, destination)
return cv2.warpPerspective(image, matrix, (max_width, max_height), borderValue=(255, 255, 255))
def rotate_bound(image, angle):
height, width = image.shape[:2]
center = (width / 2, height / 2)
matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
cos = abs(matrix[0, 0])
sin = abs(matrix[0, 1])
new_width = int((height * sin) + (width * cos))
new_height = int((height * cos) + (width * sin))
matrix[0, 2] += (new_width / 2) - center[0]
matrix[1, 2] += (new_height / 2) - center[1]
return cv2.warpAffine(image, matrix, (new_width, new_height), borderValue=(255, 255, 255))
def deskew_by_text_angle(image):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
inverted = cv2.bitwise_not(gray)
threshold = cv2.threshold(inverted, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
coordinates = np.column_stack(np.where(threshold > 0))
if len(coordinates) < 500:
return image
angle = cv2.minAreaRect(coordinates)[-1]
if angle < -45:
angle = -(90 + angle)
else:
angle = -angle
if abs(angle) < 0.2 or abs(angle) > 8:
return image
return rotate_bound(image, angle)
def find_document_contour(image, profile):
ratio = image.shape[0] / 900.0
resized = cv2.resize(image, (int(image.shape[1] / ratio), 900))
gray = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (5, 5), 0)
edges = cv2.Canny(gray, 45, 140)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7))
edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)
contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = sorted(contours, key=cv2.contourArea, reverse=True)[:8]
min_area = resized.shape[0] * resized.shape[1] * (0.03 if profile == "receipt" else 0.12)
for contour in contours:
if cv2.contourArea(contour) < min_area:
continue
perimeter = cv2.arcLength(contour, True)
approx = cv2.approxPolyDP(contour, 0.025 * perimeter, True)
if len(approx) == 4:
return approx.reshape(4, 2) * ratio
return None
def trim_light_border(image):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
mask = cv2.threshold(gray, 245, 255, cv2.THRESH_BINARY_INV)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 9))
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if not contours:
return image
contour = max(contours, key=cv2.contourArea)
if cv2.contourArea(contour) < image.shape[0] * image.shape[1] * 0.02:
return image
x, y, width, height = cv2.boundingRect(contour)
padding = max(12, int(min(width, height) * 0.025))
x = max(0, x - padding)
y = max(0, y - padding)
width = min(image.shape[1] - x, width + padding * 2)
height = min(image.shape[0] - y, height + padding * 2)
return image[y:y + height, x:x + width]
def enhance_receipt(image):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
gray = clahe.apply(gray)
gray = cv2.fastNlMeansDenoising(gray, None, 8, 7, 21)
gray = cv2.normalize(gray, None, 0, 255, cv2.NORM_MINMAX)
return cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
def enhance_document(image):
lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
l_channel, a_channel, b_channel = cv2.split(lab)
clahe = cv2.createCLAHE(clipLimit=1.6, tileGridSize=(8, 8))
l_channel = clahe.apply(l_channel)
return cv2.cvtColor(cv2.merge((l_channel, a_channel, b_channel)), cv2.COLOR_LAB2BGR)
def auto_rotate_profile(image, profile):
height, width = image.shape[:2]
if profile == "receipt" and width > height:
return cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
return image
def postprocess(input_path, output_path, profile):
image = cv2.imread(str(input_path), cv2.IMREAD_COLOR)
if image is None:
raise RuntimeError(f"OpenCV konnte {input_path} nicht lesen")
contour = find_document_contour(image, profile)
if contour is not None:
processed = four_point_transform(image, contour.astype("float32"))
else:
processed = trim_light_border(image)
processed = deskew_by_text_angle(processed)
processed = trim_light_border(processed)
processed = auto_rotate_profile(processed, profile)
if profile == "receipt":
processed = enhance_receipt(processed)
elif profile != "raw":
processed = enhance_document(processed)
save_output(processed, output_path)
def save_output(image, output_path):
suffix = output_path.suffix.lower()
if suffix == ".pdf":
rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
pil_image = Image.fromarray(rgb)
if pil_image.mode != "RGB":
pil_image = pil_image.convert("RGB")
pil_image.save(output_path, "PDF", resolution=300.0)
return
if suffix in {".jpg", ".jpeg"}:
cv2.imwrite(str(output_path), image, [cv2.IMWRITE_JPEG_QUALITY, 92])
return
if suffix == ".png":
cv2.imwrite(str(output_path), image, [cv2.IMWRITE_PNG_COMPRESSION, 3])
return
if suffix in {".tif", ".tiff"}:
cv2.imwrite(str(output_path), image)
return
raise RuntimeError(f"Nicht unterstütztes Ausgabeformat: {suffix}")
def main():
parser = argparse.ArgumentParser(description="FEDEO Scan-Nachbearbeitung mit OpenCV")
parser.add_argument("--input", required=True)
parser.add_argument("--output", required=True)
parser.add_argument("--profile", default="document", choices=["document", "receipt", "raw"])
args = parser.parse_args()
postprocess(Path(args.input), Path(args.output), args.profile)
if __name__ == "__main__":
main()

View File

@@ -20,6 +20,16 @@ const scanFormatFromEnv = (value: string | undefined): AgentConfig["scanFormat"]
return "pdf"
}
const booleanFromEnv = (value: string | undefined, fallback: boolean) => {
if (!value) return fallback
return ["1", "true", "yes", "ja", "on"].includes(value.trim().toLowerCase())
}
const postprocessProfileFromEnv = (value: string | undefined): AgentConfig["postprocessProfile"] => {
if (value === "document" || value === "receipt" || value === "raw") return value
return "document"
}
export const loadConfig = (): AgentConfig => {
loadDotEnv(process.env.FEDEO_AGENT_ENV || ".env")
@@ -40,5 +50,8 @@ export const loadConfig = (): AgentConfig => {
scanResolution: numberFromEnv(process.env.FEDEO_SCAN_RESOLUTION, 300),
scanMode: optional(process.env.FEDEO_SCAN_MODE) || "Color",
scanSource: optional(process.env.FEDEO_SCAN_SOURCE),
scanPostprocess: booleanFromEnv(process.env.FEDEO_SCAN_POSTPROCESS, false),
postprocessProfile: postprocessProfileFromEnv(process.env.FEDEO_SCAN_POSTPROCESS_PROFILE),
postprocessPython: optional(process.env.FEDEO_SCAN_POSTPROCESS_PYTHON) || "python3",
}
}

View File

@@ -0,0 +1,66 @@
import path from "node:path"
import { fileURLToPath } from "node:url"
import { AgentConfig, ScanResult } from "../types.js"
import { commandExists, runCommand } from "../commands.js"
const currentFile = fileURLToPath(import.meta.url)
const agentRoot = path.resolve(path.dirname(currentFile), "../..")
const postprocessScript = path.join(agentRoot, "scripts/opencv_postprocess.py")
const extensionMimeTypes: Record<string, string> = {
".pdf": "application/pdf",
".png": "image/png",
".tif": "image/tiff",
".tiff": "image/tiff",
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
}
const ensureOutputExtension = (filename: string, format: AgentConfig["scanFormat"]) => {
const ext = path.extname(filename)
if (ext) return filename
return `${filename}.${format}`
}
export const hasOpenCvPostprocessRuntime = async (config: AgentConfig) => {
if (!await commandExists(config.postprocessPython)) return false
const result = await runCommand(config.postprocessPython, [
"-c",
"import cv2, PIL, numpy",
], { timeoutMs: 10_000 })
return result.code === 0
}
export const postprocessScan = async (
config: AgentConfig,
inputPath: string,
outputFilename: string,
outputFormat: AgentConfig["scanFormat"],
profile: AgentConfig["postprocessProfile"]
): Promise<ScanResult> => {
const filename = ensureOutputExtension(outputFilename, outputFormat)
const outputPath = path.join(config.workDir, filename)
const result = await runCommand(config.postprocessPython, [
postprocessScript,
"--input",
inputPath,
"--output",
outputPath,
"--profile",
profile,
], { timeoutMs: 5 * 60 * 1000 })
if (result.code !== 0) {
throw new Error(result.stderr || `OpenCV-Nachbearbeitung wurde mit Code ${result.code} beendet`)
}
const extension = path.extname(outputPath).toLowerCase()
return {
path: outputPath,
filename,
mimeType: extensionMimeTypes[extension] || "application/octet-stream",
}
}

View File

@@ -2,6 +2,7 @@ import { mkdirSync } from "node:fs"
import path from "node:path"
import { AgentConfig, ScanJob, ScanResult } from "../types.js"
import { commandExists, runCommand } from "../commands.js"
import { hasOpenCvPostprocessRuntime, postprocessScan } from "./postprocess.js"
const mimeTypes = {
pdf: "application/pdf",
@@ -25,6 +26,31 @@ const numberSetting = (settings: Record<string, unknown> | undefined, key: strin
return undefined
}
const booleanSetting = (settings: Record<string, unknown> | undefined, key: string, fallback: boolean) => {
const value = settings?.[key]
if (typeof value === "boolean") return value
if (typeof value === "string") return ["1", "true", "yes", "ja", "on"].includes(value.trim().toLowerCase())
return fallback
}
const profileSetting = (
settings: Record<string, unknown> | undefined,
fallback: AgentConfig["postprocessProfile"]
): AgentConfig["postprocessProfile"] => {
const value = settings?.postprocessProfile
if (value === "document" || value === "receipt" || value === "raw") return value
return fallback
}
const ensureFilenameExtension = (filename: string, format: AgentConfig["scanFormat"]) => {
const ext = path.extname(filename)
if (!ext) return `${filename}.${format}`
const expectedExt = `.${format}`
if (ext.toLowerCase() === expectedExt) return filename
return `${filename.slice(0, -ext.length)}${expectedExt}`
}
export const hasSane = () => commandExists("scanimage")
export const listScanners = async () => {
@@ -54,18 +80,24 @@ export const runScan = async (config: AgentConfig, job: ScanJob): Promise<ScanRe
const mode = stringSetting(settings, "mode") || config.scanMode
const source = stringSetting(settings, "source") || config.scanSource
const scannerName = job.scannerName || config.scannerName
const filename = job.requestedFilename || `${job.id}.${format}`
const filename = ensureFilenameExtension(job.requestedFilename || `${job.id}.${format}`, format)
const outputPath = path.join(config.workDir, filename)
const shouldPostprocess = booleanSetting(settings, "postprocess", config.scanPostprocess)
const postprocessProfile = profileSetting(settings, config.postprocessProfile)
const scanFormat = shouldPostprocess ? "png" : format
const scanOutputPath = shouldPostprocess
? path.join(config.workDir, `${job.id}.raw.png`)
: outputPath
const args = [
"--format",
format,
scanFormat,
"--resolution",
String(resolution),
"--mode",
mode,
"--output-file",
outputPath,
scanOutputPath,
]
if (source) args.push("--source", source)
@@ -77,6 +109,14 @@ export const runScan = async (config: AgentConfig, job: ScanJob): Promise<ScanRe
throw new Error(result.stderr || `scanimage wurde mit Code ${result.code} beendet`)
}
if (shouldPostprocess) {
if (!await hasOpenCvPostprocessRuntime(config)) {
throw new Error("OpenCV-Nachbearbeitung ist aktiviert, aber python3 mit cv2, Pillow und numpy ist nicht verfügbar")
}
return await postprocessScan(config, scanOutputPath, filename, format, postprocessProfile)
}
return {
path: outputPath,
filename,

View File

@@ -9,6 +9,9 @@ export type AgentConfig = {
scanResolution: number
scanMode: string
scanSource?: string
scanPostprocess: boolean
postprocessProfile: "document" | "receipt" | "raw"
postprocessPython: string
}
export type AgentHeartbeat = {