#!/usr/bin/env python3 import argparse import math from pathlib import Path import cv2 import numpy as np from PIL import Image def order_points(points): rect = np.zeros((4, 2), dtype="float32") point_sum = points.sum(axis=1) point_diff = np.diff(points, axis=1) rect[0] = points[np.argmin(point_sum)] rect[2] = points[np.argmax(point_sum)] rect[1] = points[np.argmin(point_diff)] rect[3] = points[np.argmax(point_diff)] return rect def four_point_transform(image, points): rect = order_points(points) top_left, top_right, bottom_right, bottom_left = rect width_a = np.linalg.norm(bottom_right - bottom_left) width_b = np.linalg.norm(top_right - top_left) max_width = int(max(width_a, width_b)) height_a = np.linalg.norm(top_right - bottom_right) height_b = np.linalg.norm(top_left - bottom_left) max_height = int(max(height_a, height_b)) destination = np.array([ [0, 0], [max_width - 1, 0], [max_width - 1, max_height - 1], [0, max_height - 1], ], dtype="float32") matrix = cv2.getPerspectiveTransform(rect, destination) return cv2.warpPerspective(image, matrix, (max_width, max_height), borderValue=(255, 255, 255)) def rotate_bound(image, angle): height, width = image.shape[:2] center = (width / 2, height / 2) matrix = cv2.getRotationMatrix2D(center, angle, 1.0) cos = abs(matrix[0, 0]) sin = abs(matrix[0, 1]) new_width = int((height * sin) + (width * cos)) new_height = int((height * cos) + (width * sin)) matrix[0, 2] += (new_width / 2) - center[0] matrix[1, 2] += (new_height / 2) - center[1] return cv2.warpAffine(image, matrix, (new_width, new_height), borderValue=(255, 255, 255)) def deskew_by_text_angle(image): gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) inverted = cv2.bitwise_not(gray) threshold = cv2.threshold(inverted, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1] coordinates = np.column_stack(np.where(threshold > 0)) if len(coordinates) < 500: return image angle = cv2.minAreaRect(coordinates)[-1] if angle < -45: angle = -(90 + angle) else: angle = -angle if abs(angle) < 0.2 or abs(angle) > 8: return image return rotate_bound(image, angle) def find_document_contour(image, profile): ratio = image.shape[0] / 900.0 resized = cv2.resize(image, (int(image.shape[1] / ratio), 900)) gray = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY) gray = cv2.GaussianBlur(gray, (5, 5), 0) edges = cv2.Canny(gray, 45, 140) kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7)) edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel) contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) contours = sorted(contours, key=cv2.contourArea, reverse=True)[:8] min_area = resized.shape[0] * resized.shape[1] * (0.03 if profile == "receipt" else 0.12) for contour in contours: if cv2.contourArea(contour) < min_area: continue perimeter = cv2.arcLength(contour, True) approx = cv2.approxPolyDP(contour, 0.025 * perimeter, True) if len(approx) == 4: return approx.reshape(4, 2) * ratio return None def trim_light_border(image): gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) mask = cv2.threshold(gray, 245, 255, cv2.THRESH_BINARY_INV)[1] kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 9)) mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel) contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) if not contours: return image contour = max(contours, key=cv2.contourArea) if cv2.contourArea(contour) < image.shape[0] * image.shape[1] * 0.02: return image x, y, width, height = cv2.boundingRect(contour) padding = max(12, int(min(width, height) * 0.025)) x = max(0, x - padding) y = max(0, y - padding) width = min(image.shape[1] - x, width + padding * 2) height = min(image.shape[0] - y, height + padding * 2) return image[y:y + height, x:x + width] def enhance_receipt(image): gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) gray = clahe.apply(gray) gray = cv2.fastNlMeansDenoising(gray, None, 8, 7, 21) gray = cv2.normalize(gray, None, 0, 255, cv2.NORM_MINMAX) return cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR) def enhance_document(image): lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB) l_channel, a_channel, b_channel = cv2.split(lab) clahe = cv2.createCLAHE(clipLimit=1.6, tileGridSize=(8, 8)) l_channel = clahe.apply(l_channel) return cv2.cvtColor(cv2.merge((l_channel, a_channel, b_channel)), cv2.COLOR_LAB2BGR) def auto_rotate_profile(image, profile): height, width = image.shape[:2] if profile == "receipt" and width > height: return cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE) return image def postprocess(input_path, output_path, profile): image = cv2.imread(str(input_path), cv2.IMREAD_COLOR) if image is None: raise RuntimeError(f"OpenCV konnte {input_path} nicht lesen") contour = find_document_contour(image, profile) if contour is not None: processed = four_point_transform(image, contour.astype("float32")) else: processed = trim_light_border(image) processed = deskew_by_text_angle(processed) processed = trim_light_border(processed) processed = auto_rotate_profile(processed, profile) if profile == "receipt": processed = enhance_receipt(processed) elif profile != "raw": processed = enhance_document(processed) save_output(processed, output_path) def save_output(image, output_path): suffix = output_path.suffix.lower() if suffix == ".pdf": rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) pil_image = Image.fromarray(rgb) if pil_image.mode != "RGB": pil_image = pil_image.convert("RGB") pil_image.save(output_path, "PDF", resolution=300.0) return if suffix in {".jpg", ".jpeg"}: cv2.imwrite(str(output_path), image, [cv2.IMWRITE_JPEG_QUALITY, 92]) return if suffix == ".png": cv2.imwrite(str(output_path), image, [cv2.IMWRITE_PNG_COMPRESSION, 3]) return if suffix in {".tif", ".tiff"}: cv2.imwrite(str(output_path), image) return raise RuntimeError(f"Nicht unterstütztes Ausgabeformat: {suffix}") def main(): parser = argparse.ArgumentParser(description="FEDEO Scan-Nachbearbeitung mit OpenCV") parser.add_argument("--input", required=True) parser.add_argument("--output", required=True) parser.add_argument("--profile", default="document", choices=["document", "receipt", "raw"]) args = parser.parse_args() postprocess(Path(args.input), Path(args.output), args.profile) if __name__ == "__main__": main()