220 lines
6.8 KiB
Python
220 lines
6.8 KiB
Python
#!/usr/bin/env python3
|
|
import argparse
|
|
import math
|
|
from pathlib import Path
|
|
|
|
import cv2
|
|
import numpy as np
|
|
from PIL import Image
|
|
|
|
|
|
def order_points(points):
|
|
rect = np.zeros((4, 2), dtype="float32")
|
|
point_sum = points.sum(axis=1)
|
|
point_diff = np.diff(points, axis=1)
|
|
|
|
rect[0] = points[np.argmin(point_sum)]
|
|
rect[2] = points[np.argmax(point_sum)]
|
|
rect[1] = points[np.argmin(point_diff)]
|
|
rect[3] = points[np.argmax(point_diff)]
|
|
return rect
|
|
|
|
|
|
def four_point_transform(image, points):
|
|
rect = order_points(points)
|
|
top_left, top_right, bottom_right, bottom_left = rect
|
|
|
|
width_a = np.linalg.norm(bottom_right - bottom_left)
|
|
width_b = np.linalg.norm(top_right - top_left)
|
|
max_width = int(max(width_a, width_b))
|
|
|
|
height_a = np.linalg.norm(top_right - bottom_right)
|
|
height_b = np.linalg.norm(top_left - bottom_left)
|
|
max_height = int(max(height_a, height_b))
|
|
|
|
destination = np.array([
|
|
[0, 0],
|
|
[max_width - 1, 0],
|
|
[max_width - 1, max_height - 1],
|
|
[0, max_height - 1],
|
|
], dtype="float32")
|
|
|
|
matrix = cv2.getPerspectiveTransform(rect, destination)
|
|
return cv2.warpPerspective(image, matrix, (max_width, max_height), borderValue=(255, 255, 255))
|
|
|
|
|
|
def rotate_bound(image, angle):
|
|
height, width = image.shape[:2]
|
|
center = (width / 2, height / 2)
|
|
matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
|
|
cos = abs(matrix[0, 0])
|
|
sin = abs(matrix[0, 1])
|
|
|
|
new_width = int((height * sin) + (width * cos))
|
|
new_height = int((height * cos) + (width * sin))
|
|
|
|
matrix[0, 2] += (new_width / 2) - center[0]
|
|
matrix[1, 2] += (new_height / 2) - center[1]
|
|
|
|
return cv2.warpAffine(image, matrix, (new_width, new_height), borderValue=(255, 255, 255))
|
|
|
|
|
|
def deskew_by_text_angle(image):
|
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
inverted = cv2.bitwise_not(gray)
|
|
threshold = cv2.threshold(inverted, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
|
|
coordinates = np.column_stack(np.where(threshold > 0))
|
|
|
|
if len(coordinates) < 500:
|
|
return image
|
|
|
|
angle = cv2.minAreaRect(coordinates)[-1]
|
|
if angle < -45:
|
|
angle = -(90 + angle)
|
|
else:
|
|
angle = -angle
|
|
|
|
if abs(angle) < 0.2 or abs(angle) > 8:
|
|
return image
|
|
|
|
return rotate_bound(image, angle)
|
|
|
|
|
|
def find_document_contour(image, profile):
|
|
ratio = image.shape[0] / 900.0
|
|
resized = cv2.resize(image, (int(image.shape[1] / ratio), 900))
|
|
gray = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY)
|
|
gray = cv2.GaussianBlur(gray, (5, 5), 0)
|
|
|
|
edges = cv2.Canny(gray, 45, 140)
|
|
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7))
|
|
edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)
|
|
|
|
contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
contours = sorted(contours, key=cv2.contourArea, reverse=True)[:8]
|
|
|
|
min_area = resized.shape[0] * resized.shape[1] * (0.03 if profile == "receipt" else 0.12)
|
|
|
|
for contour in contours:
|
|
if cv2.contourArea(contour) < min_area:
|
|
continue
|
|
|
|
perimeter = cv2.arcLength(contour, True)
|
|
approx = cv2.approxPolyDP(contour, 0.025 * perimeter, True)
|
|
if len(approx) == 4:
|
|
return approx.reshape(4, 2) * ratio
|
|
|
|
return None
|
|
|
|
|
|
def trim_light_border(image):
|
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
mask = cv2.threshold(gray, 245, 255, cv2.THRESH_BINARY_INV)[1]
|
|
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 9))
|
|
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
|
|
|
|
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
if not contours:
|
|
return image
|
|
|
|
contour = max(contours, key=cv2.contourArea)
|
|
if cv2.contourArea(contour) < image.shape[0] * image.shape[1] * 0.02:
|
|
return image
|
|
|
|
x, y, width, height = cv2.boundingRect(contour)
|
|
padding = max(12, int(min(width, height) * 0.025))
|
|
x = max(0, x - padding)
|
|
y = max(0, y - padding)
|
|
width = min(image.shape[1] - x, width + padding * 2)
|
|
height = min(image.shape[0] - y, height + padding * 2)
|
|
return image[y:y + height, x:x + width]
|
|
|
|
|
|
def enhance_receipt(image):
|
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
|
gray = clahe.apply(gray)
|
|
gray = cv2.fastNlMeansDenoising(gray, None, 8, 7, 21)
|
|
gray = cv2.normalize(gray, None, 0, 255, cv2.NORM_MINMAX)
|
|
return cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
|
|
|
|
|
|
def enhance_document(image):
|
|
lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
|
|
l_channel, a_channel, b_channel = cv2.split(lab)
|
|
clahe = cv2.createCLAHE(clipLimit=1.6, tileGridSize=(8, 8))
|
|
l_channel = clahe.apply(l_channel)
|
|
return cv2.cvtColor(cv2.merge((l_channel, a_channel, b_channel)), cv2.COLOR_LAB2BGR)
|
|
|
|
|
|
def auto_rotate_profile(image, profile):
|
|
height, width = image.shape[:2]
|
|
|
|
if profile == "receipt" and width > height:
|
|
return cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
|
|
|
|
return image
|
|
|
|
|
|
def postprocess(input_path, output_path, profile):
|
|
image = cv2.imread(str(input_path), cv2.IMREAD_COLOR)
|
|
if image is None:
|
|
raise RuntimeError(f"OpenCV konnte {input_path} nicht lesen")
|
|
|
|
contour = find_document_contour(image, profile)
|
|
if contour is not None:
|
|
processed = four_point_transform(image, contour.astype("float32"))
|
|
else:
|
|
processed = trim_light_border(image)
|
|
|
|
processed = deskew_by_text_angle(processed)
|
|
processed = trim_light_border(processed)
|
|
processed = auto_rotate_profile(processed, profile)
|
|
|
|
if profile == "receipt":
|
|
processed = enhance_receipt(processed)
|
|
elif profile != "raw":
|
|
processed = enhance_document(processed)
|
|
|
|
save_output(processed, output_path)
|
|
|
|
|
|
def save_output(image, output_path):
|
|
suffix = output_path.suffix.lower()
|
|
|
|
if suffix == ".pdf":
|
|
rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
|
pil_image = Image.fromarray(rgb)
|
|
if pil_image.mode != "RGB":
|
|
pil_image = pil_image.convert("RGB")
|
|
pil_image.save(output_path, "PDF", resolution=300.0)
|
|
return
|
|
|
|
if suffix in {".jpg", ".jpeg"}:
|
|
cv2.imwrite(str(output_path), image, [cv2.IMWRITE_JPEG_QUALITY, 92])
|
|
return
|
|
|
|
if suffix == ".png":
|
|
cv2.imwrite(str(output_path), image, [cv2.IMWRITE_PNG_COMPRESSION, 3])
|
|
return
|
|
|
|
if suffix in {".tif", ".tiff"}:
|
|
cv2.imwrite(str(output_path), image)
|
|
return
|
|
|
|
raise RuntimeError(f"Nicht unterstütztes Ausgabeformat: {suffix}")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="FEDEO Scan-Nachbearbeitung mit OpenCV")
|
|
parser.add_argument("--input", required=True)
|
|
parser.add_argument("--output", required=True)
|
|
parser.add_argument("--profile", default="document", choices=["document", "receipt", "raw"])
|
|
args = parser.parse_args()
|
|
|
|
postprocess(Path(args.input), Path(args.output), args.profile)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|