From e8fe6940c28a0d88d4feefc464408c0c38564acb Mon Sep 17 00:00:00 2001 From: florianfederspiel Date: Fri, 2 Jan 2026 12:45:14 +0100 Subject: [PATCH] Added Prepare Service --- src/utils/gpt.ts | 204 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 204 insertions(+) create mode 100644 src/utils/gpt.ts diff --git a/src/utils/gpt.ts b/src/utils/gpt.ts new file mode 100644 index 0000000..d46a4f1 --- /dev/null +++ b/src/utils/gpt.ts @@ -0,0 +1,204 @@ +import dayjs from "dayjs"; +import axios from "axios"; +import OpenAI from "openai"; +import { z } from "zod"; +import { zodResponseFormat } from "openai/helpers/zod"; +import { GetObjectCommand } from "@aws-sdk/client-s3"; +import { Blob } from "buffer"; +import { FastifyInstance } from "fastify"; + +import { s3 } from "./s3"; +import { secrets } from "./secrets"; + +// Drizzle schema +import { vendors, accounts } from "../../db/schema"; +import {eq} from "drizzle-orm"; + +let openai: OpenAI | null = null; + +// --------------------------------------------------------- +// INITIALIZE OPENAI +// --------------------------------------------------------- +export const initOpenAi = async () => { + openai = new OpenAI({ + apiKey: secrets.OPENAI_API_KEY, + }); +}; + +// --------------------------------------------------------- +// STREAM โ†’ BUFFER +// --------------------------------------------------------- +async function streamToBuffer(stream: any): Promise { + return new Promise((resolve, reject) => { + const chunks: Buffer[] = []; + stream.on("data", (chunk: Buffer) => chunks.push(chunk)); + stream.on("error", reject); + stream.on("end", () => resolve(Buffer.concat(chunks))); + }); +} + +// --------------------------------------------------------- +// GPT RESPONSE FORMAT (Zod Schema) +// --------------------------------------------------------- +const InstructionFormat = z.object({ + invoice_number: z.string(), + invoice_date: z.string(), + invoice_duedate: z.string(), + invoice_type: z.string(), + delivery_type: z.string(), + delivery_note_number: z.string(), + reference: z.string(), + issuer: z.object({ + id: z.number().nullable().optional(), + name: z.string(), + address: z.string(), + phone: z.string(), + email: z.string(), + bank: z.string(), + bic: z.string(), + iban: z.string(), + }), + recipient: z.object({ + name: z.string(), + address: z.string(), + phone: z.string(), + email: z.string(), + }), + invoice_items: z.array( + z.object({ + description: z.string(), + unit: z.string(), + quantity: z.number(), + total: z.number(), + total_without_tax: z.number(), + tax_rate: z.number(), + ean: z.number().nullable().optional(), + article_number: z.number().nullable().optional(), + account_number: z.number().nullable().optional(), + account_id: z.number().nullable().optional(), + }) + ), + subtotal: z.number(), + tax_rate: z.number(), + tax: z.number(), + total: z.number(), + terms: z.string(), +}); + +// --------------------------------------------------------- +// MAIN FUNCTION โ€“ REPLACES SUPABASE VERSION +// --------------------------------------------------------- +export const getInvoiceDataFromGPT = async function ( + server: FastifyInstance, + file: any, + tenantId: number +) { + await initOpenAi(); + + if (!openai) { + throw new Error("OpenAI not initialized. Call initOpenAi() first."); + } + + console.log(`๐Ÿ“„ Reading invoice file ${file.id}`); + + // --------------------------------------------------------- + // 1) DOWNLOAD PDF FROM S3 + // --------------------------------------------------------- + let fileData: Buffer; + + try { + const command = new GetObjectCommand({ + Bucket: secrets.S3_BUCKET, + Key: file.path, + }); + + const response: any = await s3.send(command); + fileData = await streamToBuffer(response.Body); + } catch (err) { + console.log(`โŒ S3 Download failed for file ${file.id}`, err); + return null; + } + + // Only process PDFs + if (!file.path.toLowerCase().endsWith(".pdf")) { + server.log.warn(`Skipping non-PDF file ${file.id}`); + return null; + } + + const fileBlob = new Blob([fileData], { type: "application/pdf" }); + + // --------------------------------------------------------- + // 2) SEND FILE TO PDF โ†’ TEXT API + // --------------------------------------------------------- + const form = new FormData(); + form.append("fileInput", fileBlob, file.path.split("/").pop()); + form.append("outputFormat", "txt"); + + let extractedText: string; + + try { + const res = await axios.post( + "http://23.88.52.85:8080/api/v1/convert/pdf/text", + form, + { + headers: { + "Content-Type": "multipart/form-data", + Authorization: `Bearer ${secrets.STIRLING_API_KEY}`, + }, + } + ); + + extractedText = res.data; + } catch (err) { + console.log("โŒ PDF OCR API failed", err); + return null; + } + + // --------------------------------------------------------- + // 3) LOAD VENDORS + ACCOUNTS (DRIZZLE) + // --------------------------------------------------------- + const vendorList = await server.db + .select({ id: vendors.id, name: vendors.name }) + .from(vendors) + .where(eq(vendors.tenant,tenantId)); + + const accountList = await server.db + .select({ + id: accounts.id, + label: accounts.label, + number: accounts.number, + }) + .from(accounts); + + // --------------------------------------------------------- + // 4) GPT ANALYSIS + // --------------------------------------------------------- + + + + const completion = await openai.chat.completions.parse({ + model: "gpt-4o", + store: true, + response_format: zodResponseFormat(InstructionFormat, "instruction"), + messages: [ + { role: "user", content: extractedText }, + { + role: "user", + content: + "You extract structured invoice data.\n\n" + + `VENDORS: ${JSON.stringify(vendorList)}\n` + + `ACCOUNTS: ${JSON.stringify(accountList)}\n\n` + + "Match issuer by name to vendor.id.\n" + + "Match invoice items to account id based on label/number.\n" + + "Convert dates to YYYY-MM-DD.\n" + + "Keep invoice items in original order.\n", + }, + ], + }); + + const parsed = completion.choices[0].message.parsed; + + console.log(`๐Ÿงพ Extracted invoice data for file ${file.id}`); + + return parsed; +};