import dayjs from "dayjs"; import axios from "axios"; import OpenAI from "openai"; import { z } from "zod"; import { zodResponseFormat } from "openai/helpers/zod"; import { GetObjectCommand } from "@aws-sdk/client-s3"; import { Blob } from "buffer"; import { FastifyInstance } from "fastify"; import { s3 } from "./s3"; import { secrets } from "./secrets"; // Drizzle schema import { vendors, accounts } from "../../db/schema"; import {eq} from "drizzle-orm"; let openai: OpenAI | null = null; // --------------------------------------------------------- // INITIALIZE OPENAI // --------------------------------------------------------- export const initOpenAi = async () => { openai = new OpenAI({ apiKey: secrets.OPENAI_API_KEY, }); }; // --------------------------------------------------------- // STREAM โ†’ BUFFER // --------------------------------------------------------- async function streamToBuffer(stream: any): Promise { return new Promise((resolve, reject) => { const chunks: Buffer[] = []; stream.on("data", (chunk: Buffer) => chunks.push(chunk)); stream.on("error", reject); stream.on("end", () => resolve(Buffer.concat(chunks))); }); } // --------------------------------------------------------- // GPT RESPONSE FORMAT (Zod Schema) // --------------------------------------------------------- const InstructionFormat = z.object({ invoice_number: z.string(), invoice_date: z.string(), invoice_duedate: z.string(), invoice_type: z.string(), delivery_type: z.string(), delivery_note_number: z.string(), reference: z.string(), issuer: z.object({ id: z.number().nullable().optional(), name: z.string(), address: z.string(), phone: z.string(), email: z.string(), bank: z.string(), bic: z.string(), iban: z.string(), }), recipient: z.object({ name: z.string(), address: z.string(), phone: z.string(), email: z.string(), }), invoice_items: z.array( z.object({ description: z.string(), unit: z.string(), quantity: z.number(), total: z.number(), total_without_tax: z.number(), tax_rate: z.number(), ean: z.number().nullable().optional(), article_number: z.number().nullable().optional(), account_number: z.number().nullable().optional(), account_id: z.number().nullable().optional(), }) ), subtotal: z.number(), tax_rate: z.number(), tax: z.number(), total: z.number(), terms: z.string(), }); // --------------------------------------------------------- // MAIN FUNCTION // --------------------------------------------------------- export const getInvoiceDataFromGPT = async function ( server: FastifyInstance, file: any, tenantId: number, learningContext?: string ) { await initOpenAi(); if (!openai) { throw new Error("OpenAI not initialized. Call initOpenAi() first."); } console.log(`๐Ÿ“„ Reading invoice file ${file.id}`); // --------------------------------------------------------- // 1) DOWNLOAD PDF FROM S3 // --------------------------------------------------------- let fileData: Buffer; try { const command = new GetObjectCommand({ Bucket: secrets.S3_BUCKET, Key: file.path, }); const response: any = await s3.send(command); fileData = await streamToBuffer(response.Body); } catch (err) { console.log(`โŒ S3 Download failed for file ${file.id}`, err); return null; } // Only process PDFs if (!file.path.toLowerCase().endsWith(".pdf")) { server.log.warn(`Skipping non-PDF file ${file.id}`); return null; } const fileBlob = new Blob([fileData], { type: "application/pdf" }); // --------------------------------------------------------- // 2) SEND FILE TO PDF โ†’ TEXT API // --------------------------------------------------------- const form = new FormData(); form.append("fileInput", fileBlob, file.path.split("/").pop()); form.append("outputFormat", "txt"); let extractedText: string; try { const res = await axios.post( "http://23.88.52.85:8080/api/v1/convert/pdf/text", form, { headers: { "Content-Type": "multipart/form-data", Authorization: `Bearer ${secrets.STIRLING_API_KEY}`, }, } ); extractedText = res.data; } catch (err) { console.log("โŒ PDF OCR API failed", err); return null; } // --------------------------------------------------------- // 3) LOAD VENDORS + ACCOUNTS (DRIZZLE) // --------------------------------------------------------- const vendorList = await server.db .select({ id: vendors.id, name: vendors.name }) .from(vendors) .where(eq(vendors.tenant,tenantId)); const accountList = await server.db .select({ id: accounts.id, label: accounts.label, number: accounts.number, }) .from(accounts); // --------------------------------------------------------- // 4) GPT ANALYSIS // --------------------------------------------------------- const completion = await openai.chat.completions.parse({ model: "gpt-4o", store: true, response_format: zodResponseFormat(InstructionFormat as any, "instruction"), messages: [ { role: "user", content: extractedText }, { role: "user", content: "You extract structured invoice data.\n\n" + `VENDORS: ${JSON.stringify(vendorList)}\n` + `ACCOUNTS: ${JSON.stringify(accountList)}\n\n` + (learningContext ? `HISTORICAL_PATTERNS: ${learningContext}\n\n` : "") + "Match issuer by name to vendor.id.\n" + "Match invoice items to account id based on label/number.\n" + "Use historical patterns as soft hints for vendor/account/payment mapping.\n" + "Do not invent values when the invoice text contradicts the hints.\n" + "Convert dates to YYYY-MM-DD.\n" + "Keep invoice items in original order.\n", }, ], }); const parsed = completion.choices[0].message.parsed; console.log(`๐Ÿงพ Extracted invoice data for file ${file.id}`); return parsed; };