Files
FEDEO/backend/src/utils/gpt.ts
2026-01-06 12:07:43 +01:00

205 lines
6.3 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import dayjs from "dayjs";
import axios from "axios";
import OpenAI from "openai";
import { z } from "zod";
import { zodResponseFormat } from "openai/helpers/zod";
import { GetObjectCommand } from "@aws-sdk/client-s3";
import { Blob } from "buffer";
import { FastifyInstance } from "fastify";
import { s3 } from "./s3";
import { secrets } from "./secrets";
// Drizzle schema
import { vendors, accounts } from "../../db/schema";
import {eq} from "drizzle-orm";
let openai: OpenAI | null = null;
// ---------------------------------------------------------
// INITIALIZE OPENAI
// ---------------------------------------------------------
export const initOpenAi = async () => {
openai = new OpenAI({
apiKey: secrets.OPENAI_API_KEY,
});
};
// ---------------------------------------------------------
// STREAM → BUFFER
// ---------------------------------------------------------
async function streamToBuffer(stream: any): Promise<Buffer> {
return new Promise((resolve, reject) => {
const chunks: Buffer[] = [];
stream.on("data", (chunk: Buffer) => chunks.push(chunk));
stream.on("error", reject);
stream.on("end", () => resolve(Buffer.concat(chunks)));
});
}
// ---------------------------------------------------------
// GPT RESPONSE FORMAT (Zod Schema)
// ---------------------------------------------------------
const InstructionFormat = z.object({
invoice_number: z.string(),
invoice_date: z.string(),
invoice_duedate: z.string(),
invoice_type: z.string(),
delivery_type: z.string(),
delivery_note_number: z.string(),
reference: z.string(),
issuer: z.object({
id: z.number().nullable().optional(),
name: z.string(),
address: z.string(),
phone: z.string(),
email: z.string(),
bank: z.string(),
bic: z.string(),
iban: z.string(),
}),
recipient: z.object({
name: z.string(),
address: z.string(),
phone: z.string(),
email: z.string(),
}),
invoice_items: z.array(
z.object({
description: z.string(),
unit: z.string(),
quantity: z.number(),
total: z.number(),
total_without_tax: z.number(),
tax_rate: z.number(),
ean: z.number().nullable().optional(),
article_number: z.number().nullable().optional(),
account_number: z.number().nullable().optional(),
account_id: z.number().nullable().optional(),
})
),
subtotal: z.number(),
tax_rate: z.number(),
tax: z.number(),
total: z.number(),
terms: z.string(),
});
// ---------------------------------------------------------
// MAIN FUNCTION REPLACES SUPABASE VERSION
// ---------------------------------------------------------
export const getInvoiceDataFromGPT = async function (
server: FastifyInstance,
file: any,
tenantId: number
) {
await initOpenAi();
if (!openai) {
throw new Error("OpenAI not initialized. Call initOpenAi() first.");
}
console.log(`📄 Reading invoice file ${file.id}`);
// ---------------------------------------------------------
// 1) DOWNLOAD PDF FROM S3
// ---------------------------------------------------------
let fileData: Buffer;
try {
const command = new GetObjectCommand({
Bucket: secrets.S3_BUCKET,
Key: file.path,
});
const response: any = await s3.send(command);
fileData = await streamToBuffer(response.Body);
} catch (err) {
console.log(`❌ S3 Download failed for file ${file.id}`, err);
return null;
}
// Only process PDFs
if (!file.path.toLowerCase().endsWith(".pdf")) {
server.log.warn(`Skipping non-PDF file ${file.id}`);
return null;
}
const fileBlob = new Blob([fileData], { type: "application/pdf" });
// ---------------------------------------------------------
// 2) SEND FILE TO PDF → TEXT API
// ---------------------------------------------------------
const form = new FormData();
form.append("fileInput", fileBlob, file.path.split("/").pop());
form.append("outputFormat", "txt");
let extractedText: string;
try {
const res = await axios.post(
"http://23.88.52.85:8080/api/v1/convert/pdf/text",
form,
{
headers: {
"Content-Type": "multipart/form-data",
Authorization: `Bearer ${secrets.STIRLING_API_KEY}`,
},
}
);
extractedText = res.data;
} catch (err) {
console.log("❌ PDF OCR API failed", err);
return null;
}
// ---------------------------------------------------------
// 3) LOAD VENDORS + ACCOUNTS (DRIZZLE)
// ---------------------------------------------------------
const vendorList = await server.db
.select({ id: vendors.id, name: vendors.name })
.from(vendors)
.where(eq(vendors.tenant,tenantId));
const accountList = await server.db
.select({
id: accounts.id,
label: accounts.label,
number: accounts.number,
})
.from(accounts);
// ---------------------------------------------------------
// 4) GPT ANALYSIS
// ---------------------------------------------------------
const completion = await openai.chat.completions.parse({
model: "gpt-4o",
store: true,
response_format: zodResponseFormat(InstructionFormat as any, "instruction"),
messages: [
{ role: "user", content: extractedText },
{
role: "user",
content:
"You extract structured invoice data.\n\n" +
`VENDORS: ${JSON.stringify(vendorList)}\n` +
`ACCOUNTS: ${JSON.stringify(accountList)}\n\n` +
"Match issuer by name to vendor.id.\n" +
"Match invoice items to account id based on label/number.\n" +
"Convert dates to YYYY-MM-DD.\n" +
"Keep invoice items in original order.\n",
},
],
});
const parsed = completion.choices[0].message.parsed;
console.log(`🧾 Extracted invoice data for file ${file.id}`);
return parsed;
};