Added Prepare Service

This commit is contained in:
2026-01-02 12:45:14 +01:00
parent 8a87113275
commit e8fe6940c2

204
src/utils/gpt.ts Normal file
View File

@@ -0,0 +1,204 @@
import dayjs from "dayjs";
import axios from "axios";
import OpenAI from "openai";
import { z } from "zod";
import { zodResponseFormat } from "openai/helpers/zod";
import { GetObjectCommand } from "@aws-sdk/client-s3";
import { Blob } from "buffer";
import { FastifyInstance } from "fastify";
import { s3 } from "./s3";
import { secrets } from "./secrets";
// Drizzle schema
import { vendors, accounts } from "../../db/schema";
import {eq} from "drizzle-orm";
let openai: OpenAI | null = null;
// ---------------------------------------------------------
// INITIALIZE OPENAI
// ---------------------------------------------------------
export const initOpenAi = async () => {
openai = new OpenAI({
apiKey: secrets.OPENAI_API_KEY,
});
};
// ---------------------------------------------------------
// STREAM → BUFFER
// ---------------------------------------------------------
async function streamToBuffer(stream: any): Promise<Buffer> {
return new Promise((resolve, reject) => {
const chunks: Buffer[] = [];
stream.on("data", (chunk: Buffer) => chunks.push(chunk));
stream.on("error", reject);
stream.on("end", () => resolve(Buffer.concat(chunks)));
});
}
// ---------------------------------------------------------
// GPT RESPONSE FORMAT (Zod Schema)
// ---------------------------------------------------------
const InstructionFormat = z.object({
invoice_number: z.string(),
invoice_date: z.string(),
invoice_duedate: z.string(),
invoice_type: z.string(),
delivery_type: z.string(),
delivery_note_number: z.string(),
reference: z.string(),
issuer: z.object({
id: z.number().nullable().optional(),
name: z.string(),
address: z.string(),
phone: z.string(),
email: z.string(),
bank: z.string(),
bic: z.string(),
iban: z.string(),
}),
recipient: z.object({
name: z.string(),
address: z.string(),
phone: z.string(),
email: z.string(),
}),
invoice_items: z.array(
z.object({
description: z.string(),
unit: z.string(),
quantity: z.number(),
total: z.number(),
total_without_tax: z.number(),
tax_rate: z.number(),
ean: z.number().nullable().optional(),
article_number: z.number().nullable().optional(),
account_number: z.number().nullable().optional(),
account_id: z.number().nullable().optional(),
})
),
subtotal: z.number(),
tax_rate: z.number(),
tax: z.number(),
total: z.number(),
terms: z.string(),
});
// ---------------------------------------------------------
// MAIN FUNCTION REPLACES SUPABASE VERSION
// ---------------------------------------------------------
export const getInvoiceDataFromGPT = async function (
server: FastifyInstance,
file: any,
tenantId: number
) {
await initOpenAi();
if (!openai) {
throw new Error("OpenAI not initialized. Call initOpenAi() first.");
}
console.log(`📄 Reading invoice file ${file.id}`);
// ---------------------------------------------------------
// 1) DOWNLOAD PDF FROM S3
// ---------------------------------------------------------
let fileData: Buffer;
try {
const command = new GetObjectCommand({
Bucket: secrets.S3_BUCKET,
Key: file.path,
});
const response: any = await s3.send(command);
fileData = await streamToBuffer(response.Body);
} catch (err) {
console.log(`❌ S3 Download failed for file ${file.id}`, err);
return null;
}
// Only process PDFs
if (!file.path.toLowerCase().endsWith(".pdf")) {
server.log.warn(`Skipping non-PDF file ${file.id}`);
return null;
}
const fileBlob = new Blob([fileData], { type: "application/pdf" });
// ---------------------------------------------------------
// 2) SEND FILE TO PDF → TEXT API
// ---------------------------------------------------------
const form = new FormData();
form.append("fileInput", fileBlob, file.path.split("/").pop());
form.append("outputFormat", "txt");
let extractedText: string;
try {
const res = await axios.post(
"http://23.88.52.85:8080/api/v1/convert/pdf/text",
form,
{
headers: {
"Content-Type": "multipart/form-data",
Authorization: `Bearer ${secrets.STIRLING_API_KEY}`,
},
}
);
extractedText = res.data;
} catch (err) {
console.log("❌ PDF OCR API failed", err);
return null;
}
// ---------------------------------------------------------
// 3) LOAD VENDORS + ACCOUNTS (DRIZZLE)
// ---------------------------------------------------------
const vendorList = await server.db
.select({ id: vendors.id, name: vendors.name })
.from(vendors)
.where(eq(vendors.tenant,tenantId));
const accountList = await server.db
.select({
id: accounts.id,
label: accounts.label,
number: accounts.number,
})
.from(accounts);
// ---------------------------------------------------------
// 4) GPT ANALYSIS
// ---------------------------------------------------------
const completion = await openai.chat.completions.parse({
model: "gpt-4o",
store: true,
response_format: zodResponseFormat(InstructionFormat, "instruction"),
messages: [
{ role: "user", content: extractedText },
{
role: "user",
content:
"You extract structured invoice data.\n\n" +
`VENDORS: ${JSON.stringify(vendorList)}\n` +
`ACCOUNTS: ${JSON.stringify(accountList)}\n\n` +
"Match issuer by name to vendor.id.\n" +
"Match invoice items to account id based on label/number.\n" +
"Convert dates to YYYY-MM-DD.\n" +
"Keep invoice items in original order.\n",
},
],
});
const parsed = completion.choices[0].message.parsed;
console.log(`🧾 Extracted invoice data for file ${file.id}`);
return parsed;
};