205 lines
6.3 KiB
TypeScript
205 lines
6.3 KiB
TypeScript
import dayjs from "dayjs";
|
||
import axios from "axios";
|
||
import OpenAI from "openai";
|
||
import { z } from "zod";
|
||
import { zodResponseFormat } from "openai/helpers/zod";
|
||
import { GetObjectCommand } from "@aws-sdk/client-s3";
|
||
import { Blob } from "buffer";
|
||
import { FastifyInstance } from "fastify";
|
||
|
||
import { s3 } from "./s3";
|
||
import { secrets } from "./secrets";
|
||
|
||
// Drizzle schema
|
||
import { vendors, accounts } from "../../db/schema";
|
||
import {eq} from "drizzle-orm";
|
||
|
||
let openai: OpenAI | null = null;
|
||
|
||
// ---------------------------------------------------------
|
||
// INITIALIZE OPENAI
|
||
// ---------------------------------------------------------
|
||
export const initOpenAi = async () => {
|
||
openai = new OpenAI({
|
||
apiKey: secrets.OPENAI_API_KEY,
|
||
});
|
||
};
|
||
|
||
// ---------------------------------------------------------
|
||
// STREAM → BUFFER
|
||
// ---------------------------------------------------------
|
||
async function streamToBuffer(stream: any): Promise<Buffer> {
|
||
return new Promise((resolve, reject) => {
|
||
const chunks: Buffer[] = [];
|
||
stream.on("data", (chunk: Buffer) => chunks.push(chunk));
|
||
stream.on("error", reject);
|
||
stream.on("end", () => resolve(Buffer.concat(chunks)));
|
||
});
|
||
}
|
||
|
||
// ---------------------------------------------------------
|
||
// GPT RESPONSE FORMAT (Zod Schema)
|
||
// ---------------------------------------------------------
|
||
const InstructionFormat = z.object({
|
||
invoice_number: z.string(),
|
||
invoice_date: z.string(),
|
||
invoice_duedate: z.string(),
|
||
invoice_type: z.string(),
|
||
delivery_type: z.string(),
|
||
delivery_note_number: z.string(),
|
||
reference: z.string(),
|
||
issuer: z.object({
|
||
id: z.number().nullable().optional(),
|
||
name: z.string(),
|
||
address: z.string(),
|
||
phone: z.string(),
|
||
email: z.string(),
|
||
bank: z.string(),
|
||
bic: z.string(),
|
||
iban: z.string(),
|
||
}),
|
||
recipient: z.object({
|
||
name: z.string(),
|
||
address: z.string(),
|
||
phone: z.string(),
|
||
email: z.string(),
|
||
}),
|
||
invoice_items: z.array(
|
||
z.object({
|
||
description: z.string(),
|
||
unit: z.string(),
|
||
quantity: z.number(),
|
||
total: z.number(),
|
||
total_without_tax: z.number(),
|
||
tax_rate: z.number(),
|
||
ean: z.number().nullable().optional(),
|
||
article_number: z.number().nullable().optional(),
|
||
account_number: z.number().nullable().optional(),
|
||
account_id: z.number().nullable().optional(),
|
||
})
|
||
),
|
||
subtotal: z.number(),
|
||
tax_rate: z.number(),
|
||
tax: z.number(),
|
||
total: z.number(),
|
||
terms: z.string(),
|
||
});
|
||
|
||
// ---------------------------------------------------------
|
||
// MAIN FUNCTION – REPLACES SUPABASE VERSION
|
||
// ---------------------------------------------------------
|
||
export const getInvoiceDataFromGPT = async function (
|
||
server: FastifyInstance,
|
||
file: any,
|
||
tenantId: number
|
||
) {
|
||
await initOpenAi();
|
||
|
||
if (!openai) {
|
||
throw new Error("OpenAI not initialized. Call initOpenAi() first.");
|
||
}
|
||
|
||
console.log(`📄 Reading invoice file ${file.id}`);
|
||
|
||
// ---------------------------------------------------------
|
||
// 1) DOWNLOAD PDF FROM S3
|
||
// ---------------------------------------------------------
|
||
let fileData: Buffer;
|
||
|
||
try {
|
||
const command = new GetObjectCommand({
|
||
Bucket: secrets.S3_BUCKET,
|
||
Key: file.path,
|
||
});
|
||
|
||
const response: any = await s3.send(command);
|
||
fileData = await streamToBuffer(response.Body);
|
||
} catch (err) {
|
||
console.log(`❌ S3 Download failed for file ${file.id}`, err);
|
||
return null;
|
||
}
|
||
|
||
// Only process PDFs
|
||
if (!file.path.toLowerCase().endsWith(".pdf")) {
|
||
server.log.warn(`Skipping non-PDF file ${file.id}`);
|
||
return null;
|
||
}
|
||
|
||
const fileBlob = new Blob([fileData], { type: "application/pdf" });
|
||
|
||
// ---------------------------------------------------------
|
||
// 2) SEND FILE TO PDF → TEXT API
|
||
// ---------------------------------------------------------
|
||
const form = new FormData();
|
||
form.append("fileInput", fileBlob, file.path.split("/").pop());
|
||
form.append("outputFormat", "txt");
|
||
|
||
let extractedText: string;
|
||
|
||
try {
|
||
const res = await axios.post(
|
||
"http://23.88.52.85:8080/api/v1/convert/pdf/text",
|
||
form,
|
||
{
|
||
headers: {
|
||
"Content-Type": "multipart/form-data",
|
||
Authorization: `Bearer ${secrets.STIRLING_API_KEY}`,
|
||
},
|
||
}
|
||
);
|
||
|
||
extractedText = res.data;
|
||
} catch (err) {
|
||
console.log("❌ PDF OCR API failed", err);
|
||
return null;
|
||
}
|
||
|
||
// ---------------------------------------------------------
|
||
// 3) LOAD VENDORS + ACCOUNTS (DRIZZLE)
|
||
// ---------------------------------------------------------
|
||
const vendorList = await server.db
|
||
.select({ id: vendors.id, name: vendors.name })
|
||
.from(vendors)
|
||
.where(eq(vendors.tenant,tenantId));
|
||
|
||
const accountList = await server.db
|
||
.select({
|
||
id: accounts.id,
|
||
label: accounts.label,
|
||
number: accounts.number,
|
||
})
|
||
.from(accounts);
|
||
|
||
// ---------------------------------------------------------
|
||
// 4) GPT ANALYSIS
|
||
// ---------------------------------------------------------
|
||
|
||
|
||
|
||
const completion = await openai.chat.completions.parse({
|
||
model: "gpt-4o",
|
||
store: true,
|
||
response_format: zodResponseFormat(InstructionFormat as any, "instruction"),
|
||
messages: [
|
||
{ role: "user", content: extractedText },
|
||
{
|
||
role: "user",
|
||
content:
|
||
"You extract structured invoice data.\n\n" +
|
||
`VENDORS: ${JSON.stringify(vendorList)}\n` +
|
||
`ACCOUNTS: ${JSON.stringify(accountList)}\n\n` +
|
||
"Match issuer by name to vendor.id.\n" +
|
||
"Match invoice items to account id based on label/number.\n" +
|
||
"Convert dates to YYYY-MM-DD.\n" +
|
||
"Keep invoice items in original order.\n",
|
||
},
|
||
],
|
||
});
|
||
|
||
const parsed = completion.choices[0].message.parsed;
|
||
|
||
console.log(`🧾 Extracted invoice data for file ${file.id}`);
|
||
|
||
return parsed;
|
||
};
|