Fixes
This commit is contained in:
@@ -1,14 +1,13 @@
|
||||
import dayjs from "dayjs";
|
||||
import axios from "axios";
|
||||
import OpenAI from "openai";
|
||||
import { z } from "zod";
|
||||
import { zodResponseFormat } from "openai/helpers/zod";
|
||||
import { GetObjectCommand } from "@aws-sdk/client-s3";
|
||||
import { Blob } from "buffer";
|
||||
import { FastifyInstance } from "fastify";
|
||||
|
||||
import { s3 } from "./s3";
|
||||
import { secrets } from "./secrets";
|
||||
import { storeExtractedTextForFile } from "./documentText";
|
||||
|
||||
// Drizzle schema
|
||||
import { vendors, accounts, tenants } from "../../db/schema";
|
||||
@@ -16,6 +15,9 @@ import {eq} from "drizzle-orm";
|
||||
|
||||
let openai: OpenAI | null = null;
|
||||
|
||||
const nullableString = z.string().trim().nullable();
|
||||
const nullableNumber = z.number().nullable();
|
||||
|
||||
// ---------------------------------------------------------
|
||||
// INITIALIZE OPENAI
|
||||
// ---------------------------------------------------------
|
||||
@@ -41,48 +43,48 @@ async function streamToBuffer(stream: any): Promise<Buffer> {
|
||||
// GPT RESPONSE FORMAT (Zod Schema)
|
||||
// ---------------------------------------------------------
|
||||
const InstructionFormat = z.object({
|
||||
invoice_number: z.string(),
|
||||
invoice_date: z.string(),
|
||||
invoice_duedate: z.string(),
|
||||
invoice_type: z.string(),
|
||||
delivery_type: z.string(),
|
||||
delivery_note_number: z.string(),
|
||||
reference: z.string(),
|
||||
invoice_number: nullableString,
|
||||
invoice_date: nullableString,
|
||||
invoice_duedate: nullableString,
|
||||
invoice_type: nullableString,
|
||||
delivery_type: nullableString,
|
||||
delivery_note_number: nullableString,
|
||||
reference: nullableString,
|
||||
issuer: z.object({
|
||||
id: z.number().nullable().optional(),
|
||||
name: z.string(),
|
||||
address: z.string(),
|
||||
phone: z.string(),
|
||||
email: z.string(),
|
||||
bank: z.string(),
|
||||
bic: z.string(),
|
||||
iban: z.string(),
|
||||
id: nullableNumber.optional(),
|
||||
name: nullableString,
|
||||
address: nullableString,
|
||||
phone: nullableString,
|
||||
email: nullableString,
|
||||
bank: nullableString,
|
||||
bic: nullableString,
|
||||
iban: nullableString,
|
||||
}),
|
||||
recipient: z.object({
|
||||
name: z.string(),
|
||||
address: z.string(),
|
||||
phone: z.string(),
|
||||
email: z.string(),
|
||||
name: nullableString,
|
||||
address: nullableString,
|
||||
phone: nullableString,
|
||||
email: nullableString,
|
||||
}),
|
||||
invoice_items: z.array(
|
||||
z.object({
|
||||
description: z.string(),
|
||||
unit: z.string(),
|
||||
quantity: z.number(),
|
||||
total: z.number(),
|
||||
total_without_tax: z.number(),
|
||||
tax_rate: z.number(),
|
||||
ean: z.number().nullable().optional(),
|
||||
article_number: z.number().nullable().optional(),
|
||||
account_number: z.number().nullable().optional(),
|
||||
account_id: z.number().nullable().optional(),
|
||||
description: nullableString,
|
||||
unit: nullableString,
|
||||
quantity: nullableNumber,
|
||||
total: nullableNumber,
|
||||
total_without_tax: nullableNumber,
|
||||
tax_rate: nullableNumber,
|
||||
ean: nullableNumber.optional(),
|
||||
article_number: nullableNumber.optional(),
|
||||
account_number: nullableNumber.optional(),
|
||||
account_id: nullableNumber.optional(),
|
||||
})
|
||||
),
|
||||
subtotal: z.number(),
|
||||
tax_rate: z.number(),
|
||||
tax: z.number(),
|
||||
total: z.number(),
|
||||
terms: z.string(),
|
||||
subtotal: nullableNumber,
|
||||
tax_rate: nullableNumber,
|
||||
tax: nullableNumber,
|
||||
total: nullableNumber,
|
||||
terms: nullableString,
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------
|
||||
@@ -91,8 +93,7 @@ const InstructionFormat = z.object({
|
||||
export const getInvoiceDataFromGPT = async function (
|
||||
server: FastifyInstance,
|
||||
file: any,
|
||||
tenantId: number,
|
||||
learningContext?: string
|
||||
tenantId: number
|
||||
) {
|
||||
await initOpenAi();
|
||||
|
||||
@@ -126,32 +127,27 @@ export const getInvoiceDataFromGPT = async function (
|
||||
return null;
|
||||
}
|
||||
|
||||
const fileBlob = new Blob([fileData], { type: "application/pdf" });
|
||||
let extractedText = file.extractedText;
|
||||
|
||||
// ---------------------------------------------------------
|
||||
// 2) SEND FILE TO PDF → TEXT API
|
||||
// ---------------------------------------------------------
|
||||
const form = new FormData();
|
||||
form.append("fileInput", fileBlob, file.path.split("/").pop());
|
||||
form.append("outputFormat", "txt");
|
||||
if (!extractedText?.trim()) {
|
||||
try {
|
||||
const result = await storeExtractedTextForFile(
|
||||
server,
|
||||
file.id,
|
||||
fileData,
|
||||
file.mimeType,
|
||||
file.name || file.path?.split("/").pop()
|
||||
);
|
||||
extractedText = result.text;
|
||||
server.log.info(`Invoice text extraction for file ${file.id} used method: ${result.method}`)
|
||||
} catch (err) {
|
||||
console.log("❌ Local PDF text extraction failed", err);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
let extractedText: string;
|
||||
|
||||
try {
|
||||
const res = await axios.post(
|
||||
"http://23.88.52.85:8080/api/v1/convert/pdf/text",
|
||||
form,
|
||||
{
|
||||
headers: {
|
||||
"Content-Type": "multipart/form-data",
|
||||
Authorization: `Bearer ${secrets.STIRLING_API_KEY}`,
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
extractedText = res.data;
|
||||
} catch (err) {
|
||||
console.log("❌ PDF OCR API failed", err);
|
||||
if (!extractedText?.trim()) {
|
||||
server.log.warn(`No extractable PDF text found for file ${file.id}. Scanned PDFs require OCR.`);
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -198,13 +194,16 @@ export const getInvoiceDataFromGPT = async function (
|
||||
"You extract structured invoice data.\n\n" +
|
||||
`VENDORS: ${JSON.stringify(vendorList)}\n` +
|
||||
`ACCOUNTS: ${JSON.stringify(accountList)}\n\n` +
|
||||
(learningContext
|
||||
? `HISTORICAL_PATTERNS: ${learningContext}\n\n`
|
||||
: "") +
|
||||
"Use only values that are explicitly present in the invoice text.\n" +
|
||||
"If a field is missing or unclear, return null. If line items are missing or unclear, return an empty array.\n" +
|
||||
"Do not guess invoice numbers, dates, totals, payment terms, bank data, or references.\n" +
|
||||
"Do not derive values from vendor defaults or likely patterns.\n" +
|
||||
"Only set issuer.id when the issuer name clearly matches a vendor name from VENDORS.\n" +
|
||||
"Only set account_id when the invoice line clearly matches an account label or number from ACCOUNTS.\n" +
|
||||
"If multiple accounts are plausible, set account_id to null.\n" +
|
||||
"Do not merge summary totals into fabricated invoice_items.\n" +
|
||||
"Match issuer by name to vendor.id.\n" +
|
||||
"Match invoice items to account id based on label/number.\n" +
|
||||
"Use historical patterns as soft hints for vendor/account/payment mapping.\n" +
|
||||
"Do not invent values when the invoice text contradicts the hints.\n" +
|
||||
"Convert dates to YYYY-MM-DD.\n" +
|
||||
"Keep invoice items in original order.\n",
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user