import fs from "node:fs"
import path from "node:path"
import zlib from "node:zlib"

type ParsedAccount = {
    number: string
    label: string
}

const DEFAULT_PDF_PATH = "/Users/florianfederspiel/Downloads/12901_DATEV-Kontenrahmen SKR 42 Vereine, Stiftungen, gGmbH (Bilanz).pdf"
const ACCOUNT_CHART = "skr42"

const args = process.argv.slice(2)
const dryRun = args.includes("--dry-run")
const parseOnly = args.includes("--parse-only")
const pdfArg = args.find((arg) => !arg.startsWith("--"))
const pdfPath = path.resolve(pdfArg || DEFAULT_PDF_PATH)

function decodePdfString(raw: string) {
    let out = ""

    for (let i = 0; i < raw.length; i += 1) {
        const ch = raw[i]

        if (ch !== "\\") {
            out += ch
            continue
        }

        const next = raw[i + 1]
        if (!next) break

        if (next === "n") {
            out += "\n"
            i += 1
            continue
        }

        if (next === "r") {
            out += "\r"
            i += 1
            continue
        }

        if (next === "t") {
            out += "\t"
            i += 1
            continue
        }

        if (next === "b") {
            out += "\b"
            i += 1
            continue
        }

        if (next === "f") {
            out += "\f"
            i += 1
            continue
        }

        if (next === "(" || next === ")" || next === "\\") {
            out += next
            i += 1
            continue
        }

        if (/[0-7]/.test(next)) {
            let oct = next
            let advance = 1

            for (let j = 2; j <= 3; j += 1) {
                const c = raw[i + j]
                if (!c || !/[0-7]/.test(c)) break
                oct += c
                advance += 1
            }

            out += String.fromCharCode(parseInt(oct, 8))
            i += advance
            continue
        }

        out += next
        i += 1
    }

    return out
}

function extractTextFromTjOperator(segment: string) {
    const parts = segment.match(/\((?:\\.|[^\\)])*\)/g)
    if (!parts) return ""

    return parts
        .map((p) => decodePdfString(p.slice(1, -1)))
        .join("")
}

function extractPdfTextStreams(pdfBuffer: Buffer) {
    const pdfLatin = pdfBuffer.toString("latin1")
    const texts: string[] = []

    let cursor = 0
    while (true) {
        const streamPos = pdfLatin.indexOf("stream", cursor)
        if (streamPos < 0) break

        let dataStart = streamPos + 6
        if (pdfLatin[dataStart] === "\r" && pdfLatin[dataStart + 1] === "\n") {
            dataStart += 2
        } else if (pdfLatin[dataStart] === "\n") {
            dataStart += 1
        }

        const streamEnd = pdfLatin.indexOf("endstream", dataStart)
        if (streamEnd < 0) break

        const sliceEnd = streamEnd > dataStart && pdfBuffer[streamEnd - 1] === 0x0d
            ? streamEnd - 1
            : streamEnd

        const compressed = pdfBuffer.subarray(dataStart, sliceEnd)

        try {
            const inflated = zlib.inflateSync(compressed).toString("latin1")
            texts.push(inflated)
        } catch {
            // ignore non-flate streams
        }

        cursor = streamEnd + 9
    }

    return texts
}

function normalizeLabel(value: string) {
    return value
        .replace(/\s+/g, " ")
        .replace(/\s+-\s+/g, "-")
        .trim()
}

function looksLikeAccountLabel(value: string) {
    const letters = (value.match(/[A-Za-zÄÖÜäöüß]/g) || []).length
    return letters >= 3
}

function parseAccountsFromPdf(pdfBuffer: Buffer): ParsedAccount[] {
    const streams = extractPdfTextStreams(pdfBuffer)
    const found = new Map<string, string>()

    const accountPattern = /^\s*([A-Z])?\s*(\d{3,5})\s+0\s+(.+)$/

    for (const stream of streams) {
        const operators = stream.match(/\[(?:.|\r|\n)*?\]TJ|\((?:\\.|[^\\)])*\)Tj/g)
        if (!operators) continue

        for (const op of operators) {
            const text = normalizeLabel(extractTextFromTjOperator(op))
            if (!text) continue

            const m = text.match(accountPattern)
            if (m) {
                const number = m[2]
                const label = normalizeLabel(m[3])
                if (!looksLikeAccountLabel(label)) continue

                const existing = found.get(number)
                if (!existing || label.length > existing.length) {
                    found.set(number, label)
                }
            }
        }
    }

    return [...found.entries()]
        .map(([number, label]) => ({ number, label }))
        .sort((a, b) => Number(a.number) - Number(b.number))
}

async function main() {
    if (!fs.existsSync(pdfPath)) {
        throw new Error(`PDF nicht gefunden: ${pdfPath}`)
    }

    const pdfBuffer = fs.readFileSync(pdfPath)
    const parsed = parseAccountsFromPdf(pdfBuffer)

    if (!parsed.length) {
        throw new Error("Keine Konten aus PDF extrahiert.")
    }

    if (parseOnly) {
        console.log("")
        console.log(`[SKR42 IMPORT] PDF: ${pdfPath}`)
        console.log(`[SKR42 IMPORT] Gefundene Konten: ${parsed.length}`)
        console.log(`[SKR42 IMPORT] Parse-Only: JA`)
        console.log("")
        console.log("[SKR42 IMPORT] Beispiel (erste 15):")
        for (const item of parsed.slice(0, 15)) {
            console.log(`  ${item.number} ${item.label}`)
        }
        console.log("")
        return
    }

    const { eq } = await import("drizzle-orm")
    const { db, pool } = await import("../db")
    const { accounts } = await import("../db/schema")

    const existing = await db
        .select({ number: accounts.number })
        .from(accounts)
        .where(eq(accounts.accountChart, ACCOUNT_CHART))

    const existingSet = new Set(existing.map((r) => String(r.number)))

    const toInsert = parsed
        .filter((a) => !existingSet.has(a.number))
        .map((a) => ({
            number: a.number,
            label: a.label,
            accountChart: ACCOUNT_CHART,
            description: "DATEV SKR42 Import",
        }))

    if (!dryRun && toInsert.length > 0) {
        const batchSize = 500
        for (let i = 0; i < toInsert.length; i += batchSize) {
            const batch = toInsert.slice(i, i + batchSize)
            await db.insert(accounts).values(batch)
        }
    }

    console.log("")
    console.log(`[SKR42 IMPORT] PDF: ${pdfPath}`)
    console.log(`[SKR42 IMPORT] Gefundene Konten: ${parsed.length}`)
    console.log(`[SKR42 IMPORT] Bereits vorhanden (skr42): ${existing.length}`)
    console.log(`[SKR42 IMPORT] Neu einzufuegen: ${toInsert.length}`)
    console.log(`[SKR42 IMPORT] Dry-Run: ${dryRun ? "JA" : "NEIN"}`)
    console.log("")

    if (parsed.length > 0) {
        console.log("[SKR42 IMPORT] Beispiel (erste 15):")
        for (const item of parsed.slice(0, 15)) {
            console.log(`  ${item.number} ${item.label}`)
        }
        console.log("")
    }
}

main()
    .catch((err) => {
        console.error("[SKR42 IMPORT] Fehler:", err)
        process.exitCode = 1
    })
    .finally(async () => {
        if (!parseOnly) {
            const { pool } = await import("../db")
            await pool.end()
        }
    })