Created
July 7, 2025 13:17
-
-
Save robiiinos/5ba47f02563847d077b89f2bdec30785 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { env, fetch } from "bun"; | |
import { z } from "zod/v4"; | |
import dedent from "dedent"; | |
export type ModerationErrorMetadata = { | |
reasons: string[]; | |
flagged_input: string; | |
provider_name: string; | |
model_slug: string; | |
}; | |
export type ProviderErrorMetadata = { | |
provider_name: string; | |
raw: unknown; | |
}; | |
export type ErrorResponse = { | |
error: { | |
code: number; | |
message: string; | |
metadata?: ModerationErrorMetadata | ProviderErrorMetadata; | |
}; | |
}; | |
export type OpenRouterResponse = { | |
id: string; | |
provider: string; | |
model: string; | |
object: string; | |
created: number; | |
choices: Array<{ | |
logprobs: string | null; | |
finish_reason: string; | |
native_finish_reason: string; | |
index: number; | |
message: { | |
role: string; | |
content: string; | |
refusal: string | null; | |
reasoning: string | null; | |
}; | |
}>; | |
usage: { | |
prompt_tokens: number; | |
completion_tokens: number; | |
total_tokens: number; | |
}; | |
}; | |
const main = async () => { | |
const receiptURL = "https://n4huoot950.ufs.sh/f/XIImkH5Dh5qvd9xRdruUYp9gnvmWuAS0jBz8I1wbEJGZFVTH"; | |
const prompt = dedent` | |
You are a multilingual document parser specialized in extracting structured data from retail receipts and point-of-sale documents. | |
Focus on identifying transaction details, itemized purchases, payment information, and store details. | |
`; | |
const schema = z.object({ | |
invoice_number: z.string().nullable().describe("Unique identifier for the invoice"), | |
invoice_date: z.string().nullable().describe("Date of invoice in ISO 8601 format (YYYY-MM-DD)"), | |
currency: z.string().describe("Three-letter ISO 4217 currency code (e.g., USD, EUR, SEK)"), | |
total_amount: z.number().describe("Total amount for the invoice"), | |
tax_amount: z.number().nullable().describe("Tax amount for the invoice"), | |
tax_rate: z.number().nullable().describe("Tax rate as a percentage value (e.g., 20 for 20%)"), | |
tax_type: z.enum([ | |
"vat", | |
"sales_tax", | |
"gst", | |
"withholding_tax", | |
"service_tax", | |
"excise_tax", | |
"reverse_charge", | |
"custom_tax", | |
]) | |
.nullable() | |
.describe( | |
"The type of tax applied to the invoice, such as VAT, Sales Tax, GST, Withholding Tax, Service Tax, Excise Tax, Reverse Charge, or Custom Tax. This field should reflect the tax regime or system referenced on the invoice, and is important for correct accounting and compliance. If the document does not specify a tax type, infer it based on the country or context if possible.", | |
), | |
vendor_id: z.string().nullable().describe("Store ID of the vendor/seller"), | |
vendor_name: z | |
.string() | |
.nullable() | |
.describe( | |
"The legal registered business name of the company issuing the invoice. Look for names that include entity types like 'Inc.', 'Ltd', 'AB', 'GmbH', 'LLC', etc. This name is typically found in the letterhead, header, or footer of the invoice. Do not extract brands, divisions, or 'Trading as' names unless no legal name is visible. If multiple company names appear, prioritize the one that appears to be issuing the invoice rather than subsidiaries or parent companies.", | |
), | |
vendor_address: z.string().nullable().describe("Complete address of the vendor/seller"), | |
items: z | |
.array( | |
z.object({ | |
description: z.string().nullable().describe("Description of the item"), | |
quantity: z.number().nullable().describe("Quantity of items"), | |
unit_price: z.number().nullable().describe("Price per unit"), | |
total_price: z.number().nullable().describe("Total price for this line item"), | |
}), | |
) | |
.describe("Array of items listed in the document"), | |
payment_instructions: z.string().nullable().describe("Payment terms or instructions"), | |
notes: z.string().nullable().describe("Additional notes or comments"), | |
language: z | |
.string() | |
.nullable() | |
.describe( | |
"The language of the document as a PostgreSQL text search configuration name (e.g., 'english', 'swedish', 'german', 'french')", | |
), | |
}); | |
// Note: We use OpenRouter API directly to have better control over the workflow. | |
// This prevents us from using 3rd-party SDKs and libraries that could have bugs or issues. | |
const response = await fetch("https://openrouter.ai/api/v1/chat/completions", { | |
method: "POST", | |
headers: { | |
Accept: "application/json", | |
Authorization: `Bearer ${env.OPENROUTER_API_KEY!}`, | |
"Content-Type": "application/json", | |
}, | |
body: JSON.stringify({ | |
// Note: MistralAI provides slighty better results, but is 3x more expensive, and 6x slower. | |
// model: "mistralai/mistral-medium-3", | |
model: "meta-llama/llama-4-scout", | |
messages: [ | |
{ | |
role: "system", | |
content: prompt, | |
}, | |
{ | |
role: "user", | |
content: [{ type: "image_url", image_url: { url: receiptURL } }], | |
}, | |
], | |
response_format: { | |
type: "json_schema", | |
json_schema: { | |
name: "receipt", | |
strict: true, | |
schema: z.toJSONSchema(schema, { target: "draft-2020-12" }), | |
}, | |
}, | |
}), | |
}); | |
// Handle error if any; invalid request, upstream issues, etc... | |
if (!response.ok) { | |
const data: ErrorResponse = await response.json(); | |
if (data.error.metadata && "raw" in data.error.metadata) { | |
const error = JSON.parse(data.error.metadata.raw as string); | |
console.error(error); | |
throw new Error(error); | |
} | |
} | |
// Parse response and output the result | |
const data: OpenRouterResponse = await response.json(); | |
console.info(JSON.parse(data.choices[0].message.content)); | |
}; | |
main().catch((error) => { | |
console.error(error); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment