I have published this as an open source website, please check it out here:
-
Web Site URL: https://alshdavid.github.io/commbank-statement-converter
-
Source Code: https://alshdavid.github.io/commbank-statement-converter
I have published this as an open source website, please check it out here:
Web Site URL: https://alshdavid.github.io/commbank-statement-converter
Source Code: https://alshdavid.github.io/commbank-statement-converter
"use strict";var t=this&&this.__importDefault||function(t){return t&&t.__esModule?t:{default:t}};Object.defineProperty(exports,"__esModule",{value:!0}),exports.parsePDF=void 0;const e=t(require("fs")),n=t(require("pdf-parse")),s=require("json2csv");async function r(t,r){const l=e.default.readFileSync(t),o=(await n.default(l)).text.split("TransactionDebitCreditBalance");o.shift();let a=[],c=0;for(let t of o){const{results:e,finalBalance:n}=i(t,c);c=n,a=[...a,...e]}if(r&&r.endsWith(".json"))e.default.writeFileSync(r,JSON.stringify(a,null,2));else if(r&&r.endsWith(".csv")){const t=s.parse(a);e.default.writeFileSync(r,t)}else console.log(a)}function i(t,e=0){const{openingBalance:n,firstPassResults:s}=function(t){const e=t.split("\n");let n;e.shift();const s=[];if(function(t){return t.includes("OPENING BALANCE")}(e[0])){n=l((e.shift()||"").split("$")[1])}let r=!1;for(const t of e)if(!0!==r){if(""===t)break;if(t.includes("CLOSING BALANCE"))break;t.includes("CREDIT INTEREST EARNED")?r=!0:s.push(t)}else r=!1;return{firstPassResults:s,openingBalance:n}}(t);return function(t,e){const n=[];let s=e;for(const e of t){const t=e.substring(0,6);let[r,...i]=e.substring(6,e.length).split("$");const c=o(i[0]),u=l(i[1]),f=u-s;let g=0;if(0===c){let e=a(f);e.startsWith("-")&&(e=e.substring(1,e.length));e!==r.substring(r.length-e.length,r.length)&&(console.log("WrongDebitCalc"),console.log({newBalance:u-Math.abs(f),diff:Math.abs(f),ds:e,date:t,label:r,debit:g,credit:c,balance:u})),g=Math.abs(f),r=r.substring(0,r.length-e.length)}n.push({date:t,label:r,debit:g,credit:c,balance:u}),s=u}return{results:n,finalBalance:s}}(function(t){const e=[];let n="";for(const s of t){1===s.split("$").length?n+=s:(e.push(n+s),n="")}return e}(s),n||e)}function l(t){if(t.includes("CR"))return o(t.split("CR")[0]);if(t.includes("DR"))return-Math.abs(o(t.split("DR")[0]));throw"No Balance"}function o(t){return t?parseInt(t.replace(/,/g,"").replace(/\./g,"")):0}function a(t){const e=t.toString().startsWith("-")?"-":"",n=t.toString().replace("-","");if(1===n.length)return e+"0.0"+n;if(2===n.length)return e+"0."+n;const s=n.substring(0,n.length-2),r=n.substring(n.length-2,n.length);return parseInt(s).toLocaleString()+"."+r}process.argv[2]&&r(process.argv[2],process.argv[3]),exports.parsePDF=r |
Usage | |
Contact [email protected] for help | |
Download and install node from https://nodejs.org/en/download/ | |
Copy the "index.js" script below to you computer. | |
Open up your terminal, cmd, or powershell. | |
run the following once: | |
npm install pdf-parse json2csv | |
Navigate to the directory then run the following for each statement: | |
node index.js ./statement.pdf ./output.csv | |
If you want to output a JSON file | |
node index.js ./statement.pdf ./output.json |
import fs from 'fs' | |
import pdf from 'pdf-parse' | |
import { parse } from 'json2csv'; | |
export type Cents = number | |
export type Record = { | |
date: string, | |
label: string, | |
debit: Cents, | |
credit: Cents, | |
balance: Cents | |
}; | |
if (process.argv[2]) { | |
parsePDF(process.argv[2], process.argv[3]) | |
} | |
export async function parsePDF(file: string, outputFile?: string){ | |
const dataBuffer = fs.readFileSync(file); | |
const data = await pdf(dataBuffer) | |
const vs = data.text.split('TransactionDebitCreditBalance') | |
vs.shift() | |
let output: Record[] = [] | |
let previousBalance = 0 | |
for (let v of vs) { | |
const { results, finalBalance } = processTable(v, previousBalance) | |
previousBalance = finalBalance | |
output = [ ...output, ...results ] | |
} | |
if (outputFile && outputFile.endsWith('.json')) { | |
fs.writeFileSync(outputFile, JSON.stringify(output, null, 2)) | |
} else if (outputFile && outputFile.endsWith('.csv')) { | |
const csv = parse(output); | |
fs.writeFileSync(outputFile, csv) | |
} else { | |
console.log(output) | |
} | |
} | |
function isOpeningBalance(target: string) { | |
return target.includes('OPENING BALANCE') | |
} | |
function getTabletLines(target: string): { firstPassResults: string[], openingBalance: number | undefined } { | |
const l = target.split('\n') | |
l.shift() | |
let openingBalance: number | undefined | |
const p = [] | |
if (isOpeningBalance(l[0])) { | |
const openingLine = l.shift() || '' | |
openingBalance = parseBalance(openingLine.split('$')[1]) | |
} | |
let skip = false | |
for (const line of l) { | |
if (skip === true) { | |
skip = false | |
continue | |
} | |
if (line === '') { | |
break | |
} | |
if (line.includes('CLOSING BALANCE')) { | |
break | |
} | |
if (line.includes('CREDIT INTEREST EARNED')) { | |
skip = true | |
continue | |
} | |
p.push(line) | |
} | |
return { | |
firstPassResults: p, | |
openingBalance | |
} | |
} | |
function squashLines(target: string[]): string[] { | |
const p2: string[] = [] | |
let temp = '' | |
for (const line of target) { | |
const v = line.split('$') | |
if (v.length === 1) { | |
temp += line | |
} else { | |
p2.push(temp + line) | |
temp = '' | |
} | |
} | |
return p2 | |
} | |
function generateRecords(target: string[], initialBalance: number): { results: Record[], finalBalance: Cents } { | |
const p3: Record[] = [] | |
let previousBalance = initialBalance | |
for (const line of target) { | |
const date = line.substring(0, 6) | |
let [ label, ...data ] = line.substring(6, line.length).split('$') | |
const credit = moneyToCents(data[0]) | |
const balance = parseBalance(data[1]) | |
const diff = balance - previousBalance | |
let debit = 0 | |
if (credit === 0) { | |
let ds = centsToMoney(diff) | |
if (ds.startsWith('-')) { | |
ds = ds.substring(1, ds.length) | |
} | |
const diffInLabel = label.substring(label.length - ds.length, label.length) | |
if (ds !== diffInLabel) { | |
console.log('WrongDebitCalc') | |
console.log({ | |
newBalance: balance - Math.abs(diff), | |
diff: Math.abs(diff), | |
ds, | |
date, | |
label, | |
debit, | |
credit, | |
balance | |
}) | |
} | |
debit = Math.abs(diff) | |
label = label.substring(0, label.length - ds.length) | |
} | |
p3.push({ | |
date, | |
label, | |
debit, | |
credit, | |
balance | |
}) | |
previousBalance = balance | |
} | |
return { | |
results: p3, | |
finalBalance: previousBalance | |
} | |
} | |
function processTable(t: string, initialBalance: Cents = 0) { | |
const { openingBalance, firstPassResults } = getTabletLines(t) | |
const squashedLines = squashLines(firstPassResults) | |
return generateRecords(squashedLines, openingBalance || initialBalance) | |
} | |
function parseBalance(balance: string): Cents { | |
if (balance.includes('CR')) { | |
return moneyToCents(balance.split('CR')[0]) | |
} if (balance.includes('DR')) { | |
return -Math.abs(moneyToCents(balance.split('DR')[0])) | |
} | |
throw 'No Balance' | |
} | |
function moneyToCents(money: string): Cents { | |
return money ? parseInt(money.replace(/,/g, '').replace(/\./g, '')) : 0 | |
} | |
function centsToMoney(input: Cents): string { | |
const negative = input.toString().startsWith('-') ? '-' : '' | |
const iS = input.toString().replace('-', '') | |
if (iS.length === 1) { | |
return negative + '0.0'+ iS | |
} | |
if (iS.length === 2) { | |
return negative + '0.'+ iS | |
} | |
const dollars = iS.substring(0, iS.length - 2) | |
const cents = iS.substring(iS.length - 2, iS.length) | |
const localDollars = parseInt(dollars).toLocaleString() | |
return localDollars + '.' + cents | |
} |
You are a legend mate!
Updated this to work from in the browser:
URL: https://alshdavid.github.io/commbank-statement-converter
Repo: https://github.com/alshdavid/commbank-statement-converter
Thank you David, this works perfectly.