Created
September 2, 2022 14:11
-
-
Save oseiskar/b353dd94bd84f06e9eb8858927f057b7 to your computer and use it in GitHub Desktop.
Convert Nordea corporate electronic account statement NDA format to CSV
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Parses the electronic account statements in the data format used by the | |
Finnish bank Nordea (.nda / "TITO") and converts it to CSV. | |
Usage: | |
python parse_nordea_nda.py < Konekielinen-tiliote\(YY-MM-DD\).nda > result.csv | |
OR parse multiple: | |
cat Konekielinen-tiliote\(*\).nda | python parse_nordea_nda.py > result.csv | |
All field are not included in the output. Modify this script to add more fields | |
if needed. | |
""" | |
# Reference for the data format | |
# https://www.finanssiala.fi/wp-content/uploads/2021/03/Konekielinen_tiliote_palvelukuvaus.pdf | |
# Did not immediately find out what (uncommon) 8-bit encoding format is | |
# used for the text fields. This decoding table is incomplete. | |
DECODING_TABLE = [ | |
'{ä', | |
'|ö', | |
'[Ä', | |
'\\Ö', | |
# TODO: letters å and Å should be added | |
] | |
import io | |
from collections import OrderedDict | |
def parse_message(f): | |
letter = f.read(1) | |
if len(letter) < 1: raise EOFError() | |
msg_type_id = f.read(2) | |
msg_length = int(f.read(3)) - 4 # not sure where 4 comes from | |
payload = f.read(msg_length) | |
return letter, msg_type_id, payload | |
def parse_and_concat(f): | |
cur_msg = None | |
cur_extras = [] | |
while True: | |
try: | |
_, msg_type_id, payload = parse_message(f) | |
if msg_type_id == '10': | |
if cur_msg is not None: | |
yield(cur_msg, cur_extras) | |
cur_msg = payload | |
cur_extras = [] | |
elif msg_type_id == '11': | |
assert(cur_msg is not None) | |
cur_extras.append(payload) | |
except EOFError: | |
break | |
if cur_msg is not None: | |
yield(cur_msg, cur_extras) | |
def parse_string(s): | |
s = s.rstrip() | |
for entry in DECODING_TABLE: | |
s = s.replace(entry[0], entry[1]) | |
return s | |
def parse_11_msg(s): | |
head = s[:2] | |
return parse_string(s[2:]) | |
def parse_date(d): | |
return '20' + d[:2] + '-' + d[2:4] + '-' + d[4:6] | |
def parse_amount(sign, amount): | |
if sign == '+': sign = '' | |
return sign + str(int(amount[:-2])) + '.' + amount[-2:] | |
def parse_10_msg(payload): | |
f = io.StringIO(payload) | |
d = OrderedDict({}) | |
d['index'] = f.read(6) | |
d['id'] = f.read(18) | |
d['event_date'] = f.read(6) | |
d['value_date'] = f.read(6) | |
d['paid_date'] = f.read(6) | |
d['type'] = f.read(1) | |
d['code'] = f.read(3) | |
d['code_description'] = f.read(35) | |
d['sign'] = f.read(1) | |
d['amount'] = f.read(18) | |
d['receipt_code'] = f.read(1) | |
d['transfer_type'] = f.read(1) | |
d['name'] = f.read(35) | |
d['name_source'] = f.read(1) | |
d['account_number'] = f.read(14) | |
d['account_changed'] = f.read(1) | |
d['reference'] = f.read(20) | |
d['form_number'] = f.read(8) | |
d['level_id'] = f.read(1) | |
return { | |
'raw': d, | |
'id': d['id'], | |
'date': parse_date(d['event_date']), | |
'name': parse_string(d['name']), | |
'amount': parse_amount(d['sign'], d['amount']) | |
} | |
def parse(f): | |
for msg, extras in parse_and_concat(f): | |
parsed = parse_10_msg(msg) | |
parsed['extras'] = [parse_11_msg(e) for e in extras] | |
yield(parsed) | |
if __name__ == '__main__': | |
import sys, csv | |
w = csv.writer(sys.stdout) | |
w.writerow(['id','date','amount','name','message']) | |
for m in parse(sys.stdin): | |
w.writerow([m['id'], m['date'], m['amount'], m['name'], '|'.join(m['extras'])]) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment