oseiskar · September 2, 2022 14:11
diff --git a/parse_nordea_nda.py b/parse_nordea_nda.py
 """
 Parses the electronic account statements in the data format used by the
 Finnish bank Nordea (.nda / "TITO") and converts it to CSV.

 Usage:

    python parse_nordea_nda.py < Konekielinen-tiliote\(YY-MM-DD\).nda > result.csv
    
 OR parse multiple:

    cat Konekielinen-tiliote\(*\).nda | python parse_nordea_nda.py > result.csv

 All field are not included in the output. Modify this script to add more fields
 if needed.
 """
 # Reference for the data format
 # https://www.finanssiala.fi/wp-content/uploads/2021/03/Konekielinen_tiliote_palvelukuvaus.pdf

 # Did not immediately find out what (uncommon) 8-bit encoding format is
 # used for the text fields. This decoding table is incomplete.
 DECODING_TABLE = [
    '{ä',
    '|ö',
    '[Ä',
    '\\Ö',
    # TODO: letters å and Å should be added
 ]

 import io
 from collections import OrderedDict

 def parse_message(f):
    letter = f.read(1)
    if len(letter) < 1: raise EOFError()
    msg_type_id = f.read(2)
    msg_length = int(f.read(3)) - 4 # not sure where 4 comes from
    payload = f.read(msg_length)
    return letter, msg_type_id, payload

 def parse_and_concat(f):
    cur_msg = None
    cur_extras = []
    while True:
        try:
            _, msg_type_id, payload = parse_message(f)
            if msg_type_id == '10':
                if cur_msg is not None:
                    yield(cur_msg, cur_extras)
                cur_msg = payload
                cur_extras = []
            elif msg_type_id == '11':
                assert(cur_msg is not None)
                cur_extras.append(payload)
        except EOFError:
            break
    if cur_msg is not None:
        yield(cur_msg, cur_extras)
           
 def parse_string(s):
    s = s.rstrip()
    for entry in DECODING_TABLE:
        s = s.replace(entry[0], entry[1])
    return s
     
 def parse_11_msg(s):
    head = s[:2]
    return parse_string(s[2:])

 def parse_date(d):
    return '20' + d[:2] + '-' + d[2:4] + '-' + d[4:6]

 def parse_amount(sign, amount):
    if sign == '+': sign = ''
    return sign + str(int(amount[:-2])) + '.' + amount[-2:]

 def parse_10_msg(payload):
    f = io.StringIO(payload)
    d = OrderedDict({})
    d['index'] = f.read(6)
    d['id'] = f.read(18)
    d['event_date'] = f.read(6)
    d['value_date'] = f.read(6)
    d['paid_date'] = f.read(6)
    d['type'] = f.read(1)
    d['code'] = f.read(3)
    d['code_description'] = f.read(35)
    d['sign'] = f.read(1)
    d['amount'] = f.read(18)
    d['receipt_code'] = f.read(1)
    d['transfer_type'] = f.read(1)
    d['name'] = f.read(35)
    d['name_source'] = f.read(1)
    d['account_number'] = f.read(14)
    d['account_changed'] = f.read(1)
    d['reference'] = f.read(20)
    d['form_number'] = f.read(8)
    d['level_id'] = f.read(1)
    return {
        'raw': d,
        'id': d['id'],
        'date': parse_date(d['event_date']),
        'name': parse_string(d['name']),
        'amount': parse_amount(d['sign'], d['amount'])
    }
    
 def parse(f):
    for msg, extras in parse_and_concat(f):
        parsed = parse_10_msg(msg)
        parsed['extras'] = [parse_11_msg(e) for e in extras]
        yield(parsed)
    
 if __name__ == '__main__':
    import sys, csv
    w = csv.writer(sys.stdout)
    w.writerow(['id','date','amount','name','message'])
    for m in parse(sys.stdin):
        w.writerow([m['id'], m['date'], m['amount'], m['name'], '|'.join(m['extras'])])
	"""
	Parses the electronic account statements in the data format used by the
	Finnish bank Nordea (.nda / "TITO") and converts it to CSV.

	Usage:

	python parse_nordea_nda.py < Konekielinen-tiliote\(YY-MM-DD\).nda > result.csv

	OR parse multiple:

	cat Konekielinen-tiliote\(*\).nda \| python parse_nordea_nda.py > result.csv

	All field are not included in the output. Modify this script to add more fields
	if needed.
	"""
	# Reference for the data format
	# https://www.finanssiala.fi/wp-content/uploads/2021/03/Konekielinen_tiliote_palvelukuvaus.pdf

	# Did not immediately find out what (uncommon) 8-bit encoding format is
	# used for the text fields. This decoding table is incomplete.
	DECODING_TABLE = [
	'{ä',
	'\|ö',
	'[Ä',
	'\\Ö',
	# TODO: letters å and Å should be added
	]

	import io
	from collections import OrderedDict

	def parse_message(f):
	letter = f.read(1)
	if len(letter) < 1: raise EOFError()
	msg_type_id = f.read(2)
	msg_length = int(f.read(3)) - 4 # not sure where 4 comes from
	payload = f.read(msg_length)
	return letter, msg_type_id, payload

	def parse_and_concat(f):
	cur_msg = None
	cur_extras = []
	while True:
	try:
	_, msg_type_id, payload = parse_message(f)
	if msg_type_id == '10':
	if cur_msg is not None:
	yield(cur_msg, cur_extras)
	cur_msg = payload
	cur_extras = []
	elif msg_type_id == '11':
	assert(cur_msg is not None)
	cur_extras.append(payload)
	except EOFError:
	break
	if cur_msg is not None:
	yield(cur_msg, cur_extras)

	def parse_string(s):
	s = s.rstrip()
	for entry in DECODING_TABLE:
	s = s.replace(entry[0], entry[1])
	return s

	def parse_11_msg(s):
	head = s[:2]
	return parse_string(s[2:])

	def parse_date(d):
	return '20' + d[:2] + '-' + d[2:4] + '-' + d[4:6]

	def parse_amount(sign, amount):
	if sign == '+': sign = ''
	return sign + str(int(amount[:-2])) + '.' + amount[-2:]

	def parse_10_msg(payload):
	f = io.StringIO(payload)
	d = OrderedDict({})
	d['index'] = f.read(6)
	d['id'] = f.read(18)
	d['event_date'] = f.read(6)
	d['value_date'] = f.read(6)
	d['paid_date'] = f.read(6)
	d['type'] = f.read(1)
	d['code'] = f.read(3)
	d['code_description'] = f.read(35)
	d['sign'] = f.read(1)
	d['amount'] = f.read(18)
	d['receipt_code'] = f.read(1)
	d['transfer_type'] = f.read(1)
	d['name'] = f.read(35)
	d['name_source'] = f.read(1)
	d['account_number'] = f.read(14)
	d['account_changed'] = f.read(1)
	d['reference'] = f.read(20)
	d['form_number'] = f.read(8)
	d['level_id'] = f.read(1)
	return {
	'raw': d,
	'id': d['id'],
	'date': parse_date(d['event_date']),
	'name': parse_string(d['name']),
	'amount': parse_amount(d['sign'], d['amount'])
	}

	def parse(f):
	for msg, extras in parse_and_concat(f):
	parsed = parse_10_msg(msg)
	parsed['extras'] = [parse_11_msg(e) for e in extras]
	yield(parsed)

	if __name__ == '__main__':
	import sys, csv
	w = csv.writer(sys.stdout)
	w.writerow(['id','date','amount','name','message'])
	for m in parse(sys.stdin):
	w.writerow([m['id'], m['date'], m['amount'], m['name'], '\|'.join(m['extras'])])
No results found