Last active
January 30, 2019 17:29
-
-
Save DavidEdwards1/6a7e61bb4399248ce370a80befade359 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'csv' | |
def split_and_tokenise(string_to_tokenise) | |
string_to_tokenise.to_s.downcase.gsub(/[^a-z0-9]/, ' '). | |
split(" "). | |
map { |token| Digest::SHA256.hexdigest token }. | |
join(" ") | |
end | |
def tokenise(string_to_tokenise) | |
string_to_tokenise.present? ? split_and_tokenise(string_to_tokenise) : nil | |
end | |
def transaction_attrs(transaction) | |
{ | |
bank_transaction_id: transaction.id, | |
bank_transaction_dated_on: transaction.dated_on, | |
bank_transaction_name: tokenise(transaction.name), | |
bank_transaction_memo: tokenise(transaction.memo), | |
bank_transaction_amount: transaction.amount, | |
bank_transaction_deleted: transaction.deleted_at.present?, | |
bank_account_id: transaction.bank_account_id, | |
bank_account_is_personal: transaction.bank_account.is_personal, | |
bank_account_type: transaction.bank_account.type, | |
company_id: transaction.bank_account.company_id, | |
} | |
end | |
def explanation_attrs(explanation) | |
if explanation.nil? | |
{ | |
bank_account_entry_id: nil, | |
general_ledger_account_id: nil, | |
general_ledger_account_nominal_code_base: nil, | |
general_ledger_account_nominal_code_sub: nil, | |
general_ledger_account_type: nil, | |
# area is not exposed with accessor | |
general_ledger_account_area: nil, | |
} | |
else | |
account = explanation.general_ledger_account | |
{ | |
bank_account_entry_id: explanation.id, | |
general_ledger_account_id: account.id, | |
general_ledger_account_nominal_code_base: account.base_code, | |
general_ledger_account_nominal_code_sub: account.sub_code, | |
general_ledger_account_type: account.account_type.name, | |
# area is not exposed with accessor | |
general_ledger_account_area: account.account_type.instance_variable_get(:@area), | |
} | |
end | |
end | |
models_to_include = [ | |
:bank_account, | |
bank_account_entries: {general_ledger_account: :chart_of_accounts}, | |
] | |
# in the initial instance we just want companys with the tracking feature | |
# and those that would have had it in september 2017 | |
company_ids = [] | |
Company.where("created_at < '2017-10-01'").find_each do |c| | |
company_ids << (c.has_feature?(:dw_banking_events) ? c.id : nil) | |
end | |
# ony want a sample of all data | |
#number_of_samples = 10 | |
#Random.srand(666) | |
sampling = { | |
bank_account_id: BankAccount. | |
joins(:company). | |
merge(Company.find(company_ids)). | |
pluck("bank_accounts.id"), | |
} | |
FILE_PREFIX = 'banquo_training_data_on' | |
writer = Metrics::CsvFileWriter.new(FILE_PREFIX) | |
writer.open_csv_for_write do |csv| | |
BankTransaction. | |
where("bank_transactions.created_at < '2017-10-01' and bank_transactions.created_at > '2017-09-01'"). | |
where(sampling). | |
left_outer_joins(models_to_include). | |
includes(models_to_include). | |
find_each do |transaction| | |
explanations = transaction.bank_account_entries | |
if explanations.length == 0 | |
attrs = transaction_attrs(transaction).merge(explanation_attrs(nil)) | |
csv << attrs.keys if csv.lineno.zero? | |
csv << attrs.values | |
end | |
explanations.each do |explanation| | |
attrs = transaction_attrs(transaction).merge(explanation_attrs(explanation)) | |
csv << attrs.keys if csv.lineno.zero? | |
csv << attrs.values | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment