Skip to content

Instantly share code, notes, and snippets.

@DavidEdwards1
Last active January 30, 2019 17:29
Show Gist options
  • Save DavidEdwards1/6a7e61bb4399248ce370a80befade359 to your computer and use it in GitHub Desktop.
Save DavidEdwards1/6a7e61bb4399248ce370a80befade359 to your computer and use it in GitHub Desktop.
require 'csv'
def split_and_tokenise(string_to_tokenise)
string_to_tokenise.to_s.downcase.gsub(/[^a-z0-9]/, ' ').
split(" ").
map { |token| Digest::SHA256.hexdigest token }.
join(" ")
end
def tokenise(string_to_tokenise)
string_to_tokenise.present? ? split_and_tokenise(string_to_tokenise) : nil
end
def transaction_attrs(transaction)
{
bank_transaction_id: transaction.id,
bank_transaction_dated_on: transaction.dated_on,
bank_transaction_name: tokenise(transaction.name),
bank_transaction_memo: tokenise(transaction.memo),
bank_transaction_amount: transaction.amount,
bank_transaction_deleted: transaction.deleted_at.present?,
bank_account_id: transaction.bank_account_id,
bank_account_is_personal: transaction.bank_account.is_personal,
bank_account_type: transaction.bank_account.type,
company_id: transaction.bank_account.company_id,
}
end
def explanation_attrs(explanation)
if explanation.nil?
{
bank_account_entry_id: nil,
general_ledger_account_id: nil,
general_ledger_account_nominal_code_base: nil,
general_ledger_account_nominal_code_sub: nil,
general_ledger_account_type: nil,
# area is not exposed with accessor
general_ledger_account_area: nil,
}
else
account = explanation.general_ledger_account
{
bank_account_entry_id: explanation.id,
general_ledger_account_id: account.id,
general_ledger_account_nominal_code_base: account.base_code,
general_ledger_account_nominal_code_sub: account.sub_code,
general_ledger_account_type: account.account_type.name,
# area is not exposed with accessor
general_ledger_account_area: account.account_type.instance_variable_get(:@area),
}
end
end
models_to_include = [
:bank_account,
bank_account_entries: {general_ledger_account: :chart_of_accounts},
]
# in the initial instance we just want companys with the tracking feature
# and those that would have had it in september 2017
company_ids = []
Company.where("created_at < '2017-10-01'").find_each do |c|
company_ids << (c.has_feature?(:dw_banking_events) ? c.id : nil)
end
# ony want a sample of all data
#number_of_samples = 10
#Random.srand(666)
sampling = {
bank_account_id: BankAccount.
joins(:company).
merge(Company.find(company_ids)).
pluck("bank_accounts.id"),
}
FILE_PREFIX = 'banquo_training_data_on'
writer = Metrics::CsvFileWriter.new(FILE_PREFIX)
writer.open_csv_for_write do |csv|
BankTransaction.
where("bank_transactions.created_at < '2017-10-01' and bank_transactions.created_at > '2017-09-01'").
where(sampling).
left_outer_joins(models_to_include).
includes(models_to_include).
find_each do |transaction|
explanations = transaction.bank_account_entries
if explanations.length == 0
attrs = transaction_attrs(transaction).merge(explanation_attrs(nil))
csv << attrs.keys if csv.lineno.zero?
csv << attrs.values
end
explanations.each do |explanation|
attrs = transaction_attrs(transaction).merge(explanation_attrs(explanation))
csv << attrs.keys if csv.lineno.zero?
csv << attrs.values
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment