Last active
April 9, 2024 07:42
-
-
Save prettyirrelevant/5e554d033e0c34079c8eb0b3f95600ae to your computer and use it in GitHub Desktop.
Python module to extra information from Kuda's bank statement
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from decimal import Decimal | |
def clean_currency(value): | |
"""Removes currency symbol and commas, then converts to `Decimal`.""" | |
if isinstance(value, Decimal): | |
return value | |
value = value.replace("₦", "").replace(",", "") | |
return Decimal(value) | |
def clean_spreadsheet(filename: str) -> pd.DataFrame: | |
dataframe = pd.read_excel(filename, skiprows=15) | |
dataframe = dataframe.drop(dataframe.columns[dataframe.columns.str.contains("^Unnamed")], axis=1) | |
dataframe[["Money In", "Money out"]] = dataframe[["Money In", "Money out"]].fillna(Decimal(0)) | |
dataframe[["Money In", "Money out", "Balance"]] = dataframe[["Money In", "Money out", "Balance"]].map(clean_currency) | |
return dataframe | |
def get_top_debitors(dataframe: pd.DataFrame, n: int) -> pd.DataFrame: | |
"""Returns the accounts that received the most money from you.""" | |
return dataframe.groupby("To / From")["Money out"].sum().sort_values(ascending=False).head(n) | |
def get_top_creditors(dataframe: pd.DataFrame, n: int) -> pd.DataFrame: | |
"""Returns the accounts that sent you the most money.""" | |
return dataframe.groupby("To / From")["Money In"].sum().sort_values(ascending=False).head(n) | |
def get_top_debit_descriptions(dataframe: pd.DataFrame, n: int) -> pd.DataFrame: | |
"""Returns the descriptions that incurred volume tn descending order.""" | |
return ( | |
dataframe.groupby("Description") | |
.agg(Money_out=("Money out", "sum"), To_From_List=("To / From", list)) | |
.sort_values(by="Money_out", ascending=False).head(n) | |
) | |
def get_top_debitors_by_description(dataframe: pd.DataFrame, query: str, n: int) -> pd.DataFrame: | |
"""Uses a query to search `Description`. | |
It returns the account(s) that received money from you in descending order of volume. | |
""" | |
filtered_df = dataframe[dataframe["Description"].str.contains(query)] | |
return ( | |
filtered_df.groupby("Description") | |
.agg(Money_out=("Money out", "sum"), To_From_List=("To / From", list)) | |
.sort_values(by='Money_out', ascending=False).head(n) | |
) | |
def get_top_debitors_by_category(dataframe: pd.DataFrame, query: str, n: int) -> pd.DataFrame: | |
"""Uses a query to search `Category`. | |
It returns the account(s) that received money from you in descending order of volume. | |
""" | |
filtered_df = dataframe.dropna(subset=["Category"]) | |
filtered_df = filtered_df[filtered_df["Category"].str.contains(query)] | |
return ( | |
filtered_df.groupby("Category") | |
.agg(Money_out=("Money out", "sum"), To_From_List=("To / From", list)) | |
.sort_values(by='Money_out', ascending=False).head(n) | |
) | |
def get_top_debitors_by_account(dataframe: pd.DataFrame, query: str, n: int) -> pd.DataFrame: | |
"""Uses a query to search `To / From`. | |
It returns the account(s) that received money from you in descending order of volume. | |
""" | |
filtered_df = dataframe.dropna(subset=["To / From"]) | |
filtered_df = filtered_df[filtered_df["To / From"].str.contains(query)] | |
return filtered_df.groupby("To / From")["Money out"].sum().sort_values(ascending=False).head(n) | |
def get_hottest_transactions_days(dataframe: pd.DataFrame, n: int) -> pd.DataFrame: | |
"""Days you made the most volume in transactions.""" | |
dataframe["Date"] = pd.to_datetime(dataframe["Date/Time"], format="%d/%m/%y %H:%M:%S").dt.date | |
return ( | |
dataframe.groupby("Date") | |
.agg( | |
Money_out=("Money out", "sum"), | |
To_From_List=("To / From", list), | |
Description_list=("Description", list) | |
) | |
.sort_values(by="Money_out", ascending=False) | |
.head(n) | |
) | |
def get_transactions_by_date(dataframe: pd.DataFrame, date: str) -> pd.DataFrame: | |
"""Return all transactions for a given date. | |
Date format is DD-MM-YYYY. | |
""" | |
target_date = pd.to_datetime(date, format='%d-%m-%Y') | |
dataframe["Date"] = pd.to_datetime(dataframe["Date/Time"], format="%d/%m/%y %H:%M:%S") | |
return dataframe[dataframe["Date"].dt.date == target_date.date()] | |
df = clean_spreadsheet("path/to/excel/file.xlsx") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment