Skip to content

Instantly share code, notes, and snippets.

@prettyirrelevant
Last active April 9, 2024 07:42
Show Gist options
  • Save prettyirrelevant/5e554d033e0c34079c8eb0b3f95600ae to your computer and use it in GitHub Desktop.
Save prettyirrelevant/5e554d033e0c34079c8eb0b3f95600ae to your computer and use it in GitHub Desktop.
Python module to extra information from Kuda's bank statement
import pandas as pd
from decimal import Decimal
def clean_currency(value):
"""Removes currency symbol and commas, then converts to `Decimal`."""
if isinstance(value, Decimal):
return value
value = value.replace("₦", "").replace(",", "")
return Decimal(value)
def clean_spreadsheet(filename: str) -> pd.DataFrame:
dataframe = pd.read_excel(filename, skiprows=15)
dataframe = dataframe.drop(dataframe.columns[dataframe.columns.str.contains("^Unnamed")], axis=1)
dataframe[["Money In", "Money out"]] = dataframe[["Money In", "Money out"]].fillna(Decimal(0))
dataframe[["Money In", "Money out", "Balance"]] = dataframe[["Money In", "Money out", "Balance"]].map(clean_currency)
return dataframe
def get_top_debitors(dataframe: pd.DataFrame, n: int) -> pd.DataFrame:
"""Returns the accounts that received the most money from you."""
return dataframe.groupby("To / From")["Money out"].sum().sort_values(ascending=False).head(n)
def get_top_creditors(dataframe: pd.DataFrame, n: int) -> pd.DataFrame:
"""Returns the accounts that sent you the most money."""
return dataframe.groupby("To / From")["Money In"].sum().sort_values(ascending=False).head(n)
def get_top_debit_descriptions(dataframe: pd.DataFrame, n: int) -> pd.DataFrame:
"""Returns the descriptions that incurred volume tn descending order."""
return (
dataframe.groupby("Description")
.agg(Money_out=("Money out", "sum"), To_From_List=("To / From", list))
.sort_values(by="Money_out", ascending=False).head(n)
)
def get_top_debitors_by_description(dataframe: pd.DataFrame, query: str, n: int) -> pd.DataFrame:
"""Uses a query to search `Description`.
It returns the account(s) that received money from you in descending order of volume.
"""
filtered_df = dataframe[dataframe["Description"].str.contains(query)]
return (
filtered_df.groupby("Description")
.agg(Money_out=("Money out", "sum"), To_From_List=("To / From", list))
.sort_values(by='Money_out', ascending=False).head(n)
)
def get_top_debitors_by_category(dataframe: pd.DataFrame, query: str, n: int) -> pd.DataFrame:
"""Uses a query to search `Category`.
It returns the account(s) that received money from you in descending order of volume.
"""
filtered_df = dataframe.dropna(subset=["Category"])
filtered_df = filtered_df[filtered_df["Category"].str.contains(query)]
return (
filtered_df.groupby("Category")
.agg(Money_out=("Money out", "sum"), To_From_List=("To / From", list))
.sort_values(by='Money_out', ascending=False).head(n)
)
def get_top_debitors_by_account(dataframe: pd.DataFrame, query: str, n: int) -> pd.DataFrame:
"""Uses a query to search `To / From`.
It returns the account(s) that received money from you in descending order of volume.
"""
filtered_df = dataframe.dropna(subset=["To / From"])
filtered_df = filtered_df[filtered_df["To / From"].str.contains(query)]
return filtered_df.groupby("To / From")["Money out"].sum().sort_values(ascending=False).head(n)
def get_hottest_transactions_days(dataframe: pd.DataFrame, n: int) -> pd.DataFrame:
"""Days you made the most volume in transactions."""
dataframe["Date"] = pd.to_datetime(dataframe["Date/Time"], format="%d/%m/%y %H:%M:%S").dt.date
return (
dataframe.groupby("Date")
.agg(
Money_out=("Money out", "sum"),
To_From_List=("To / From", list),
Description_list=("Description", list)
)
.sort_values(by="Money_out", ascending=False)
.head(n)
)
def get_transactions_by_date(dataframe: pd.DataFrame, date: str) -> pd.DataFrame:
"""Return all transactions for a given date.
Date format is DD-MM-YYYY.
"""
target_date = pd.to_datetime(date, format='%d-%m-%Y')
dataframe["Date"] = pd.to_datetime(dataframe["Date/Time"], format="%d/%m/%y %H:%M:%S")
return dataframe[dataframe["Date"].dt.date == target_date.date()]
df = clean_spreadsheet("path/to/excel/file.xlsx")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment