Last active
May 4, 2025 14:18
-
-
Save crrmacarse/74fd0e52859d1fe8b3d8d8f0b10b1311 to your computer and use it in GitHub Desktop.
analyze_money_manager.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from openpyxl import load_workbook | |
from pprint import pprint | |
from datetime import datetime | |
from utils.helpers import write_to_file | |
def process_coffee_shops(food_data): | |
coffee_shops = [ | |
"Starbucks", "Dunkin Donuts", "Pickup Coffee", "Coffee Project", | |
"Tim Hortons", "Dean & Deluca", "Cafe Amazon", "Highlands", "Bo's" | |
] | |
coffee_shops_iloilo = ["Coffeebreak", "Cafe Brewtherhood", "Teepee", "Tiring"] | |
# adjust depending your coffee shop config here | |
coffee_shops = coffee_shops + coffee_shops_iloilo | |
# filter food_data that matches coffee_shops and is_coffee_shop to true | |
food_data_coffee_shops_filtered = [ | |
(name, data) for name, data in food_data.items() | |
if data[3] and any(re.search(rf'\b{re.escape(coffee_shop)}\b', name, re.IGNORECASE) for coffee_shop in coffee_shops) | |
] | |
top_10_coffee_shops = sorted(food_data_coffee_shops_filtered, key=lambda item: item[1][0], reverse=True)[:10] | |
return top_10_coffee_shops | |
def analyze_money_manager(file_path): | |
workbook = load_workbook(file_path) | |
sheet = workbook.active | |
header_row = next(sheet.iter_rows(values_only=True)) | |
period_index = header_row.index("Period") + 1 | |
account_index = header_row.index("Accounts") + 1 | |
category_index = header_row.index("Category") + 1 | |
subcategory_index = header_row.index("Subcategory") + 1 | |
amount_index = header_row.index("Amount") + 1 | |
income_expense_column_index = header_row.index("Income/Expense") + 1 | |
note_index = header_row.index("Note") + 1 | |
# totals | |
total_expense = 0 | |
total_expense_count = 0 | |
total_income = 0 | |
total_income_count = 0 | |
# data | |
expense_account_data = {} | |
income_from_data = {} | |
purchase_from_data = {} | |
food_data = {} | |
# special cases | |
total_shopee_count = 0 | |
total_lazada_count = 0 | |
total_amazon_count = 0 | |
total_grab_food_count = 0 | |
total_grab_car_count = 0 | |
total_foodpanda_count = 0 | |
total_711 = 0 | |
for row in sheet.iter_rows(min_row=2): | |
period_cell = row[period_index - 1] | |
account_cell = row[account_index - 1] | |
category_cell = row[category_index - 1] | |
subcategory_cell = row[subcategory_index - 1] | |
amount_cell = row[amount_index - 1] | |
income_expense_cell = row[income_expense_column_index - 1] | |
note_cell = row[note_index - 1] | |
# get the values from the cells | |
period_value = period_cell.value | |
category_value = category_cell.value | |
subcategory_value = subcategory_cell.value | |
amount_value = amount_cell.value | |
income_expense_value = income_expense_cell.value | |
account_value = account_cell.value | |
note_value = note_cell.value | |
if isinstance(amount_value, (int, float)): | |
if income_expense_value == "Exp.": | |
total_expense_count += 1 | |
total_expense += amount_value | |
# count expense accounts | |
if account_value: | |
expense_account_data[account_value] = expense_account_data.get(account_value, 0) + 1 | |
if note_value: | |
# count purchase_from entry, total amount, and first instance | |
if note_value not in purchase_from_data: | |
purchase_from_data[note_value] = (0, 0, period_value.strftime("%B %d, %Y")) | |
current_count, current_amount_total, current_period_value = purchase_from_data[note_value] | |
# check if current_period_value is older than period_value then update it | |
if datetime.strptime(current_period_value, "%B %d, %Y") > period_value: | |
current_period_value = period_value.strftime("%B %d, %Y") | |
purchase_from_data[note_value] = (current_count + 1, round(current_amount_total + amount_value, 2), current_period_value) | |
if category_value == "Food": | |
# include Grab and Foodpanda for delivery | |
is_coffee_shop = subcategory_value == "Cafe Hopping" or subcategory_value == "Grab" or subcategory_value == "Foodpanda" | |
if note_value not in food_data: | |
food_data[note_value] = (0, 0, period_value.strftime("%B %d, %Y"), is_coffee_shop) | |
current_count, current_amount_total, current_period_value, _ = food_data[note_value] | |
# check if current_period_value is older than period_value then update it | |
if datetime.strptime(current_period_value, "%B %d, %Y") > period_value: | |
current_period_value = period_value.strftime("%B %d, %Y") | |
food_data[note_value] = (current_count + 1, round(current_amount_total + amount_value, 2), current_period_value, is_coffee_shop) | |
# catch GrabFood | |
if subcategory_value == "Grab": | |
total_grab_food_count += 1 | |
# catch Foodpanda | |
if subcategory_value == "Foodpanda": | |
total_foodpanda_count += 1 | |
# catch shopee orders | |
if re.search(r'Shopee$', note_value, re.IGNORECASE): | |
total_shopee_count += 1 | |
# catch lazada orders | |
if re.search(r'Lazada$', note_value, re.IGNORECASE): | |
total_lazada_count += 1 | |
# catch amazon orders | |
if re.search(r'Amazon$', note_value, re.IGNORECASE): | |
total_amazon_count += 1 | |
# catch 711 orders | |
if re.search(r'^711', note_value, re.IGNORECASE): | |
total_711 += 1 | |
# catch grabcar and grabtaxi | |
if category_value == "Transportation": | |
if re.search(r'^Grab', note_value, re.IGNORECASE): | |
total_grab_car_count += 1 | |
elif income_expense_value == "Income": | |
total_income_count += 1 | |
total_income += amount_value | |
if note_value: | |
# count income_from entry, total amount, and first instance | |
if note_value not in income_from_data: | |
income_from_data[note_value] = (0, 0, period_value.strftime("%B %d, %Y")) | |
current_count, current_amount_total, current_period_value = income_from_data[note_value] | |
# check if current_period_value is older than period_value then update it | |
if datetime.strptime(current_period_value, "%B %d, %Y") > period_value: | |
current_period_value = period_value.strftime("%B %d, %Y") | |
income_from_data[note_value] = (current_count + 1, round(current_amount_total + amount_value, 2), current_period_value) | |
output = [] | |
output.append("# Money Manager Analysis") | |
output.append("@crrmacarse") | |
output.append("\n## Summary") | |
output.append(f"- Total Income: PHP {total_income:,.2f}") | |
output.append(f"- Total Expense: PHP {total_expense:,.2f}") | |
balance = total_income - total_expense | |
output.append(f"- Balance: PHP {balance:,.2f}") | |
output.append(f"- Total Income Entry: {total_income_count}") | |
output.append(f"- Total Expense Entry: {total_expense_count}") | |
# Most common category, subcategory | |
output.append("\n## Expense Accounts") | |
output.append("| Account | Number of Entries ↓ |") | |
output.append("|-------------------|-------------------|") | |
sorted_expense_accounts = sorted(expense_account_data.items(), key=lambda item: item[1], reverse=True) | |
for account, count in sorted_expense_accounts: | |
output.append(f"| {account} | {count} |") | |
output.append("\n## Top 10 Income From") | |
output.append("| Income from | Number of Entries | Total Amount ↓ | First Instance |") | |
output.append("|-------------------|-------------------|--------------|------------------|") | |
top_10_income_from_data = sorted(income_from_data.items(), key=lambda item: item[1][1], reverse=True)[:10] | |
for income_from, (count, total, first_instance) in top_10_income_from_data: | |
output.append(f"| {income_from} | {count} | PHP {total:,.2f} | {first_instance} |") | |
# Top 5 Highest income earned with date | |
output.append("\n## Top 30 Expense From") | |
top_30_purchase_from_data = sorted(purchase_from_data.items(), key=lambda item: item[1][0], reverse=True)[:30] | |
output.append("| Expense from | Number of Entries ↓ | Total Amount | First Instance |") | |
output.append("|-------------------|-------------------|--------------|------------------|") | |
for purchase_from, (count, total, first_instance) in top_30_purchase_from_data: | |
output.append(f"| {purchase_from} | {count} | PHP {total:,.2f} | {first_instance} |") | |
output.append("\n## Top 10 Expense From by Amount") | |
top_30_purchase_from_data = sorted(purchase_from_data.items(), key=lambda item: item[1][1], reverse=True)[:10] | |
output.append("| Expense from | Total Amount ↓ |") | |
output.append("|-------------------|-------------------|") | |
for purchase_from, (_, total, _) in top_30_purchase_from_data: | |
output.append(f"| {purchase_from} | PHP {total:,.2f} |") | |
# Top 10 Most expensive expense with date | |
output.append("\n## Top 30 Food") | |
top_30_food_data = sorted(food_data.items(), key=lambda item: item[1][0], reverse=True)[:30] | |
output.append("| Food Establishments | Number of Entries ↓ | Total Amount | First Instance |") | |
output.append("|-------------------|-------------------|--------------|------------------|") | |
for food_establishment, (count, total, first_instance, _) in top_30_food_data: | |
output.append(f"| {food_establishment} | {count} | PHP {total:,.2f} | {first_instance} |") | |
output.append("\n## Top 10 Fast Foods") | |
fast_foods = ["Jollibee", "Mcdo", "KFC", "Chowking", "Mang Inasal", "Burger King", "Pizza Hut"] | |
food_data_fast_foods_filtered = [ | |
(name, data) for name, data in food_data.items() | |
if any(re.search(rf'\b{re.escape(fast_food)}\b', name, re.IGNORECASE) for fast_food in fast_foods) | |
] | |
top_10_fast_foods = sorted(food_data_fast_foods_filtered, key=lambda item: item[1][0], reverse=True)[:10] | |
output.append("| Fast Food | Number of Entries ↓ | Total Amount |") | |
output.append("|-------------------|-------------------|-------------------|") | |
for fast_food, (count, total, _, _) in top_10_fast_foods: | |
output.append(f"| {fast_food} | {count} | PHP {total:,.2f} |") | |
output.append("\n## Top 10 Coffee Shops") | |
top_10_coffee_shops = process_coffee_shops(food_data) | |
output.append("| Coffee Shop | Number of Entries ↓ | Total Amount |") | |
output.append("|-------------------|-------------------|-------------------|") | |
for coffee_shop, (count, total, _, _) in top_10_coffee_shops: | |
output.append(f"| {coffee_shop} | {count} | PHP {total:,.2f} |") | |
output.append("\n## Special Cases") | |
output.append(f"- Total Shopee Order Count: {total_shopee_count}") | |
output.append(f"- Total Lazada Order Count: {total_lazada_count}") | |
output.append(f"- Total Amazon Order Count: {total_amazon_count}") | |
output.append(f"- Total Foodpanda Count: {total_foodpanda_count}") | |
output.append(f"- Total GrabFood Count: {total_grab_food_count}") | |
output.append(f"- Total GrabCar Count: {total_grab_car_count}") | |
output.append(f"- Total 711 Count: {total_711}") | |
return output | |
if __name__ == "__main__": | |
try: | |
# temporarily added a default file path | |
file_path = input("Enter the path to the .xlsx file: ") or "./dump/mm.xlsx" | |
output_file_path = input("Enter the path to the output .md file (default: dump/output.md): ") or "dump/output.md" | |
result = analyze_money_manager(file_path) | |
write_to_file(output_file_path, "\n".join(result)) | |
except Exception as e: | |
print(f"Error: {e}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment