Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save codedeep79/386c8699e4b15fd1460e35065cd849aa to your computer and use it in GitHub Desktop.
Save codedeep79/386c8699e4b15fd1460e35065cd849aa to your computer and use it in GitHub Desktop.
Separate Excel Data into Workbooks by Column Values

Example 1

from pathlib import Path

import pandas as pd  # pip install pandas

# Define & create output directory
output_dir = Path(__file__).parent / "output"
output_dir.mkdir(parents=True, exist_ok=True)

# Define Excel file path
excel_file = Path(__file__).parent / "Data sample 1.xlsx"

df = pd.read_excel(excel_file)
column_name = "Segment"
unique_values = df[column_name].unique()

for unique_value in unique_values:
    df_output = df[df[column_name].str.fullmatch(unique_value)]
    output_path = output_dir / f"{unique_value}.xlsx"
    df_output.to_excel(output_path, sheet_name=unique_value[:31], index=False)

Data sample: https://docs.google.com/spreadsheets/d/1CPpcxYVzAZtlNZnJ-_4sldZku-xB1jpN

Example 2

from pathlib import Path

import pandas as pd  # pip install pandas


# Define & create output directory
output_dir = Path(__file__).parent / "output"
output_dir.mkdir(parents=True, exist_ok=True)

# Define Excel file path
excel_file = Path(__file__).parent / "Data sample 2.xlsx"

df = pd.read_excel(excel_file)
column_name = "Country"
df[column_name] = df[column_name].str.strip().str.title()
unique_values = df[column_name].unique()

for unique_value in unique_values:
    df_output = df[df[column_name].str.fullmatch(unique_value)]
    output_path = output_dir / f"{unique_value}.xlsx"
    df_output.to_excel(output_path, sheet_name=unique_value[:31], index=False)

Data sample: https://docs.google.com/spreadsheets/d/1wC7kDFZ9-umysOnRkl3bDgSbBZUUEg9h

Example 3

from pathlib import Path

import pandas as pd  # pip install pandas

# Define & create output directory
output_dir = Path(__file__).parent / "output"
output_dir.mkdir(parents=True, exist_ok=True)

# Define Excel file path
excel_file = Path(__file__).parent / "Data sample 3.xlsx"

df = pd.read_excel(excel_file)
column_name = "Country"
replace_symbols = ['>', '<', ':', '"', '/', '\\\\', '\|', '\?', '\*']
df[column_name] = (
    df[column_name].replace(replace_symbols, '', regex=True).str.strip().str.title()
)
unique_values = df[column_name].unique()

for unique_value in unique_values:
    df_output = df[df[column_name].str.fullmatch(unique_value)]
    output_path = output_dir / f"{unique_value}.xlsx"
    df_output.to_excel(output_path, sheet_name=unique_value[:31], index=False)

Data sample: https://docs.google.com/spreadsheets/d/14OwiSeX3t2jY5gysOjK3Un0tAEzjpTiL

Example 4: Split data with numeric column

from pathlib import Path

import pandas as pd  

output_dir = Path(__file__).parent / "output"
output_dir.mkdir(parents=True, exist_ok=True)

excel_file = Path(__file__).parent / "Result1_20230519.xlsx"

df = pd.read_excel(excel_file, sheet_name='Result1_20230519')
column_name = "CTKD"
df[column_name] = (
    df[column_name].apply(str).str.replace(',', '')
)

unique_values = df[column_name].unique()

for unique_value in unique_values:
    df_output = df[df[column_name].str.fullmatch(unique_value)]
    output_path = output_dir / f"{unique_value}.xlsx"
    df_output.to_excel(output_path, sheet_name=unique_value[:31], index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment