Skip to content

Instantly share code, notes, and snippets.

@patrickfuller
Last active August 28, 2025 14:47
Show Gist options
  • Save patrickfuller/73b369de9537343432b9e871367a1f8b to your computer and use it in GitHub Desktop.
Save patrickfuller/73b369de9537343432b9e871367a1f8b to your computer and use it in GitHub Desktop.
LinkedIn Connections CSV Combiner
# /// script
# requires-python = ">=3.13"
# dependencies = [
# "openpyxl",
# "pandas",
# ]
# ///
"""
Given a folder of Connections.csv files downloaded through LinkedIn,
create a single excel file with all the connections.
"""
import os
import pandas as pd
# Get folder of connections.csv files
base_path = os.path.expanduser('~/Desktop/connections')
csv_paths = [
os.path.join(base_path, file)
for file in os.listdir(base_path)
if file.endswith('.csv')
]
# Combine all the csv files into a single dataframe
df = pd.DataFrame()
for csv_path in csv_paths:
print("Processing", csv_path)
single_df = pd.read_csv(csv_path, skiprows=3)
single_df['Connector'] = os.path.basename(csv_path).removesuffix('.csv')
df = pd.concat([df, single_df], ignore_index=True)
def merge_connectors(group):
merged_row = group.iloc[0].copy()
merged_row['Connector'] = ', '.join(
sorted(set(group['Connector'].dropna()), key=lambda x: x.split()[-1])
)
return merged_row
# Clean, sort, and merge connectors
df = (
df
.dropna(subset=['First Name'])
.groupby('URL', dropna=False, as_index=False)
.apply(merge_connectors, include_groups=False)
.sort_values(
by=['Company', 'Last Name'],
ascending=[True, True],
ignore_index=True,
)
.reset_index(drop=True)
)
# Remake a dataframe with just the columns we need
def get_full_name(row):
first_name = row.get('First Name', '')
last_name = row.get('Last Name', '')
full_name = f"{first_name} {last_name}".strip()
return full_name
export_df = pd.DataFrame({
'Company': df.get('Company', ''),
'Name': df.apply(get_full_name, axis=1),
'Position': df.get('Position', ''),
'Connector': df.get('Connector', ''),
})
# Export to excel
output_path = os.path.join(base_path, 'connections.xlsx')
with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
export_df.to_excel(writer, sheet_name='Connections', index=False)
worksheet = writer.sheets['Connections']
# Add hyperlinks to the Name column for LinkedIn URLs
name_column_index = export_df.columns.get_loc('Name') + 1
url_values = df['URL'].values
url_and_row_pairs = zip(url_values, list(export_df.itertuples(index=False)))
for row_index, (url, _) in enumerate(url_and_row_pairs, start=2):
name_cell = worksheet.cell(row=row_index, column=name_column_index)
name_cell.hyperlink = url
name_cell.style = 'Hyperlink'
# Set column widths
for column_cells in worksheet.columns:
length = max(
len(str(cell.value)) if cell.value is not None else 0
for cell in column_cells
)
worksheet.column_dimensions[column_cells[0].column_letter].width = max(length * 0.5, 15)
worksheet.freeze_panes = worksheet['A2']
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment