Last active
August 28, 2025 14:47
-
-
Save patrickfuller/73b369de9537343432b9e871367a1f8b to your computer and use it in GitHub Desktop.
LinkedIn Connections CSV Combiner
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # /// script | |
| # requires-python = ">=3.13" | |
| # dependencies = [ | |
| # "openpyxl", | |
| # "pandas", | |
| # ] | |
| # /// | |
| """ | |
| Given a folder of Connections.csv files downloaded through LinkedIn, | |
| create a single excel file with all the connections. | |
| """ | |
| import os | |
| import pandas as pd | |
| # Get folder of connections.csv files | |
| base_path = os.path.expanduser('~/Desktop/connections') | |
| csv_paths = [ | |
| os.path.join(base_path, file) | |
| for file in os.listdir(base_path) | |
| if file.endswith('.csv') | |
| ] | |
| # Combine all the csv files into a single dataframe | |
| df = pd.DataFrame() | |
| for csv_path in csv_paths: | |
| print("Processing", csv_path) | |
| single_df = pd.read_csv(csv_path, skiprows=3) | |
| single_df['Connector'] = os.path.basename(csv_path).removesuffix('.csv') | |
| df = pd.concat([df, single_df], ignore_index=True) | |
| def merge_connectors(group): | |
| merged_row = group.iloc[0].copy() | |
| merged_row['Connector'] = ', '.join( | |
| sorted(set(group['Connector'].dropna()), key=lambda x: x.split()[-1]) | |
| ) | |
| return merged_row | |
| # Clean, sort, and merge connectors | |
| df = ( | |
| df | |
| .dropna(subset=['First Name']) | |
| .groupby('URL', dropna=False, as_index=False) | |
| .apply(merge_connectors, include_groups=False) | |
| .sort_values( | |
| by=['Company', 'Last Name'], | |
| ascending=[True, True], | |
| ignore_index=True, | |
| ) | |
| .reset_index(drop=True) | |
| ) | |
| # Remake a dataframe with just the columns we need | |
| def get_full_name(row): | |
| first_name = row.get('First Name', '') | |
| last_name = row.get('Last Name', '') | |
| full_name = f"{first_name} {last_name}".strip() | |
| return full_name | |
| export_df = pd.DataFrame({ | |
| 'Company': df.get('Company', ''), | |
| 'Name': df.apply(get_full_name, axis=1), | |
| 'Position': df.get('Position', ''), | |
| 'Connector': df.get('Connector', ''), | |
| }) | |
| # Export to excel | |
| output_path = os.path.join(base_path, 'connections.xlsx') | |
| with pd.ExcelWriter(output_path, engine='openpyxl') as writer: | |
| export_df.to_excel(writer, sheet_name='Connections', index=False) | |
| worksheet = writer.sheets['Connections'] | |
| # Add hyperlinks to the Name column for LinkedIn URLs | |
| name_column_index = export_df.columns.get_loc('Name') + 1 | |
| url_values = df['URL'].values | |
| url_and_row_pairs = zip(url_values, list(export_df.itertuples(index=False))) | |
| for row_index, (url, _) in enumerate(url_and_row_pairs, start=2): | |
| name_cell = worksheet.cell(row=row_index, column=name_column_index) | |
| name_cell.hyperlink = url | |
| name_cell.style = 'Hyperlink' | |
| # Set column widths | |
| for column_cells in worksheet.columns: | |
| length = max( | |
| len(str(cell.value)) if cell.value is not None else 0 | |
| for cell in column_cells | |
| ) | |
| worksheet.column_dimensions[column_cells[0].column_letter].width = max(length * 0.5, 15) | |
| worksheet.freeze_panes = worksheet['A2'] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment