patrickfuller · August 28, 2025 14:47
diff --git a/combine_linkedin_csvs.py b/combine_linkedin_csvs.py
 # /// script
 # requires-python = ">=3.13"
 # dependencies = [
 #     "openpyxl",
 #     "pandas",
 # ]
 # ///

 """
 Given a folder of Connections.csv files downloaded through LinkedIn,
 create a single excel file with all the connections.
 """
 import os

 import pandas as pd

 # Get folder of connections.csv files
 base_path = os.path.expanduser('~/Desktop/connections')
 csv_paths = [
    os.path.join(base_path, file)
    for file in os.listdir(base_path)
    if file.endswith('.csv')
 ]

 # Combine all the csv files into a single dataframe
 df = pd.DataFrame()
 for csv_path in csv_paths:
    print("Processing", csv_path)
    single_df = pd.read_csv(csv_path, skiprows=3)
    single_df['Connector'] = os.path.basename(csv_path).removesuffix('.csv')
    df = pd.concat([df, single_df], ignore_index=True)

 def merge_connectors(group):
    merged_row = group.iloc[0].copy()
    merged_row['Connector'] = ', '.join(
        sorted(set(group['Connector'].dropna()), key=lambda x: x.split()[-1])
    )
    return merged_row

 # Clean, sort, and merge connectors
 df = (
    df
    .dropna(subset=['First Name'])
    .groupby('URL', dropna=False, as_index=False)
    .apply(merge_connectors, include_groups=False)
    .sort_values(
        by=['Company', 'Last Name'],
        ascending=[True, True],
        ignore_index=True,
    )
    .reset_index(drop=True)
 )

 # Remake a dataframe with just the columns we need
 def get_full_name(row):
    first_name = row.get('First Name', '')
    last_name = row.get('Last Name', '')
    full_name = f"{first_name} {last_name}".strip()
    return full_name

 export_df = pd.DataFrame({
    'Company': df.get('Company', ''),
    'Name': df.apply(get_full_name, axis=1),
    'Position': df.get('Position', ''),
    'Connector': df.get('Connector', ''),
 })

 # Export to excel
 output_path = os.path.join(base_path, 'connections.xlsx')

 with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
    export_df.to_excel(writer, sheet_name='Connections', index=False)
    worksheet = writer.sheets['Connections']

    # Add hyperlinks to the Name column for LinkedIn URLs
    name_column_index = export_df.columns.get_loc('Name') + 1
    url_values = df['URL'].values
    url_and_row_pairs = zip(url_values, list(export_df.itertuples(index=False)))
    for row_index, (url, _) in enumerate(url_and_row_pairs, start=2):
        name_cell = worksheet.cell(row=row_index, column=name_column_index)
        name_cell.hyperlink = url
        name_cell.style = 'Hyperlink'

    # Set column widths
    for column_cells in worksheet.columns:
        length = max(
            len(str(cell.value)) if cell.value is not None else 0
            for cell in column_cells
        )
        worksheet.column_dimensions[column_cells[0].column_letter].width = max(length * 0.5, 15)

    worksheet.freeze_panes = worksheet['A2']
	# /// script
	# requires-python = ">=3.13"
	# dependencies = [
	# "openpyxl",
	# "pandas",
	# ]
	# ///

	"""
	Given a folder of Connections.csv files downloaded through LinkedIn,
	create a single excel file with all the connections.
	"""
	import os

	import pandas as pd

	# Get folder of connections.csv files
	base_path = os.path.expanduser('~/Desktop/connections')
	csv_paths = [
	os.path.join(base_path, file)
	for file in os.listdir(base_path)
	if file.endswith('.csv')
	]

	# Combine all the csv files into a single dataframe
	df = pd.DataFrame()
	for csv_path in csv_paths:
	print("Processing", csv_path)
	single_df = pd.read_csv(csv_path, skiprows=3)
	single_df['Connector'] = os.path.basename(csv_path).removesuffix('.csv')
	df = pd.concat([df, single_df], ignore_index=True)

	def merge_connectors(group):
	merged_row = group.iloc[0].copy()
	merged_row['Connector'] = ', '.join(
	sorted(set(group['Connector'].dropna()), key=lambda x: x.split()[-1])
	)
	return merged_row

	# Clean, sort, and merge connectors
	df = (
	df
	.dropna(subset=['First Name'])
	.groupby('URL', dropna=False, as_index=False)
	.apply(merge_connectors, include_groups=False)
	.sort_values(
	by=['Company', 'Last Name'],
	ascending=[True, True],
	ignore_index=True,
	)
	.reset_index(drop=True)
	)

	# Remake a dataframe with just the columns we need
	def get_full_name(row):
	first_name = row.get('First Name', '')
	last_name = row.get('Last Name', '')
	full_name = f"{first_name} {last_name}".strip()
	return full_name

	export_df = pd.DataFrame({
	'Company': df.get('Company', ''),
	'Name': df.apply(get_full_name, axis=1),
	'Position': df.get('Position', ''),
	'Connector': df.get('Connector', ''),
	})

	# Export to excel
	output_path = os.path.join(base_path, 'connections.xlsx')

	with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
	export_df.to_excel(writer, sheet_name='Connections', index=False)
	worksheet = writer.sheets['Connections']

	# Add hyperlinks to the Name column for LinkedIn URLs
	name_column_index = export_df.columns.get_loc('Name') + 1
	url_values = df['URL'].values
	url_and_row_pairs = zip(url_values, list(export_df.itertuples(index=False)))
	for row_index, (url, _) in enumerate(url_and_row_pairs, start=2):
	name_cell = worksheet.cell(row=row_index, column=name_column_index)
	name_cell.hyperlink = url
	name_cell.style = 'Hyperlink'

	# Set column widths
	for column_cells in worksheet.columns:
	length = max(
	len(str(cell.value)) if cell.value is not None else 0
	for cell in column_cells
	)
	worksheet.column_dimensions[column_cells[0].column_letter].width = max(length * 0.5, 15)

	worksheet.freeze_panes = worksheet['A2']