Skip to content

Instantly share code, notes, and snippets.

@yjw868
Created March 31, 2017 12:28
Show Gist options
  • Save yjw868/690df25be93cfb48e20521097fa14801 to your computer and use it in GitHub Desktop.
Save yjw868/690df25be93cfb48e20521097fa14801 to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
from fuzzywuzzy import process
import multiprocessing as mp
def multi_run_wrapper(args):
return process_df(*args)
def find_app_name(name, all_names):
#### set score_cutoff =88 for Magnus ###
try:
app_name = process.extractBests(name, choices=all_names, score_cutoff=88, limit=5)
result = [name[0] for name in app_name if 88 <= name[1] < 100]
if len(result) == 0:
# print("Skipping {}, found no similiar match".format(name))
return np.nan
final_result = "".join(str(item) for item in result)
return final_result
# return result
except Exception as e:
print(e)
pass
return np.nan
def process_df(df, all_names):
df[0]["Suggested Name"] = df[0].apply(lambda x: find_app_name(x["Account Name"], all_names), axis=1)
return df
if __name__ == '__main__':
infileName = 'SF Account Names data.csv'
outfileName = "SF Account Names data - out.xlsx"
sfData = pd.read_csv(infileName, header=0)
Name = sfData["Account Name"]
sfData["Suggested Name"] = np.nan
cpu_count = mp.cpu_count()
p = mp.Pool(cpu_count)
pool_results = p.map(multi_run_wrapper, [(np.array_split(sfData, cpu_count), Name)])
p.close()
p.join()
results = pd.concat(pool_results[0])
sf_output = results[~pd.isnull(results['Suggested Name'])]
# print(sf_output.head(10))
writer = pd.ExcelWriter(outfileName)
sf_output.to_excel(writer, 'SF Account Name')
writer.save()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment