Forked from toddbirchard/pandas_dataframe_difference.py
Created
April 23, 2020 01:15
-
-
Save Per48edjes/348269b8cc19b49b91eeee68837da0fb to your computer and use it in GitHub Desktop.
Helper function to compare two DataFrames and find rows which are unique or shared.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def dataframe_difference(df1, df2, which=None): | |
"""Find rows which are different.""" | |
comparison_df = df1.merge(df2, | |
indicator=True, | |
how='outer') | |
if which is None: | |
diff_df = comparison_df[comparison_df['_merge'] != 'both'] | |
else: | |
diff_df = comparison_df[comparison_df['_merge'] == which] | |
diff_df.to_csv('data/diff.csv') | |
return diff_df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment