Created
March 3, 2022 19:06
-
-
Save alexmill/eb48aed6c1c69a79b1b9843fa7c1aa45 to your computer and use it in GitHub Desktop.
Methods to search and retrieve pandas dataframe columns by substring matching.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
def colfind(self, query, ignore_case=True): | |
if type(self)!=pd.DataFrame: | |
raise Exception( | |
'colsearch method only applicable to pandas.DataFrame objects' | |
) | |
re_query = re.escape(query) | |
re_search = re.compile(re_query, (re.IGNORECASE if ignore_case else None)) | |
column_matches = [True if re_search.search(f'({col})') else False for col in self.columns] | |
matching_columns = self.columns[column_matches].tolist() | |
return(matching_columns) | |
def colget(self, query, ignore_case=True): | |
return self[colsearch(self, query, ignore_case=ignore_case)] | |
# This allows you to apply these functions as class methodss | |
# to Pandas DataFrame objects | |
pd.DataFrame.colfind = colfind | |
pd.DataFrame.colget = colget |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment