Last active
February 12, 2024 00:50
-
-
Save Samk13/8a072b2053f0718dd9b7591b02d185c2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://github.com/inveniosoftware/invenio-communities/pull/1086/files#diff-6c07ffce9d2dd97c912da89fd4869787bdb49626a3fa278d466117cc8e489941 | |
# Simplifying the function | |
def filter_dict_keys(src, keys): | |
"""Filter a dictionary based on a list of key paths.""" | |
# Split the keys into top-level and nested keys | |
top_level_keys = [key for key in keys if "." not in key] | |
nested_keys = [key for key in keys if "." in key] | |
# Filter the top-level keys | |
result = {key: src[key] for key in top_level_keys if key in src} | |
# Handle nested keys | |
for key in nested_keys: | |
parts = key.split(".") | |
current_dict = src | |
for part in parts[:-1]: | |
if part in current_dict: | |
current_dict = current_dict[part] | |
else: | |
break # Skip this key if the path does not exist | |
# Update the filtered dictionary with the nested key if it exists | |
if parts[-2] in result and parts[-1] in current_dict: | |
if parts[-2] not in result: | |
result[parts[-2]] = {} | |
result[parts[-2]][parts[-1]] = current_dict[parts[-1]] | |
# Handle specific case for top-level keys that are dictionaries but not explicitly mentioned | |
for key in src: | |
if key not in result and isinstance(src[key], dict): | |
subkeys = [k.split(".", 1)[1] for k in keys if k.startswith(f"{key}.")] | |
if subkeys: | |
result[key] = filter_dict_keys(src[key], subkeys) | |
return result | |
# add it to https://github.com/inveniosoftware/invenio-records/blob/master/invenio_records/dictutils.py | |
def filter_dict_keys_v2(src, keys): | |
"""Recursively filter a dictionary based on a list of key paths.""" | |
def traverse(current_dict, key_path): | |
# Base case: if the key_path is empty, return the current dict | |
if not key_path: | |
return current_dict | |
part, *remaining = key_path.split(".", 1) | |
if part in current_dict: | |
if remaining: # if there's more path to process, recurse | |
return {part: traverse(current_dict[part], remaining[0])} | |
else: # end of path, include this key | |
return {part: current_dict[part]} | |
else: | |
return {} # key path does not exist | |
result = {} | |
for key in keys: | |
if "." in key: | |
# For nested keys, merge the result with the new data | |
nested_result = traverse(src, key) | |
# Deep merge nested_result into result | |
for k, v in nested_result.items(): | |
if k in result and isinstance(result[k], dict) and isinstance(v, dict): | |
result[k] = {**result[k], **v} | |
else: | |
result[k] = v | |
else: | |
# For top-level keys, directly add to result if exists | |
if key in src: | |
result[key] = src[key] | |
return result | |
src = { | |
"custom_fields": { | |
"rdm:repository_url": "https://rc.upr.edu.cu/jspui/handle/DICT/3890" | |
}, | |
"id": "y4cmy-4mk78", | |
"is_draft": "false", | |
"updated": "2024-02-08T14:05:05.254680+00:00", | |
"revision_id": 3, | |
"created": "2024-02-08T14:05:05.134344+00:00", | |
"is_published": "true", | |
"status": "published", | |
"files": { | |
"enabled": "true", | |
"order": [ | |
] | |
}, | |
"access": { | |
"files": "public", | |
"embargo": { | |
"reason": "null", | |
"active": "false" | |
}, | |
"record": "public", | |
"status": "open" | |
}, | |
"stats": { | |
"this_version": { | |
"views": 0, | |
"data_volume": 0.0, | |
"downloads": 0, | |
"unique_downloads": 0, | |
"unique_views": 0 | |
}, | |
"all_versions": { | |
"views": 0, | |
"data_volume": 0.0, | |
"downloads": 0, | |
"unique_downloads": 0, | |
"unique_views": 0 | |
} | |
}, | |
"parent": { | |
"communities": { | |
}, | |
"id": "q3e4b-xv278" | |
}, | |
"metadata": { | |
"resource_type": { | |
"title": { | |
"de": "Datensatz", | |
"en": "Dataset" | |
}, | |
"id": "dataset" | |
}, | |
"rights": [ | |
{ | |
"id": "cc-by-4.0", | |
"description": { | |
"en": "The Creative Commons Attribution license allows re-distribution and re-use of a licensed work on the condition that the creator is appropriately credited." | |
}, | |
"icon": "cc-by-icon", | |
"props": { | |
"scheme": "spdx", | |
"url": "https://creativecommons.org/licenses/by/4.0/legalcode" | |
}, | |
"title": { | |
"en": "Creative Commons Attribution 4.0 International" | |
} | |
} | |
], | |
"title": "Caracterización ingeniero geológica del municipio de Pinar del Río", | |
"publication_date": "2022", | |
"creators": [ | |
{ | |
"person_or_org": { | |
"name": "Martínez Silva, Rafael", | |
"family_name": "Martínez Silva", | |
"given_name": "Rafael", | |
"type": "personal" | |
} | |
} | |
], | |
"subjects": [ | |
{ | |
"subject": "Ingeniería geológica" | |
}, | |
{ | |
"subject": "Sismicidad" | |
}, | |
{ | |
"subject": "Estado tensional" | |
} | |
], | |
"languages": [ | |
{ | |
"title": { | |
"en": "Spanish" | |
}, | |
"id": "spa" | |
} | |
], | |
"publisher": "Universidad de Pinar del Río \"Hermanos Saíz Montes de Oca\"" | |
}, | |
"pids": { | |
"doi": { | |
"client": "datacite", | |
"identifier": "10.81088/y4cmy-4mk78", | |
"provider": "datacite" | |
}, | |
"oai": { | |
"identifier": "oai:inveniordm.web.cern.ch:y4cmy-4mk78", | |
"provider": "oai" | |
} | |
}, | |
"versions": { | |
"is_latest": "true", | |
"index": 1 | |
}, | |
"links": { | |
"self": "https://inveniordm.web.cern.ch/api/records/y4cmy-4mk78", | |
"self_html": "https://inveniordm.web.cern.ch/records/y4cmy-4mk78", | |
"self_doi": "https://inveniordm.web.cern.ch/doi/10.81088/y4cmy-4mk78", | |
"doi": "https://doi.org/10.81088/y4cmy-4mk78", | |
"self_iiif_manifest": "https://inveniordm.web.cern.ch/api/iiif/record:y4cmy-4mk78/manifest", | |
"self_iiif_sequence": "https://inveniordm.web.cern.ch/api/iiif/record:y4cmy-4mk78/sequence/default", | |
"files": "https://inveniordm.web.cern.ch/api/records/y4cmy-4mk78/files", | |
"archive": "https://inveniordm.web.cern.ch/api/records/y4cmy-4mk78/files-archive", | |
"latest": "https://inveniordm.web.cern.ch/api/records/y4cmy-4mk78/versions/latest", | |
"latest_html": "https://inveniordm.web.cern.ch/records/y4cmy-4mk78/latest", | |
"draft": "https://inveniordm.web.cern.ch/api/records/y4cmy-4mk78/draft", | |
"versions": "https://inveniordm.web.cern.ch/api/records/y4cmy-4mk78/versions", | |
"access_links": "https://inveniordm.web.cern.ch/api/records/y4cmy-4mk78/access/links", | |
"reserve_doi": "https://inveniordm.web.cern.ch/api/records/y4cmy-4mk78/draft/pids/doi", | |
"communities": "https://inveniordm.web.cern.ch/api/records/y4cmy-4mk78/communities", | |
"communities-suggestions": "https://inveniordm.web.cern.ch/api/records/y4cmy-4mk78/communities-suggestions", | |
"requests": "https://inveniordm.web.cern.ch/api/records/y4cmy-4mk78/requests" | |
} | |
} | |
keys=["uuid", | |
"created", | |
"updated", | |
"metadata.subjects", | |
"id", | |
"slug", | |
"theme", | |
"version_id", | |
"metadata.title", | |
"metadata.type", | |
"metadata.website", | |
"metadata.organizations", | |
"metadata.funding", | |
] | |
import json | |
assert filter_dict_keys(src, keys) == filter_dict_keys_v2(src, keys) | |
print(json.dumps(filter_dict_keys_v2(src, keys), indent=2)) | |
print(json.dumps(filter_dict_keys(src, keys), indent=2)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment