Skip to content

Instantly share code, notes, and snippets.

@Samk13
Last active February 12, 2024 00:50
Show Gist options
  • Save Samk13/8a072b2053f0718dd9b7591b02d185c2 to your computer and use it in GitHub Desktop.
Save Samk13/8a072b2053f0718dd9b7591b02d185c2 to your computer and use it in GitHub Desktop.
# https://github.com/inveniosoftware/invenio-communities/pull/1086/files#diff-6c07ffce9d2dd97c912da89fd4869787bdb49626a3fa278d466117cc8e489941
# Simplifying the function
def filter_dict_keys(src, keys):
"""Filter a dictionary based on a list of key paths."""
# Split the keys into top-level and nested keys
top_level_keys = [key for key in keys if "." not in key]
nested_keys = [key for key in keys if "." in key]
# Filter the top-level keys
result = {key: src[key] for key in top_level_keys if key in src}
# Handle nested keys
for key in nested_keys:
parts = key.split(".")
current_dict = src
for part in parts[:-1]:
if part in current_dict:
current_dict = current_dict[part]
else:
break # Skip this key if the path does not exist
# Update the filtered dictionary with the nested key if it exists
if parts[-2] in result and parts[-1] in current_dict:
if parts[-2] not in result:
result[parts[-2]] = {}
result[parts[-2]][parts[-1]] = current_dict[parts[-1]]
# Handle specific case for top-level keys that are dictionaries but not explicitly mentioned
for key in src:
if key not in result and isinstance(src[key], dict):
subkeys = [k.split(".", 1)[1] for k in keys if k.startswith(f"{key}.")]
if subkeys:
result[key] = filter_dict_keys(src[key], subkeys)
return result
# add it to https://github.com/inveniosoftware/invenio-records/blob/master/invenio_records/dictutils.py
def filter_dict_keys_v2(src, keys):
"""Recursively filter a dictionary based on a list of key paths."""
def traverse(current_dict, key_path):
# Base case: if the key_path is empty, return the current dict
if not key_path:
return current_dict
part, *remaining = key_path.split(".", 1)
if part in current_dict:
if remaining: # if there's more path to process, recurse
return {part: traverse(current_dict[part], remaining[0])}
else: # end of path, include this key
return {part: current_dict[part]}
else:
return {} # key path does not exist
result = {}
for key in keys:
if "." in key:
# For nested keys, merge the result with the new data
nested_result = traverse(src, key)
# Deep merge nested_result into result
for k, v in nested_result.items():
if k in result and isinstance(result[k], dict) and isinstance(v, dict):
result[k] = {**result[k], **v}
else:
result[k] = v
else:
# For top-level keys, directly add to result if exists
if key in src:
result[key] = src[key]
return result
src = {
"custom_fields": {
"rdm:repository_url": "https://rc.upr.edu.cu/jspui/handle/DICT/3890"
},
"id": "y4cmy-4mk78",
"is_draft": "false",
"updated": "2024-02-08T14:05:05.254680+00:00",
"revision_id": 3,
"created": "2024-02-08T14:05:05.134344+00:00",
"is_published": "true",
"status": "published",
"files": {
"enabled": "true",
"order": [
]
},
"access": {
"files": "public",
"embargo": {
"reason": "null",
"active": "false"
},
"record": "public",
"status": "open"
},
"stats": {
"this_version": {
"views": 0,
"data_volume": 0.0,
"downloads": 0,
"unique_downloads": 0,
"unique_views": 0
},
"all_versions": {
"views": 0,
"data_volume": 0.0,
"downloads": 0,
"unique_downloads": 0,
"unique_views": 0
}
},
"parent": {
"communities": {
},
"id": "q3e4b-xv278"
},
"metadata": {
"resource_type": {
"title": {
"de": "Datensatz",
"en": "Dataset"
},
"id": "dataset"
},
"rights": [
{
"id": "cc-by-4.0",
"description": {
"en": "The Creative Commons Attribution license allows re-distribution and re-use of a licensed work on the condition that the creator is appropriately credited."
},
"icon": "cc-by-icon",
"props": {
"scheme": "spdx",
"url": "https://creativecommons.org/licenses/by/4.0/legalcode"
},
"title": {
"en": "Creative Commons Attribution 4.0 International"
}
}
],
"title": "Caracterización ingeniero geológica del municipio de Pinar del Río",
"publication_date": "2022",
"creators": [
{
"person_or_org": {
"name": "Martínez Silva, Rafael",
"family_name": "Martínez Silva",
"given_name": "Rafael",
"type": "personal"
}
}
],
"subjects": [
{
"subject": "Ingeniería geológica"
},
{
"subject": "Sismicidad"
},
{
"subject": "Estado tensional"
}
],
"languages": [
{
"title": {
"en": "Spanish"
},
"id": "spa"
}
],
"publisher": "Universidad de Pinar del Río \"Hermanos Saíz Montes de Oca\""
},
"pids": {
"doi": {
"client": "datacite",
"identifier": "10.81088/y4cmy-4mk78",
"provider": "datacite"
},
"oai": {
"identifier": "oai:inveniordm.web.cern.ch:y4cmy-4mk78",
"provider": "oai"
}
},
"versions": {
"is_latest": "true",
"index": 1
},
"links": {
"self": "https://inveniordm.web.cern.ch/api/records/y4cmy-4mk78",
"self_html": "https://inveniordm.web.cern.ch/records/y4cmy-4mk78",
"self_doi": "https://inveniordm.web.cern.ch/doi/10.81088/y4cmy-4mk78",
"doi": "https://doi.org/10.81088/y4cmy-4mk78",
"self_iiif_manifest": "https://inveniordm.web.cern.ch/api/iiif/record:y4cmy-4mk78/manifest",
"self_iiif_sequence": "https://inveniordm.web.cern.ch/api/iiif/record:y4cmy-4mk78/sequence/default",
"files": "https://inveniordm.web.cern.ch/api/records/y4cmy-4mk78/files",
"archive": "https://inveniordm.web.cern.ch/api/records/y4cmy-4mk78/files-archive",
"latest": "https://inveniordm.web.cern.ch/api/records/y4cmy-4mk78/versions/latest",
"latest_html": "https://inveniordm.web.cern.ch/records/y4cmy-4mk78/latest",
"draft": "https://inveniordm.web.cern.ch/api/records/y4cmy-4mk78/draft",
"versions": "https://inveniordm.web.cern.ch/api/records/y4cmy-4mk78/versions",
"access_links": "https://inveniordm.web.cern.ch/api/records/y4cmy-4mk78/access/links",
"reserve_doi": "https://inveniordm.web.cern.ch/api/records/y4cmy-4mk78/draft/pids/doi",
"communities": "https://inveniordm.web.cern.ch/api/records/y4cmy-4mk78/communities",
"communities-suggestions": "https://inveniordm.web.cern.ch/api/records/y4cmy-4mk78/communities-suggestions",
"requests": "https://inveniordm.web.cern.ch/api/records/y4cmy-4mk78/requests"
}
}
keys=["uuid",
"created",
"updated",
"metadata.subjects",
"id",
"slug",
"theme",
"version_id",
"metadata.title",
"metadata.type",
"metadata.website",
"metadata.organizations",
"metadata.funding",
]
import json
assert filter_dict_keys(src, keys) == filter_dict_keys_v2(src, keys)
print(json.dumps(filter_dict_keys_v2(src, keys), indent=2))
print(json.dumps(filter_dict_keys(src, keys), indent=2))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment