Last active
May 12, 2019 11:43
-
-
Save chesnerdesir/4426c2752d0943e68d97ed747951f90d to your computer and use it in GitHub Desktop.
Walk and extract recursively from a given dictionary all available complete paths to data values (from root to leaf) as a list of nested nodes (fields or keys), in Python.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Walk and extract recursively from a given dictionary all available complete paths to data values, including nodes that are nested lists of dictionaries (from root to leaf) as a list of nested nodes. | |
""" | |
import copy | |
def get_paths_from_dict(in_dict, io_final_list=None, io_temp_list=None, in_extract_list=True) -> list: | |
""" | |
Walk and extract all available complete paths to data values as a list of nested nodes from dictionary, recursively. | |
The method also handles dictionaries within nested lists (a specific pattern is used for indexing list elements) | |
and preserves key object type. | |
Usage : get_paths_from_dict(dict_test) | |
:param in_dict: Dictionary from which to extract list of nested nodes | |
:type in_dict:dict | |
:param io_final_list: list for intermediary outputs, starting list (default is None) | |
:type io_final_list : list | |
:param io_temp_list: list for intermediary outputs,temporary list (default is None) | |
:type io_temp_list: list | |
:param in_extract_list: if True, nested lists containing dictionaries are processed also (default is True) | |
:type in_extract_list: bool | |
:return: List of paths as a list of nodes | |
:rtype:list | |
""" | |
if io_temp_list is None: | |
io_temp_list = [] | |
if io_final_list is None: | |
io_final_list = [] | |
for key, val in in_dict.items(): | |
if isinstance(val, dict): | |
io_temp_list.append(key) | |
get_paths_from_dict(val, io_final_list, io_temp_list, in_extract_list) | |
io_temp_list.pop(-1) | |
else: | |
if in_extract_list and isinstance(val, list) and all(isinstance(l, dict) for l in val): | |
idx = 0 | |
io_temp_list.append(key) | |
for row in val: | |
if not isinstance(row, dict): | |
v = copy.deepcopy(io_temp_list) | |
v.append("#") # any tag here for end of tree | |
io_final_list.append(v) | |
continue | |
io_temp_list.append("list_idx_%d" % idx) | |
idx += 1 | |
# print("io_final_list", io_final_list) | |
get_paths_from_dict(row, io_final_list, io_temp_list, in_extract_list) | |
io_temp_list.pop(-1) | |
io_temp_list.pop(-1) | |
else: | |
if io_temp_list: | |
v = copy.deepcopy(io_temp_list) | |
v.append(key) | |
v.append("#") # any tag here for end of tree | |
io_final_list.append(v) | |
else: | |
io_final_list.append([key, "#"]) # any tag here for end of tree | |
val_list = [] | |
for row in io_final_list: | |
if row[-1] == "#": | |
val_list.append(row[0:-1]) | |
return val_list | |
def get_dict_value_from_nodes_list(in_dict, in_list_nodes): | |
""" | |
Retrieve value from a dictionary from a list of nested nodes. | |
Dictionary element k of a nested list is indexed with "list_idx_k" pattern in in_list_nodes. | |
:param in_dict: Dictionary from which teh value will be extracted | |
:type in_dict: dict | |
:param in_list_nodes: the list of nested nodes to walk through | |
:type in_list_nodes: | |
:return: The value found, None if nothing is found | |
:rtype: object | |
""" | |
if in_dict is None or in_list_nodes is None: | |
# print("Error, stopped before beginning in get_dict_value_from_nodes_list due to None value:", | |
# val, in_list_nodes, in_dict) | |
return None | |
val = copy.deepcopy(in_dict) | |
for n in in_list_nodes: | |
if n is not None and val is not None: | |
# print("get_dict_value_from_nodes_list", "val", val, "in_list_nodes", in_list_nodes, "in_dit", in_dict) | |
if isinstance(n, str) and n.startswith("list_idx_"): | |
idx = int(n.split("_")[-1]) | |
val = val[idx] | |
continue | |
if isinstance(val, dict): | |
val = val.get(n) | |
else: | |
return val | |
else: | |
# print("Error, stopped earlier due to None value:", n, val, in_list_nodes, in_dict) | |
val = None | |
break | |
return val | |
if __name__ == '__main__': | |
""" | |
Testing | |
""" | |
dict_test = { | |
"a": "value a", | |
"b": { | |
"b.1": "value b.1", | |
"b.2": { | |
"b.2.1": "value b.2.1", | |
"b.2.2": { | |
"b.2.2.1": "value b.2.2.1", | |
}, | |
"b.2.3": "value b.2.3", | |
}, | |
"b.3": "value b.3", | |
"b.4": "value b.4", | |
"b.5": { | |
"b.5.1": "value b.5.1", | |
"b.5.2": { | |
"b.5.2.1": "value b.5.2.1", | |
} | |
}, | |
}, | |
"c": { | |
"c.1": "value c.1", | |
"c.2": [{"c.2.0.1": 31415, "c.2.0.2": 92653}, {"c.2.1.1": "pie"}], | |
("c.3", 314): [3, 1, 4, "159265358"], | |
979392: -1, | |
}, | |
"d": "value d", | |
} | |
expected_output1 = [ | |
["a"], | |
["b", "b.1"], | |
["b", "b.2", "b.2.1"], | |
["b", "b.2", "b.2.2", "b.2.2.1"], | |
["b", "b.2", "b.2.3"], | |
["b", "b.3"], | |
["b", "b.4"], | |
["b", "b.5", "b.5.1"], | |
["b", "b.5", "b.5.2", "b.5.2.1"], | |
["c", "c.1"], | |
["c", "c.2"], | |
["c", ("c.3", 314)], | |
["c", 979392], | |
["d"], | |
] | |
expected_output2 = [ | |
["a"], | |
["b", "b.1"], | |
["b", "b.2", "b.2.1"], | |
["b", "b.2", "b.2.2", "b.2.2.1"], | |
["b", "b.2", "b.2.3"], | |
["b", "b.3"], | |
["b", "b.4"], | |
["b", "b.5", "b.5.1"], | |
["b", "b.5", "b.5.2", "b.5.2.1"], | |
["c", "c.1"], | |
["c", "c.2", "list_idx_0", "c.2.0.1"], | |
["c", "c.2", "list_idx_0", "c.2.0.2"], | |
["c", "c.2", "list_idx_1", "c.2.1.1"], | |
["c", ("c.3", 314)], | |
["c", 979392], | |
["d"], | |
] | |
print("expected", expected_output) | |
print("found ..", get_paths_from_dict(dict_test)) | |
print("value from [\"b\", \"b.2\", \"b.2.2\", \"b.2.2.1\"]", | |
get_dict_value_from_nodes_list(dict_test, ["b", "b.2", "b.2.2", "b.2.2.1"])) | |
print(get_paths_from_dict(in_dict=dict_test, in_extract_list=True)) | |
print(get_paths_from_dict(in_dict=dict_test, in_extract_list=False)) | |
print(92653, get_dict_value_from_nodes_list(dict_test, ['c', 'c.2', 'list_idx_0', 'c.2.0.2'])) | |
print([3, 1, 4, "159265358"], get_dict_value_from_nodes_list(dict_test, ['c', ('c.3', 314)]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment