Skip to content

Instantly share code, notes, and snippets.

@chesnerdesir
Last active May 12, 2019 11:43
Show Gist options
  • Save chesnerdesir/4426c2752d0943e68d97ed747951f90d to your computer and use it in GitHub Desktop.
Save chesnerdesir/4426c2752d0943e68d97ed747951f90d to your computer and use it in GitHub Desktop.
Walk and extract recursively from a given dictionary all available complete paths to data values (from root to leaf) as a list of nested nodes (fields or keys), in Python.
"""
Walk and extract recursively from a given dictionary all available complete paths to data values, including nodes that are nested lists of dictionaries (from root to leaf) as a list of nested nodes.
"""
import copy
def get_paths_from_dict(in_dict, io_final_list=None, io_temp_list=None, in_extract_list=True) -> list:
"""
Walk and extract all available complete paths to data values as a list of nested nodes from dictionary, recursively.
The method also handles dictionaries within nested lists (a specific pattern is used for indexing list elements)
and preserves key object type.
Usage : get_paths_from_dict(dict_test)
:param in_dict: Dictionary from which to extract list of nested nodes
:type in_dict:dict
:param io_final_list: list for intermediary outputs, starting list (default is None)
:type io_final_list : list
:param io_temp_list: list for intermediary outputs,temporary list (default is None)
:type io_temp_list: list
:param in_extract_list: if True, nested lists containing dictionaries are processed also (default is True)
:type in_extract_list: bool
:return: List of paths as a list of nodes
:rtype:list
"""
if io_temp_list is None:
io_temp_list = []
if io_final_list is None:
io_final_list = []
for key, val in in_dict.items():
if isinstance(val, dict):
io_temp_list.append(key)
get_paths_from_dict(val, io_final_list, io_temp_list, in_extract_list)
io_temp_list.pop(-1)
else:
if in_extract_list and isinstance(val, list) and all(isinstance(l, dict) for l in val):
idx = 0
io_temp_list.append(key)
for row in val:
if not isinstance(row, dict):
v = copy.deepcopy(io_temp_list)
v.append("#") # any tag here for end of tree
io_final_list.append(v)
continue
io_temp_list.append("list_idx_%d" % idx)
idx += 1
# print("io_final_list", io_final_list)
get_paths_from_dict(row, io_final_list, io_temp_list, in_extract_list)
io_temp_list.pop(-1)
io_temp_list.pop(-1)
else:
if io_temp_list:
v = copy.deepcopy(io_temp_list)
v.append(key)
v.append("#") # any tag here for end of tree
io_final_list.append(v)
else:
io_final_list.append([key, "#"]) # any tag here for end of tree
val_list = []
for row in io_final_list:
if row[-1] == "#":
val_list.append(row[0:-1])
return val_list
def get_dict_value_from_nodes_list(in_dict, in_list_nodes):
"""
Retrieve value from a dictionary from a list of nested nodes.
Dictionary element k of a nested list is indexed with "list_idx_k" pattern in in_list_nodes.
:param in_dict: Dictionary from which teh value will be extracted
:type in_dict: dict
:param in_list_nodes: the list of nested nodes to walk through
:type in_list_nodes:
:return: The value found, None if nothing is found
:rtype: object
"""
if in_dict is None or in_list_nodes is None:
# print("Error, stopped before beginning in get_dict_value_from_nodes_list due to None value:",
# val, in_list_nodes, in_dict)
return None
val = copy.deepcopy(in_dict)
for n in in_list_nodes:
if n is not None and val is not None:
# print("get_dict_value_from_nodes_list", "val", val, "in_list_nodes", in_list_nodes, "in_dit", in_dict)
if isinstance(n, str) and n.startswith("list_idx_"):
idx = int(n.split("_")[-1])
val = val[idx]
continue
if isinstance(val, dict):
val = val.get(n)
else:
return val
else:
# print("Error, stopped earlier due to None value:", n, val, in_list_nodes, in_dict)
val = None
break
return val
if __name__ == '__main__':
"""
Testing
"""
dict_test = {
"a": "value a",
"b": {
"b.1": "value b.1",
"b.2": {
"b.2.1": "value b.2.1",
"b.2.2": {
"b.2.2.1": "value b.2.2.1",
},
"b.2.3": "value b.2.3",
},
"b.3": "value b.3",
"b.4": "value b.4",
"b.5": {
"b.5.1": "value b.5.1",
"b.5.2": {
"b.5.2.1": "value b.5.2.1",
}
},
},
"c": {
"c.1": "value c.1",
"c.2": [{"c.2.0.1": 31415, "c.2.0.2": 92653}, {"c.2.1.1": "pie"}],
("c.3", 314): [3, 1, 4, "159265358"],
979392: -1,
},
"d": "value d",
}
expected_output1 = [
["a"],
["b", "b.1"],
["b", "b.2", "b.2.1"],
["b", "b.2", "b.2.2", "b.2.2.1"],
["b", "b.2", "b.2.3"],
["b", "b.3"],
["b", "b.4"],
["b", "b.5", "b.5.1"],
["b", "b.5", "b.5.2", "b.5.2.1"],
["c", "c.1"],
["c", "c.2"],
["c", ("c.3", 314)],
["c", 979392],
["d"],
]
expected_output2 = [
["a"],
["b", "b.1"],
["b", "b.2", "b.2.1"],
["b", "b.2", "b.2.2", "b.2.2.1"],
["b", "b.2", "b.2.3"],
["b", "b.3"],
["b", "b.4"],
["b", "b.5", "b.5.1"],
["b", "b.5", "b.5.2", "b.5.2.1"],
["c", "c.1"],
["c", "c.2", "list_idx_0", "c.2.0.1"],
["c", "c.2", "list_idx_0", "c.2.0.2"],
["c", "c.2", "list_idx_1", "c.2.1.1"],
["c", ("c.3", 314)],
["c", 979392],
["d"],
]
print("expected", expected_output)
print("found ..", get_paths_from_dict(dict_test))
print("value from [\"b\", \"b.2\", \"b.2.2\", \"b.2.2.1\"]",
get_dict_value_from_nodes_list(dict_test, ["b", "b.2", "b.2.2", "b.2.2.1"]))
print(get_paths_from_dict(in_dict=dict_test, in_extract_list=True))
print(get_paths_from_dict(in_dict=dict_test, in_extract_list=False))
print(92653, get_dict_value_from_nodes_list(dict_test, ['c', 'c.2', 'list_idx_0', 'c.2.0.2']))
print([3, 1, 4, "159265358"], get_dict_value_from_nodes_list(dict_test, ['c', ('c.3', 314)])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment