Skip to content

Instantly share code, notes, and snippets.

@jhejderup
Last active March 20, 2020 14:18
Show Gist options
  • Save jhejderup/389c9906cdc262ce20e71f75985df9d4 to your computer and use it in GitHub Desktop.
Save jhejderup/389c9906cdc262ce20e71f75985df9d4 to your computer and use it in GitHub Desktop.
#!/usr/bin/python3
# -*- coding: utf-8 -*-
""" Generate a dependency graph from a Cargo.lock file
Run: python3 cargolock2csv.py <path-to-Cargo.lock>
Deps: pip3 install toml
Return: pkg_nodes.csv, pkg_edges.csv
"""
import sys
import toml
assert len(sys.argv) == 2
def read_contents(file_path):
with open(file_path, 'r') as f:
return f.read()
def create_name_version_mappings(lockfile_dict):
__pkg_name_version_mappings = {}
for pkg in lockfile_dict['package']:
if pkg['name'] in __pkg_name_version_mappings:
continue ##don't include resolved versions
else:
__pkg_name_version_mappings[pkg['name']] = pkg['version']
return __pkg_name_version_mappings
lockfile_string = read_contents(sys.argv[1])
lockfile_dict = toml.loads(lockfile_string)
__mappings_name_ver = create_name_version_mappings(lockfile_dict)
with open("pkg_edges.csv", "w") as edge_file:
edge_file.write("source_name,source_version,target_name,target_version\n")
for pkg in lockfile_dict['package']:
source_name = pkg['name']
source_version = pkg['version']
source = "{},{}".format(source_name, source_version)
if 'dependencies' in pkg:
for dep in pkg['dependencies']:
if " " in dep:
dep_arr = dep.split(" ")
target = "{},{}".format(dep_arr[0], dep_arr[1])
else:
target_version = __mappings_name_ver[dep]
target = "{},{}".format(dep, target_version)
edge_file.write("{},{}\n".format(source, target))
with open("pkg_nodes.csv", "w") as node_file:
node_file.write("package_name,package_version\n")
for pkg in lockfile_dict['package']:
name = pkg['name']
version = pkg['version']
node = "{},{}\n".format(name, version)
node_file.write(node)
# Find all lockfiles and extract a lockfile
find . -name Cargo.lock -printf '%h\n' | parallel 'cd {}; python3 /data/praezi/rustcg2csv.py callgraph.json; python3 /data/praezi/cargolock2csv.py Cargo.lock'
# Check if calldata is a subset of the metadata
find . -name metadata_pkg_nodes.csv -printf '%h\n' | parallel 'cd {};RES=$( comm -23 <(sort call_data_pkg_nodes.csv | uniq) <(sort metadata_pkg_nodes.csv | uniq) | head -1); [ -z "$RES" ] && echo "{}/TRUE" || echo "{}/FALSE"' | awk -F"/" '{print $2","$3","$4}'
#!/usr/bin/python3
# -*- coding: utf-8 -*-
"""
Find calls from package A to package B.
Run: python3 find_calls.py <path-to-callgaph.json> <from-package-name> <to-package-name>
Authors: @ktrianta, @jhejderup
"""
import json
import sys
from pathlib import Path
assert len(sys.argv) == 4
with open(sys.argv[1]) as cg_file:
data = json.load(cg_file)
_mapping_id_node_name = {}
_mapping_id_node_version = {}
_mapping_id_def = {}
##
### Process nodes
##
for cg_node in data['nodes']:
_mapping_id_node_name[cg_node['id']] = cg_node['package_name']
_mapping_id_node_version[cg_node['id']] = cg_node['package_version']
_mapping_id_def[cg_node['id']] = cg_node['relative_def_id']
##
### Process edges
##
pkg_edges = set()
for edge in data['edges']:
source_id = edge[0]
target_id = edge[1]
source_name = _mapping_id_node_name[source_id]
source_version = _mapping_id_node_version[source_id]
target_name = _mapping_id_node_name[target_id]
target_version = _mapping_id_node_version[target_id]
source = "{} {}".format(source_name, source_version)
target = "{} {}".format(target_name, target_version)
## we only want package-bound edges
if source != target and source != None and target != None:
if source_name == sys.argv[2] and target_name == sys.argv[3]:
print("{} -> {}".format(source, target))
print(" {} -> {}".format(_mapping_id_def[source_id],_mapping_id_def[target_id]))
#!/usr/bin/python3
# -*- coding: utf-8 -*-
""" Generate a dependency graph from a rust call graph
Run: python3 rustcg2csv.py <path-to-callgaph.json>
Return: pkg_nodes_calldata.csv, pkg_edges_calldata.csv
"""
import json
import sys
from pathlib import Path
assert len(sys.argv) == 2
with open(sys.argv[1]) as cg_file:
data = json.load(cg_file)
nodes = set()
_mapping_id_node = {}
##
### Process nodes
##
with open("pkg_nodes_call_data.csv", "w") as node_file:
node_file.write("package_name,package_version\n")
for cg_node in data['nodes']:
nodes.add(cg_node['package'])
_mapping_id_node[cg_node['id']] = cg_node['package']
for node in nodes:
if node != 'NULL':
pkg_id = node.split(" ")
try:
node_file.write("{},{}\n".format(pkg_id[0],pkg_id[1]))
except Exception:
print(pkg_id)
##
### Process edges
##
with open("pkg_edges_call_data.csv", "w") as edge_file:
edge_file.write("source_name,source_version,target_name,target_version\n")
pkg_edges = set()
for edge in data['edges']:
source_id = edge[0]
target_id = edge[1]
source_name = _mapping_id_node[source_id]
target_name = _mapping_id_node[target_id]
## we only want package-bound edges
if source_name != target_name and source_name != 'NULL' and target_name != 'NULL':
source = source_name.split(" ")
target = target_name.split(" ")
pkg_edges.add("{},{},{},{}\n".format(source[0], source[1],target[0],target[1]))
for edge in pkg_edges:
edge_file.write(edge)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment