Last active
September 12, 2018 13:00
-
-
Save boxed/610b2ba73066c96e9781aed7c0c0b25c to your computer and use it in GitHub Desktop.
This tool analyses your code base for cases where a short for for keyword arguments would be nice to have
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from parso import parse | |
from collections import defaultdict | |
args = 0 | |
kwargs = 0 | |
kwargs_foo_equal_foo = 0 | |
passed_value = 0 | |
args_kwargs = 0 | |
calls = [] | |
non_unique_function_names = set() | |
unique_function_name_parameters = {} | |
kwarg_that_cannot_match = '<kwarg_that_cannot_match>' | |
hardcoded = '<hardcoded>' | |
misc = '<misc>' | |
matched_kwarg = '<matched_kwarg>' | |
argument_variable_name_lengths = defaultdict(int) | |
def get_arguments(i): | |
# parso represents functions with one argument and multiple arguments very differently, need to handle this here | |
if i.type == 'arglist': | |
return i.children | |
else: | |
return [i] | |
def param_name(i): | |
if i.children[0].type == 'tfpdef': | |
return i.children[0].children[0].value | |
else: | |
return i.children[0].value | |
def handle_list(result): | |
for i in result.children: | |
handle_node(i) | |
def handle_node(i): | |
t = i.type | |
if t == 'funcdef': | |
name = i.children[1].value | |
parameters = [param_name(x) for x in i.children[2].children if x.type == 'param'] | |
if parameters and parameters[0] == 'self': | |
parameters = parameters[1:] | |
if name in unique_function_name_parameters: | |
del unique_function_name_parameters[name] | |
non_unique_function_names.add(name) | |
else: | |
unique_function_name_parameters[name] = parameters | |
elif t == 'atom_expr': | |
arguments = None | |
if len(i.children) == 2: | |
# normal function call | |
if i.children[0].type == 'name': | |
function_name = i.children[0].value | |
arguments = [node for node in get_arguments(i.children[1].children[1]) if node.type != 'operator'] # filter out , | |
else: | |
if (i.children[-2].children[0].type, i.children[-2].children[0].value) == ('operator', '.'): | |
# member function call | |
function_name = i.children[-2].children[1].value | |
arguments = [node for node in get_arguments(i.children[-1].children[1]) if node.type != 'operator'] # filter out , | |
else: | |
# list comprehensions and stuff | |
pass | |
if arguments and arguments[0].type != 'subscript' and len(i.children) > 2: | |
argument_values = [handle_argument(argument) for argument in arguments] | |
calls.append((function_name, argument_values)) | |
if hasattr(i, 'children'): | |
handle_list(i) | |
def handle_argument(argument): | |
global args_kwargs, args, kwargs, kwargs_foo_equal_foo | |
if argument.type == 'name': | |
# positional argument with a named variable | |
passed_name = argument.value | |
argument_variable_name_lengths[len(passed_name)] += 1 | |
# print('positional', function_name, passed_name) | |
args += 1 | |
return passed_name | |
elif argument.type == 'argument': | |
if argument.children[0].type == 'operator' and argument.children[0].value in ('*', '**'): | |
# *args and **kwargs | |
args_kwargs += 1 | |
return | |
if len(argument.children) == 3: | |
# keyword argument | |
argument, _, passed = argument.children | |
argument_name = argument.value | |
if passed.type == 'name': | |
# passed variable | |
# print('keyword variable', function_name, argument_name, '=', passed.value) | |
argument_variable_name_lengths[len(passed.value)] += 1 | |
if argument_name == passed.value: | |
kwargs_foo_equal_foo += 1 | |
return matched_kwarg | |
else: | |
kwargs += 1 | |
return kwarg_that_cannot_match | |
else: | |
# passed hardcoded value | |
kwargs += 1 | |
return kwarg_that_cannot_match | |
else: | |
# String formatting and stuff | |
return misc | |
else: | |
# hardcoded value | |
return hardcoded | |
def analyse_directory(directory): | |
for root, dirs, files in os.walk(directory): | |
dirs[:] = [d for d in dirs if not d.startswith('.') and not d.startswith('env') and not d.startswith('venv') and not d.endswith('_env') and d != 'node_modules'] | |
for filename in files: | |
if filename.endswith('.py'): | |
with open(os.path.join(root, filename)) as file: | |
try: | |
contents = file.read() | |
except: | |
continue | |
handle_list(parse(contents, error_recovery=True)) | |
import sys | |
if len(sys.argv) != 2: | |
print('This tool analyses your code base for cases where a short for for keyword arguments would be nice to have.') | |
print('Usage: supply one directory path to the code you wish to analyse.') | |
exit(1) | |
analyse_directory(sys.argv[1]) | |
print('This analysis takes a LOT of short cuts. It will most likely under report the actual numbers.') | |
print('-----') | |
# print(f'kwargs: {kwargs}') | |
# print(f'args: {args}') | |
print(f'non-unique function names, unknown relevance: {len(non_unique_function_names)}') | |
print(f'unique {len(unique_function_name_parameters)}') | |
print(f'calls to functions outside this code base, will not analyze: {len([name for name, args in calls if name not in non_unique_function_names and name not in unique_function_name_parameters])}') | |
print('passed variable name length statistics:') | |
for length, count in sorted(argument_variable_name_lengths.items()): | |
print(f' length {length}: {count} times') | |
calls_we_can_analyse = [ | |
(name, args) | |
for name, args in calls | |
if name in unique_function_name_parameters and not (name.startswith('__') and name.startswith('__')) | |
] | |
calls_with_potential = [ | |
(name, args) | |
for name, args in calls_we_can_analyse | |
if any([x not in (hardcoded, kwarg_that_cannot_match) for x in args]) | |
] | |
could_have_been_a_matched_kwarg = 0 | |
did_not_match = 0 | |
arity_counts = defaultdict(int) | |
for name, args in calls_with_potential: | |
had_potential = False | |
for argument, parameter in zip(args, unique_function_name_parameters[name]): | |
if argument == parameter: | |
could_have_been_a_matched_kwarg += 1 | |
had_potential = True | |
else: | |
did_not_match += 1 | |
if had_potential: | |
arity_counts[len(args)] += 1 | |
print('number of arguments for calls that matched:') | |
for arity, count in arity_counts.items(): | |
print(f' {arity} arguments: {count} calls') | |
print('-----') | |
print(f'already matches (foo=foo): {kwargs_foo_equal_foo}') | |
print(f'could have been a matched kwarg: {could_have_been_a_matched_kwarg}') | |
print(f'did not match: {did_not_match}') | |
print('----') | |
benefits_from_new_syntax = kwargs_foo_equal_foo + could_have_been_a_matched_kwarg | |
total = did_not_match + benefits_from_new_syntax | |
print(f'Arguments that would benefit from new syntax suggestion: {benefits_from_new_syntax} ({benefits_from_new_syntax/total * 100:.2f}%)') |
Results on twisted:
This analysis takes a LOT of short cuts. It will most likely under report the actual numbers.
-----
non-unique function names, unknown relevance: 2427
unique 12945
calls to functions outside this code base, will not analyze: 11554
-----
already matches (foo=foo): 428
could have been a matched kwarg: 2875
did not match: 16468
----
Arguments that would benefit from new syntax suggestion: 3303 (16.71%)
Dask:
510-dask % ~/bin/keyword_argument_analysis.py .
This analysis takes a LOT of short cuts. It will most likely under report the actual numbers.
-----
non-unique function names, unknown relevance: 368
unique 2869
calls to functions outside this code base, will not analyze: 5092
-----
already matches (foo=foo): 1355
could have been a matched kwarg: 345
did not match: 4843
----
Arguments that would benefit from new syntax suggestion: 1700 (25.98%)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Results on django:
This analysis takes a LOT of short cuts. It will most likely under report the actual numbers.
-----
non-unique function names, unknown relevance: 1844
unique 13603
calls to functions outside this code base, will not analyze: 35706
-----
already matches (foo=foo): 1312
could have been a matched kwarg: 4974
did not match: 15508
----
Arguments that would benefit from new syntax suggestion: 6286 (28.84%)