Created
October 25, 2021 09:23
-
-
Save thangarajan8/f7d541e90796d45e99e8f6df44b50fb9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import json | |
f_path = "HomePage.java" | |
with open(f_path,'r') as f: | |
content = f.read() | |
def flattern_json(d): | |
if len(d) == 0: | |
return {} | |
from collections import deque | |
q = deque() | |
res = dict() | |
for key, val in d.items(): # This loop push the top most keys and values into queue. | |
if not isinstance(val, dict): # If it's not dict | |
if isinstance(val, list): # If it's list then check list values if it contains dict object. | |
temp = list() # Creating temp list for storing the values that we will need which are not dict. | |
for v in val: | |
if not isinstance(v, dict): | |
temp.append(v) | |
else: | |
q.append((key, v)) # if it's value is dict type then we push along with parent which is key. | |
if len(temp) > 0: | |
res[key] = temp | |
else: | |
res[key] = val | |
else: | |
q.append((key, val)) | |
while q: | |
k, v = q.popleft() # Taking parent and the value out of queue | |
for key, val in v.items(): | |
new_parent = k + "_" + key # New parent will be old parent_currentval | |
if isinstance(val, list): | |
temp = list() | |
for v in val: | |
if not isinstance(v, dict): | |
temp.append(v) | |
else: | |
q.append((new_parent, v)) | |
if len(temp) >= 0: | |
res[new_parent] = temp | |
elif not isinstance(val, dict): | |
res[new_parent] = val | |
else: | |
q.append((new_parent, val)) | |
return res | |
def flatten_json(y): | |
out = {} | |
def flatten(x, name=''): | |
if type(x) is dict: | |
for a in x: | |
flatten(x[a], name + a + '_') | |
elif type(x) is list: | |
i = 0 | |
for a in x: | |
flatten(a, name + str(i) + '_') | |
i += 1 | |
else: | |
out[name[:-1]] = x | |
flatten(y) | |
return out | |
def json_ast_encoder(o): | |
if type(o) is set and len(o) == 0: | |
return [] | |
if hasattr(o, "__dict__"): | |
return o.__dict__ | |
return "" | |
def find_end_line_number(node): | |
"""Finds end line of a node.""" | |
max_line = node.position.line | |
def traverse(node): | |
for child in node.children: | |
if isinstance(child, list) and (len(child) > 0): | |
for item in child: | |
traverse(item) | |
else: | |
if hasattr(child, '_position'): | |
nonlocal max_line | |
if child._position.line > max_line: | |
max_line = child._position.line | |
return | |
traverse(node) | |
return max_line | |
with open(f_path,'r') as f: | |
lines = f.readlines() | |
import javalang as jl | |
tree = jl.parse.parse(content) | |
#jl.tree.BlockStatement | |
docs = [] | |
failed = False | |
index = 0 | |
for path, node in tree.filter(jl.tree.MethodDeclaration): | |
d = {} | |
if failed == False: | |
failed_method = None | |
start_line = node.position.line | |
try: | |
# print(node.name) | |
d['method'] = node.name | |
d['index'] = index | |
index += 1 | |
d['start_line'] = start_line | |
end_line = find_end_line_number(node) | |
d['code'] = lines[start_line:end_line] | |
d['end_line'] = end_line | |
except Exception: | |
failed_method = node.name | |
d['end_line'] = 0 | |
failed = True | |
print(f"Error in {node.name}") | |
docs.append(d) | |
df = pd.DataFrame(docs) | |
def get_new_end_line(index,df,lines_count,end_line): | |
if end_line == 0: | |
max_index= df['index'].max() | |
# print(max_index,index) | |
if index == max_index: | |
return lines_count | |
else: | |
next_index = index + 1 | |
next_df = df[(df['index']==next_index)] | |
new_end_line = next_df['start_line'].iloc[0] - 1 | |
return new_end_line | |
else: | |
return end_line | |
lines_count = len(lines) | |
df['new_end_line'] = df.apply(lambda row: get_new_end_line(row['index'],df,lines_count,row['end_line']),axis=1) | |
result = df.to_dict(orient="records") | |
v = json.dumps(tree, sort_keys=True, default=json_ast_encoder) | |
y = json.loads(v)['types'][0]['body'] | |
z = json.dumps(y) | |
docs = [] | |
for i in y: | |
m = flatten_json(i) | |
docs.append(m) | |
df = pd.DataFrame(docs) | |
df_1 = df.dropna(axis=1, how='all') | |
df_2 = df_1.select_dtypes(include=['object']) | |
df_3 = df_2.dropna(subset=['name']) | |
df_json = df.to_dict(orient="records") | |
for x in tree.filter(jl.tree.Member): | |
print(x) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment