thangarajan8 · October 26, 2021 05:40
diff --git a/javalang_parser_V2.py b/javalang_parser_V2.py

 import pandas as pd
 import numpy as np
 import json
 f_path = "ContactsPage.java"
 with open(f_path,'r') as f:
    content = f.read()
 #def flattern_json(d):
 #    if len(d) == 0:
 #        return {}
 #    from collections import deque
 #    q = deque()
 #    res = dict()
 #    for key, val in d.items(): # This loop push the top most keys and values into queue.
 #        if not isinstance(val, dict):  # If it's not dict
 #            if isinstance(val, list):  # If it's list then check list values if it contains dict object.
 #                temp = list()  # Creating temp list for storing the values that we will need which are not dict.
 #                for v in val:
 #                    if not isinstance(v, dict):
 #                        temp.append(v)
 #                    else:
 #                        q.append((key, v))  # if it's value is dict type then we push along with parent which is key.
 #                if len(temp) > 0:
 #                    res[key] = temp
 #            else:
 #                res[key] = val
 #        else:
 #            q.append((key, val))
 #    while q:
 #        k, v = q.popleft()  # Taking parent and the value out of queue
 #        for key, val in v.items():
 #            new_parent = k + "_" + key  # New parent will be old parent_currentval
 #            if isinstance(val, list):
 #                temp = list()
 #                for v in val:
 #                    if not isinstance(v, dict):
 #                        temp.append(v)
 #                    else:
 #                        q.append((new_parent, v))
 #                if len(temp) >= 0:
 #                    res[new_parent] = temp
 #            elif not isinstance(val, dict):
 #                res[new_parent] = val
 #            else:
 #                q.append((new_parent, val))
 #    return res
 #
 def flatten_json(y):
    out = {}

    def flatten(x, name=''):
        if type(x) is dict:
            for a in x:
                flatten(x[a], name + a + '_')
        elif type(x) is list:
            i = 0
            for a in x:
                flatten(a, name + str(i) + '_')
                i += 1
        else:
            out[name[:-1]] = x

    flatten(y)
    return out
 def json_ast_encoder(o):
    if type(o) is set and len(o) == 0:
        return []
    if hasattr(o, "__dict__"):
        return o.__dict__
    return ""
 #def find_end_line_number(node):
 #    """Finds end line of a node."""
 #    max_line = node.position.line
 #
 #    def traverse(node):
 #        for child in node.children:
 #            if isinstance(child, list) and (len(child) > 0):
 #                for item in child:
 #                    traverse(item)
 #            else:
 #                if hasattr(child, '_position'):
 #                    nonlocal max_line
 #                    if child._position.line > max_line:
 #                        max_line = child._position.line
 #                        return
 #
 #    traverse(node)
 #    return max_line
 with open(f_path,'r') as f:
    lines = f.readlines()
 import javalang as jl
 tree = jl.parse.parse(content)
 ##jl.tree.BlockStatement
 #
 #docs = []
 #failed = False
 #index = 0
 #for path, node in tree.filter(jl.tree.MethodDeclaration):
 #    d = {}
 #    if failed == False:
 #        failed_method = None
 #    start_line = node.position.line
 #    try:
 #        
 ##        print(node.name)
 #        d['method'] = node.name
 #        d['index'] = index
 #        index += 1
 #        d['start_line'] = start_line
 #        end_line = find_end_line_number(node)
 #        d['code']  = lines[start_line:end_line]
 #        d['end_line'] = end_line
 #    except Exception:
 #        failed_method = node.name
 #        d['end_line'] = 0
 #        failed = True
 #        print(f"Error in {node.name}")
 #    docs.append(d)
 #
 #
 #    
 #df = pd.DataFrame(docs)
 #
 #def get_new_end_line(index,df,lines_count,end_line):
 #    if end_line == 0:
 #        max_index= df['index'].max()
 ##        print(max_index,index)
 #        if index == max_index:
 #            return lines_count
 #        else:
 #            next_index = index + 1
 #            next_df = df[(df['index']==next_index)]
 #            new_end_line = next_df['start_line'].iloc[0] - 1
 #            return new_end_line
 #    else:
 #        return end_line
 #lines_count = len(lines)
 #
 #df['new_end_line'] = df.apply(lambda row: get_new_end_line(row['index'],df,lines_count,row['end_line']),axis=1)
 #result = df.to_dict(orient="records")


 v = json.dumps(tree, sort_keys=True, default=json_ast_encoder)
 y = json.loads(v)['types'][0]['body']
 z = json.dumps(y)
 #docs = []
 #for i in y:
 #    m = flatten_json(i)
 #    docs.append(m)
 #df = pd.DataFrame(docs)
 #df_1 = df.dropna(axis=1, how='all')
 #df_2 = df_1.select_dtypes(include=['object'])
 #df_3 = df_2.dropna(subset=['name'])

 def get_parameters(param_list:list)->str:
    params = []
    if param_list:
        for pl in param_list:
            params.append(pl.get('name','NA'))
        return ','.join(params)
    else:
        return 'No Param'

 def get_operator(ops):
    
    pass
 def get_actions(body_list:list)->str:
    actions = []
    object_argument = []
    if body_list:
        for bl in body_list:
            expression = bl.get('expression',{})
            action = expression.get('member','NA')
            _object = expression.get('qualifier','NA')
            indirect_actions = expression.get('selectors',[])
            if indirect_actions:
                for ic in indirect_actions:
                    actions.append({"action":ic.get('member','NA')})
                    pos = ic['_position']
 #                    print(pos)
 #                    line = lines[pos[0]-1]
                    last_line = pos[0]-1
                    print("last line:",last_line)
                    first_line = last_line
 #                    print(last_line,lines[last_line])
 #                    print(lines[last_line-1])
                    while lines[first_line].strip().startswith("+"):
                        first_line -= 1
 #                        print(first_line)
                    line = ' '.join(lines[first_line:last_line+1]).strip().replace("\t","")
                    object_argument.append({'line': line})
                    print(first_line,last_line+1)
            if not(action == 'NA' and _object == 'NA'):
                actions.append({"action":action,"object":_object})
        return actions,object_argument
    else:
        return "no object arg",'No Actions'
            
 methods = []
 docs = []
 for i in y:
 #    if not i['annotations']:
        d = {}
        methods.append(i)
        d['method_name'] = i.get('name','NA')
        d['parameters'] = get_parameters(i.get('parameters',[]))
        d['actions'],d['object_argument'] = get_actions(i.get('body',[]))
        docs.append(d)
 #y1 = methods[-2]['body']
 #x1 = methods[-2].get('body')[0]['expression']['arguments'][0]['arguments']


 df = pd.DataFrame(docs)

	import pandas as pd
	import numpy as np
	import json
	f_path = "ContactsPage.java"
	with open(f_path,'r') as f:
	content = f.read()
	#def flattern_json(d):
	# if len(d) == 0:
	# return {}
	# from collections import deque
	# q = deque()
	# res = dict()
	# for key, val in d.items(): # This loop push the top most keys and values into queue.
	# if not isinstance(val, dict): # If it's not dict
	# if isinstance(val, list): # If it's list then check list values if it contains dict object.
	# temp = list() # Creating temp list for storing the values that we will need which are not dict.
	# for v in val:
	# if not isinstance(v, dict):
	# temp.append(v)
	# else:
	# q.append((key, v)) # if it's value is dict type then we push along with parent which is key.
	# if len(temp) > 0:
	# res[key] = temp
	# else:
	# res[key] = val
	# else:
	# q.append((key, val))
	# while q:
	# k, v = q.popleft() # Taking parent and the value out of queue
	# for key, val in v.items():
	# new_parent = k + "_" + key # New parent will be old parent_currentval
	# if isinstance(val, list):
	# temp = list()
	# for v in val:
	# if not isinstance(v, dict):
	# temp.append(v)
	# else:
	# q.append((new_parent, v))
	# if len(temp) >= 0:
	# res[new_parent] = temp
	# elif not isinstance(val, dict):
	# res[new_parent] = val
	# else:
	# q.append((new_parent, val))
	# return res
	#
	def flatten_json(y):
	out = {}

	def flatten(x, name=''):
	if type(x) is dict:
	for a in x:
	flatten(x[a], name + a + '_')
	elif type(x) is list:
	i = 0
	for a in x:
	flatten(a, name + str(i) + '_')
	i += 1
	else:
	out[name[:-1]] = x

	flatten(y)
	return out
	def json_ast_encoder(o):
	if type(o) is set and len(o) == 0:
	return []
	if hasattr(o, "__dict__"):
	return o.__dict__
	return ""
	#def find_end_line_number(node):
	# """Finds end line of a node."""
	# max_line = node.position.line
	#
	# def traverse(node):
	# for child in node.children:
	# if isinstance(child, list) and (len(child) > 0):
	# for item in child:
	# traverse(item)
	# else:
	# if hasattr(child, '_position'):
	# nonlocal max_line
	# if child._position.line > max_line:
	# max_line = child._position.line
	# return
	#
	# traverse(node)
	# return max_line
	with open(f_path,'r') as f:
	lines = f.readlines()
	import javalang as jl
	tree = jl.parse.parse(content)
	##jl.tree.BlockStatement
	#
	#docs = []
	#failed = False
	#index = 0
	#for path, node in tree.filter(jl.tree.MethodDeclaration):
	# d = {}
	# if failed == False:
	# failed_method = None
	# start_line = node.position.line
	# try:
	#
	## print(node.name)
	# d['method'] = node.name
	# d['index'] = index
	# index += 1
	# d['start_line'] = start_line
	# end_line = find_end_line_number(node)
	# d['code'] = lines[start_line:end_line]
	# d['end_line'] = end_line
	# except Exception:
	# failed_method = node.name
	# d['end_line'] = 0
	# failed = True
	# print(f"Error in {node.name}")
	# docs.append(d)
	#
	#
	#
	#df = pd.DataFrame(docs)
	#
	#def get_new_end_line(index,df,lines_count,end_line):
	# if end_line == 0:
	# max_index= df['index'].max()
	## print(max_index,index)
	# if index == max_index:
	# return lines_count
	# else:
	# next_index = index + 1
	# next_df = df[(df['index']==next_index)]
	# new_end_line = next_df['start_line'].iloc[0] - 1
	# return new_end_line
	# else:
	# return end_line
	#lines_count = len(lines)
	#
	#df['new_end_line'] = df.apply(lambda row: get_new_end_line(row['index'],df,lines_count,row['end_line']),axis=1)
	#result = df.to_dict(orient="records")


	v = json.dumps(tree, sort_keys=True, default=json_ast_encoder)
	y = json.loads(v)['types'][0]['body']
	z = json.dumps(y)
	#docs = []
	#for i in y:
	# m = flatten_json(i)
	# docs.append(m)
	#df = pd.DataFrame(docs)
	#df_1 = df.dropna(axis=1, how='all')
	#df_2 = df_1.select_dtypes(include=['object'])
	#df_3 = df_2.dropna(subset=['name'])

	def get_parameters(param_list:list)->str:
	params = []
	if param_list:
	for pl in param_list:
	params.append(pl.get('name','NA'))
	return ','.join(params)
	else:
	return 'No Param'

	def get_operator(ops):

	pass
	def get_actions(body_list:list)->str:
	actions = []
	object_argument = []
	if body_list:
	for bl in body_list:
	expression = bl.get('expression',{})
	action = expression.get('member','NA')
	_object = expression.get('qualifier','NA')
	indirect_actions = expression.get('selectors',[])
	if indirect_actions:
	for ic in indirect_actions:
	actions.append({"action":ic.get('member','NA')})
	pos = ic['_position']
	# print(pos)
	# line = lines[pos[0]-1]
	last_line = pos[0]-1
	print("last line:",last_line)
	first_line = last_line
	# print(last_line,lines[last_line])
	# print(lines[last_line-1])
	while lines[first_line].strip().startswith("+"):
	first_line -= 1
	# print(first_line)
	line = ' '.join(lines[first_line:last_line+1]).strip().replace("\t","")
	object_argument.append({'line': line})
	print(first_line,last_line+1)
	if not(action == 'NA' and _object == 'NA'):
	actions.append({"action":action,"object":_object})
	return actions,object_argument
	else:
	return "no object arg",'No Actions'

	methods = []
	docs = []
	for i in y:
	# if not i['annotations']:
	d = {}
	methods.append(i)
	d['method_name'] = i.get('name','NA')
	d['parameters'] = get_parameters(i.get('parameters',[]))
	d['actions'],d['object_argument'] = get_actions(i.get('body',[]))
	docs.append(d)
	#y1 = methods[-2]['body']
	#x1 = methods[-2].get('body')[0]['expression']['arguments'][0]['arguments']


	df = pd.DataFrame(docs)