karthikavijayanexpts · May 31, 2022 09:56
diff --git a/compound_to_simple.py b/compound_to_simple.py
 import spacy
 nlp = spacy.load('en_core_web_md')

 def compound_to_simple(sentence):
    doc = nlp(sentence)
    
    root_token = None
    for token in doc:
        if (token.dep_ == "ROOT"):
            root_token = token
            
    other_verbs = []
    for token in doc:
        ancestors = list(token.ancestors)
        if (token.pos_ == "VERB" and len(ancestors) < 3 and token != root_token):
            other_verbs.append(token)
            
    token_spans = []
    all_verbs = [root_token] + other_verbs
    for other_verb in all_verbs:
        first_token_index = len(doc)
        last_token_index = 0
        this_verb_children = list(other_verb.children)
        for child in this_verb_children:
            if (child not in all_verbs):
                if (child.i < first_token_index):
                    first_token_index = child.i
                if (child.i > last_token_index):
                    last_token_index = child.i
        token_spans.append((first_token_index, last_token_index))
        
    sentence_clauses = []
    for token_span in token_spans:
        start = token_span[0]
        end = token_span[1]
        if (start < end):
            clause = doc[start:end]
            sentence_clauses.append(clause)
    sentence_clauses = sorted(sentence_clauses, key=lambda tup: tup[0])
    clauses_text = [clause.text for clause in sentence_clauses]
    return clauses_text
	import spacy
	nlp = spacy.load('en_core_web_md')

	def compound_to_simple(sentence):
	doc = nlp(sentence)

	root_token = None
	for token in doc:
	if (token.dep_ == "ROOT"):
	root_token = token

	other_verbs = []
	for token in doc:
	ancestors = list(token.ancestors)
	if (token.pos_ == "VERB" and len(ancestors) < 3 and token != root_token):
	other_verbs.append(token)

	token_spans = []
	all_verbs = [root_token] + other_verbs
	for other_verb in all_verbs:
	first_token_index = len(doc)
	last_token_index = 0
	this_verb_children = list(other_verb.children)
	for child in this_verb_children:
	if (child not in all_verbs):
	if (child.i < first_token_index):
	first_token_index = child.i
	if (child.i > last_token_index):
	last_token_index = child.i
	token_spans.append((first_token_index, last_token_index))

	sentence_clauses = []
	for token_span in token_spans:
	start = token_span[0]
	end = token_span[1]
	if (start < end):
	clause = doc[start:end]
	sentence_clauses.append(clause)
	sentence_clauses = sorted(sentence_clauses, key=lambda tup: tup[0])
	clauses_text = [clause.text for clause in sentence_clauses]
	return clauses_text