Created
May 31, 2022 09:52
-
-
Save karthikavijayanexpts/3a41dcb0a96e0aeb34642f0212ef736d to your computer and use it in GitHub Desktop.
Coreference resolution using coreferee and spaCy packages
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import spacy | |
import coreferee | |
def coref_resolve(text): | |
nlp1 = spacy.load('en_core_web_trf') | |
nlp1.add_pipe('coreferee') | |
doc1 = nlp1(text) | |
tok_list = list(token.text for token in doc1) | |
c = 0 | |
for chain in doc1._.coref_chains: | |
for mention in chain: | |
res1 = [doc1._.coref_chains.resolve(doc1[i]) for i in mention] | |
res = list(filter((None).__ne__, res1)) | |
if len(res) != 0: | |
if len(res[0]) == 1: | |
tok_list[mention[0] + c] = str(res[0][0]) | |
elif len(res[0]) > 1: | |
tok_list[mention[0] + c] = str(res[0][0]) | |
for j in range(1, len(res[0])): | |
tok_list.insert(mention[0] + c + j, str(res[0][j])) | |
c = c + 1 | |
textres = " ".join(tok_list) | |
return textres |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment