Created
January 19, 2021 22:39
-
-
Save zahin-mohammad/75ddbca9783eac0afb78ac901fed8801 to your computer and use it in GitHub Desktop.
neuralcoref vs corenlp coref resolution
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import neuralcoref | |
import en_core_web_sm | |
from stanfordnlp.server import CoreNLPClient | |
CORENLP_HOST = os.getenv('CORENLP_HOST') or 'localhost' | |
CORENLP_PORT = os.getenv('CORENLP_PORT') or 9000 | |
CORENLP_URL = f'http://{CORENLP_HOST}:{CORENLP_PORT}' | |
annotators = 'tokenize, ssplit, pos, lemma, ner, entitymentions, coref, sentiment, openie' | |
options = {'openie.resolve_coref': True} | |
client = CoreNLPClient( | |
annotators=annotators, | |
options=options, | |
start_server=False, | |
endpoint=CORENLP_URL) | |
nlp = en_core_web_sm.load() | |
neuralcoref.add_to_pipe(nlp) | |
def spacy_coref(text: str): | |
doc = nlp(text) | |
return doc._.coref_resolved | |
def corenlp_coref(text: str): | |
ann = client.annotate(text, | |
output_format="json", | |
properties={ | |
'annotators': 'tokenize,ssplit,pos,lemma,ner,parse,coref,openie', | |
'openie.resolve_coref': 'true', | |
'pinelineLanguage': 'en'}) | |
# ann.keys() -> | |
# dict_keys(['sentences', 'corefs']) | |
# key -> value : (sentence_number, pronoun_start_index) -> (pronoun_end_index, noun) | |
replacement_map = {} | |
corefs = ann['corefs'] | |
for ent_id, coref in corefs.items(): | |
if len(coref) == 0: | |
continue | |
representative_mention = list(filter(lambda x: x['isRepresentativeMention'], coref))[0] | |
if representative_mention['type'] == 'PRONOMINAL': | |
continue | |
for mention in coref: | |
if mention == representative_mention: | |
continue | |
# mention.keys() -> | |
# dict_keys(['id', 'text', 'type', 'number', 'gender', 'animacy', 'startIndex', 'endIndex', 'headIndex', 'sentNum', 'position', 'isRepresentativeMention']) | |
pronoun_start_index = mention['startIndex'] | |
pronoun_end_index = mention['endIndex'] | |
noun = representative_mention['text'] | |
sentence_number = mention['sentNum'] | |
replacement_map[(sentence_number, pronoun_start_index)] = (pronoun_end_index, noun) | |
new_text = '' | |
pronoun_end_index = None | |
sentence_number = 0 | |
sentences = ann['sentences'] | |
for sentence in sentences: | |
# sentence.keys() -> | |
# dict_keys(['index', 'parse', 'basicDependencies', 'enhancedDependencies', 'enhancedPlusPlusDependencies', 'openie', 'entitymentions', 'tokens']) | |
sentence_number += 1 # 1 indexed | |
sub_text = '' | |
for token in sentence['tokens']: | |
# token.keys() -> | |
# dict_keys(['index', 'word', 'originalText', 'lemma', 'characterOffsetBegin', 'characterOffsetEnd', 'pos', 'ner', 'speaker', 'before', 'after']) | |
pronoun_start_index = token['index'] | |
# Handles the case where pronoun is larger then one word | |
if pronoun_end_index is not None and pronoun_start_index < pronoun_end_index: | |
continue | |
# Get noun if exists, else copy original text | |
if (sentence_number, pronoun_start_index) in replacement_map: | |
pronoun_end_index, noun = replacement_map[(sentence_number, pronoun_start_index)] | |
sub_text += f'{noun} ' | |
else: | |
pronoun_end_index = None | |
if token['pos'].isalnum(): | |
sub_text += f'{token["originalText"]} ' | |
else: | |
# This case handles punctuation | |
sub_text = sub_text[:-1] | |
sub_text += f'{token["originalText"]} ' | |
new_text += f'{sub_text}' | |
return new_text | |
examples = [ | |
'Barack was born in Hawaii. His wife Michelle was born in Milan. He says that she is very smart.', | |
'My sister has a friend called Barack Obama. Really, tell me more about him? She thinks he is so funny!', | |
'The dog chased the cat. But it escaped.', | |
'X and Y are neighbours. She admires him because he works hard.', | |
'John and Mary are neighbours. She admires him because he works hard.', | |
'Mary and Julie are sisters. They love chocolates.', | |
'My brother has a dog and he loves her.', | |
'My sister has a dog and she loves her.', | |
'My sister has a dog and she loves him. He is cute.', | |
'My sister has a dog. She loves him.', | |
'My sister has a dog and she loves him.', | |
] | |
for example in examples: | |
print(f'#########################################################################################################') | |
print(f'ORIGINAL: {example}') | |
print() | |
print(f'spacy_coref:') | |
print(spacy_coref(example)) | |
print() | |
print(f'corenlp_coref') | |
print(corenlp_coref(example)) | |
print() | |
''' | |
######################################################################################################### | |
ORIGINAL: Barack was born in Hawaii. His wife Michelle was born in Milan. He says that she is very smart. | |
spacy_coref: | |
Barack was born in Hawaii. Barack wife Michelle was born in Milan. Barack says that His wife Michelle is very smart. | |
corenlp_coref | |
Barack was born in Hawaii. Barack wife Michelle was born in Milan. Barack says that His wife Michelle is very smart. | |
######################################################################################################### | |
ORIGINAL: My sister has a friend called Barack Obama. Really, tell me more about him? She thinks he is so funny! | |
spacy_coref: | |
My sister has a friend called Barack Obama. Really, tell me more about a friend called Barack Obama? My sister thinks a friend called Barack Obama is so funny! | |
corenlp_coref | |
My sister has a friend called Barack Obama. Really, tell me more about Barack Obama? My sister thinks Barack Obama is so funny! | |
######################################################################################################### | |
ORIGINAL: The dog chased the cat. But it escaped. | |
spacy_coref: | |
The dog chased the cat. But The dog escaped. | |
corenlp_coref | |
The dog chased the cat. But The dog escaped. | |
######################################################################################################### | |
ORIGINAL: X and Y are neighbours. She admires him because he works hard. | |
spacy_coref: | |
X and Y are neighbours. She admires him because him works hard. | |
corenlp_coref | |
X and Y are neighbours. She admires him because he works hard. | |
######################################################################################################### | |
ORIGINAL: John and Mary are neighbours. She admires him because he works hard. | |
spacy_coref: | |
John and Mary are neighbours. Mary admires John because John works hard. | |
corenlp_coref | |
John and Mary are neighbours. Mary admires John because John works hard. | |
######################################################################################################### | |
ORIGINAL: Mary and Julie are sisters. They love chocolates. | |
spacy_coref: | |
Mary and Julie are sisters. Mary and Julie love chocolates. | |
corenlp_coref | |
Mary and Julie are sisters. They love chocolates. | |
######################################################################################################### | |
ORIGINAL: My brother has a dog and he loves her. | |
spacy_coref: | |
My brother has a dog and My brother loves My brother. | |
corenlp_coref | |
My brother has a dog and My brother loves a dog. | |
######################################################################################################### | |
ORIGINAL: My sister has a dog and she loves her. | |
spacy_coref: | |
My sister has a dog and My sister loves My sister. | |
corenlp_coref | |
My sister has a dog and My sister loves My sister. | |
######################################################################################################### | |
ORIGINAL: My sister has a dog and she loves him. He is cute. | |
spacy_coref: | |
My sister has a dog and My sister loves a dog. a dog is cute. | |
corenlp_coref | |
My sister has a dog and My sister loves him. He is cute. | |
######################################################################################################### | |
ORIGINAL: My sister has a dog. She loves him. | |
spacy_coref: | |
My sister has a dog. My sister loves a dog. | |
corenlp_coref | |
My sister has a dog. My sister loves him. | |
######################################################################################################### | |
ORIGINAL: My sister has a dog and she loves him. | |
spacy_coref: | |
My sister has a dog and My sister loves a dog. | |
corenlp_coref | |
My sister has a dog and My sister loves him. | |
''' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment