Last active
June 15, 2021 15:39
-
-
Save gaurav5430/9fce93759eb2f6b1697883c3782f30de to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import nltk | |
from nltk.stem import WordNetLemmatizer | |
from nltk.corpus import wordnet | |
lemmatizer = WordNetLemmatizer() | |
# function to convert nltk tag to wordnet tag | |
def nltk_tag_to_wordnet_tag(nltk_tag): | |
if nltk_tag.startswith('J'): | |
return wordnet.ADJ | |
elif nltk_tag.startswith('V'): | |
return wordnet.VERB | |
elif nltk_tag.startswith('N'): | |
return wordnet.NOUN | |
elif nltk_tag.startswith('R'): | |
return wordnet.ADV | |
else: | |
return None | |
def lemmatize_sentence(sentence): | |
#tokenize the sentence and find the POS tag for each token | |
nltk_tagged = nltk.pos_tag(nltk.word_tokenize(sentence)) | |
#tuple of (token, wordnet_tag) | |
wordnet_tagged = map(lambda x: (x[0], nltk_tag_to_wordnet_tag(x[1])), nltk_tagged) | |
lemmatized_sentence = [] | |
for word, tag in wordnet_tagged: | |
if tag is None: | |
#if there is no available tag, append the token as is | |
lemmatized_sentence.append(word) | |
else: | |
#else use the tag to lemmatize the token | |
lemmatized_sentence.append(lemmatizer.lemmatize(word, tag)) | |
return " ".join(lemmatized_sentence) | |
print(lemmatizer.lemmatize("I am loving it")) #I am loving it | |
print(lemmatizer.lemmatize("loving")) #loving | |
print(lemmatizer.lemmatize("loving", "v")) #love | |
print(lemmatize_sentence("I am loving it")) #I be love it |
hey, this is free to use, please use it as you wish
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
hi, could be please license this code of yours?
Thank you