hkaraoguz · October 28, 2020 20:53
diff --git a/text_preprocess.py b/text_preprocess.py
 import string
 import re
 import nltk

 def preprocess_text(text):
    
    # Make lowercase
    text = text.lower()
    #print(text)

    # Remove mentions and http links
    text = re.sub(r"(?:\@|https?\://)\S+", "", text)

    # Remove punctionation
    text = ''.join(char for char in text if char not in string.punctuation)
    
    # Get words
    words = nltk.word_tokenize(text)
    
    # Perform stemming
    porter = nltk.stem.porter.PorterStemmer()
    stemmed = [porter.stem(word) for word in words]
    
    
    #print(words)

    #print(stemmed)

    return stemmed
	import string
	import re
	import nltk

	def preprocess_text(text):

	# Make lowercase
	text = text.lower()
	#print(text)

	# Remove mentions and http links
	text = re.sub(r"(?:\@\|https?\://)\S+", "", text)

	# Remove punctionation
	text = ''.join(char for char in text if char not in string.punctuation)

	# Get words
	words = nltk.word_tokenize(text)

	# Perform stemming
	porter = nltk.stem.porter.PorterStemmer()
	stemmed = [porter.stem(word) for word in words]


	#print(words)

	#print(stemmed)

	return stemmed
No results found