Maryna Longnickel MarynaLongnickel

13 followers · 2 following

https://www.linkedin.com/in/maryna-longnickel/

View GitHub Profile

Recently created

Least recently created

Recently updated

Least recently updated

MarynaLongnickel / NB.py

Created June 16, 2018 21:17

	train, test = train_test_split(data, test_size = 0.3)

	cols = train.columns[:-1]

	gnb = MultinomialNB()
	gnb.fit(train[cols], train['sentiment'])
	y_pred = gnb.predict(test[cols])

	print("Number of mislabeled points out of a total {} points : {}, performance {:05.2f}%"
	.format(

MarynaLongnickel / pos_neg.py

Created June 16, 2018 21:16

	pos_reviews = data[data['sentiment'] == 1]
	neg_reviews = data[data['sentiment'] == 0]

	pnum = np.array(pos_reviews[pos_reviews.columns].sum())
	nnum = np.array(neg_reviews[ntg_reviews.columns].sum())

	dif = pnum > nnum

MarynaLongnickel / word_matrix.py

Created June 16, 2018 21:15

	word_matrix = []

	for i in lemmatized: word_matrix.append([1 if j in i else 0 for j in top5000])
	features = pd.DataFrame(word_matrix, columns = top5000, index = pd.DataFrame(filtered_tokens))
	features['sentiment'] = data['sentiment'].values

MarynaLongnickel / analysis.py

Created June 16, 2018 21:13

	from operator import itemgetter
	from collections import Counter

	flat_list = [i for sublist in filtered_tokens for i in sublist]

	# Count how many times each word appears
	count = Counter(flat_list).items()
	sorted_count = sorted(count, key = itemgetter(1))
	sorted_count.reverse()

MarynaLongnickel / cleaning.py

Created June 16, 2018 21:07

	en_stopwords = list(set(nltk.corpus.stopwords.words('english')))

	# remove punctuation from data
	clean = [re.sub(r'[^\w\s]','',i).lower() for i in data]

	tokens = [word_tokenize(x) for x in data['text']]
	filtered_tokens = []

	# tokens that are not stopwords collected here
	for i in tokens:

MarynaLongnickel / data_prep.py

Created June 16, 2018 21:06

	data = pd.DataFrame(columns=['text', 'sentiment'])

	for id in movie_reviews.fileids():
	text = ' '.join(movie_reviews.words(id))
	sentiment = 1 if movie_reviews.categories(id) == 'pos' else 0
	data = data.append(pd.DataFrame({'text': text,'sentiment': sentiment}, index=[0]))

MarynaLongnickel / setup.py

Last active June 16, 2018 21:04

	import nltk
	nltk.download('all')

	import regex as re
	import pandas as pd
	from sklearn.utils import shuffle
	from nltk import LancasterStemmer
	from nltk.tokenize import word_tokenize
	from nltk.corpus import movie_reviews, stopwords
	from sklearn.naive_bayes import MultinomialNB

MarynaLongnickel / ball

Created April 6, 2018 15:01

rolling ball animation


	import numpy as np
	import matplotlib.pyplot as plt
	from matplotlib.animation import FuncAnimation

	p = np.linspace(-np.pi/2,np.pi/2,10)
	x = np.sin(p)

	v = np.column_stack((np.concatenate((x,x)),np.concatenate((np.cos(p),-np.cos(p))),[1]len(p)2))