jmsword’s gists

jmsword / knn.py

Created March 5, 2017 21:30

KNN

	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	from random import randint
	from sklearn.neighbors import NearestNeighbors
	import math
	import random


	#Read in data

jmsword / naive_bayes.py

Created February 11, 2017 21:41

Naive Bayes

	import pandas as pd
	import matplotlib.pyplot as plt
	from sklearn.naive_bayes import GaussianNB

	#I kept getting this error 'pandas.io.common.CParserError: Error tokenizing data. C error: Expected 1 fields in line 104, saw 3'
	#when trying to read in the data from GitHub so I just copied the data into a csv file and saved it locally
	df = pd.read_csv('ideal_weight.csv')

	#Remove single '' from coulmn names
	df.rename(columns=lambda x: x.replace("'", ""), inplace=True)

jmsword / random_forest.py

Created February 11, 2017 20:04

Random Forest

	import pandas as pd
	import numpy as np
	from sklearn.ensemble import RandomForestClassifier
	import sklearn.metrics as skm
	import pylab as pl

	#Read in the column names for the dataset
	feat = pd.read_csv('features.txt', delim_whitespace=True, header=None, index_col=False)

	#Name the columns to isolate all dataset column names in one column

jmsword / cross_validation.py

Created February 5, 2017 21:46

Cross Validation Help

	import pandas as pd
	import numpy as np
	from sklearn.model_selection import KFold
	import statsmodels.api as sm
	import statsmodels.formula.api as smf
	from sklearn.metrics import mean_squared_error

	loansData = pd.read_csv('https://github.com/Thinkful-Ed/curric-data-001-data-sets/raw/master/loans/loansData.csv')

	#Remove '%' from 'Interest.Rate' column and contert to number

jmsword / overfitting.py

Created January 31, 2017 15:31

Over-fitting practice

	import numpy as np
	import statsmodels.formula.api as smf
	import pandas as pd
	import matplotlib.pyplot as plt
	import statsmodels.api as sm
	from sklearn.metrics import mean_squared_error

	#Set seed for reproducable results (what does this mean?)
	np.random.seed(414)

jmsword / education.py

Created January 28, 2017 17:56

Education

	from bs4 import BeautifulSoup
	import requests
	import pandas as pd
	import sqlite3 as lite
	import csv
	import numpy as np
	import matplotlib.pyplot as plt
	import numpy as np
	import statsmodels.api as sm

jmsword / temperature.py

Created January 24, 2017 16:22

temperature

jmsword / citibike.py

Created January 19, 2017 14:01

citibike help

	import requests
	from pandas.io.json import json_normalize
	import matplotlib.pyplot as plt
	import pandas as pd
	import sqlite3 as lite
	import time
	from dateutil.parser import parse
	import collections

jmsword / time_series.py

Created January 13, 2017 04:33

Time Series Analysis

	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import statsmodels.api as sm
	import statsmodels.formula.api as smf

	df = pd.read_csv('LoanStats3b.csv', header=1, low_memory=False)

	df['issue_d_format'] = pd.to_datetime(df['issue_d'])
	dfts = df.set_index('issue_d_format')

jmsword / multivariant.py

Created January 12, 2017 04:11

multivariant anal;ysis

	import pandas as pd
	import statsmodels.api as sm
	import statsmodels.formula.api as smf
	import numpy as np

	df = pd.read_csv('https://github.com/Thinkful-Ed/curric-data-001-data-sets/raw/master/loans/loansData.csv')

	df['annual_inc'] = df['Monthly.Income'].map(lambda x: x * 12)
	df['int_rate'] = df['Interest.Rate'].map(lambda x: round(float(x.rstrip('%')) / 100, 4))
	df['home_ownership'] = df['Home.Ownership']

Jeff S. jmsword