This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#utilizing one-hot-encoding, Randomforest, and xgboost to predict the outliers | |
import numpy as np | |
from sklearn.base import BaseEstimator,TransformerMixin, ClassifierMixin | |
from sklearn.preprocessing import LabelEncoder | |
import xgboost as xgb | |
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv) | |
from sklearn.ensemble import GradientBoostingClassifier | |
import time | |
from sklearn.model_selection import KFold | |
from sklearn.ensemble import RandomForestClassifier |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
#import seaborn as sns | |
#matplotlib inline | |
from sklearn import model_selection, preprocessing | |
import xgboost as xgb | |
import datetime | |
import operator | |
from sklearn.cross_validation import train_test_split |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.linear_model import LinearRegression | |
from sklearn.feature_selection import RFE | |
from sklearn.cross_validation import cross_val_score, train_test_split | |
import xgboost as xgb | |
from sklearn.metrics import r2_score | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.linear_model import LinearRegression | |
from sklearn.feature_selection import RFE | |
from sklearn.cross_validation import cross_val_score, train_test_split | |
#input the data | |
data0 = pd.read_csv('./data/train.csv', sep=',', header=False, names=range(378)) | |
data = data0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from collections import Counter | |
from sklearn.model_selection import KFold | |
from sklearn.ensemble import RandomForestClassifier | |
import xgboost as xgb | |
from sklearn.metrics import matthews_corrcoef as mc | |
import operator |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Training logistic regression via stochastic gradient ascent | |
import math | |
import pandas as pd | |
import numpy as np | |
#the dataset consists a subset of baby product reviews on Amazon.com | |
import sframe | |
products = sframe.SFrame('amazon_baby_subset.gl/') | |
products = sframe.SFrame.to_dataframe(products) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#explore precision and recall | |
import pandas as pd | |
import numpy as np | |
#the dataset consists of baby product reviews on Amazon.com | |
import sframe | |
products = sframe.SFrame('amazon_baby.gl/') | |
#clean the original data: remove punctuation, fill in N/A, remove neutral sentiment, | |
# perform a train/test split, produce word count matrix |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Boosting a decision stump from scratch | |
import pandas as pd | |
import numpy as np | |
#the dataset consists data from the LendingClub to predict whether a loan will be paid off in full or | |
#the loan with be charged off and possibly go into default | |
import sframe | |
loans = sframe.SFrame('lending-club-data.gl/') | |
#target column 'safe_loans' with +1 means a safe loan and -1 for risky loan |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#use the pre-implemented gradient boosted trees | |
import pandas as pd | |
import numpy as np | |
#the dataset consists data from the LendingClub to predict whether a loan will be paid off in full or | |
#the loan with be charged off and possibly go into default | |
import sframe | |
loans = sframe.SFrame('lending-club-data.gl/') | |
#target column 'safe_loans' with +1 means a safe loan and -1 for risky loan |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#explore various techniques for preventing overfitting in decision trees | |
import math | |
import pandas as pd | |
import numpy as np | |
#the dataset consists data from the LendingClub to predict whether a loan will be paid off in full or | |
#the loan with be charged off and possibly go into default | |
import sframe | |
loans = sframe.SFrame('lending-club-data.gl/') |
NewerOlder