Orhan Yalcin ogyalcin

#Adding an existing project to GitHub using the command line

Simple steps to add existing project to Github.

1. Create a new repository on GitHub.

In Terminal, change the current working directory to your local project.

##2. Initialize the local directory as a Git repository.

git init

	pd.set_option('display.max_colwidth', 400) # Adjust row width to read the entire rule
	pd.options.display.float_format = '{:.2f}'.format # Round decimals to 2 decimal places
	rules = rulefit.get_rules() # Get the rules
	rules = rules[rules['type']!='linear'] # Eliminate the existing explanatory variables
	rules = rules[rules['coef'] != 0] # eliminate the insignificant rules
	rules = rules.sort_values('support', ascending=False) # Sort the rules based on "support" value
	rules = rules[rules['rule'].str.len()>30] # optional: To see more complex rules, filter the long rules
	rules.iloc[0:5] # Show the first 5 rules

	# The RMSE of the RuleFit model
	rulefit_preds = rulefit.predict(X.values)
	rulefit_rmse = np.sqrt(((rulefit_preds - y) ** 2).mean())
	print(rulefit_rmse)

	# If you are using Colab, you need to install rulefit library
	!pip install rulefit
	from rulefit import RuleFit
	# Create and Train RuleFit Model
	rulefit = RuleFit(tree_generator=RandomForestRegressor(n_estimators = 100))
	rulefit.fit(X.values, y, feature_names=X.columns)

	# Plot the feature importances
	feat_importances = pd.Series(rf.feature_importances_, index=X.columns)
	feat_importances = feat_importances.sort_values(ascending=False)
	px.bar(y=feat_importances, x=feat_importances.index, template='ggplot2', width=800)

	# Calculate RMSE
	# Note that we did not split train and test datasets to simplify the process
	rf_preds = rf.predict(X)
	rf_rmse = np.sqrt(((rf_preds - y) ** 2).mean())
	print(rf_rmse)

	rules = rulefit.get_rules()
	rules = rules.sort_values('support', ascending=False)
	rules.iloc[:15]

	from sklearn.ensemble import RandomForestRegressor

	y = df.target
	X = df.drop('target', axis=1)

	# Train a Random Forest Regressor model
	rf = RandomForestRegressor(random_state=42, n_estimators=50, n_jobs=-1)
	rf.fit(X, y)

	import plotly.express as px
	px.histogram(df, x='target',template='ggplot2', width=800, nbins=50)

	import numpy as np
	import pandas as pd
	from sklearn.datasets import load_boston

	raw_data = load_boston()
	df = pd.DataFrame(np.c_[raw_data['data'], raw_data['target']],
	columns= np.append(raw_data['feature_names'], ['target']))
	df.head()