Skip to content

Instantly share code, notes, and snippets.

@atodev
Last active November 19, 2024 03:34
Show Gist options
  • Save atodev/692ad3cc0d3dc5b7621d55276b4efac2 to your computer and use it in GitHub Desktop.
Save atodev/692ad3cc0d3dc5b7621d55276b4efac2 to your computer and use it in GitHub Desktop.
[ML]
https://www.kaggle.com/code/ryannolan1/kaggle-housing-youtube-video
plt.scatter(x='MSSubClass', y='SalePrice', data=train_df)
plt.scatter(x='LotFrontage', y='SalePrice', data=train_df)
train_df.query('LotFrontage > 300')
stats.zscore(train_df['LotArea']).sort_values().tail(10)
train_df.query('OverallCond == 5 & SalePrice > 700000')
#1183
values = [598, 955, 935, 1299, 250, 314, 336, 707, 379, 1183, 692, 186, 441, 186, 524, 739, 598, 955, 636, 1062, 1191, 496, 198, 1338]
train_df = train_df[train_df.Id.isin(values) == False]
pd.DataFrame(train_df.isnull().sum().sort_values(ascending=False)).head(20)
fill values
train_df['MiscFeature'].unique()
train_df['Alley'].fillna('No', inplace=True)
test_df['Alley'].fillna('No', inplace=True)
sns.catplot(data=train_df, x="GarageType", y="SalePrice", kind="box")
#feature engineering
train_df['houseage'] = train_df['YrSold'] - train_df['YearBuilt']
test_df['houseage'] = test_df['YrSold'] - test_df['YearBuilt']
train_df['houseremodelage'] = train_df['YrSold'] - train_df['YearRemodAdd']
test_df['houseremodelage'] = test_df['YrSold'] - test_df['YearRemodAdd']
correlation_matrix = train_df.corr(numeric_only=True)
plt.figure(figsize=(20,12))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f")
#convert class to int
df['class'] = (df['class']=='g').astype(int)
get external data into google sheets
=IMPORTHTML("https://www.espncricinfo.com/records/highest-career-batting-average-282910","table")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment