Skip to content

Instantly share code, notes, and snippets.

@sxjscience
Created March 12, 2021 05:55
Show Gist options
  • Save sxjscience/9cbe4ac99d983770746fc22a6dba7513 to your computer and use it in GitHub Desktop.
Save sxjscience/9cbe4ac99d983770746fc22a6dba7513 to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
import argparse
import os
import json
import random
from autogluon.tabular import TabularPredictor
from autogluon.text import TextPredictor
from autogluon.text.text_prediction.infer_types import infer_column_problem_types
from autogluon.text.text_prediction import constants as _C
def get_parser():
parser = argparse.ArgumentParser(
description='The Basic Example of AutoGluon for House Price Prediction.')
parser.add_argument('--mode', choices=['stack5', 'weighted', 'single'], default='weighted')
parser.add_argument('--data_path', type=str, default='kaggle')
parser.add_argument('--seed', type=int, default=123)
parser.add_argument('--exp_path', default=None)
parser.add_argument('--with_tax_values', default=1, type=int)
parser.add_argument('--overwrite_exp', action='store_true',
help='Whether to overwrite the existing experiment.')
return parser
def preprocess(df, with_tax_values=True, log_scale_lot=True, has_label=True):
new_df = df.copy()
new_df.drop('Id', axis=1, inplace=True)
new_df['Elementary School'] = new_df['Elementary School'].apply(lambda ele: str(ele)[:-len(' Elementary School')] if str(ele).endswith('Elementary School') else ele)
if log_scale_lot:
new_df['Lot'] = np.log(new_df['Lot'] + 1)
if with_tax_values:
new_df['Tax assessed value'] = np.log(new_df['Tax assessed value'] + 1)
new_df['Annual tax amount'] = np.log(new_df['Annual tax amount'] + 1)
else:
new_df.drop('Tax assessed value', axis=1, inplace=True)
new_df.drop('Annual tax amount', axis=1, inplace=True)
if has_label:
new_df['Sold Price'] = np.log(new_df['Sold Price'])
return new_df
def set_seed(seed):
import mxnet as mx
import torch as th
th.manual_seed(seed)
mx.random.seed(seed)
np.random.seed(seed)
random.seed(seed)
def train(args):
set_seed(args.seed)
train_df = pd.read_csv(os.path.join(args.data_path, 'train.csv'))
test_no_label_df = pd.read_csv(os.path.join(args.data_path, 'test.csv'))
test_label_df = pd.read_csv(os.path.join(args.data_path, 'test_label.csv'))
test_df = test_no_label_df.merge(test_label_df, 'outer', 'Id')
train_df = preprocess(train_df, with_tax_values=args.with_tax_values, has_label=True)
test_no_label_df = preprocess(test_no_label_df,
with_tax_values=args.with_tax_values, has_label=False)
test_df = preprocess(test_df,
with_tax_values=args.with_tax_values, has_label=True)
label_column = 'Sold Price'
feature_columns = [ele for ele in train_df.columns if ele != label_column]
eval_metric = 'r2'
if args.mode == 'weighted':
predictor = TabularPredictor(eval_metric=eval_metric, label=label_column, path=args.exp_path)
predictor.fit(train_df, hyperparameters='multimodal')
leaderboard = predictor.leaderboard(test_df)
leaderboard.to_csv(os.path.join(args.exp_path, 'test_leaderboard.csv'))
elif args.mode == 'single':
predictor = TextPredictor(eval_metric=eval_metric, label=label_column, path=args.exp_path)
predictor.fit(train_df, seed=args.seed)
elif args.mode == 'stack5':
predictor = TabularPredictor(eval_metric=eval_metric, label=label_column,
path=args.exp_path)
predictor.fit(train_df, hyperparameters='multimodal',
num_bag_folds=5, num_stack_levels=1)
leaderboard = predictor.leaderboard(test_df)
leaderboard.to_csv(os.path.join(args.exp_path, 'test_leaderboard.csv'))
else:
raise NotImplementedError
predictions = np.exp(predictor.predict(test_no_label_df))
predictions.to_csv(os.path.join(args.exp_path, 'test_predictions.csv'))
score = predictor.evaluate(test_df)
with open(os.path.join(args.exp_path, 'test_score.json'), 'w') as of:
json.dump({'r2': score}, of)
if __name__ == '__main__':
parser = get_parser()
args = parser.parse_args()
if args.exp_path is None:
args.exp_path = f'ag_zillow_{args.mode}'
train(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment