Skip to content

Instantly share code, notes, and snippets.

@sxjscience
Last active July 12, 2022 08:17
Show Gist options
  • Save sxjscience/b011460574c7e41c47a42ba86d026cbc to your computer and use it in GitHub Desktop.
Save sxjscience/b011460574c7e41c47a42ba86d026cbc to your computer and use it in GitHub Desktop.
AutoMM mT5 single GPU finetune
from autogluon.multimodal import MultiModalPredictor
from datasets import load_dataset
import json
import os
import time
import argparse
train_data = load_dataset("glue", 'mrpc')['train'].to_pandas().drop('idx', axis=1)
test_data = load_dataset("glue", 'mrpc')['validation'].to_pandas().drop('idx', axis=1)
label = 'label'
backbone = 'google/mt5-xl'
pooling_mode = 'mean'
efficient_finetune = 'lora_norm'
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('--prompt',type=str, help='the prompt that may indicate the task.', default='')
parser.add_argument('--lr_decay',type=float, help='lr_decay', default=1.0)
parser.add_argument('--learning_rate',type=float, help='learning rate', default=1e-03)
parser.add_argument('--efficient_finetune',type=str, help='efficient finetuning type', default='lora_norm')
parser.add_argument('--pooling_mode', type=str, help='pooling mode', default='mean')
parser.add_argument('--seed', default=1)
args = parser.parse_args()
save_path = f'{backbone}_{args.pooling_mode}_{args.efficient_finetune}_lr{args.learning_rate}_{args.lr_decay}_prompt_{args.prompt}'
train_data['sentence1'] = train_data['sentence1'].apply(lambda ele: args.prompt + ' ' + ele)
test_data['sentence1'] = test_data['sentence1'].apply(lambda ele: args.prompt + ' ' + ele)
train_start = time.time()
predictor = MultiModalPredictor(label=label, path=save_path, seed=args.seed).fit(
train_data,
hyperparameters={
"model.hf_text.checkpoint_name": backbone,
"model.hf_text.gradient_checkpointing": True,
"model.hf_text.pooling_mode": args.pooling_mode,
"optimization.efficient_finetune": args.efficient_finetune,
"optimization.lr_decay": args.lr_decay,
"optimization.learning_rate": args.learning_rate,
"env.precision": "bf16",
"env.num_gpus": 1,
})
train_end = time.time()
predictions = predictor.predict(test_data)
score = predictor.evaluate(test_data, metrics=['acc', 'f1'])
score.update({'time_cost': train_end - train_start})
print(score)
with open(os.path.join(save_path, 'results.json'), 'w') as f:
json.dump(score, f)
@sxjscience
Copy link
Author

Results on g4.12 (with single GPU).

{"acc": 0.8725490196078431, "f1": 0.9047619047619048, "time_cost": 8712.896693944931}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment