Skip to content

Instantly share code, notes, and snippets.

View sxjscience's full-sized avatar
:octocat:
Hallucinating code...

Xingjian Shi sxjscience

:octocat:
Hallucinating code...
View GitHub Profile
@sxjscience
sxjscience / automm_mt5_xl_multi_gpu_fsdp.py
Last active July 12, 2022 14:59
AutoMM Multi-GPU FSDP for mT5-XL
from autogluon.multimodal import MultiModalPredictor
from datasets import load_dataset
import json
import os
import time
import argparse
PAWS_X_LANGUAGE_L = ['en', 'fr', 'es', 'de', 'zh', 'ja', 'ko']
os.makedirs("data_cache", exist_ok=True)
@sxjscience
sxjscience / automm_mt5_xl_single_gpu.py
Last active July 12, 2022 08:17
AutoMM mT5 single GPU finetune
from autogluon.multimodal import MultiModalPredictor
from datasets import load_dataset
import json
import os
import time
import argparse
train_data = load_dataset("glue", 'mrpc')['train'].to_pandas().drop('idx', axis=1)
test_data = load_dataset("glue", 'mrpc')['validation'].to_pandas().drop('idx', axis=1)
label = 'label'
import argparse
import os
from auto_mm_bench.datasets_with_image import dataset_with_image_registry, create_dataset
from autogluon.core.features.feature_metadata import FeatureMetadata
from autogluon.tabular import TabularPredictor
BASELINE_HPARAMS = {
'FASTAI': {},
}
import time
import torch
from transformers import GPTNeoForCausalLM, AutoConfig, GPT2Tokenizer
import torch
import hashlib
import transformers
import argparse
import collections
import os
import logging
import os
from PIL import Image
base_dir = 'image manually'
with open('label_file.txt', 'w') as of:
for filename in os.listdir(base_dir):
print('Labeling filename', filename)
img = Image.open(os.path.join(base_dir, filename))
img.show()
label = input('Flood = 0, Non-flood=1: ')

California House Prices Prediction

Install AutoGluon (I used version==0.1 in the submission and you may try the latest version, which may give you a better performance).

pip install autogluon

Competition in https://www.kaggle.com/c/california-house-prices

import pandas as pd
import numpy as np
import argparse
import os
import json
import random
from autogluon.tabular import TabularPredictor
from autogluon.text import TextPredictor
from autogluon.text.text_prediction.infer_types import infer_column_problem_types
from autogluon.text.text_prediction import constants as _C
@sxjscience
sxjscience / run_squad_albert-base-v2.log
Created November 17, 2020 05:13
huggingface_g4_12dn_run
export SQUAD_DIR=/home/ubuntu/squad
python3 -m torch.distributed.launch --nproc_per_node=4 ./examples/question-answering/run_squad.py \
--model_type albert \
--model_name_or_path albert-base-v2 \
--do_train \
--do_eval \
--version_2_with_negative \
--train_file $SQUAD_DIR/train-v2.0.json \
--predict_file $SQUAD_DIR/dev-v2.0.json \
--learning_rate 3e-5 \
for MODEL_NAME in albert_base \
albert_large \
albert_xlarge \
albert_xxlarge \
electra_base \
electra_large \
electra_small \
roberta_large \
uncased_bert_base \
uncased_bert_large \
FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04
LABEL maintainer="GluonNLP Team"
ARG DEBIAN_FRONTEND=noninteractive
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/lib" \
PYTHONIOENCODING=UTF-8 \