This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import uuid | |
from pathlib import Path | |
import click | |
from vllm import LLM, SamplingParams | |
from vllm.outputs import RequestOutput | |
import datasets as ds | |
from src.data.common import normalize_text |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datasets as ds | |
from konoha import SentenceTokenizer | |
def title2text(): | |
dataset: ds.Dataset = ds.load_dataset("globis-university/aozorabunko-clean", split="train", num_proc=16) | |
def process(x: dict[str, list]): | |
anc_list, pos_list = [], [] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import PreTrainedTokenizer | |
from vllm import LLM, SamplingParams | |
from vllm.outputs import RequestOutput | |
import datasets as ds | |
def build_input_text(text: str, tokenizer: PreTrainedTokenizer) -> str: | |
text = text.strip() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def format_param_with_unit(num_params: int) -> str: | |
if num_params >= 1000 * 1000 * 1000: | |
unit = "B" | |
num_params /= 1000 * 1000 * 1000 | |
elif num_params >= 1000 * 1000: | |
unit = "M" | |
num_params /= 1000 * 1000 | |
elif num_params >= 1000: | |
unit = "K" | |
num_params /= 1000 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ORIGIN_DIR=$(pwd) | |
JUMANPP_DIR="$HOME/.local/share/jumanpp" | |
mkdir -p $JUMANPP_DIR | |
cd $JUMANPP_DIR | |
curl -LO https://github.com/ku-nlp/jumanpp/releases/download/v2.0.0-rc3/jumanpp-2.0.0-rc3.tar.xz | |
tar -xf jumanpp-2.0.0-rc3.tar.xz | |
cd jumanpp-2.0.0-rc3 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import hoge.a | |
print(__file__) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from tqdm import tqdm | |
for i in tqdm(list(range(100)), position=0): | |
for batch in tqdm(list(range(10000000)), position=1): | |
pass |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
N_TRIALS=10 | |
STORAGE=sqlite:///example.db | |
STUDY_NAME=`optuna create-study --storage $STORAGE` | |
DISTRIBUTIONS=`cat distributions.json` | |
for _ in `seq 1 $N_TRIALS`; do | |
trial=`optuna ask \ | |
--storage $STORAGE \ | |
--study-name $STUDY_NAME \ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Python 3 hrs 45 mins โโโโโโโโโโโโโโโโโโโโโ 50.0% | |
Markdown 2 hrs 38 mins โโโโโโโโโโโโโโโโโโโโโ 35.1% | |
TypeScript 37 mins โโโโโโโโโโโโโโโโโโโโโ 8.2% | |
JSON 19 mins โโโโโโโโโโโโโโโโโโโโโ 4.2% | |
JavaScript 5 mins โโโโโโโโโโโโโโโโโโโโโ 1.3% |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"lastUpload":"2020-04-04T07:45:57.757Z","extensionVersion":"v3.4.3"} |