Skip to content

Instantly share code, notes, and snippets.

@Taekyoon
Taekyoon / copy-of-7-2_bert_finetune-kor-korner.ipynb
Created August 15, 2020 01:05
Copy of 7.2_bert_finetune-KOR-KORNER.ipynb
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
cls_model.load_weights(checkpoint_path)
results = cls_model.evaluate(test_data_sents, test_data_labels, batch_size=1024)
print("test loss, test acc: ", results)
model_name = "tf2_gpt2_naver_movie"
earlystop_callback = EarlyStopping(monitor='val_accuracy', min_delta=0.0001, patience=2)
checkpoint_path = os.path.join(DATA_OUT_PATH, model_name, 'weights.h5')
checkpoint_dir = os.path.dirname(checkpoint_path)
if os.path.exists(checkpoint_dir):
print("{} -- Folder already exists \n".format(checkpoint_dir))
else:
class TFBertClassifier(TFBertPreTrainedModel):
def __init__(self, dir_path, num_class=2):
super().__init__(config)
self.bert = TFBertModel(name="bert").from_pretrained('bert-base-multilingual-cased', cache_dir=dir_path)
self.num_class = num_class
self.dropout = tf.keras.layers.Dropout(self.bert.config.hidden_dropout_prob)
self.classifier = tf.keras.layers.Dense(self.num_class,
kernel_initializer=tf.keras.initializers.TruncatedNormal(config.initializer_range),
def bert_tokenizer_v2(sent1, sent2, MAX_LEN):
# For Two setenece input
encoded_dict = tokenizer.encode_plus(
text = sent1,
text_pair = sent2,
add_special_tokens = True, # Add '[CLS]' and '[SEP]'
max_length = MAX_LEN, # Pad & truncate all sentences.
pad_to_max_length = True,
class SquadExample:
def __init__(self, question, context, start_char_idx, answer_text, all_answers):
self.question = " ".join(str(question).split())
self.context = " ".join(str(context).split())
self.answer_text = " ".join(str(answer_text).split())
self.start_char_idx = start_char_idx
self.all_answers = all_answers
self.skip = False
def get_answer_position(self, context, tokenized_context, answer, start_char_idx):
import parselmouth
import time
import concurrent.futures
NO_PITCH_VALUE = -999
NO_INTENSITY_VALUE = -999
MIN_PITCH = 75.0
def extract_pitch(path, window_size=0.01):
@Taekyoon
Taekyoon / test_multiprocess.py
Created June 2, 2020 07:25
Test Pytorch multiprocess IterableDataset
import torch
import math
import time
SLEEP_TIME = 0.1
class MyMapDataset(torch.utils.data.Dataset):
def __init__(self, size):
self.dataset = [i for i in range(size)]
@Taekyoon
Taekyoon / sequence_tagging_utils.py
Created July 3, 2019 07:23
sequence tagging utils
def base_to_char_level_bio_tags(sent, tag, empty_tag='O'):
char_level_tags = list()
sent_list, tag_list = sent.split(), tag.split()
if len(sent_list) != len(tag_list):
raise ValueError()
for i, (s, t) in enumerate(zip(sent_list, tag_list)):
char_level_tags += [t for _ in range(len(s))]
@Taekyoon
Taekyoon / vocabulary.py
Created June 16, 2019 12:38
Vocabulary for Deep NLP
import copy
import json
from typing import List
from collections import Counter
from pathlib import Path
class Vocabulary(object):
def __init__(self,
max_size=None,