Taekyoon

Taekyoon / copy-of-7-2_bert_finetune-kor-korner.ipynb

Created August 15, 2020 01:05

Copy of 7.2_bert_finetune-KOR-KORNER.ipynb

Sorry, something went wrong. Reload?

Sorry, we cannot display this file.

Sorry, this file is invalid so it cannot be displayed.

Taekyoon / eval_example.py

Created August 10, 2020 00:54

	cls_model.load_weights(checkpoint_path)

	results = cls_model.evaluate(test_data_sents, test_data_labels, batch_size=1024)
	print("test loss, test acc: ", results)

Taekyoon / train_example.py

Created August 10, 2020 00:49

	model_name = "tf2_gpt2_naver_movie"

	earlystop_callback = EarlyStopping(monitor='val_accuracy', min_delta=0.0001, patience=2)

	checkpoint_path = os.path.join(DATA_OUT_PATH, model_name, 'weights.h5')
	checkpoint_dir = os.path.dirname(checkpoint_path)

	if os.path.exists(checkpoint_dir):
	print("{} -- Folder already exists \n".format(checkpoint_dir))
	else:

Taekyoon / model_example.py

Last active December 25, 2020 10:58


	class TFBertClassifier(TFBertPreTrainedModel):
	def __init__(self, dir_path, num_class=2):
	super().__init__(config)

	self.bert = TFBertModel(name="bert").from_pretrained('bert-base-multilingual-cased', cache_dir=dir_path)
	self.num_class = num_class
	self.dropout = tf.keras.layers.Dropout(self.bert.config.hidden_dropout_prob)
	self.classifier = tf.keras.layers.Dense(self.num_class,
	kernel_initializer=tf.keras.initializers.TruncatedNormal(config.initializer_range),

Taekyoon / prepro_bert_example.py

Last active August 10, 2020 00:52

	def bert_tokenizer_v2(sent1, sent2, MAX_LEN):

	# For Two setenece input

	encoded_dict = tokenizer.encode_plus(
	text = sent1,
	text_pair = sent2,
	add_special_tokens = True, # Add '[CLS]' and '[SEP]'
	max_length = MAX_LEN, # Pad & truncate all sentences.
	pad_to_max_length = True,

Taekyoon / squad_example.py

Created August 3, 2020 12:49

	class SquadExample:
	def __init__(self, question, context, start_char_idx, answer_text, all_answers):
	self.question = " ".join(str(question).split())
	self.context = " ".join(str(context).split())
	self.answer_text = " ".join(str(answer_text).split())
	self.start_char_idx = start_char_idx
	self.all_answers = all_answers
	self.skip = False

	def get_answer_position(self, context, tokenized_context, answer, start_char_idx):

Taekyoon / get pitch and intensity

Created July 21, 2020 08:01

	import parselmouth
	import time
	import concurrent.futures

	NO_PITCH_VALUE = -999
	NO_INTENSITY_VALUE = -999
	MIN_PITCH = 75.0


	def extract_pitch(path, window_size=0.01):

Taekyoon / test_multiprocess.py

Created June 2, 2020 07:25

Test Pytorch multiprocess IterableDataset

	import torch
	import math
	import time


	SLEEP_TIME = 0.1

	class MyMapDataset(torch.utils.data.Dataset):
	def __init__(self, size):
	self.dataset = [i for i in range(size)]

Taekyoon / sequence_tagging_utils.py

Created July 3, 2019 07:23

sequence tagging utils

	def base_to_char_level_bio_tags(sent, tag, empty_tag='O'):
	char_level_tags = list()
	sent_list, tag_list = sent.split(), tag.split()

	if len(sent_list) != len(tag_list):
	raise ValueError()

	for i, (s, t) in enumerate(zip(sent_list, tag_list)):
	char_level_tags += [t for _ in range(len(s))]

Taekyoon / vocabulary.py

Created June 16, 2019 12:38

Vocabulary for Deep NLP

	import copy
	import json
	from typing import List
	from collections import Counter
	from pathlib import Path


	class Vocabulary(object):
	def __init__(self,
	max_size=None,