Skip to content

Instantly share code, notes, and snippets.

@Taekyoon
Last active December 25, 2020 10:58
Show Gist options
  • Save Taekyoon/0ee86ba00728a027846512a136ae4309 to your computer and use it in GitHub Desktop.
Save Taekyoon/0ee86ba00728a027846512a136ae4309 to your computer and use it in GitHub Desktop.
class TFBertClassifier(TFBertPreTrainedModel):
def __init__(self, dir_path, num_class=2):
super().__init__(config)
self.bert = TFBertModel(name="bert").from_pretrained('bert-base-multilingual-cased', cache_dir=dir_path)
self.num_class = num_class
self.dropout = tf.keras.layers.Dropout(self.bert.config.hidden_dropout_prob)
self.classifier = tf.keras.layers.Dense(self.num_class,
kernel_initializer=tf.keras.initializers.TruncatedNormal(config.initializer_range),
name="classifier")
def call(self, inputs, attention_mask=None, token_type_ids=None, training=False):
#outputs 값: # sequence_output, pooled_output, (hidden_states), (attentions)
outputs = self.bert(inputs, attention_mask=attention_mask, token_type_ids=token_type_ids)
pooled_output = outputs[1]
pooled_output = self.dropout(pooled_output, training=training)
logits = self.classifier(pooled_output)
outputs = (logits,) + outputs[2:]
return outputs
class TFGPT2Classifier(tf.keras.Model):
def __init__(self, dir_path, num_class=2):
super(TFGPT2Classifier, self).__init__()
self.gpt2 = TFGPT2Model.from_pretrained(dir_path)
self.num_class = num_class
self.dropout = tf.keras.layers.Dropout(self.gpt2.config.summary_first_dropout)
self.classifier = tf.keras.layers.Dense(self.num_class,
kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=self.gpt2.config.initializer_range),
name="classifier")
def call(self, inputs):
outputs = self.gpt2(inputs)
pooled_output = outputs[0][:, -1]
pooled_output = self.dropout(pooled_output, training=training)
logits = self.classifier(pooled_output)
outputs = (logits,) + outputs[2:]
return outputs
@reniew
Copy link

reniew commented Aug 16, 2020

class TFBertClassifier(tf.keras.Model):
    def __init__(self, model_name, dir_path, num_class):
        super(TFBertClassifier, self).__init__()

        self.bert = TFBertModel.from_pretrained(model_name, cache_dir=dir_path)
        self.num_class = num_class
        self.dropout = tf.keras.layers.Dropout(self.bert.config.hidden_dropout_prob)
        self.classifier = tf.keras.layers.Dense(self.num_class, 
                                                kernel_initializer=tf.keras.initializers.TruncatedNormal(self.bert.config.initializer_range),
                                                name="classifier")

    def call(self, inputs, attention_mask=None, token_type_ids=None, training=False):

        #outputs 값: # sequence_output, pooled_output, (hidden_states), (attentions)
        outputs = self.bert(inputs, attention_mask=attention_mask, token_type_ids=token_type_ids)
        pooled_output = outputs[1] 
        pooled_output = self.dropout(pooled_output, training=training)
        logits = self.classifier(pooled_output)

        return logits

@reniew
Copy link

reniew commented Aug 16, 2020

class TFGPT2Classifier(tf.keras.Model):
    def __init__(self, dir_path, num_class):
        super(TFGPT2Classifier, self).__init__()
        
        self.gpt2 = TFGPT2Model.from_pretrained(dir_path)
        self.num_class = num_class
        self.dropout = tf.keras.layers.Dropout(self.gpt2.config.summary_first_dropout)
        self.classifier = tf.keras.layers.Dense(self.num_class, 
                                                kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=self.gpt2.config.initializer_range), 
                                                name="classifier")
        
    def call(self, inputs):
        outputs = self.gpt2(inputs)
        pooled_output = outputs[0][:, -1]

        pooled_output = self.dropout(pooled_output, training=training)
        logits = self.classifier(pooled_output)

        return logits

@Jyun1998
Copy link

Jyun1998 commented Dec 25, 2020

@reniew
@Taekyoon

안녕하세요 binary classification 문제에 모델 실험을 해보다 막힌 부분이 있어 연락드립니다.

model.predict(테스트데이터)를 실행하니 (len(테스트데이터),2) 형태의 logit이 return됩니다.

logit[0] 이 class1에 속할 logit, logit[1]이 class2에 속할 logit이 맞을까요?

logit을 binary result로 변환시키고 싶은데 def call 함수에서 return logits를 tf.round(tf.nn.sigmoid(logits))로 바꾸면 될까요?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment