This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import json | |
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments | |
from datasets import load_dataset | |
import torch | |
import time | |
import argparse | |
# Function to log training details for benchmarking | |
def log_training_details(training_args, logs, num_gpus, num_cpus, batch_time, epochs): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import json | |
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments | |
from datasets import load_dataset | |
import torch | |
import time | |
import argparse | |
# Function to log training details for benchmarking | |
def log_training_details(training_args, logs, num_gpus, num_cpus, batch_time, epochs): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import spacy | |
import neuralcoref | |
import re | |
nlp = spacy.load('en') | |
neuralcoref.add_to_pipe(nlp, conv_dict={'Jack Porter': ['man', 'CEO'], 'Cognizer':['company', 'organization']}) | |
history = "" | |
while True: | |
text = input("Enter your text.\n") | |
if text != "exit": |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from torch.utils.data import Dataset | |
import numpy as np | |
import pdb | |
class DummyDataset(Dataset): | |
def __init__(self, prob, vocab_size=None, | |
nSamples=None, max_len=None): | |
self.prob = prob | |
if not vocab_size: vocab_size = 10 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from torch.utils.data import Dataset | |
import numpy as np | |
class DummyDataset(Dataset): | |
def __init__(self, **kwargs): | |
self.prob = kwargs['prob'] | |
self.vocab_size = kwargs['vocab_size'] | |
self.nSamples = kwargs['nSamples'] | |
self.src_data = np.random.choice(self.vocab_size, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'STLR scheduler from https://arxiv.org/abs/1801.06146' | |
class STLR(_LRScheduler): | |
def __init__(self, optimizer, T_max, last_epoch=-1, ratio=32): | |
self.T_max = T_max | |
self.cut = np.floor(T_max*0.1) | |
self.ratio = ratio | |
super(STLR, self).__init__(optimizer, last_epoch) | |
def get_lr(self): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import Counter, defaultdict | |
from textdistance import levenshtein as lev | |
import numpy as np | |
import pdb | |
from tqdm import * | |
def CharMajVoting(words): | |
def most_frequent(list_): | |
counter = Counter(list_) | |
return counter.most_common()[0][0] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def CharMajVoting(words): | |
def most_frequent(list_): | |
counter = Counter(list_) | |
return counter.most_common()[0][0] | |
dict_ = defaultdict(list) | |
lengths = [len(word) for word in words] | |
common_length = most_frequent(lengths) | |
for word in words: | |
for i in range(len(word)): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import sys | |
import os | |
import tempfile | |
import subprocess | |
import pdb | |
import pandas as pd | |
import numpy as np | |
from collections import defaultdict | |
from ocr.baselines.base_config import * |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Levenshtein as lev | |
def cer(prediction, target): | |
sum_edit_dists = lev.distance(prediction, target) | |
sum_gt_lengths = sum(map(len, target)) | |
fraction = sum_edit_dists/sum_gt_lengths | |
percent = fraction*100 | |
return (100.0-percent) |
NewerOlder