Deepayan Deepayan137

Deepayan137 / gist:49ebea7952c1ce8afffb1a5283d5e283

Created January 18, 2025 18:27

iscra benchmark llm

	import os
	import json
	from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
	from datasets import load_dataset
	import torch
	import time
	import argparse

	# Function to log training details for benchmarking
	def log_training_details(training_args, logs, num_gpus, num_cpus, batch_time, epochs):

Deepayan137 / gist:f9f93ef929df0fdc682279e7595fc56b

Created January 18, 2025 10:21

LLM multi-gpu benchmarking

	import os
	import json
	from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
	from datasets import load_dataset
	import torch
	import time
	import argparse

	# Function to log training details for benchmarking
	def log_training_details(training_args, logs, num_gpus, num_cpus, batch_time, epochs):

Deepayan137 / neural_coref.py

Created December 24, 2021 07:55

	import spacy
	import neuralcoref
	import re
	nlp = spacy.load('en')
	neuralcoref.add_to_pipe(nlp, conv_dict={'Jack Porter': ['man', 'CEO'], 'Cognizer':['company', 'organization']})

	history = ""
	while True:
	text = input("Enter your text.\n")
	if text != "exit":

Deepayan137 / dummyData_ver2.py

Created September 16, 2020 14:57

a toy dataset for seq2seq implementation

	import torch
	from torch.utils.data import Dataset
	import numpy as np
	import pdb

	class DummyDataset(Dataset):
	def __init__(self, prob, vocab_size=None,
	nSamples=None, max_len=None):
	self.prob = prob
	if not vocab_size: vocab_size = 10

Deepayan137 / dummyData.py

Last active September 16, 2020 06:36

sample code

	import torch
	from torch.utils.data import Dataset
	import numpy as np

	class DummyDataset(Dataset):
	def __init__(self, **kwargs):
	self.prob = kwargs['prob']
	self.vocab_size = kwargs['vocab_size']
	self.nSamples = kwargs['nSamples']
	self.src_data = np.random.choice(self.vocab_size,

Deepayan137 / stlr.py

Created February 8, 2020 06:23

	'STLR scheduler from https://arxiv.org/abs/1801.06146'

	class STLR(_LRScheduler):
	def __init__(self, optimizer, T_max, last_epoch=-1, ratio=32):
	self.T_max = T_max
	self.cut = np.floor(T_max*0.1)
	self.ratio = ratio
	super(STLR, self).__init__(optimizer, last_epoch)

	def get_lr(self):

Deepayan137 / align.py

Created September 16, 2019 06:56

word prediction and ground truth alignment

	from collections import Counter, defaultdict
	from textdistance import levenshtein as lev
	import numpy as np
	import pdb
	from tqdm import *
	def CharMajVoting(words):

	def most_frequent(list_):
	counter = Counter(list_)
	return counter.most_common()[0][0]

Deepayan137 / cmv.py

Created January 16, 2019 11:03

character majority voting

	def CharMajVoting(words):

	def most_frequent(list_):
	counter = Counter(list_)
	return counter.most_common()[0][0]
	dict_ = defaultdict(list)
	lengths = [len(word) for word in words]
	common_length = most_frequent(lengths)
	for word in words:
	for i in range(len(word)):

Deepayan137 / evaluate.py

Created January 3, 2019 06:07

CA and WA

	import re
	import sys
	import os
	import tempfile
	import subprocess
	import pdb
	import pandas as pd
	import numpy as np
	from collections import defaultdict
	from ocr.baselines.base_config import *

Deepayan137 / char_acc.py

Created January 1, 2019 16:25

Character accuracy

	import Levenshtein as lev

	def cer(prediction, target):
	sum_edit_dists = lev.distance(prediction, target)
	sum_gt_lengths = sum(map(len, target))
	fraction = sum_edit_dists/sum_gt_lengths
	percent = fraction*100
	return (100.0-percent)