Michel Nivard MichelNivard

Speaker 0:

You wrote a piece a follow-up piece to your oral history titled, there is no replacement for black Twitter. I think back in November, What do you think we lose if we lose black Twitter? Tell

Speaker 1:

me not to meet your Mac, but we lose everything. I'm John Favreau. Welcome to offline.

Speaker 0:

	import os
	import numpy as np
	import sidechainnet as scn
	from denoising_diffusion_pytorch import Unet, GaussianDiffusion
	import torch
	from torch.utils.data import Dataset, DataLoader
	import torchvision.transforms as T
	from PIL import Image
	from pathlib import Path
	from tqdm import tqdm



	# Full training script for protein contact map diffusion model
	# Using LucidRain's denoising-diffusion-pytorch (grayscale input)

	from denoising_diffusion_pytorch import Unet, GaussianDiffusion, Trainer
	import matplotlib.pyplot as plt
	import numpy as np

	model = Unet(

	# t-test vs wilcox vs ordinal

	library(tidyverse)
	library(multidplyr) # parallelize
	library(rms) # ordinal regression


	sample_size = 500 # N

	# genarate paired sets and calculate p-values with different techniques

	## The problem with (personal) non-ordinality:

	n <- 30000 # 10k fictional people

	# a pair of exposures, no measurmennt issues:
	xa <- rnorm(n)
	xb <- rnorm(n)

	# Personal threshold 1 - 4 for each person,
	# reasonable scale design I think by which I mean the bins fill up sort of "normal" like this matters a lot!!

	require(MASS)

	# fixed a2 and e2 for the entire script:
	a <- .87 # additive genetic variance
	e <- .13 # environmental variance

	# make ZM covariance, i.e. the cov is a, the var = 1
	sigma_mz <- matrix(c(1,a,a,1),2,2)

	# sample size (is big becuase rare traits)

	import torch
	from transformers import GPT2Tokenizer, GPT2LMHeadModel

	# Load the tokenizer and model
	tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
	model = GPT2LMHeadModel.from_pretrained('./results')

	# Set the device to GPU if available, otherwise use CPU
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	model.to(device)

	cat author_manuscript_txt.incr.2022-12-19//.txt > merged-file.txt



	from datasets import load_dataset
	dataset = load_dataset('text', data_files="merged-file.txt")
	print(dataset)
	dataset2 = dataset.filter(lambda x: len(x["text"]) > 500)
	print(dataset2)