dpressel’s gists

dpressel / tfrecord-counts.py

Created August 12, 2020 17:33

	import tensorflow as tf
	import sys
	import time
	import os

	assert len(sys.argv) == 2, f"Usage: {sys.argv[0]} <directory>"
	dirname = sys.argv[1]
	print(dirname)
	md_file = os.path.join(dirname, 'md.yml')
	input_files = tf.io.gfile.glob(os.path.join(dirname, '*.tfrecord'))

dpressel / rawten.py

Created November 6, 2018 20:33

mead-train --config config/sst2-rawten.py

	from baseline.reader import register_reader, SeqLabelReader
	from baseline.vectorizers import register_vectorizer, create_vectorizer, Vectorizer
	from collections import Counter
	from baseline.data import DataFeed
	from baseline.embeddings import register_embeddings
	from baseline.tf.embeddings import TensorFlowEmbeddings
	from baseline.utils import read_json
	import numpy as np
	import tensorflow as tf

dpressel / highway.py

Last active June 8, 2020 17:18

Highway layer using PyTorch

	import torch
	import torch.nn as nn

	class Highway(nn.Module):

	def __init__(self, input_size):
	super(Highway, self).__init__()
	self.proj = nn.Linear(input_size, input_size)
	self.transform = nn.Linear(input_size, input_size)
	self.transform.bias.data.fill_(-2.0)

dpressel / ShingledCharFeatureExtractor.java

Created January 19, 2018 18:17

Adding a char shingling FX for sgdtk struct

	package org.sgdtk.struct;

	import java.util.ArrayList;
	import java.util.List;

	/**
	* Extracts a an array of character shingle features from a sequence
	*
	* @author dpressel
	*/

dpressel / makefile.inc

Last active October 31, 2017 14:54

This is a gist to build faiss with CPU and GPU support on Ubuntu 14. See https://github.com/facebookresearch/faiss/pull/30, https://github.com/facebookresearch/faiss/issues/29


	# Copyright (c) 2015-present, Facebook, Inc.
	# All rights reserved.
	#
	# This source code is licensed under the BSD+Patents license found in the
	# LICENSE file in the root directory of this source tree.

	# -- makefile --
	# tested on CentOS 7, Ubuntu 16 and Ubuntu 14, see below to adjust flags to distribution.

dpressel / tagger_cbow.py

Created August 22, 2017 15:03

RNN model with sum of char embeddings.

	from baseline.tf.tfy import *
	import json
	import os
	from google.protobuf import text_format
	from tensorflow.python.platform import gfile
	from baseline.model import Tagger
	from tensorflow.contrib.layers import fully_connected, xavier_initializer


	class RNNWordBoWCharTaggerModel(Tagger):

dpressel / tagger_nochar.py

Created August 16, 2017 17:10

Word-only BLSTM add-on example

	from baseline.tf.tfy import *
	import json
	import os
	from google.protobuf import text_format
	from tensorflow.python.platform import gfile
	from baseline.model import Tagger
	from tensorflow.contrib.layers import fully_connected, xavier_initializer


	class RNNWordTaggerModel(Tagger):

dpressel / classify_nbowmax.py

Last active August 22, 2017 17:43

Max over time instead of sum of embeddings

	import torch
	import torch.nn as nn
	from baseline.pytorch.torchy import classify_bt, append2seq
	from baseline.model import Classifier


	class NBowMaxModel(nn.Module, Classifier):

	def save(self, outname):
	print('saving %s' % outname)

dpressel / classify_nbow.py

Last active August 22, 2017 17:44

Neural Bag of Words addon for baseline

	import torch
	import torch.nn as nn
	from baseline.pytorch.torchy import classify_bt, append2seq
	from baseline.model import Classifier


	class NBowModel(nn.Module, Classifier):

	def save(self, outname):
	print('saving %s' % outname)

dpressel / class_shell.py

Created July 5, 2017 17:28

Run baseline classifier model in a REPL

	from baseline import classifier_repl
	# You can use any of the implementations interchangeably!
	from baseline.keras.classify import ConvModel
	import argparse

	"""
	Example of loading a previously trained classifier (in Keras) and interacting
	with it using a shell. Your shell inputs (for the time being) should be
	in the form the model tokenizes its training data.
	"""

Daniel Pressel dpressel