Goals: Add links that are reasonable and good explanations of how stuff works. No hype and no vendor content if possible. Practical first-hand accounts of models in prod eagerly sought.
| # [Mamba: Linear-Time Sequence Modeling with Selective State Spaces](https://arxiv.org/abs/2312.00752) | |
| import torch | |
| import torch.nn as nn | |
| import torch.optim as optim | |
| from torch.utils.data import DataLoader, Dataset | |
| from torch.nn import functional as F | |
| from einops import rearrange, repeat | |
| from tqdm import tqdm |
| import torch | |
| from torch import LongTensor | |
| from torch.nn import Embedding, LSTM | |
| from torch.autograd import Variable | |
| from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence | |
| ## We want to run LSTM on a batch of 3 character sequences ['long_str', 'tiny', 'medium'] | |
| # | |
| # Step 1: Construct Vocabulary | |
| # Step 2: Load indexed data (list of instances, where each instance is list of character indices) |
| package server | |
| import ( | |
| "bufio" | |
| "fmt" | |
| "log" | |
| "net" | |
| ) | |
| // Server ... |
| """ | |
| Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy) | |
| BSD License | |
| """ | |
| import numpy as np | |
| # data I/O | |
| data = open('input.txt', 'r').read() # should be simple plain text file | |
| chars = list(set(data)) | |
| data_size, vocab_size = len(data), len(chars) |