Created
February 9, 2020 17:07
-
-
Save kuuso/7b614866503a4b1a88fc058e00926765 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sys import stdin | |
import sys | |
import numpy as np | |
import collections | |
from functools import cmp_to_key | |
import heapq | |
sys.setrecursionlimit(100000) | |
import time | |
import torch | |
import torch.nn as nn | |
from torch.optim import SGD | |
import math | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
## model | |
class LSTMmodel(nn.Module): | |
def __init__(self, n_input=2, n_output=1, n_fc=10): | |
super(LSTMmodel, self).__init__() | |
self.n_input = n_input | |
self.n_output = n_output | |
self.n_fc = n_fc | |
self.h1_lstm = nn.LSTM(input_size = n_input, | |
hidden_size = n_fc, | |
batch_first = True) | |
self.h2_fc = nn.Linear(n_fc, n_output) | |
#def forward(self, inputs, h0 = None, c0 = None): | |
def forward(self, inputs, h0 = None, c0 = None): | |
output_h1, (h_n, c_n) = self.h1_lstm(inputs) | |
output_h2 = self.h2_fc(output_h1) | |
return output_h2[:, -1, :] | |
## | |
def data_loader(fname): | |
df = pd.read_csv(fname) | |
return df | |
def create_data_unit(df, t_length = 10, t_start = 0, x_header_prev=["Rating"], x_header_future=["DateValue"], y_header="Rating"): | |
## t_length個の(これまでのRating推移)と(日付(次に受ける日含めて)) から次のRatingを予測する. | |
## day : 0 1 2 3 4 5 [(1,a),(2,b),(3,c),(4,d),(5,e)] から Yを予測 | |
## score: a b c d e Y | |
x =[] | |
if t_start + t_length >= len(df): | |
t_length = len(df) - 1 - t_start | |
for i in range(t_start, t_start+t_length): | |
l = [] | |
for header in x_header_prev: | |
l.append(float(df[header][i])) | |
for header in x_header_future: | |
l.append(float(df[header][i + 1])) | |
x.append(l) | |
y = [] | |
y.append(df[y_header][t_start+t_length]) | |
#return torch.tensor(x), y | |
return x, [float(df[y_header][t_start+t_length])] | |
def create_batch(df, batch_size = 20, t_length = 10): | |
xs = [] | |
ys = [] | |
for _ in range(batch_size): | |
t_start = np.random.randint(0, len(df) - t_length) | |
x, y = create_data_unit(df, t_length, t_start, x_header_future = []) | |
xs.append(x) | |
ys.append(y) | |
#print("xs:{0}".format(xs)) | |
#print("ys:{0}".format(ys)) | |
return torch.tensor(xs), torch.tensor(ys) | |
## | |
def main(): | |
df = data_loader("AtocderRate.csv") | |
n_input = 1 | |
df["Rating"] /= 2000.0 | |
model = LSTMmodel(n_input=n_input, n_output=1, n_fc=20) | |
loss_function = nn.MSELoss() | |
optimizer = torch.optim.SGD(model.parameters(), lr=0.01) | |
n_epoch = 10 | |
n_batch = 50 | |
n_train = 100 | |
t_length = 20 | |
t_range = 5 | |
np.random.seed(2525) | |
## training | |
for epoch in range(n_epoch): | |
loss_total = 0.0 | |
ylbak = [] | |
yibak = [] | |
for t in range(n_train): | |
optimizer.zero_grad() | |
xs, ys = create_batch(df, n_batch, t_length) | |
#print("xs:{0}".format(xs)) | |
#print("ys:{0}".format(ys)) | |
#print("xs.size(0):{0}".format(xs.size(0))) | |
#print("xs.Size():{0}".format(xs.size())) | |
#print("ys.Size():{0}".format(ys.size())) | |
y_inference = model(xs) | |
#print("y_inference.Size():{0}".format(y_inference.size())) | |
#print("y_inference:{0}".format(y_inference)) | |
loss = loss_function(y_inference, ys) | |
loss.backward() | |
optimizer.step() | |
if t % 10 == 0: | |
ylbak.append(ys[0]) | |
yibak.append(y_inference[0]) | |
loss_total += loss.data | |
print("epoch:{0}, loss:{1}".format(epoch, math.sqrt(loss_total/ n_train))) | |
#print("ylbak:{0}".format(ylbak)) | |
#print("yibak:{0}".format(yibak)) | |
# forecast: | |
actual = df["Rating"] | |
#forecast = np.array([float(v) for v in df["Rating"][0:t_range]]).reshape(1,t_range,1) | |
forecast = [float(v) for v in df["Rating"][0:t_range]] | |
print(forecast) | |
for i in range(150): | |
inputs = torch.tensor(np.array(forecast[i:i+t_range],dtype="float32").reshape(1,t_range,1)) | |
print(np.array(forecast[i:i+t_range],dtype="float32")[-1]) | |
predict = model(inputs) | |
forecast.append(predict[0,0].data) | |
actual = [v * 2000.0 for v in actual] | |
forecast = [v * 2000.0 for v in forecast] | |
plt.plot(actual, label="actual") | |
plt.plot(forecast, label="forecast_past{0}".format(t_range)) | |
plt.legend() | |
plt.savefig("ac_pr_{0}_{1}.png".format(t_length,t_range)) | |
plt.show() | |
if __name__ == "__main__": | |
t0 = time.time() | |
main() | |
print("Elapsed: {0} ".format(time.time() - t0)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment