1 year ago
#76316
Akshay Verma
Torch taking 4X time than keras
We have a run a LSTM Model for binary classification having multiple sequential inputs whose output is concatenated for classification. Below is the torch model.
import pandas as pd
import time
from nltk.tokenize import word_tokenize
from itertools import combinations
from collections import Counter
import numpy as np
import pickle
import feather
from itertools import groupby
#Tokenize the data manually
from nltk.tokenize import word_tokenize
from itertools import combinations
from collections import Counter
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch
from torch.nn.utils.rnn import pad_sequence
import torch.nn as nn
from torchvision import models
from torchsummary import summary
events = ['eve_seq_page_action','eve_seq_page_type','eve_seq_store_name','eve_seq_sub_page_type']
ts = ['ts_seq_page_action','ts_seq_page_type','ts_seq_store_name','ts_seq_sub_page_type']
def_val = 1137 + 1
max_len = 150
pod_flag = 0
#LSTM Model defined
class LSTM_model(nn.Module):
def __init__(self,vocab_size,hidden_size,add_feats,embed_size):
super(LSTM_model, self).__init__()
#Model Layers
self.embedding0 = nn.Embedding(vocab_size+1,embed_size,padding_idx=def_val)
self.embedding1 = nn.Embedding(vocab_size+1,embed_size,padding_idx=def_val)
self.embedding2 = nn.Embedding(vocab_size+1,embed_size,padding_idx=def_val)
self.embedding3 = nn.Embedding(vocab_size+1,embed_size,padding_idx=def_val)
self.lstm0 = nn.LSTM(embed_size+add_feats, hidden_size,batch_first=True)#+1 is done to accomodate additional feature i.e. ts
self.lstm1 = nn.LSTM(embed_size+add_feats, hidden_size,batch_first=True)
self.lstm2 = nn.LSTM(embed_size+add_feats, hidden_size,batch_first=True)
self.lstm3 = nn.LSTM(embed_size+add_feats, hidden_size,batch_first=True)
self.fc1 = nn.Linear(hidden_size*4,64)
self.fc2 = nn.Linear(64,32)
self.fc3 = nn.Linear(32,1)
self.drop = nn.Dropout(0.3)
#Activations
self.relu = nn.ReLU()
self.sigmoid = nn.Sigmoid()
def forward(self,a,b,c,d,ts0,ts1,ts2,ts3):
#Initialize the Embedding Layer
x0 = self.embedding0(a.long())
x1 = self.embedding1(b.long())
x2 = self.embedding2(c.long())
x3 = self.embedding3(d.long())
#Numerical Features for Processing data
co0 = torch.cat([x0,ts0.unsqueeze(2)],2)
co1 = torch.cat([x1,ts1.unsqueeze(2)],2)
co2 = torch.cat([x2,ts2.unsqueeze(2)],2)
co3 = torch.cat([x3,ts3.unsqueeze(2)],2)
#Run 4 separate LSTM's
x0 , (h0,c0) = self.lstm0(co0)
x1 , (h1,c1) = self.lstm1(co1)
x2 , (h2,c2) = self.lstm2(co2)
x3 , (h3,c3) = self.lstm3(co3)
#Transform the matrics
h0 = h0[-1,:,:]
h1 = h1[-1,:,:]
h2 = h2[-1,:,:]
h3 = h3[-1,:,:]
#Transform the 4 layers
f = torch.cat([h0,h1,h2,h3],1)
x = self.relu(self.fc1(f))
x = self.drop(x)
x = self.relu(self.fc2(x))
x = self.drop(x)
x = self.sigmoid(self.fc3(x))
return x
#Model
model = LSTM_model(vocab_size=def_val,hidden_size=32,add_feats=1,embed_size=50)
#Loss
loss_fn = nn.BCELoss()
#Optimizer
optimizer = torch.optim.Adam(model.parameters(),lr=0.01)
import torch.utils.data as data_utils
dataset = torch.utils.data.TensorDataset(torch.zeros(100000,150),torch.zeros(100000,150),\
torch.zeros(100000,150),torch.zeros(100000,150),torch.zeros(100000,150),torch.zeros(100000,150),\
torch.zeros(100000,150),torch.zeros(100000,150),torch.randint(0,1,(100000,1)).float())
loader = torch.utils.data.DataLoader(
dataset,
num_workers=0,
batch_size=256,
# shuffle = True
)
%%time
# torch.set_num_threads(8)
n_epochs = 1 # or whatever
losses = []
for epoch in range(n_epochs):
model = model.train()
for batch_idx, (a,b,c,d,ts0,ts1,ts2,ts3,y) in enumerate(loader):
t0 = time.time()
print(a.size(),b.size())
# in case you wanted a semi-full example
outputs = model.forward(a,b,c,d,ts0,ts1,ts2,ts3)
loss = loss_fn(outputs,y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
# del [a,b,c,d,ts0,ts1,ts2,ts3]
# gc.collect()
if batch_idx%100 == 0:
losses.append(loss)
print("epoch {}.\ttimespent {}.\tloss : {}".format(epoch,time.time() - t0,loss))
It takes almost 4 minutes to run 1 epoch with a sample dataset. Current implementation is on a CPU.
However, same model with equal number of parameters is implemented in Keras which runs in 1 minutes for a single epoch.
Below is the Keras Implementation:
###Keras Experiment###
import pandas as pd
from nltk.tokenize import word_tokenize
from itertools import combinations
from collections import Counter
import numpy as np
import pickle
import feather
from itertools import groupby
#Tokenize the data manually
from nltk.tokenize import word_tokenize
from itertools import combinations
from collections import Counter
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.models import Model
from keras.models import Sequential
from keras.layers import LSTM, Activation, Dense, Dropout, Input, Embedding
from keras.preprocessing.text import Tokenizer
from keras.preprocessing import sequence
# from keras.utils import to_categorical
from keras.callbacks import EarlyStopping
from keras.layers import Input, Embedding, Dot, Reshape, Dense, Dropout,Concatenate
def RNN():
#Input for the Sequential Data
input0 = Input(name=str('input0'),shape=[max_len])
input1 = Input(name=str('input1'),shape=[max_len])
input2 = Input(name=str('input2'),shape=[max_len])
input3 = Input(name=str('input3'),shape=[max_len])
#Input Profiles for the Timespent on each page
input_ts0 = Input(name=str('input01'),shape=[max_len,1])
input_ts1 = Input(name=str('input02'),shape=[max_len,1])
input_ts2 = Input(name=str('input03'),shape=[max_len,1])
input_ts3 = Input(name=str('input04'),shape=[max_len,1])
# #Embedding Layer
embed0 = Embedding(def_val+1,50,input_length=max_len)(input0)
embed1 = Embedding(def_val+1,50,input_length=max_len)(input1)
embed2 = Embedding(def_val+1,50,input_length=max_len)(input2)
embed3 = Embedding(def_val+1,50,input_length=max_len)(input3)
##concatenate the embedding and time spent on each page
ts_eve_concat0 = Concatenate(name='Concatenated_eve_ts0')([embed0,input_ts0])
ts_eve_concat1 = Concatenate(name='Concatenated_eve_ts1')([embed1,input_ts1])
ts_eve_concat2 = Concatenate(name='Concatenated_eve_ts2')([embed2,input_ts2])
ts_eve_concat3 = Concatenate(name='Concatenated_eve_ts3')([embed3,input_ts3])
#LSTM on all the individual layers
lstm0 = LSTM(32)(ts_eve_concat0)
lstm1 = LSTM(32)(ts_eve_concat1)
lstm2 = LSTM(32)(ts_eve_concat2)
lstm3 = LSTM(32)(ts_eve_concat3)
##Concatenate all the LSTM Layers
concat_lstm = Concatenate(name='Concatenated_lstm')([lstm0,lstm1,lstm2,lstm3])
layer = Dense(64,name='FC1')(concat_lstm)
layer = Activation('relu')(layer)
layer = Dropout(0.3)(layer)
layer = Dense(32,name='FC2',activation='relu')(layer)
layer = Dropout(0.3)(layer)
layer = Dense(1,name='out_layer')(layer)
layer = Activation('sigmoid')(layer)
model = Model(inputs=[input0,input1,input2,input3,
input_ts0,input_ts1,input_ts2,input_ts3],outputs=layer)
return model
model = RNN()
model.summary()
%%time
model.compile(optimizer = 'Adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
model.fit([np.zeros([100000,150]),np.zeros([100000,150]),np.zeros([100000,150]),np.zeros([100000,150]),\
np.zeros([100000,150]),np.zeros([100000,150]),np.zeros([100000,150]),np.zeros([100000,150])], np.random.randint(2, size=(100000, 1)),
epochs = 1 ,batch_size= 256, verbose = True)
Any reason why would it be the case.
python
keras
pytorch
lstm
0 Answers
Your Answer