DEV Community

Super Kai (Kazuya Ito)
Super Kai (Kazuya Ito)

Posted on

RNN in PyTorch

Buy Me a Coffee

*Memos:

RNN() can get the two 2D or 3D tensors of the one or more elements computed by RNN from the 2D or 3D tensor of zero or more elements as shown below:

*Memos:

  • The 1st argument for initialization is input_size(Required-Type:int). *It must be 0 <= x.
  • The 2nd argument for initialization is hidden_size(Required-Type:int). *It must be 1 <= x.
  • The 3rd argument for initialization is num_layers(Optional-Default:1-Type:int): *Memos:
    • It must be 1 <= x.
    • It must be 1 < x, if dropout is 0 < x.
    • Its number is same as the number of bias_ih_lx, bias_hh_lx, weight_ih_lx and weight_hh_lx so if it's 3, there are bias_ih_l0, bias_ih_l1, bias_ih_l2, bias_hh_l0, bias_hh_l1, bias_hh_l2, weight_ih_l0, weight_ih_l1, weight_ih_l2, weight_hh_l0, weight_hh_l1 and weight_hh_l2.
  • The 4th argument for initialization is nonlinearity(Optional-Default:1-Type:str). *'tanh' or 'relu' can be set.
  • The 5th argument for initialization is bias(Optional-Default:True-Type:bool). *My post explains bias argument.
  • The 6th argument for initialization is batch_first(Optional-Default:False-Type:bool).
  • The 7th argument for initialization is dropout(Optional-Default:0.0-Type:int or float). *It must be 0 <= x <= 1.
  • The 8th argument for initialization is bidirectional(Optional-Default:False-Type:bool).
  • The 9th argument for initialization is device(Optional-Default:None-Type:str, int or device()). *Memos:
  • The 10th argument for initialization is dtype(Optional-Default:None-Type:int). *Memos:
  • The 1st argument is input(Required-Type:tensor of float or complex): *Memos:
    • Its device and dtype must be same as RNN()'s. -complex must be set to dtype of RNN() to use a complex tensor.
  • The 2nd argument is hx(Optional-Default:None-Type:tensor of float or complex). *Its D, device and dtype must be same as input's.
  • The tensor's requires_grad which is False by default is set to True by RNN().
  • rnn1.device and rnn1.dtype don't work.
import torch
from torch import nn

tensor1 = torch.tensor([[8., -3., 0., 1., 5., -2.]])

tensor1.requires_grad
# False

torch.manual_seed(42)

rnn1 = nn.RNN(input_size=6, hidden_size=3)

tensor2 = rnn1(input=tensor1)
tensor2
# (tensor([[0.9590, -0.9501, 0.9999]], grad_fn=<SqueezeBackward1>),
#  tensor([[0.9590, -0.9501, 0.9999]], grad_fn=<SqueezeBackward1>))

tensor2[0].requires_grad
tensor2[1].requires_grad
# True

rnn1
# RNN(6, 3)

rnn1.input_size
# 6

rnn1.hidden_size
# 3

rnn1.num_layers
# 1

rnn1.nonlinearity
# 'tanh'

rnn1.bias
# bias

rnn1.batch_first
# False

rnn1.dropout
# 0.0

rnn1.bidirectional
# False

rnn1.bias_ih_l0
# Parameter containing:
# tensor([-0.3471, 0.0545, -0.5702], requires_grad=True)

rnn1.bias_hh_l0
# Parameter containing:
# tensor([0.5214, -0.4904, 0.4457], requires_grad=True)

rnn1.weight_ih_l0
# Parameter containing:
# tensor([[0.4414, 0.4792, -0.1353, 0.5304, -0.1265, 0.1165],
#         [-0.2811, 0.3391, 0.5090, -0.4236, 0.5018, 0.1081],
#         [0.4266, 0.0782, 0.2784, -0.0815, 0.4451, 0.0853]],
#        requires_grad=True)

rnn1.weight_hh_l0
# Parameter containing:
# tensor([[-0.2695, 0.1472, -0.2660],
#         [-0.0677, -0.2345, 0.3830],
#         [-0.4557, -0.2662, -0.1630]],
#        requires_grad=True)

torch.manual_seed(42)

rnn2 = nn.RNN(input_size=3, hidden_size=3)

rnn2(input=tensor2[0])
rnn2(input=tensor2[1])
# (tensor([[-0.4656, 0.5770, 0.0342]], grad_fn=<SqueezeBackward1>),
#  tensor([[-0.4656, 0.5770, 0.0342]], grad_fn=<SqueezeBackward1>))

torch.manual_seed(42)

rnn = nn.RNN(input_size=6, hidden_size=3, num_layers=1, nonlinearity='tanh',
             bias=True, batch_first=False, dropout=0.0, bidirectional=False,
             device=None, dtype=None)
rnn(input=tensor1, hx=None)
rnn(input=tensor1, hx=torch.tensor([[0., 0., 0.]]))
# (tensor([[0.9590, -0.9501, 0.9999]], grad_fn=<SqueezeBackward1>),
#  tensor([[0.9590, -0.9501, 0.9999]], grad_fn=<SqueezeBackward1>))

my_tensor = torch.tensor([[8., -3., 0.],
                          [1., 5., -2.]])
torch.manual_seed(42)

rnn = nn.RNN(input_size=3, hidden_size=3)
rnn(input=my_tensor)
# (tensor([[0.9421, 0.9998, -0.9963],
#          [0.9921, -0.2163, 0.6621]], grad_fn=<SqueezeBackward1>),
#  tensor([[0.9921, -0.2163, 0.6621]], grad_fn=<SqueezeBackward1>))

my_tensor = torch.tensor([[8.], [-3.], [0.],
                          [1.], [5.], [-2.]])
torch.manual_seed(42)

rnn = nn.RNN(input_size=1, hidden_size=3)
rnn(input=my_tensor)
# (tensor([[0.9991, 0.9997, -0.6159],
#          [-0.5700, -0.8242, 0.6535],
#          [0.2195, 0.6271, 0.2563],
#          [0.6928, 0.8575, 0.4446],
#          [0.9935, 0.9969, -0.1270],
#          [-0.1506, -0.4145, 0.6044]], grad_fn=<SqueezeBackward1>),
#  tensor([[-0.1506, -0.4145, 0.6044]], grad_fn=<SqueezeBackward1>))

my_tensor = torch.tensor([[[8.], [-3.], [0.]],
                          [[1.], [5.], [-2.]]])
torch.manual_seed(42)

rnn = nn.RNN(input_size=1, hidden_size=3)
rnn(input=my_tensor)
# (tensor([[[0.9991, 0.9997, -0.6159],
#           [-0.7527, -0.7232, 0.6466],
#           [0.3320, 0.4802, 0.3485]],
#          [[0.8069, 0.6334, 0.2359],
#           [0.9809, 0.9968, -0.2792],
#           [-0.3643, -0.1861, 0.6483]]], grad_fn=<StackBackward0>),
#  tensor([[[0.8069, 0.6334, 0.2359],
#           [0.9809, 0.9968, -0.2792],
#           [-0.3643, -0.1861, 0.6483]]], grad_fn=<StackBackward0>))

my_tensor = torch.tensor([[[8.+0.j], [-3.+0.j], [0.+0.j]],
                          [[1.+0.j], [5.+0.j], [-2.+0.j]]])
torch.manual_seed(42)

rnn = nn.RNN(input_size=1, hidden_size=3, dtype=torch.complex64)
rnn(input=my_tensor)
# (tensor([[[0.9985-1.9315e-04j, -0.9702+4.0555e-01j, -0.9542+1.9353e-01j],
#           [-1.0401+1.6018e-01j, 1.4827+4.1275e-01j, 0.3302-5.8074e-01j],
#           [0.1235-9.3636e-01j, 0.2921-2.3584e-01j, -0.1383-2.1280e-01j]], 
#          [[-0.7388+4.8243e-01j, -0.1439+6.0930e-01j, -0.1099-4.2196e-01j],
#           [0.9995+1.1798e-03j, -0.6179-1.3320e+00j, -0.6428+1.3737e-01j],
#           [-1.8129+5.6506e-01j, 1.4051+4.5077e-01j, 0.2443-1.2884e-01j]]],
#         grad_fn=<StackBackward0>),
#  tensor([[[-0.7388+4.8243e-01j, -0.1439+6.0930e-01j, -0.1099-4.2196e-01j],
#           [0.9995+1.1798e-03j, -0.6179-1.3320e+00j, -0.6428+1.3737e-01j],
#           [-1.8129+5.6506e-01j, 1.4051+4.5077e-01j, 0.2443-1.2884e-01j]]],
#         grad_fn=<StackBackward0>))
Enter fullscreen mode Exit fullscreen mode

Top comments (0)