12  Recurrent Neural Networks (RNN)

Recurrent neural networks are used to model sequential data, i.e. a temporal sequence that exhibits temporal dynamic behavior. Here is a good introduction to the topic:

12.1 Case Study: Predicting drought

We will use a subset of the data explained in this github repository

utils::download.file("https://www.dropbox.com/s/radyscnl5zcf57b/weather_soil.RDS?raw=1", destfile = "weather_soil.RDS")
data = readRDS("weather_soil.RDS")
X = data$train # Features of the last 180 days
dim(X)
[1] 999 180  21
# 999 batches of 180 days with 21 features each
Y = data$target
dim(Y)
[1] 999   6
# 999 batches of 6 week drought predictions

# let's visualize drought over 24 months:
# -> We have to take 16 batches (16*6 = 96 weaks ( = 24 months) )
plot(as.vector(Y[1:16,]), type = "l", xlab = "week", ylab = "Drought")

library(keras3)

holdout = 700:999
X_train = X[-holdout,,]
X_test = X[holdout,,]

Y_train = Y[-holdout,]
Y_test = Y[holdout,]

model = keras_model_sequential()
model %>% 
  layer_lstm(units = 60L,input_shape = dim(X)[2:3]) %>% 
  layer_dense(units = 6L)

model %>% compile(loss = keras3::loss_mean_squared_error, optimizer = optimizer_adamax(learning_rate = 0.01))
  
model %>% fit(x = X_train, y = Y_train, epochs = 30L)
Epoch 1/30
22/22 - 1s - 60ms/step - loss: 0.7506
Epoch 2/30
22/22 - 1s - 40ms/step - loss: 0.4153
Epoch 3/30
22/22 - 1s - 29ms/step - loss: 0.3555
Epoch 4/30
22/22 - 1s - 26ms/step - loss: 0.3522
Epoch 5/30
22/22 - 1s - 27ms/step - loss: 0.3493
Epoch 6/30
22/22 - 1s - 26ms/step - loss: 0.3175
Epoch 7/30
22/22 - 1s - 26ms/step - loss: 0.3007
Epoch 8/30
22/22 - 1s - 26ms/step - loss: 0.3136
Epoch 9/30
22/22 - 1s - 26ms/step - loss: 0.3007
Epoch 10/30
22/22 - 1s - 26ms/step - loss: 0.2950
Epoch 11/30
22/22 - 1s - 26ms/step - loss: 0.2871
Epoch 12/30
22/22 - 1s - 27ms/step - loss: 0.2743
Epoch 13/30
22/22 - 1s - 27ms/step - loss: 0.2745
Epoch 14/30
22/22 - 1s - 29ms/step - loss: 0.2906
Epoch 15/30
22/22 - 1s - 27ms/step - loss: 0.2722
Epoch 16/30
22/22 - 1s - 26ms/step - loss: 0.2571
Epoch 17/30
22/22 - 1s - 26ms/step - loss: 0.2449
Epoch 18/30
22/22 - 1s - 26ms/step - loss: 0.2502
Epoch 19/30
22/22 - 1s - 28ms/step - loss: 0.2393
Epoch 20/30
22/22 - 1s - 31ms/step - loss: 0.2437
Epoch 21/30
22/22 - 1s - 31ms/step - loss: 0.2560
Epoch 22/30
22/22 - 1s - 51ms/step - loss: 0.2310
Epoch 23/30
22/22 - 1s - 28ms/step - loss: 0.2176
Epoch 24/30
22/22 - 1s - 26ms/step - loss: 0.2204
Epoch 25/30
22/22 - 1s - 43ms/step - loss: 0.2055
Epoch 26/30
22/22 - 1s - 26ms/step - loss: 0.1906
Epoch 27/30
22/22 - 1s - 27ms/step - loss: 0.1895
Epoch 28/30
22/22 - 1s - 27ms/step - loss: 0.1866
Epoch 29/30
22/22 - 1s - 26ms/step - loss: 0.1923
Epoch 30/30
22/22 - 1s - 26ms/step - loss: 0.1837
preds = 
  model %>% predict(X_test)
10/10 - 0s - 19ms/step
matplot(cbind(as.vector(preds[1:48,]),  
              as.vector(Y_test[1:48,])), 
        col = c("darkblue", "darkred"),
        type = "o", 
        pch = c(15, 16),
        xlab = "week", ylab = "Drought")
legend("topright", bty = "n", 
       col = c("darkblue", "darkred"),
      pch = c(15, 16), 
      legend = c("Prediction", "True Values"))

The following code snippet shows you many (technical) things you need for building more complex network structures, even with LSTM cells (the following example doesn’t have any functionality, it is just an example for how to process two different inputs in different ways within one network):

library(tensorflow)
library(keras3)


inputDimension1 = 50L
inputDimension2 = 10L

input1 = layer_input(shape = inputDimension1)
input2 = layer_input(shape = inputDimension2)

modelInput2 = input2 %>%
  layer_dropout(rate = 0.5) %>%
  layer_dense(units = inputDimension2, activation = "gelu")

modelMemory = input1 %>%
  layer_embedding(input_dim = inputDimension1, output_dim = 64L) %>%
  layer_lstm(units = 64L) %>%
  layer_dropout(rate = 0.5) %>%
  layer_dense(units = 2L, activation = "sigmoid")

modelDeep = input1 %>%
  layer_dropout(rate = 0.5) %>%
  layer_dense(units = 64L, activation = "relu") %>%
  layer_dropout(rate = 0.3) %>%
  layer_dense(units = 64L, activation = "relu") %>%
  layer_dense(units = 64L, activation = "relu") %>%
  layer_dense(units = 5L, activation = "sigmoid")

modelMain = layer_concatenate(c(modelMemory, modelDeep, modelInput2)) %>%
  layer_dropout(rate = 0.25) %>%
  layer_dense(units = 64L, activation = "relu") %>%
  layer_dropout(rate = 0.3) %>%
  layer_dense(units = 64L, activation = "relu") %>%
  layer_dense(units = 2L, activation = "sigmoid")

model = keras_model(
  inputs = c(input1, input2),
  outputs = c(modelMain)  # Use the whole modelMain (resp. its output) as output.
)

summary(model)
Model: "functional_3"
┏━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┓
┃ Layer (type)          ┃ Output Shape      ┃     Param # ┃ Connected to       ┃
┡━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━┩
│ input_layer_1         │ (None, 50)        │           0 │ -                  │
│ (InputLayer)          │                   │             │                    │
├───────────────────────┼───────────────────┼─────────────┼────────────────────┤
│ dropout_2 (Dropout)   │ (None, 50)        │           0 │ input_layer_1[0][… │
├───────────────────────┼───────────────────┼─────────────┼────────────────────┤
│ dense_3 (Dense)       │ (None, 64)        │       3,264 │ dropout_2[0][0]    │
├───────────────────────┼───────────────────┼─────────────┼────────────────────┤
│ embedding (Embedding) │ (None, 50, 64)    │       3,200 │ input_layer_1[0][… │
├───────────────────────┼───────────────────┼─────────────┼────────────────────┤
│ dropout_3 (Dropout)   │ (None, 64)        │           0 │ dense_3[0][0]      │
├───────────────────────┼───────────────────┼─────────────┼────────────────────┤
│ lstm_1 (LSTM)         │ (None, 64)        │      33,024 │ embedding[0][0]    │
├───────────────────────┼───────────────────┼─────────────┼────────────────────┤
│ dense_4 (Dense)       │ (None, 64)        │       4,160 │ dropout_3[0][0]    │
├───────────────────────┼───────────────────┼─────────────┼────────────────────┤
│ input_layer_2         │ (None, 10)        │           0 │ -                  │
│ (InputLayer)          │                   │             │                    │
├───────────────────────┼───────────────────┼─────────────┼────────────────────┤
│ dropout_1 (Dropout)   │ (None, 64)        │           0 │ lstm_1[0][0]       │
├───────────────────────┼───────────────────┼─────────────┼────────────────────┤
│ dense_5 (Dense)       │ (None, 64)        │       4,160 │ dense_4[0][0]      │
├───────────────────────┼───────────────────┼─────────────┼────────────────────┤
│ dropout (Dropout)     │ (None, 10)        │           0 │ input_layer_2[0][… │
├───────────────────────┼───────────────────┼─────────────┼────────────────────┤
│ dense_2 (Dense)       │ (None, 2)         │         130 │ dropout_1[0][0]    │
├───────────────────────┼───────────────────┼─────────────┼────────────────────┤
│ dense_6 (Dense)       │ (None, 5)         │         325 │ dense_5[0][0]      │
├───────────────────────┼───────────────────┼─────────────┼────────────────────┤
│ dense_1 (Dense)       │ (None, 10)        │         110 │ dropout[0][0]      │
├───────────────────────┼───────────────────┼─────────────┼────────────────────┤
│ concatenate           │ (None, 17)        │           0 │ dense_2[0][0],     │
│ (Concatenate)         │                   │             │ dense_6[0][0],     │
│                       │                   │             │ dense_1[0][0]      │
├───────────────────────┼───────────────────┼─────────────┼────────────────────┤
│ dropout_4 (Dropout)   │ (None, 17)        │           0 │ concatenate[0][0]  │
├───────────────────────┼───────────────────┼─────────────┼────────────────────┤
│ dense_7 (Dense)       │ (None, 64)        │       1,152 │ dropout_4[0][0]    │
├───────────────────────┼───────────────────┼─────────────┼────────────────────┤
│ dropout_5 (Dropout)   │ (None, 64)        │           0 │ dense_7[0][0]      │
├───────────────────────┼───────────────────┼─────────────┼────────────────────┤
│ dense_8 (Dense)       │ (None, 64)        │       4,160 │ dropout_5[0][0]    │
├───────────────────────┼───────────────────┼─────────────┼────────────────────┤
│ dense_9 (Dense)       │ (None, 2)         │         130 │ dense_8[0][0]      │
└───────────────────────┴───────────────────┴─────────────┴────────────────────┘
 Total params: 53,815 (210.21 KB)
 Trainable params: 53,815 (210.21 KB)
 Non-trainable params: 0 (0.00 B)
# model %>% plot_model()
library(torch)

model_torch = nn_module(
  initialize = function(type, inputDimension1 = 50L, inputDimension2 = 10L) {
    self$dim1 = inputDimension1
    self$dim2 = inputDimension2
    self$modelInput2 = nn_sequential(
      nn_dropout(0.5),
      nn_linear(in_features = self$dim2, out_features = self$dim2),
      nn_selu()
    )
    self$modelMemory = nn_sequential(
      nn_embedding(self$dim1, 64),
      nn_lstm(64, 64)
    )
    self$modelMemoryOutput = nn_sequential(
      nn_dropout(0.5),
      nn_linear(64L, 2L),
      nn_sigmoid()
    )
    
    self$modelDeep = nn_sequential(
      nn_dropout(0.5),
      nn_linear(self$dim1, 64L),
      nn_relu(),
      nn_dropout(0.3),
      nn_linear(64, 64),
      nn_relu(),
      nn_linear(64, 64),
      nn_relu(),
      nn_linear(64, 5),
      nn_sigmoid()
    )
    
    self$modelMain = nn_sequential(
      nn_linear(7+self$dim2, 64),
      nn_relu(),
      nn_dropout(0.5),
      nn_linear(64, 64),
      nn_relu(),
      nn_dropout(),
      nn_linear(64, 2),
      nn_sigmoid()
    )
  },
  
  forward = function(x) {
    input1 = x[[1]]
    input2 = x[[2]]
    out2 = self$modelInput2(input2)
    out1 = self$modelMemoryOutput( self$modelMemory(input1)$view(list(dim(input1)[1], -1)) )
    out3 = self$modelDeep(input1)
    out = self$modelMain(torch_cat(list(out1, out2, out3), 2))
    return(out)
  }
  
)

(model_torch())
An `nn_module` containing 54,071 parameters.

── Modules ─────────────────────────────────────────────────────────────────────
• modelInput2: <nn_sequential> #110 parameters
• modelMemory: <nn_sequential> #36,480 parameters
• modelMemoryOutput: <nn_sequential> #130 parameters
• modelDeep: <nn_sequential> #11,909 parameters
• modelMain: <nn_sequential> #5,442 parameters