import tensorflow as tf
from tensorflow import keras
import numpy as np
class NestedCell(keras.layers.Layer):
def __init__(self,unit_1,unit_2,unit_3,**kwargs):
self.unit_1=unit_1
self.unit_2=unit_2
self.unit_3=unit_3
self.state_size=[tf.TensorShape([unit_1]),tf.TensorShape([unit_2,unit_3])]
self.output_size=[tf.TensorShape([unit_1]),tf.TensorShape([unit_2,unit_3])]
super(NestedCell,self).__init__(**kwargs)
# 网络的节点
def build(self,input_shapes):
# expect input_shape to contain 2 items, [(batch,i1),(batch,i2,i3)]
i1=input_shapes[0][1]
i2=input_shapes[1][1]
i3=input_shapes[1][2]
self.kernel_1=self.add_weight(shape=(i1,self.unit_1),initializer='uniform',name='kernel_1')
self.kernel_2_3=self.add_weight(shape=(i2,i3,self.unit_2,self.unit_3),initializer='uniform',name='kernel_2_3',)
# 网络的节点计算方式
def call(self,inputs,states):
# inputs should be in [(batch,input_1),(batch,input_2,input_3)] state should be in shape [(batch,unit_1),(batch,unit_2,unit_3)]
input_1,input_2=tf.nest.flatten(inputs)
s1,s2=states
output_1=tf.matmul(input_1,self.kernel_1)
output_2_3=tf.einsum('bij,ijkl->bkl',input_2,self.kernel_2_3)
state_1=s1+output_1
state_2_3=s2+output_2_3
output=(output_1,output_2_3)
new_states=(state_1,state_2_3)
return output,new_states
def get_config(self):
return {'unit_1':self.unit_1,'unit_2':self.unit_2,'unit_3':self.unit_3}
unit_1=10
unit_2=20
unit_3=30
i1=32
i2=64
i3=32
batch_size=64
num_batches=10
timestep=50
cell=NestedCell(unit_1,unit_2,unit_3)
rnn=keras.layers.RNN(cell)
input_1=keras.Input((None,i1))
input_2=keras.Input((None,i2,i3))
outputs=rnn((input_1,input_2))
model=keras.models.Model([input_1,input_2],outputs)
model.compile(optimizer='adam',loss='mse',metrics=['accuracy'])
input_1_data=np.random.random((batch_size*num_batches,timestep,i1))
input_2_data=np.random.random((batch_size*num_batches,timestep,i2,i3))
target_1_data=np.random.random((batch_size*num_batches,unit_1))
target_2_data=np.random.random((batch_size*num_batches,unit_2,unit_3))
input_data=[input_1_data,input_2_data]
target_data=[target_1_data,target_2_data]
model.fit(input_data,target_data,batch_size=batch_size),自定义rnn子层训练
import numpy as np import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers batch_size=64 # Each MNIST image batch is a tensor of shape(batch_size,28,28). Each input sequence will be of size (28,28) (height is treated like time). input_dim=28 units=64 output_size=10 # labels are from 0 to 9 # Build the RNN model def build_model(allow_cudnn_kernel=True): # CuDNN is only available at the layer level, and not at the cell level. This means 'LSTM(units)' will use the CuDNN kernel, while RNN(LSTMCell(units)) will run on non-CuDNN kernel. if allow_cudnn_kernel: # The LSTM layer with default options uses CuDNN lstm_layer=keras.layers.LSTM(units,input_shape=(None,input_dim)) else: # Wrapping a LSTMCell in a RNN layer will not use CuDNN. lstm_layer=keraas.layers.RNN(keras.layers.LSTMCell(units),input_shape=(None,input_dim)) model=keras.models.Sequential([lstm_layer,keras.layers.BatchNormalization(),keras.layers.Dense(output_size),]) return model mnist=keras.datasets.mnist (x_train,y_train),(x_test,y_test)=mnist.load_data() x_train,x_test=x_train/255.0,x_test/255.0 sample,sample_label=x_train[0],y_train[0] model=build_model(allow_cudnn_kernel=True) model.compile(loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),optimizer='sgd',metrics=['accuracy'],) model.fit(x_train,y_train,validation_data=(x_test,y_test),batch_size=batch_size,epochs=1),rnn在图像分类上的实践
参考链接:Keras 中的循环神经网络 (RNN)
本文创建于2022.11.1/12.10,修改于2022.11.1/12.10