回到首页

学习tensorflow的rnn模块

import tensorflow as tf
from tensorflow import keras
import numpy as np

class NestedCell(keras.layers.Layer):
	def __init__(self,unit_1,unit_2,unit_3,**kwargs):
		self.unit_1=unit_1
		self.unit_2=unit_2
		self.unit_3=unit_3
		self.state_size=[tf.TensorShape([unit_1]),tf.TensorShape([unit_2,unit_3])]
		self.output_size=[tf.TensorShape([unit_1]),tf.TensorShape([unit_2,unit_3])]
		super(NestedCell,self).__init__(**kwargs)

	# 网络的节点
	def build(self,input_shapes):
		# expect input_shape to contain 2 items, [(batch,i1),(batch,i2,i3)]
		i1=input_shapes[0][1]
		i2=input_shapes[1][1]
		i3=input_shapes[1][2]
		self.kernel_1=self.add_weight(shape=(i1,self.unit_1),initializer='uniform',name='kernel_1')
		self.kernel_2_3=self.add_weight(shape=(i2,i3,self.unit_2,self.unit_3),initializer='uniform',name='kernel_2_3',)

	# 网络的节点计算方式
	def call(self,inputs,states):
		# inputs should be in [(batch,input_1),(batch,input_2,input_3)] state should be in shape [(batch,unit_1),(batch,unit_2,unit_3)]
		input_1,input_2=tf.nest.flatten(inputs)
		s1,s2=states
		output_1=tf.matmul(input_1,self.kernel_1)
		output_2_3=tf.einsum('bij,ijkl->bkl',input_2,self.kernel_2_3)
		state_1=s1+output_1
		state_2_3=s2+output_2_3
		output=(output_1,output_2_3)
		new_states=(state_1,state_2_3)
		return output,new_states

	def get_config(self):
		return {'unit_1':self.unit_1,'unit_2':self.unit_2,'unit_3':self.unit_3}

unit_1=10
unit_2=20
unit_3=30
i1=32
i2=64
i3=32
batch_size=64
num_batches=10
timestep=50

cell=NestedCell(unit_1,unit_2,unit_3)
rnn=keras.layers.RNN(cell)
input_1=keras.Input((None,i1))
input_2=keras.Input((None,i2,i3))
outputs=rnn((input_1,input_2))
model=keras.models.Model([input_1,input_2],outputs)
model.compile(optimizer='adam',loss='mse',metrics=['accuracy'])

input_1_data=np.random.random((batch_size*num_batches,timestep,i1))
input_2_data=np.random.random((batch_size*num_batches,timestep,i2,i3))
target_1_data=np.random.random((batch_size*num_batches,unit_1))
target_2_data=np.random.random((batch_size*num_batches,unit_2,unit_3))
input_data=[input_1_data,input_2_data]
target_data=[target_1_data,target_2_data]

model.fit(input_data,target_data,batch_size=batch_size)

，自定义rnn子层训练

import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

batch_size=64
# Each MNIST image batch is a tensor of shape(batch_size,28,28). Each input sequence will be of size (28,28) (height is treated like time).
input_dim=28

units=64
output_size=10 # labels are from 0 to 9

# Build the RNN model
def build_model(allow_cudnn_kernel=True):
	# CuDNN is only available at the layer level, and not at the cell level. This means 'LSTM(units)' will use the CuDNN kernel, while RNN(LSTMCell(units)) will run on non-CuDNN kernel.
	if allow_cudnn_kernel:
		# The LSTM layer with default options uses CuDNN
		lstm_layer=keras.layers.LSTM(units,input_shape=(None,input_dim))
	else:
		# Wrapping a LSTMCell in a RNN layer will not use CuDNN.
		lstm_layer=keraas.layers.RNN(keras.layers.LSTMCell(units),input_shape=(None,input_dim))
	model=keras.models.Sequential([lstm_layer,keras.layers.BatchNormalization(),keras.layers.Dense(output_size),])
	return model

mnist=keras.datasets.mnist
(x_train,y_train),(x_test,y_test)=mnist.load_data()
x_train,x_test=x_train/255.0,x_test/255.0
sample,sample_label=x_train[0],y_train[0]

model=build_model(allow_cudnn_kernel=True)
model.compile(loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),optimizer='sgd',metrics=['accuracy'],)
model.fit(x_train,y_train,validation_data=(x_test,y_test),batch_size=batch_size,epochs=1)

，rnn在图像分类上的实践

参考链接：Keras 中的循环神经网络 (RNN)

本文创建于2022.11.1/12.10，修改于2022.11.1/12.10