我就廢話不多說了,大家還是直接看代碼吧~
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
|
# -*- coding: utf-8 -*- #keras==2.0.5 #tensorflow==1.1.0 import os,sys,string import sys import logging import multiprocessing import time import json import cv2 import numpy as np from sklearn.model_selection import train_test_split import keras import keras.backend as K from keras.datasets import mnist from keras.models import * from keras.layers import * from keras.optimizers import * from keras.callbacks import * from keras import backend as K # from keras.utils.visualize_util import plot from visual_callbacks import AccLossPlotter plotter = AccLossPlotter(graphs = [ 'acc' , 'loss' ], save_graph = True , save_graph_path = sys.path[ 0 ]) #識別字符集 char_ocr = '0123456789' #string.digits #定義識別字符串的最大長度 seq_len = 8 #識別結果集合個數 0-9 label_count = len (char_ocr) + 1 def get_label(filepath): # print(str(os.path.split(filepath)[-1]).split('.')[0].split('_')[-1]) lab = [] for num in str (os.path.split(filepath)[ - 1 ]).split( '.' )[ 0 ].split( '_' )[ - 1 ]: lab.append( int (char_ocr.find(num))) if len (lab) < seq_len: cur_seq_len = len (lab) for i in range (seq_len - cur_seq_len): lab.append(label_count) # return lab def gen_image_data( dir = r 'data rain' , file_list = []): dir_path = dir for rt, dirs, files in os.walk(dir_path): # =pathDir for filename in files: # print (filename) if filename.find( '.' ) > = 0 : (shotname, extension) = os.path.splitext(filename) # print shotname,extension if extension = = '.tif' : # extension == '.png' or file_list.append(os.path.join( '%s\%s' % (rt, filename))) # print (filename) print ( len (file_list)) index = 0 X = [] Y = [] for file in file_list: index + = 1 # if index>1000: # break # print(file) img = cv2.imread( file , 0 ) # print(np.shape(img)) # cv2.namedWindow("the window") # cv2.imshow("the window",img) img = cv2.resize(img, ( 150 , 50 ), interpolation = cv2.INTER_CUBIC) img = cv2.transpose(img,( 50 , 150 )) img = cv2.flip(img, 1 ) # cv2.namedWindow("the window") # cv2.imshow("the window",img) # cv2.waitKey() img = ( 255 - img) / 256 # 反色處理 X.append([img]) Y.append(get_label( file )) # print(get_label(file)) # print(np.shape(X)) # print(np.shape(X)) # print(np.shape(X)) X = np.transpose(X, ( 0 , 2 , 3 , 1 )) X = np.array(X) Y = np.array(Y) return X,Y # the actual loss calc occurs here despite it not being # an internal Keras loss function def ctc_lambda_func(args): y_pred, labels, input_length, label_length = args # the 2 is critical here since the first couple outputs of the RNN # tend to be garbage: # y_pred = y_pred[:, 2:, :] 測試感覺沒影響 y_pred = y_pred[:, :, :] return K.ctc_batch_cost(labels, y_pred, input_length, label_length) if __name__ = = '__main__' : height = 150 width = 50 input_tensor = Input ((height, width, 1 )) x = input_tensor for i in range ( 3 ): x = Convolution2D( 32 * 2 * * i, ( 3 , 3 ), activation = 'relu' , padding = 'same' )(x) # x = Convolution2D(32*2**i, (3, 3), activation='relu')(x) x = MaxPooling2D(pool_size = ( 2 , 2 ))(x) conv_shape = x.get_shape() # print(conv_shape) x = Reshape(target_shape = ( int (conv_shape[ 1 ]), int (conv_shape[ 2 ] * conv_shape[ 3 ])))(x) x = Dense( 32 , activation = 'relu' )(x) gru_1 = GRU( 32 , return_sequences = True , kernel_initializer = 'he_normal' , name = 'gru1' )(x) gru_1b = GRU( 32 , return_sequences = True , go_backwards = True , kernel_initializer = 'he_normal' , name = 'gru1_b' )(x) gru1_merged = add([gru_1, gru_1b]) ################### gru_2 = GRU( 32 , return_sequences = True , kernel_initializer = 'he_normal' , name = 'gru2' )(gru1_merged) gru_2b = GRU( 32 , return_sequences = True , go_backwards = True , kernel_initializer = 'he_normal' , name = 'gru2_b' )( gru1_merged) x = concatenate([gru_2, gru_2b]) ###################### x = Dropout( 0.25 )(x) x = Dense(label_count, kernel_initializer = 'he_normal' , activation = 'softmax' )(x) base_model = Model(inputs = input_tensor, outputs = x) labels = Input (name = 'the_labels' , shape = [seq_len], dtype = 'float32' ) input_length = Input (name = 'input_length' , shape = [ 1 ], dtype = 'int64' ) label_length = Input (name = 'label_length' , shape = [ 1 ], dtype = 'int64' ) loss_out = Lambda(ctc_lambda_func, output_shape = ( 1 ,), name = 'ctc' )([x, labels, input_length, label_length]) model = Model(inputs = [input_tensor, labels, input_length, label_length], outputs = [loss_out]) model. compile (loss = { 'ctc' : lambda y_true, y_pred: y_pred}, optimizer = 'adadelta' ) model.summary() def test(base_model): file_list = [] X, Y = gen_image_data(r 'data est' , file_list) y_pred = base_model.predict(X) shape = y_pred[:, :, :].shape # 2: out = K.get_value(K.ctc_decode(y_pred[:, :, :], input_length = np.ones(shape[ 0 ]) * shape[ 1 ])[ 0 ][ 0 ])[:, :seq_len] # 2: print () error_count = 0 for i in range ( len (X)): print (file_list[i]) str_src = str (os.path.split(file_list[i])[ - 1 ]).split( '.' )[ 0 ].split( '_' )[ - 1 ] print (out[i]) str_out = ''.join([ str (x) for x in out[i] if x! = - 1 ]) print (str_src, str_out) if str_src! = str_out: error_count + = 1 print ( '################################' ,error_count) # img = cv2.imread(file_list[i]) # cv2.imshow('image', img) # cv2.waitKey() class LossHistory(Callback): def on_train_begin( self , logs = {}): self .losses = [] def on_epoch_end( self , epoch, logs = None ): model.save_weights( 'model_1018.w' ) base_model.save_weights( 'base_model_1018.w' ) test(base_model) def on_batch_end( self , batch, logs = {}): self .losses.append(logs.get( 'loss' )) # checkpointer = ModelCheckpoint(filepath="keras_seq2seq_1018.hdf5", verbose=1, save_best_only=True, ) history = LossHistory() # base_model.load_weights('base_model_1018.w') # model.load_weights('model_1018.w') X,Y = gen_image_data() maxin = 4900 subseq_size = 100 batch_size = 10 result = model.fit([X[:maxin], Y[:maxin], np.array(np.ones( len (X)) * int (conv_shape[ 1 ]))[:maxin], np.array(np.ones( len (X)) * seq_len)[:maxin]], Y[:maxin], batch_size = 20 , epochs = 1000 , callbacks = [history, plotter, EarlyStopping(patience = 10 )], #checkpointer, history, validation_data = ([X[maxin:], Y[maxin:], np.array(np.ones( len (X)) * int (conv_shape[ 1 ]))[maxin:], np.array(np.ones( len (X)) * seq_len)[maxin:]], Y[maxin:]), ) test(base_model) K.clear_session() |
補充知識:日常填坑之keras.backend.ctc_batch_cost參數問題
InvalidArgumentError sequence_length(0) <=30錯誤
下面的代碼是在網上絕大多數文章給出的關于k.ctc_batch_cost()函數的使用代碼
1
2
3
4
5
6
|
def ctc_lambda_func(args): y_pred, labels, input_length, label_length = args # the 2 is critical here since the first couple outputs of the RNN # tend to be garbage: y_pred = y_pred[:, 2 :, :] return K.ctc_batch_cost(labels, y_pred, input_length, label_length) |
可以注意到有一句:y_pred = y_pred[:, 2:, :],這里把y_pred 的第二維數據去掉了兩列,說人話:把送進lstm序列的step減了2步。后來偶然在一篇文章中有提到說這里之所以減2是因為在將feature送入keras的lstm時自動少了2維,所以這里就寫成這樣了。估計是之前老版本的bug,現在的新版本已經修復了。如果依然按照上面的寫法,會得到如下錯誤:
InvalidArgumentError sequence_length(0) <=30
'<='后面的數值 = 你cnn最后的輸出維度 - 2。這個錯誤我找了很久,一直不明白30哪里來的,后來一行行的檢查代碼是發現了這里很可疑,于是改成如下形式錯誤解決。
1
2
3
|
def ctc_lambda_func(args): y_pred, labels, input_length, label_length = args return K.ctc_batch_cost(labels, y_pred, input_length, label_length) |
訓練時出現ctc_loss_calculator.cc:144] No valid path found或loss: inf錯誤
熟悉CTC算法的話,這個提示應該是ctc沒找到有效路徑。既然是沒找到有效路徑,那肯定是label和input之間哪個地方又出問題了!和input相關的錯誤已經解決了,那么肯定就是label的問題了。再看ctc_batch_cost的四個參數,labels和label_length這兩個地方有可疑。對于ctc_batch_cost()的參數,labels需要one-hot編碼,形狀:[batch, max_labelLength],其中max_labelLength指預測的最大字符長度;label_length就是每個label中的字符長度了,受之前tf.ctc_loss的影響把這里都設置成了最大長度,所以報錯。
對于參數labels而言,max_labelLength是能預測的最大字符長度。這個值與送lstm的featue的第二維,即特征序列的max_step有關,表面上看只要max_labelLength<max_step即可,但是如果小的不多依然會出現上述錯誤。至于到底要小多少,還得從ctc算法里找,由于ctc算法在標簽中的每個字符后都加了一個空格,所以應該把這個長度考慮進去,所以有 max_labelLength < max_step//2。沒仔細研究keras里ctc_batch_cost()函數的實現細節,上面是我的猜測。如果有很明確的答案,還請麻煩告訴我一聲,謝了先!
錯誤代碼:
batch_label_length = np.ones(batch_size) * max_labelLength
正確打開方式:
1
2
3
4
5
6
7
8
|
batch_x, batch_y = [], [] batch_input_length = np.ones(batch_size) * (max_img_weigth / / 8 ) batch_label_length = [] for j in range (i, i + batch_size): x, y = self .get_img_data(index_all[j]) batch_x.append(x) batch_y.append(y) batch_label_length.append( self .label_length[j]) |
最后附一張我的crnn的模型圖:
以上這篇使用keras框架cnn+ctc_loss識別不定長字符圖片操作就是小編分享給大家的全部內容了,希望能給大家一個參考,也希望大家多多支持服務器之家。
原文鏈接:https://blog.csdn.net/xinfeng2005/article/details/78278832