本文為大家分享了java實現百度云ocr識別的具體代碼,高精度ocr識別身份證信息,供大家參考,具體內容如下
1.通用ocr文字識別
這種ocr只能按照識別圖片中的文字,且是按照行識別返回結果,精度較低。
首先引入依賴包:
1
2
3
4
5
|
<dependency> <groupid>com.baidu.aip</groupid> <artifactid>java-sdk</artifactid> <version> 4.6 . 0 </version> </dependency> |
通過ocr工具類:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
|
package util; import com.baidu.aip.ocr.aipocr; import org.json.jsonobject; import java.util.hashmap; public class ocrapi { private static final string app_id = "你的 app id" ; private static final string api_key = "xb12m5t4js2n7" ; private static final string secret_key = "9xvx9gpcsbsutz" ; private static aipocr getaipclient() { return getaipclient(api_key, secret_key); } public static aipocr getaipclient(string apikey, string secretkey) { aipocr client = new aipocr(app_id, apikey, secretkey); // 可選:設置網絡連接參數 client.setconnectiontimeoutinmillis( 2000 ); client.setsockettimeoutinmillis( 60000 ); return client; } public static string result(aipocr client) { // 傳入可選參數調用接口 hashmap<string, string> options = new hashmap<>(); options.put( "language_type" , "chn_eng" ); options.put( "detect_direction" , "true" ); options.put( "detect_language" , "true" ); options.put( "probability" , "true" ); jsonobject res = client.basicgeneralurl( "https://lichunyu1234.oss-cn-shanghai.aliyuncs.com/1.png" , options); return res.tostring( 2 ); } public static void main(string[] args) { system.out.println(result(getaipclient())); } } |
結果如下,識別有兩行信息(words即是識別的信息):
2.高精度ocr識別身份證信息
這種就比較高精度,且按照分類顯示,返回數據更友好,高可用。
2.1 接口說明及請求參數是地址官方截圖如下:
2.2 ocr身份證識別工具類
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
|
package util; import com.alibaba.druid.util.base64; import com.alibaba.fastjson.jsonobject; import java.io.*; import java.net.*; import java.nio.charset.standardcharsets; import java.util.list; import java.util.map; public class ocrutil { // access_token獲取 private static final string access_token_host = "https://aip.baidubce.com/oauth/2.0/token?" ; // 身份證識別請求url private static final string ocr_host = "https://aip.baidubce.com/rest/2.0/ocr/v1/idcard?" ; // apikey,secretkey private static final string api_key = "xb12m5t4js" ; private static final string secret_key = "9xvx9gpcsbsut" ; // 獲取百度云ocr的授權access_token public static string getaccesstoken() { return getaccesstoken(api_key, secret_key); } /** * 獲取百度云ocr的授權access_token * @param apikey * @param secretkey * @return */ public static string getaccesstoken(string apikey, string secretkey) { string accesstokenurl = access_token_host // 1. grant_type為固定參數 + "grant_type=client_credentials" // 2. 官網獲取的 api key + "&client_id=" + apikey // 3. 官網獲取的 secret key + "&client_secret=" + secretkey; try { url url = new url(accesstokenurl); // 打開和url之間的連接 httpurlconnection connection = (httpurlconnection) url.openconnection(); connection.setrequestmethod( "get" ); connection.connect(); // 獲取響應頭 map<string, list<string>> map = connection.getheaderfields(); // 遍歷所有的響應頭字段 for (string key : map.keyset()) { system.out.println(key + "---->" + map.get(key)); } // 定義 bufferedreader輸入流來讀取url的響應 bufferedreader bufferedreader = new bufferedreader( new inputstreamreader(connection.getinputstream())); stringbuilder result = new stringbuilder(); string inputline; while ((inputline = bufferedreader.readline()) != null ) { result.append(inputline); } jsonobject jsonobject = jsonobject.parseobject(result.tostring()); return jsonobject.getstring( "access_token" ); } catch (exception e) { e.printstacktrace(); system.err.print( "獲取access_token失敗" ); } return null ; } /** * 獲取身份證識別后的數據 * @param imageurl * @param idcardside * @return */ public static string getstringidentitycard(file imageurl, string idcardside) { // 身份證ocr的http url+鑒權token string ocrurl = ocr_host+ "access_token=" +getaccesstoken(); system.out.println(ocrurl); system.out.println( "***************************************************" ); system.out.println(getaccesstoken()); // 對圖片進行base64處理 string image = encodeimagetobase64(imageurl); // 請求參數 string requestparam = "detect_direction=true&id_card_side=" +idcardside+ "&image=" +image; try { // 請求ocr地址 url url = new url(ocrurl); httpurlconnection connection = (httpurlconnection) url.openconnection(); // 設置請求方法為post connection.setrequestmethod( "post" ); // 設置請求頭 connection.setrequestproperty( "content-type" , "application/x-www-form-urlencoded" ); connection.setrequestproperty( "apikey" , api_key); connection.setdooutput( true ); connection.getoutputstream().write(requestparam.getbytes(standardcharsets.utf_8)); connection.connect(); // 定義 bufferedreader輸入流來讀取url的響應 bufferedreader bufferedreader = new bufferedreader( new inputstreamreader(connection.getinputstream(), standardcharsets.utf_8)); stringbuilder result = new stringbuilder(); string inputline; while ((inputline = bufferedreader.readline()) != null ) { result.append(inputline); } bufferedreader.close(); return result.tostring(); } catch (exception e) { e.printstacktrace(); system.err.println( "身份證ocr識別異常" ); return null ; } } /** * 對圖片url進行base64編碼處理 * @param imageurl * @return */ public static string encodeimagetobase64(file imageurl) { // 將圖片文件轉化為字節數組字符串,并對其進行base64編碼處理 byte [] data = null ; try { inputstream inputstream = new fileinputstream(imageurl); data = new byte [inputstream.available()]; inputstream.read(data); inputstream.close(); // 對字節數組base64編碼 return urlencoder.encode(base64.bytearraytobase64(data), "utf-8" ); } catch (exception e) { e.printstacktrace(); return null ; } } /** * 提取ocr識別身份證有效信息 * @param * @return */ public static map<string, string> getidcardinfo(multipartfile image, int idcardside) { string value = getstringidentitycard(image, idcardside); string side; if (idcardside == 1 ) { side = "正面" ; } else { side = "背面" ; } map<string, string> map = new hashmap<>(); jsonobject jsonobject = jsonobject.parseobject(value); jsonobject words_result = jsonobject.getjsonobject( "words_result" ); if (words_result == null || words_result.isempty()) { throw new myexception( "請提供身份證" +side+ "圖片" ); } for (string key : words_result.keyset()) { jsonobject result = words_result.getjsonobject(key); string info = result.getstring( "words" ); switch (key) { case "姓名" : map.put( "name" , info); break ; case "性別" : map.put( "sex" , info); break ; case "民族" : map.put( "nation" , info); break ; case "出生" : map.put( "birthday" , info); break ; case "住址" : map.put( "address" , info); break ; case "公民身份號碼" : map.put( "idnumber" , info); break ; case "簽發機關" : map.put( "issuedorganization" , info); break ; case "簽發日期" : map.put( "issuedat" , info); break ; case "失效日期" : map.put( "expiredat" , info); break ; } } return map; } } |
官方返回示例:
對于身份證識別有個大坑:
1.有的base64編碼后有頭部“base64:”要去掉,阿里巴巴的base64可以正常使用。
2.ocr識別官方只說明圖片要base64編碼,但是實際上還是要再urlencode再編碼一次才可以。
以上就是本文的全部內容,希望對大家的學習有所幫助,也希望大家多多支持服務器之家。
原文鏈接:https://blog.csdn.net/weixin_42058600/article/details/82709590