由于所爬取的網站需要驗證碼,通過網頁的開發人員工具【F12】及在線http post,get接口測試請求工具(http://coolaf.com/)發現訪問時加上請求頭header 信息時可以跳過驗證碼校驗。
而且該網站只接受post請求,對提交的參數也只接受json格式,否則請求失敗。
現將通過 post 方式提交json參數的方法記錄如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
|
import java.io.UnsupportedEncodingException; import java.net.URI; import java.net.URLDecoder; import java.util.ArrayList; import java.util.List; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.client.HttpClient; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.HttpPost; import org.apache.http.client.methods.HttpRequestBase; import org.apache.http.client.utils.URIBuilder; import org.apache.http.entity.StringEntity; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClientBuilder; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONObject; /** * <p>@PostJsonParamsTest.java</p> * @version 1.0 * @author zxk * @Date 2018-3-3 */ public class PostJsonParamsTest { // 超時時間 private static final int RUN_TIME = 10000 ; // 爬取初始頁數 private String page; public static void main(String[] args) throws Exception { PostJsonParamsTest crawl = new PostJsonParamsTest(); // 請求的url地址 String url = "http://www.gzcredit.gov.cn/Service/CreditService.asmx/searchOrgWithPage" ; // 設置起始訪問頁碼 crawl.setPage( "1" ); String isStop = "" ; // 設置請求 HttpRequestBase request = null ; request = new HttpPost(url); try { // 設置config RequestConfig requestConfig = RequestConfig.custom() .setSocketTimeout(RUN_TIME) .setConnectTimeout(RUN_TIME) .setConnectionRequestTimeout(RUN_TIME) .build(); request.setConfig(requestConfig); // json 格式的 post 參數 String postParams = "{\"condition\":{\"qymc\":\"%%%%\",\"cydw\":\"\"},\"pageNo\":" +crawl.getPage()+ ",\"pageSize\":100,count:2709846}" ; System.out.println(postParams); HttpEntity httpEntity = new StringEntity(postParams); ((HttpPost) request).setEntity(httpEntity); // 添加請求頭,可以繞過驗證碼 request.addHeader( "Accept" , "application/json, text/javascript, */*" ); request.addHeader( "Accept-Encoding" , "gzip, deflate" ); request.addHeader( "Accept-Language" , "zh-CN,zh;q=0.8" ); request.addHeader( "Connection" , "keep-alive" ); request.addHeader( "Host" , "www.gzcredit.gov.cn" ); request.addHeader( "Content-Type" , "application/json; charset=UTF-8" ); URIBuilder builder = new URIBuilder(url); URI uri = builder.build(); uri = new URI(URLDecoder.decode(uri.toString(), "UTF-8" )); request.setURI(uri); while (!isStop.equals( "停止" )||isStop.equals( "重跑" )){ isStop = crawl.crawlList(request); if (isStop.equals( "爬取" )){ crawl.setPage(String.valueOf(Integer.parseInt(crawl.getPage())+ 1 )); } // if("2713".equals(crawl.getPage())) break; if ( "2" .equals(crawl.getPage())){ break ; } } } catch (NumberFormatException e) { e.printStackTrace(); throw new NumberFormatException( "數字格式錯誤" ); } catch (UnsupportedEncodingException e) { e.printStackTrace(); throw new UnsupportedEncodingException( "不支持的編碼集" ); } } /** * 爬取搜索列表 * @param page * @return */ private String crawlList(HttpRequestBase request){ int statusCode = 0 ; // 下面兩種方式都可以用來創建客戶端連接,相當于打開了一個瀏覽器 CloseableHttpClient httpClient = HttpClients.createDefault(); // HttpClient httpClient = HttpClientBuilder.create().build(); HttpEntity httpEntity = null ; HttpResponse response = null ; try { try { response = httpClient.execute(request); } catch (Exception e){ e.printStackTrace(); EntityUtils.consumeQuietly(httpEntity); return "重跑" ; } //打印狀態 statusCode =response.getStatusLine().getStatusCode(); if (statusCode!= 200 ){ EntityUtils.consumeQuietly(httpEntity); return "重跑" ; } //實體 httpEntity = response.getEntity(); String searchListStr = EntityUtils.toString(httpEntity, "GBK" ).replaceAll( "\\\\米" , "米" ); String allData = (String) JSONObject.parseObject(searchListStr).get( "d" ); // 字符串值中間含雙引號的替換處理 String s = allData.replaceAll( "\\{\"" , "{'" ) .replaceAll( "\":\"" , "':'" ) .replaceAll( "\",\"" , "','" ) .replaceAll( "\":" , "':" ) .replaceAll( ",\"" , ",'" ) .replaceAll( "\"\\}" , "'}" ) .replaceAll( "\"" , "" ) .replaceAll( "'" , "\"" ) .replaceAll( "<br />" , "" ) .replaceAll( "\t" , "" ) .replaceAll( "\\\\" , "?" ); JSONObject jsonData = JSONObject.parseObject(s); JSONArray jsonContent = jsonData.getJSONArray( "orgList" ); searchListStr = null ; allData = null ; s = null ; if (jsonContent== null || jsonContent.size()< 1 ) { return "重跑" ; } System.out.println(jsonContent.toJSONString()); return "爬取" ; } catch (Exception e) { e.printStackTrace(); return "重跑" ; } finally { EntityUtils.consumeQuietly(httpEntity); } } private String getPage() { return page; } private void setPage(String page) { this .page = page; } } |
補充知識:JAVA利用HttpClient發送post請求,將請求數據放到body里
我就廢話不多說了,大家還是直接看代碼吧~
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
|
/** * post請求 ,請求數據放到body里 * @param url 請求地址 * @param bodyData 參數 * @author wangyj * @date 2019年4月20日 */ public static String doPostBodyData(String url, String bodyData) throws Exception{ String result = "" ; CloseableHttpClient httpClient = null ; CloseableHttpResponse response = null ; try { HttpPost httpPost = getHttpPost(url, null ); // 請求地址 httpPost.setEntity( new StringEntity(bodyData, Encoding)); httpClient = getHttpClient(); // 得到返回的response response = httpClient.execute(httpPost); HttpEntity entity = response.getEntity(); result = getResult(entity, Encoding); } catch (Exception e) { throw e; } finally { // 關閉httpClient if ( null != httpClient) { httpClient.close(); } // 關閉response if ( null != response) { EntityUtils.consume(response.getEntity()); // 會自動釋放連接 response.close(); } } return result; } |
以上這篇java 實現通過 post 方式提交json參數操作就是小編分享給大家的全部內容了,希望能給大家一個參考,也希望大家多多支持服務器之家。
原文鏈接:https://blog.csdn.net/zhouxukun123/article/details/79441031