python爬蟲學習之定向爬取淘寶商品價格,供大家參考,具體內容如下
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
|
import requests import re def getHTMLText(url): try : r = requests.get(url, timeout = 30 ) r.raise_for_status() #如果發送了一個失敗請求(非200響應),#我們可以通過 Response.raise_for_status() 來拋出異常: r.encoding = r.apparent_encoding return r.text except : return "" def parsePage(ilt,html): try : plt = re.findall(r '\"view_price\"\:\"[\d\.]*?\"' ,html) #正則表達式來匹配 "view_price":"\d\."類型的字符串 tlt = re.findall(r '\"raw_title\"\:\".*?\"' ,html) #正則表達式來匹配 "raw_title":".*?"類型的字符串,.*?是任意字符的最小匹配 for i in range ( len (plt)): price = eval (plt[i].split( ':' )[ 1 ]) title = eval (tlt[i].split( ':' )[ 1 ]) ilt.append([price,title]) except : print ("") def PrintGoodsList(ilt): tplt = "{:4}\t{:8}\t{:16}" print (tplt. format ( "序號" , "價格" , "商品名稱" )) count = 0 for g in ilt: count = count + 1 print (tplt. format (count,g[ 0 ],g[ 1 ])) def main(): goods = '書包' depth = 2 start_url = 'https://s.taobao.com/search?q=' + goods infoList = [] for i in range (depth): try : url = start_url + '&s=' + str ( 44 * i) html = getHTMLText(url) parsePage(infoList,html) except : continue PrintGoodsList(infoList) main() |
以上就是本文的全部內容,希望對大家的學習有所幫助,也希望大家多多支持服務器之家。
原文鏈接:http://blog.csdn.net/learn_is_happy/article/details/78773956