python抓京东手机数据
作者:vpoet
mail:vpoet_sir@163.com
1 #coding=utf-8 2 3 import urllib2 4 5 from lxml import etree 6 7 import re 8 9 10 if __name__ == '__main__':11 12 main_url = """http://search.jd.com/Search?keyword=%%E6%%89%%8B%%E6%%9C%%BA&enc=utf-8&suggest=0#keyword=%%E6%%89%%8B%%E6%%9C%%BA&enc=utf-8&qrst=1&ps=addr&rt=1&stop=1&sttr=1&cid3=655&click=3- 655&psort=3&page=%s"""13 14 page_num = 115 16 for page in range(page_num):17 18 html_url = main_url % page19 20 Res = urllib2.urlopen(html_url)21 22 Htm = Res.read()23 24 #print Htm25 26 tree = etree.HTML(Htm);27 28 #phone_names = tree.xpath("//div[@id='plist']/ul/li/div[@class='lh-wrap']/div[@class='p-name']/a/text()") 29 30 #x = 131 #for phone_name in phone_names: 32 #print phone_name+'\t'+str(x)+'\n'33 34 #x=x+135 36 37 38 #phone_pic_urls = tree.xpath("//div[@class='lh-wrap']/div[@class='p-img']/a/img")39 40 #for phone_pic_url in phone_pic_urls: 41 #print phone_pic_url.values()[3]42 43 #phone_prices = tree.xpath("//div[@class='p-price']/strong") 44 phone_prices = tree.xpath("//*[@id='plist']/ul[@class='list-h clearfix']/li/div/div[@class='p-price']/strong") 45 46 x = 147 48 for phone_price in phone_prices: 49 print phone_price.values()[1]+'\t'+str(x)+'\n'50 x = x + 151 52 53 #phone_comments = tree.xpath("//div[@class='extra']/a/text()") 54 55 #for phone_comment in phone_comments: 56 #print "评价数"57 #comment_num = re.findall(r'.{2}(\d+).{3}',phone_comment)58 #print comment_num[0]59 60 61 #phone_good_comments = tree.xpath("//div[@class='extra']/span[@class='reputation']/text()") 62 63 #for phone_good_comment in phone_good_comments: 64 #print "好评率"65 #comment_good_num = re.findall(r'\((\d{2})%.{2}\)',phone_good_comment)66 #print comment_good_num[0]67 68 69 print "over"
这个没写完,先保存在这里。有时间再完成