diff --git a/python/spider/shopping/xingfadi.py b/python/spider/shopping/xingfadi.py new file mode 100644 index 0000000..2d91b4c --- /dev/null +++ b/python/spider/shopping/xingfadi.py @@ -0,0 +1,64 @@ +import requests # 请求 +from threading import Thread # 单线程 +from lxml import etree # xpath +import re # 正则表达 +import os # 系统 +import json # json +from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor # 多线程/多进程 +import csv + +from pip._internal.utils import urls + +filename = "xingcaidi.csv" + +csvfile = open(filename, 'a+', newline='') +writer = csv.writer(csvfile) + +writer.writerow( + ['品名', '一级分类', '最低价', '最高价', '平均价', '产地', '规格', '单位', '发布日期']) + + +def download(index): + url = "http://www.xinfadi.com.cn/getPriceData.html" + print(index) + data = { + "limit": "20", + "current": index, + "pubDateStartTime": "", + "pubDateEndTime": "", + "prodPcatid": "", + "prodCatid": "", + "prodName": "" + } + + obj = re.compile(r'"prodName":"(?P.*?)"' + r'.*?"prodCat":"(?P.*?)",' + r'.*?"lowPrice":"(?P.*?)",' + r'"highPrice":"(?P.*?)",' + r'"avgPrice":"(?P.*?)",' + r'"place":"(?P.*?)",' + r'"specInfo":"(?P.*?)",' + r'"unitInfo":"(?P.*?)",' + r'"pubDate":"(?P.*?)"', re.S) + + get = requests.post(url=url, data=data) + + data = obj.findall(get.text) + + content = [] + + for item in data: + content_item = tuple(field.replace('\\', '') for field in item) + content.append(content_item) + + for row in content: + writer.writerow(row) + + +if __name__ == '__main__': + with ThreadPoolExecutor(max_workers=100) as t: + for index in range(1,1): + t.submit(download, index=index) + +print("打印完成") +csvfile.close()