多线程版的15个网站招标信息爬虫

文章正文

发布时间：2025-08-15 04:09

我的上一篇帖子写了一个招标信息的爬虫，但是运行之后太慢了，跑完一遍需要半个小时以上，然后听从论坛大佬的建议，加入了多线程（速度真是快啊，50多秒就跑完了），同时加入了时间筛选（2天前的招标信息也没啥价值，筛掉筛掉），本来还想加入数据保存到csv里面，但仔细一想，pycharm就能直接打开链接，存了也没啥用就没有加入，改完后大概就是这个样子的（本来想写到一个帖子里，但是仔细想想，两段800行的代码这帖子得多长

）。ps：我总感觉数据不如单线程的多，难不成是我的错觉？还有，作为一名英语高考只考了20多分的学渣，大家就不要嘲笑我函数和变量的命名了。

[Python] 纯文本查看复制代码

import requests import re import json import datetime from lxml import etree from queue import Queue import threading import time yesterday = datetime.date.today()+datetime.timedelta(-2) need = ['馆', '展厅', '厅', '装修', '设计', '施工', '景观', '展', '装饰', '基地', '党建', '文化', '空间', '线上', '数字', '策划', '提升', '美丽乡村'] no_need_1 = '中标' no_need_2 = '结果' class Procuder(threading.Thread): def __init__(self, url_queue,pc_queue,*args,**kwargs): super(Procuder,self).__init__(*args,**kwargs) self.url_queue = url_queue self.pc_queue = pc_queue def run(self): while True: if self.url_queue.empty(): break url = self.url_queue.get() self.jie(url) def jie(self,url): if url.rsplit('|',1)[0] == str(1): url1 = url.rsplit('|',1)[1] self.sdggzyjyzx(url1) if url.rsplit('|',1)[0] == str(2): url1 = url.rsplit('|',1)[1] self.sdggzyjyzx(url1) if url.rsplit('|',1)[0] == str(3): url1 = url.rsplit('|',1)[1] self.qdggzyjyw(url1) if url.rsplit('|',1)[0] == str(4): url1 = url.rsplit('|',1)[1] self.qdggzyjyw(url1) if url.rsplit('|',1)[0] == str(5): url1 = url.rsplit('|',1)[1] self.zbggzyjyw(url1) if url.rsplit('|',1)[0]== str(6): url1 = url.rsplit('|',1)[1] self.zbggzyjyw(url1) if url.rsplit('|',1)[0] == str(7): url1 = url.rsplit('|',1)[1] self.zzggzyjyw(url1) if url.rsplit('|',1)[0] == str(8): url1 = url.rsplit('|',1)[1] self.dyggzyjyw(url1) if url.rsplit('|',1)[0] == str(9): url1 = url.rsplit('|',1)[1] self.dyggzyjyw(url1) if url.rsplit('|',1)[0] == str(10): url1 = url.rsplit('|',1)[1] self.wfggzyjyw(url1) if url.rsplit('|',1)[0] == str(11): url1 = url.rsplit('|',1)[1] self.wfggzyjyw(url1) if url.rsplit('|',1)[0]== str(12): url1 = url.rsplit('|',1)[1] self.wfggzyjyw(url1) if url.rsplit('|',1)[0] == str(13): url1 = url.rsplit('|',1)[1] self.jnggzyjyw(url1) if url.rsplit('|',1)[0] == str(14): url1 = url.rsplit('|',1)[1] self.taggzyjyw(url1) if url.rsplit('|',1)[0] == str(15): url1 = url.rsplit('|',1)[1] self.taggzyjyw(url1) if url.rsplit('|',1)[0] == str(16): url1 = url.rsplit('|',1)[1] self.whggzyjyw(url1) if url.rsplit('|',1)[0] == str(17): url1 = url.rsplit('|',1)[1] self.whggzyjyw(url1) if url.rsplit('|',1)[0] == str(18): url1 = url.rsplit('|',1)[1] self.rzggzyjyw(url1) if url.rsplit('|',1)[0] == str(19): url1 = url.rsplit('|',1)[1] self.rzggzyjyw(url1) if url.rsplit('|',1)[0] == str(20): url1 = url.rsplit('|',1)[1] self.rzggzyjyw(url1) if url.rsplit('|',1)[0] == str(21): url1 = url.rsplit('|',1)[1] self.lyggzyjyw(url1) if url.rsplit('|',1)[0] == str(22): url1 = url.rsplit('|',1)[1] self.lyggzyjyw(url1) if url.rsplit('|',1)[0] == str(23): url1 = url.rsplit('|',1)[1] self.lyggzyjyw(url1) if url.rsplit('|',1)[0] == str(24): url1 = url.rsplit('|',1)[1] self.lyggzyjyw(url1) if url.rsplit('|',1)[0] == str(25): url1 = url.rsplit('|',1)[1] self.lcggzyjyw(url1) if url.rsplit('|',1)[0] == str(26): url1 = url.rsplit('|',1)[1] self.lcggzyjyw(url1) if url.rsplit('|',1)[0] == str(27): url1 = url.rsplit('|',1)[1] self.bzggzyjyw(url1) if url.rsplit('|',1)[0] == str(28): url1 = url.rsplit('|',1)[1] self.bzggzyjyw(url1) if url.rsplit('|',1)[0] == str(29): url = self.url_queue.get() self.bzggzyjyw(url1) if url.rsplit('|',1)[0] == str(30): url1 = url.rsplit('|',1)[1] self.hzggzyjyw(url1) if url.rsplit('|',1)[0] == str(31): url1 = url.rsplit('|',1)[1] self.hzggzyjyw(url1) if url.rsplit('|',1)[0] == str(32): url1 = url.rsplit('|',1)[1] self.hzggzyjyw(url1) def hzggzyjyw(self,url1): t = [] u = [] q = [] headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36' } time.sleep(1) r = requests.get(url1, headers=headers).text html = etree.HTML(r) title = html.xpath('/html/body/div[2]/div[2]/div[2]/div/div[2]/ul/li/a/text()') url_1 = html.xpath('/html/body/div[2]/div[2]/div[2]/div/div[2]/ul/li/a/@href') time_rq = html.xpath('/html/body/div[2]/div[2]/div[2]/div/div[2]/ul/li/span/text()') for title_1 in title: title_2 = '|' + title_1 + '|' t.append(title_2) for url_2 in url_1: url_3 = 'http://www.hzsggzyjyzx.gov.cn' + url_2 u.append(url_3) list_word = zip(time_rq, t, u) for list_word_1 in list_word: list_word_2 = ''.join(list_word_1) q.append(list_word_2) self.pc_queue.put(q) def bzggzyjyw(self,url1): t = [] u = [] q = [] headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36' } time.sleep(1) r = requests.get(url1, headers=headers).text html = etree.HTML(r) title = html.xpath('//*[@id="right"]/table/tr[1]/td/table/tr/td[2]/a/text()') url_1 = html.xpath('//*[@id="right"]/table/tr[1]/td/table/tr/td[2]/a/@href') time_rq = html.xpath('//*[@id="right"]/table/tr[1]/td/table/tr/td[3]/text()') for title_1 in title: title_2 = '|' + title_1 + '|' t.append(title_2) for url_2 in url_1: url_3 = 'http://ggzyjy.binzhou.gov.cn' + url_2 u.append(url_3) list_word = zip(time_rq, t, u) for list_word_1 in list_word: list_word_2 = ''.join(list_word_1) q.append(list_word_2) self.pc_queue.put(q) def lcggzyjyw(self,url1): t = [] u = [] q = [] headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36' } time.sleep(1) r = requests.get(url1, headers=headers).text html = etree.HTML(r) title = html.xpath('/html/body/div[3]/div[2]/div/div[2]/ul/li/a/text()') url_1 = html.xpath('/html/body/div[3]/div[2]/div/div[2]/ul/li/a/@href') time_rq = html.xpath('/html/body/div[3]/div[2]/div/div[2]/ul/li/span/text()') for title_1 in title: title_2 = '|' + title_1 + '|' t.append(title_2) for url_2 in url_1: url_3 = 'http://www.lcsggzyjy.cn' + url_2 u.append(url_3) list_word = zip(time_rq, t, u) for list_word_1 in list_word: list_word_2 = ''.join(list_word_1) q.append(list_word_2) self.pc_queue.put(q) def lyggzyjyw(self,url1): t = [] u = [] q = [] headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36' } time.sleep(1) r = requests.get(url1, headers=headers).text html = etree.HTML(r) title = html.xpath('/html/body/div[3]/div[2]/div/div[2]/ul/li/a/text()') url_1 = html.xpath('/html/body/div[3]/div[2]/div/div[2]/ul/li/a/@href') time_rq = html.xpath('/html/body/div[3]/div[2]/div/div[2]/ul/li/span/text()') for title_1 in title: title_2 = '|' + title_1 + '|' t.append(title_2) for url_2 in url_1: url_3 = 'http://ggzyjy.linyi.gov.cn' + url_2 u.append(url_3) list_word = zip(time_rq, t, u) for list_word_1 in list_word: list_word_2 = ''.join(list_word_1) q.append(list_word_2) self.pc_queue.put(q) def rzggzyjyw(self,url1): t = [] u = [] q = [] e = [] m = [] headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36' } time.sleep(1) r = requests.get(url1, headers=headers).text html = etree.HTML(r) title = html.xpath('//*[@id="DataList1"]/tr/td/li/a/div[1]/text()') url_1 = html.xpath('//*[@id="DataList1"]/tr/td/li/a/@href') for url_2 in url_1: url_3 = url_2[2:] e.append(url_3) time_rq = html.xpath('//*[@id="DataList1"]/tr/td/li/a/div[2]/text()') for title_1 in title: title_2 = '|' + title_1.strip() + '|' t.append(title_2) for e_1 in e: e_2 = 'http://ggzyjy.rizhao.gov.cn/rzwz' + e_1 u.append(e_2) for time_rq_1 in time_rq: time_rq_2 = time_rq_1.strip() time_rq_3 = time_rq_2.replace('.', '-') m.append(time_rq_3) list_word = zip(m, t, u) for list_word_1 in list_word: list_word_2 = ''.join(list_word_1) q.append(list_word_2) self.pc_queue.put(q) def whggzyjyw(self,url1): t = [] u = [] q = [] e = [] m = [] headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36' } time.sleep(1) r = requests.get(url1, headers=headers).text html = etree.HTML(r) for i in range(1, 11): title = html.xpath('/html/body/div[4]/div[3]/div/ul/li[' + str(i) + ']/div/a/text()') title_1 = ''.join(title).strip() e.append(title_1) time_rq = html.xpath('/html/body/div[4]/div[3]/div/ul/li/div/div/text()') for time_rq_1 in time_rq: time_rq_2 = time_rq_1.strip() m.append(time_rq_2) url_1 = html.xpath('/html/body/div[4]/div[3]/div/ul/li[1]/div/a/@href') for url_2 in url_1: url_3 = 'http://www.whggzyjy.cn' + url_2 u.append(url_3) for e_1 in e: e_2 = '|' + e_1 + '|' t.append(e_2) list_word = zip(m, t, u) for list_word_1 in list_word: list_word_2 = ''.join(list_word_1) q.append(list_word_2) self.pc_queue.put(q) def taggzyjyw(self,url1): t = [] u = [] q = [] m = [] headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36' } time.sleep(1) r = requests.get(url1, headers=headers).text html = etree.HTML(r) title = html.xpath('//*[@id="right_table"]/table/tr/td[2]/a/text()') url_1 = html.xpath('//*[@id="right_table"]/table/tr/td[2]/a/@href') time_rq = html.xpath('//*[@id="right_table"]/table/tr/td[3]/text()') for time_rq_1 in time_rq: time_rq_2 = time_rq_1.replace('[', '').replace(']', '') m.append(time_rq_2) for title_1 in title: title_2 = '|' + title_1 + '|' t.append(title_2) for url_2 in url_1: url_3 = 'http://www.taggzyjy.com.cn' + url_2 u.append(url_3) list_word = zip(m, t, u) for list_word_1 in list_word: list_word_2 = ''.join(list_word_1) q.append(list_word_2) self.pc_queue.put(q) def jnggzyjyw(self,url1): t = [] u = [] q = [] headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36', 'Content-Type': 'application/json' } time.sleep(1) payloadData = { "FilterText": "", "categoryCode": "503000", "maxResultCount": 20, "skipCount": 0, "tenantId": "3" } data = json.dumps(payloadData) r = requests.post(url1, data=data, headers=headers).text title = re.findall(r'title":"(.*?)",', r) url_1 = re.findall(r'"id":"(.*?)"},', r) time_rq = re.findall(r'"releaseDate":"(.*?)T', r) for title_1 in title: title_2 = '|' + title_1 + '|' t.append(title_2) for url_2 in url_1: url_3 = 'http://ggzy.jining.gov.cn/JiNing/Bulletins/Detail/' + url_2 + '/?CategoryCode=503000' u.append(url_3) list_word = zip(time_rq, t, u) for list_word_1 in list_word: list_word_2 = ''.join(list_word_1) q.append(list_word_2) self.pc_queue.put(q) def wfggzyjyw(self,url1): t = [] u = [] q = [] headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36' } time.sleep(1) r = requests.get(url1, headers=headers).text html = etree.HTML(r) title = html.xpath('//*[@class="info-form"]/table/tbody/tr/td[3]/span/a/text()') url_1 = html.xpath('//*[@class="info-form"]/table/tbody/tr/td[3]/span/a/@href') time_rq = html.xpath('//*[@class="info-form"]/table/tbody/tr/td[4]/span/text()') for title_1 in title: title_2 = '|' + title_1 + '|' t.append(title_2) for url_2 in url_1: url_3 = 'http://ggzy.weifang.gov.cn' + url_2 u.append(url_3) list_word = zip(time_rq, t, u) for list_word_1 in list_word: list_word_2 = ''.join(list_word_1) q.append(list_word_2) self.pc_queue.put(q) def dyggzyjyw(self,url1): t = [] u = [] q = [] m = [] headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36' } time.sleep(1) r = requests.get(url1, headers=headers).text html = etree.HTML(r) title = html.xpath('//*[@height="25"]/td[2]/a/font/text()') url_1 = html.xpath('//*[@height="25"]/td[2]/a/@href') time_rq = html.xpath('//*[@height="25"]/td[3]/text()') for time_rq_1 in time_rq: time_rq_2 = time_rq_1.replace('[', '').replace(']', '') m.append(time_rq_2) for title_1 in title: title_2 = '|' + title_1 + '|' t.append(title_2) for url_2 in url_1: url_3 = 'http://ggzy.dongying.gov.cn' + url_2 u.append(url_3) list_word = zip(m, t, u) for list_word_1 in list_word: list_word_2 = ''.join(list_word_1) q.append(list_word_2) self.pc_queue.put(q) def zzggzyjyw(self,url1): t = [] u = [] q = [] m = [] headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36' } time.sleep(1) r = requests.get(url1, headers=headers).text html = etree.HTML(r) title = html.xpath('//*[@width="98%"]/tr/td[3]/a/text()') url_1 = html.xpath('//*[@width="98%"]/tr/td[3]/a/@href') time_rq = html.xpath('//*[@width="98%"]/tr/td[4]/text()') for time_rq_1 in time_rq: time_rq_2 = time_rq_1.replace('[', '').replace(']', '') m.append(time_rq_2) for title_1 in title: title_2 = '|' + title_1 + '|' t.append(title_2) for url_2 in url_1: url_3 = 'http://www.zzggzy.com' + url_2 u.append(url_3) list_word = zip(m, t, u) for list_word_1 in list_word: list_word_2 = ''.join(list_word_1) q.append(list_word_2) self.pc_queue.put(q) def zbggzyjyw(self,url1): t = [] u = [] q = [] e = [] headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36' } time.sleep(1) r = requests.get(url1, headers=headers).text html = etree.HTML(r) title = html.xpath('//*[@id="MoreInfoList1_DataGrid1"]/tr/td[2]/a/text()') url_1 = html.xpath('//*[@id="MoreInfoList1_DataGrid1"]/tr/td[2]/a/@href') time_rq = html.xpath('//*[@id="MoreInfoList1_DataGrid1"]/tr/td[3]/text()') for time_rq_1 in time_rq: time_rq_2 = time_rq_1.strip() e.append(time_rq_2) for title_1 in title: title_2 = '|' + title_1 + '|' t.append(title_2) for url_2 in url_1: url_3 = 'http://ggzyjy.zibo.gov.cn' + url_2 u.append(url_3) list_word = zip(e, t, u) for list_word_1 in list_word: list_word_2 = ''.join(list_word_1) q.append(list_word_2) self.pc_queue.put(q) def sdggzyjyzx(self,url1): u = [] t = [] q = [] headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36' } r = requests.get(url1, headers=headers).text html = etree.HTML(r) title = html.xpath('//*[@class="ewb-list"]/li/a/text()') date = html.xpath('//*[@class="ewb-list"]/li/span/text()') url_1 = html.xpath('//*[@class="ewb-list"]/li/a/@href') for url_2 in url_1: url_3 = 'http://ggzyjyzx.shandong.gov.cn' + url_2 u.append(url_3) for title_1 in title: title_2 = ' | ' + title_1 + ' | ' t.append(title_2) list_word = zip(date, t, u) for list_word_1 in list_word: list_word_2 = ''.join(list_word_1) q.append(list_word_2) self.pc_queue.put(q) def qdggzyjyw(self,url1): t = [] u = [] q = [] headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36' } time.sleep(1) r = requests.get(url1, headers=headers).text html = etree.HTML(r) title = html.xpath('//*[@class="info_con"]/table/tr/td/a/@title') url_1 = html.xpath('//*[@class="info_con"]/table/tr/td/a/@href') time_rq = html.xpath('//*[@class="info_con"]/table/tr/td[2]/text()') for title_1 in title: title_2 = '|' + title_1 + '|' t.append(title_2) for url_2 in url_1: url_3 = 'https://ggzy.qingdao.gov.cn' + url_2 u.append(url_3) list_word = zip(time_rq, t, u) for list_word_1 in list_word: list_word_2 = ''.join(list_word_1) q.append(list_word_2) self.pc_queue.put(q) class Consumer(threading.Thread): def __init__(self, url_queue,pc_queue,*args,**kwargs): super(Consumer,self).__init__(*args,**kwargs) self.url_queue = url_queue self.pc_queue = pc_queue def run(self) : while True: if self.pc_queue.empty() and self.url_queue.empty(): break q = self.pc_queue.get() for tt in q: for need_1 in need: if need_1 in tt: if no_need_1 not in tt: if str(yesterday) < tt[:10]: if tt != []: print(need_1) print(tt) def main(): start = time.time() url_queue = Queue(1000) pc_queue = Queue(10000) tsk = [] for i in range(1,33): if i ==1: print('开始') url = str(i)+'|'+'http://ggzyjyzx.shandong.gov.cn/003/003001/003001001/moreinfo.html' url_queue.put(url) if i ==2: url = str(i)+'|'+'http://ggzyjyzx.shandong.gov.cn/003/003004/003004001/moreinfo.html' url_queue.put(url) if i ==3: for z in range(1, 5): url = str(i)+'|'+'https://ggzy.qingdao.gov.cn/Tradeinfo-GGGSList/0-0-0?pageIndex=' + str(z) url_queue.put(url) if i ==4: for z in range(1, 5): url = str(i)+'|'+'https://ggzy.qingdao.gov.cn/Tradeinfo-GGGSList/1-1-0?pageIndex=' + str(z) url_queue.put(url,i) if i ==5: for z in range(1,13): if z ==1: url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002001/002001001/002001001001/MoreInfo.aspx?CategoryNum=268698113' url_queue.put(url,i) if z ==2: url =str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002001/002001001/002001001002/MoreInfo.aspx?CategoryNum=268698114' url_queue.put(url,i) if z ==3: url =str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002001/002001001/002001001013/MoreInfo.aspx?CategoryNum=268698123' url_queue.put(url,i) if z ==4: url =str(i)+'|'+ 'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002001/002001001/002001001009/MoreInfo.aspx?CategoryNum=2001001009' url_queue.put(url,i) if z ==5: url =str(i)+'|'+ 'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002001/002001001/002001001010/MoreInfo.aspx?CategoryNum=268698120' url_queue.put(url,i) if z ==6: url =str(i)+'|'+ 'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002001/002001001/002001001007/MoreInfo.aspx?CategoryNum=268698119' url_queue.put(url,i) if z ==7: url =str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002001/002001001/002001001008/MoreInfo.aspx?CategoryNum=2001001008' url_queue.put(url,i) if z ==8: url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002001/002001001/002001001006/MoreInfo.aspx?CategoryNum=268698118' url_queue.put(url,i) if z ==9: url =str(i)+'|'+ 'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002001/002001001/002001001012/MoreInfo.aspx?CategoryNum=268698122' url_queue.put(url,i) if z ==10: url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002001/002001001/002001001004/MoreInfo.aspx?CategoryNum=268698116' url_queue.put(url,i) if z ==11: url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002001/002001001/002001001005/MoreInfo.aspx?CategoryNum=268698117' url_queue.put(url,i) if z ==12: url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002001/002001001/002001001011/MoreInfo.aspx?CategoryNum=268698121' url_queue.put(url,i) if i == 6: for z in range(1, 13): if z == 1: url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002002/002002001/002002001001/MoreInfo.aspx?CategoryNum=268960257' url_queue.put(url,i) if z == 2: url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002002/002002001/002002001002/MoreInfo.aspx?CategoryNum=268960258' url_queue.put(url,i) if z == 3: url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002002/002002001/002002001011/MoreInfo.aspx?CategoryNum=268960265' url_queue.put(url,i) if z == 4: url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002002/002002001/002002001007/MoreInfo.aspx?CategoryNum=268960263' url_queue.put(url,i) if z == 5: url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002002/002002001/002002001012/MoreInfo.aspx?CategoryNum=268960266' url_queue.put(url,i) if z == 6: url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002002/002002001/002002001009/MoreInfo.aspx?CategoryNum=2002001009' url_queue.put(url,i) if z == 7: url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002002/002002001/002002001010/MoreInfo.aspx?CategoryNum=268960264' url_queue.put(url,i) if z == 8: url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002002/002002001/002002001008/MoreInfo.aspx?CategoryNum=2002001008' url_queue.put(url,i) if z == 9: url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002002/002002001/002002001006/MoreInfo.aspx?CategoryNum=268960262' url_queue.put(url,i) if z == 10: url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002002/002002001/002002001004/MoreInfo.aspx?CategoryNum=268960260' url_queue.put(url,i) if z == 11: url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002002/002002001/002002001005/MoreInfo.aspx?CategoryNum=268960261' url_queue.put(url,i) if z == 12: url = str(i)+'|'+'http://ggzyjy.zibo.gov.cn/TPFront/jyxx/002002/002002001/002002001013/MoreInfo.aspx?CategoryNum=268960267' url_queue.put(url,i) if i ==7: for z in range(1,3): if z ==1: url = str(i)+'|'+'http://www.zzggzy.com/TPFront/jyxx/070001/070001001/' url_queue.put(url,i) if z==2: for y in range(1,4): url = str(i)+'|'+'http://www.zzggzy.com/TPFront/jyxx/070002/070002001/Paging='+str(y) url_queue.put(url, i) if i ==8: for z in range(1,10): url = str(i)+'|'+'http://ggzy.dongying.gov.cn/dyweb/004/004001/004001001/0040010010'+str(z).rjust(2,'0')+'/' url_queue.put(url, i) if i ==9: for z in range(1,10): url = str(i)+'|'+'http://ggzy.dongying.gov.cn/dyweb/004/004002/004002001/0040020010'+str(z).rjust(2,'0')+'/' url_queue.put(url, i) if i ==10: for z in range(1,4): url = str(i)+'|'+'http://ggzy.weifang.gov.cn/wfggzy/showinfo/moreinfo_gg.aspx?address=&type=&categorynum=004012001&Paging='+str(z) url_queue.put(url, i) if i ==11: for z in range(1,6): url = str(i)+'|'+'http://ggzy.weifang.gov.cn/wfggzy/showinfo/moreinfo_gg_zfcgtwo.aspx?address=&type=&categorynum=004002001&Paging='+str(z) url_queue.put(url, i) if i ==12: for z in range(1,4): url = str(i)+'|'+'http://ggzy.weifang.gov.cn/wfggzy/showinfo/moreinfo_gg_zfcg_cgxq.aspx?address=&categorynum=004002017&Paging='+str(z) url_queue.put(url, i) if i ==13: url = str(i)+'|'+'http://ggzy.jining.gov.cn/api/services/app/stPrtBulletin/GetBulletinList' url_queue.put(url, i) if i ==14: for z in range(1,8): url = str(i)+'|'+'http://www.taggzyjy.com.cn/Front/jyxx/075001/075001001/07500100100'+str(z)+'/' url_queue.put(url, i) if i ==15: for x in range(1,3): if x ==1: for z in range(1,8): url = str(i)+'|'+'http://www.taggzyjy.com.cn/Front/jyxx/075002/075002004/07500200400'+str(z)+'/' url_queue.put(url, i) if x ==2: for z in range(1,8): url = str(i)+'|'+'http://www.taggzyjy.com.cn/Front/jyxx/075002/075002001/07500200100'+str(z)+'/' url_queue.put(url, i) if i ==16: for z in range(1,10): if z ==1: url = str(i)+'|'+'http://www.whggzyjy.cn/queryContent-jyxx.jspx?title=&inDates=&ext=&origin=&channelId=563&beginTime=&endTime=' url_queue.put(url,i) if z ==2: url = str(i)+'|'+'http://www.whggzyjy.cn/queryContent-jyxx.jspx?title=&inDates=&ext=&origin=%E7%8E%AF%E7%BF%A0&channelId=563&beginTime=&endTime=' url_queue.put(url,i) if z ==3: url = str(i)+'|'+'http://www.whggzyjy.cn/queryContent-jyxx.jspx?title=&inDates=&ext=&origin=%E9%AB%98%E5%8C%BA&channelId=563&beginTime=&endTime=' url_queue.put(url,i) if z ==4: url = str(i)+'|'+'http://www.whggzyjy.cn/queryContent-jyxx.jspx?title=&inDates=&ext=&origin=%E7%BB%8F%E5%8C%BA&channelId=563&beginTime=&endTime=' url_queue.put(url,i) if z ==5: url = str(i)+'|'+'http://www.whggzyjy.cn/queryContent-jyxx.jspx?title=&inDates=&ext=&origin=%E4%B8%B4%E6%B8%AF&channelId=563&beginTime=&endTime=' url_queue.put(url,i) if z ==6: url = str(i)+'|'+'http://www.whggzyjy.cn/queryContent-jyxx.jspx?title=&inDates=&ext=&origin=%E8%8D%A3%E6%88%90&channelId=563&beginTime=&endTime=' url_queue.put(url,i) if z ==7: url = str(i)+'|'+'http://www.whggzyjy.cn/queryContent-jyxx.jspx?title=&inDates=&ext=&origin=%E6%96%87%E7%99%BB&channelId=563&beginTime=&endTime=' url_queue.put(url,i) if z ==8: url = str(i)+'|'+'http://www.whggzyjy.cn/queryContent-jyxx.jspx?title=&inDates=&ext=&origin=%E4%B9%B3%E5%B1%B1&channelId=563&beginTime=&endTime=' url_queue.put(url,i) if z ==9: url =str(i)+'|'+'http://www.whggzyjy.cn/queryContent-jyxx.jspx?title=&inDates=&ext=&origin=%E5%8D%97%E6%B5%B7&channelId=563&beginTime=&endTime=' url_queue.put(url,i) if i ==17: for z in range(1,3): url =str(i)+'|'+'http://www.whggzyjy.cn/jyxxzfcg/index_'+str(z)+'.jhtml' url_queue.put(url, i) if i ==18: for z in range(1,3): url = str(i)+'|'+'http://ggzyjy.rizhao.gov.cn/rzwz/ShowInfo/MoreJyxxList.aspx?categoryNum=071001001&Paging='+str(z) url_queue.put(url, i) if i ==19: for z in range(1,3): url = str(i)+'|'+'http://ggzyjy.rizhao.gov.cn/rzwz/ShowInfo/MoreJyxxList.aspx?categoryNum=071002001&Paging='+str(z) url_queue.put(url, i) if i ==20: for z in range(1,4): url = str(i)+'|'+'http://ggzyjy.rizhao.gov.cn/rzwz/ShowInfo/MoreJyxxList.aspx?categoryNum=071002002&Paging='+str(z) url_queue.put(url, i) if i ==21: for z in range(1,7): url = str(i)+'|'+'http://ggzyjy.linyi.gov.cn/TPFront/jyxx/074001/074001001/07400100100'+str(z)+'/' url_queue.put(url, i) if i ==22: for z in range(1,8): url = str(i)+'|'+'http://ggzyjy.linyi.gov.cn/TPFront/jyxx/074002/074002001/07400200100'+str(z)+'/' url_queue.put(url, i) if i ==23: for z in range(1,8): url = str(i)+'|'+'http://ggzyjy.linyi.gov.cn/TPFront/jyxx/074002/074002002/07400200200'+str(z)+'/' url_queue.put(url, i) if i ==24: for z in range(1,5): if z ==1: url=str(i)+'|'+'http://ggzyjy.dezhou.gov.cn/TPFront/xmxx/004001/004001005/004001005001/' url_queue.put(url,i) if z ==2: url = str(i)+'|'+'http://ggzyjy.dezhou.gov.cn/TPFront/xmxx/004001/004001001/004001001001/' url_queue.put(url,i) if z ==3: url =str(i)+'|'+ 'http://ggzyjy.dezhou.gov.cn/TPFront/xmxx/004002/004002005/004002005001/' url_queue.put(url,i) if z ==4: url = str(i)+'|'+'http://ggzyjy.dezhou.gov.cn/TPFront/xmxx/004002/004002001/004002001001/' url_queue.put(url,i) if i ==25: for z in range(1,6): for x in range(1,15): url = str(i)+'|'+'http://www.lcsggzyjy.cn/lcweb/jyxx/079001/079001001/07900100100'+str(z)+'/07900100100'+str(z)+'0'+str(x).rjust(2,'0')+'/' url_queue.put(url,i) if i ==26: for z in range(7,21): url = str(i)+'|'+'http://www.lcsggzyjy.cn/lcweb/jyxx/079002/079002001/0790020010'+str(z).rjust(2,'0')+'/' url_queue.put(url, i) if i ==27: for t in range(1,4): if t ==1: for z in range(1,12): url = str(i)+'|'+'http://ggzyjy.binzhou.gov.cn/bzweb/002/002004/002004001/0020040010'+str(z).rjust(2,'0')+'/' url_queue.put(url, i) if t ==2: for z in range(1,12): url = str(i)+'|'+'http://ggzyjy.binzhou.gov.cn/bzweb/002/002005/002005008/0020050080'+str(z).rjust(2,'0')+'/' url_queue.put(url, i) if t ==3: for z in range(1,12): url = str(i)+'|'+'http://ggzyjy.binzhou.gov.cn/bzweb/002/002005/002005004/0020050040'+str(z).rjust(2,'0')+'/' url_queue.put(url, i) if i ==30: url = str(i)+'|'+'http://www.hzsggzyjyzx.gov.cn/jyxx/001001/001001001/about.html' url_queue.put(url, i) if i ==31: url = str(i)+'|'+'http://www.hzsggzyjyzx.gov.cn/jyxx/001002/001002001/about.html' url_queue.put(url, i) if i ==32: for z in range(1,4): if z ==1: url = str(i)+'|'+'http://www.hzsggzyjyzx.gov.cn/jyxx/001002/001002003/about.html' url_queue.put(url,i) else: url = str(i)+'|'+'http://www.hzsggzyjyzx.gov.cn/jyxx/001002/001002003/'+str(z)+'.html' url_queue.put(url,i) for x in range(6): t = Procuder(url_queue,pc_queue) t.start() tsk.append(t) for x in range(4): t = Consumer(url_queue,pc_queue) t.start() tsk.append(t) for t in tsk: t.join() end = time.time() print('耗时：%0.002fs'%(end - start)) if __name__ == '__main__': main()

标签