1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
| from fleader import fleader as rq import time,os import queue
q = queue.Queue()
def getp(_): while 1: qg=q.get() url,request,parse,meta=qg['url'],qg['request'],qg['parse'],qg['meta'] rp=response() rp.result=request(url) rp.meta=meta parse(rp) qsz=q.qsize() if qsz==0: os._exit(0)
class response(): result='' meta={}
class spider(): start_urls = [] num=20 def __init__(self): self.start()
def request(self,url): return rq.get(url)
def parse(self, response): pass
def feed(self,url,meta={},callback=None,request=None): if callback == None: callback=self.parse if request == None: request=self.request if type(url)==str: url=[url] for u in url: food={} food['url']=u food['request']=request food['parse']=callback food['meta']=meta q.put(food)
def start(self): if len(self.start_urls)>0: self.feed(self.start_urls) num = self.num rq.pool(getp,range(num),num) else: print('hello rider')
class amz(spider): start_urls=['http://127.0.0.1']
def parse(self, response): print(response.result)
if __name__ == '__main__': amz()
|