gevent、multiprocessing、threadpool
在协程、多进程、线程池中关注的为常量、队列、锁
在采集过程中关注的为硬盘io、网络io、每秒请求量

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
@staticmethod#线程池
def pool(callback, lists,threadNum=10):
import threadpool
pool = threadpool.ThreadPool(threadNum)
requests = threadpool.makeRequests(callback, lists)
[pool.putRequest(req) for req in requests]
pool.wait()


@staticmethod
def bPool(arg):
from multiprocessing.dummy import Pool as ThreadPool # 线程池
tpool = ThreadPool(arg['tnum'])
arr=list(map(lambda i:{'cnum':arg['cnum'],'tnum':i,'arg':arg['arg']},range(arg['tnum'])))
tpool.map(arg['callback'], arr)
tpool.close()
tpool.join()

@staticmethod#进程池
def sPool(callback,tnum=20,cnum='',arg=[]):
from multiprocessing import Pool as ProcessPool # 进程池
from multiprocessing import cpu_count #cpu数量
if cnum=='':
spool = ProcessPool(cpu_count())
else:
spool = ProcessPool(cnum)
arr=list(map(lambda i:{'cnum':i,'tnum':tnum,'callback':callback,'arg':arg},range(cnum)))
spool.map(fleader.bPool, arr)
spool.close()
spool.join()

def Manager():
from multiprocessing import Manager
manager = Manager()
q = manager.Queue()
lock = manager.Lock()
return q,lock

@staticmethod #gevent协程
def gPool(callback,urls=[],pnum=800):
from gevent import monkey; monkey.patch_all(socket=True,select=True)
from gevent.pool import Pool
gpool = Pool(pnum)
gpool.map(callback, urls)

@staticmethod#gevent常量
def getGevent():
from gevent import monkey; monkey.patch_all(socket=True,select=True)
from gevent.queue import Queue#get,put
from gevent.local import local
try:
from gevent.lock import BoundedSemaphore
except:
from gevent.coros import BoundedSemaphore
sem = BoundedSemaphore(2)#acquire,release
return local,Queue,sem#返回常量,队列,锁