magicalbomb
本帖最后由

本帖最后由 magicalbomb 于 2016-10-11 23:12 编辑

姐姐是做汽摩配的,帮她用Python写过一个爬虫, 爬下近10W+数据#7109!

分享到百度了, 可以看一下

https://pan.baidu.com/s/1gfny7PD

部分代码

[mw_shl_code=python,true]class Filter:

class Process:

def __init__(self,std_in,std_out):

self.std_in = std_in

self.std_out = std_out

self.filters = \

[Filter("cid",["cdi","electric box","igniter","box comp fuse","engine control unit assy","control unit electronic","box"],[]),

Filter("stator",["stator","base assembly","ducati generator","plate asm","magneto","plate assy","plate assembly"],[]),

Filter("ignition coil",["ignition coil","coil assy ign","generator","dgital transducer","device","ign coil"],[]),

Filter("flasher",["flasher relay","flasher unit","relay assy","flahser"],["light","lens"]),

Filter("start relay",["start relay","starter relay","start mag","relay"],["cable","flahser"]),

Filter("regulator",["rectificer regulator","regulator voltage","regulator","regulador","reg rec"],[])

]

def run(self):

pass

def _Get_Web_Step(self,url):

try:

self._brower.get(url)

except TimeoutException:

return 1

def _Get_Elements_Step(self,xpath,parent_elemt=None):

try:

p = self._brower

if parent_elemt:

p = parent_elemt

WebDriverWait(p,10).until(EC.presence_of_all_elements_located((By.XPATH,xpath)))

except TimeoutException:

#self._brower.get_screenshot_as_file(r"C:\Users\Magicalbomb\Desktop\Work\ss.jpg")

return []

if parent_elemt:

return parent_elemt.find_elements_by_xpath(xpath)

return self._brower.find_elements_by_xpath(xpath)[/mw_shl_code]