下载selenium
pip install selenium
下载驱动
帮助>关于google chrome >查看chrome版本
下载对应版本webdriver驱动 http://chromedriver.storage.googleapis.com/index.html
#middlewares.py
class ItestDownloaderMiddleware:
@classmethod
def from_crawler(cls, crawler):
s = cls()
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
return s
def process_request(self, request, spider):
spider.browser.get(request.url)
for i in range(5):
spider.browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
return HtmlResponse(url=spider.browser.current_url, body=spider.browser.page_source, encoding='utf8',
request=request)
def process_response(self, request, response, spider):
return response
def process_exception(self, request, exception, spider):
pass
def spider_opened(self, spider):
spider.logger.info('Spider opened: %s' % spider.name)
#itest.py
def __init__(self):
chrome_options = webdriver.ChromeOptions()
# chrome_options.add_argument('--headless')
self.browser = webdriver.Chrome(chrome_options=chrome_options, executable_path='C:/dirver/chromedriver')
super(itest, self).__init__()
def closed(self, reason):
self.browser.close()