|
|
@@ -112,11 +112,15 @@ class TaobaoDownloaderMiddleWare(object):
|
|
|
def __init__(self, timeout=None):
|
|
|
self.timeout = timeout
|
|
|
# options = webdriver.ChromeOptions()
|
|
|
- # options.add_argument('headless')
|
|
|
+ # options.add_argument('--headless')
|
|
|
# self.browser = webdriver.Chrome(options=options)
|
|
|
- self.browser = webdriver.Chrome()
|
|
|
+ options = webdriver.ChromeOptions()
|
|
|
+ options.add_argument('--headless')
|
|
|
+ self.browser = webdriver.Chrome(options)
|
|
|
self.browser.set_window_size(1000, 600)
|
|
|
- self.browser.set_page_load_timeout(self.timeout)
|
|
|
+ self.browser.implicitly_wait(10)
|
|
|
+ # self.browser.add_cookie({})
|
|
|
+ # self.browser.set_page_load_timeout(self.timeout)
|
|
|
|
|
|
def __del__(self):
|
|
|
self.browser.close()
|
|
|
@@ -124,6 +128,7 @@ class TaobaoDownloaderMiddleWare(object):
|
|
|
def process_request(self, request, spider):
|
|
|
try:
|
|
|
self.browser.get(request.url)
|
|
|
+ # Chrome对象的page_source代表了页面的HTML代码(带动态内容)
|
|
|
return HtmlResponse(url=request.url, body=self.browser.page_source,
|
|
|
request=request, encoding='utf-8', status=200)
|
|
|
except TimeoutException:
|