example10.py 1.1 KB

123456789101112131415161718192021222324252627282930313233
  1. import requests
  2. from bs4 import BeautifulSoup
  3. # selenium是一个自动化测试工具
  4. # 通过它可以模拟浏览器的行为来访问Web页面
  5. from selenium import webdriver
  6. def main():
  7. # 先下载chromedriver并且将可执行程序放到PATH环境变量路径下
  8. # 创建谷歌Chrome浏览器内核
  9. driver = webdriver.Chrome()
  10. # 通过浏览器内核加载页面(可以加载动态生成的内容)
  11. driver.get('https://www.taobao.com/markets/mm/mm2017')
  12. # driver.page_source获得的页面包含了JavaScript动态创建的内容
  13. soup = BeautifulSoup(driver.page_source, 'lxml')
  14. all_images = soup.select('img[src]')
  15. for image in all_images:
  16. url = image.get('src')
  17. try:
  18. if not str(url).startswith('http'):
  19. url = 'http:' + url
  20. filename = url[url.rfind('/') + 1:]
  21. print(filename)
  22. resp = requests.get(url)
  23. with open('c:/images/' + filename, 'wb') as f:
  24. f.write(resp.content)
  25. except OSError:
  26. print(filename + '下载失败!')
  27. print('图片下载完成!')
  28. if __name__ == '__main__':
  29. main()