18
2020
04

python crawler

from selenium import webdriver

driver=webdriver.Chrome("xxx location")

driver.get("http://xxxxxxxx")

driver.page_source



from bs4 import BeautifulSoup

soup = BeautifulSoup(driver.page_source,  'lxml')


soup.select_one('#page').get('src')


print("http://" + above soup.selectxxxxxx)


img link


pagenum=int(soup.select_one('#pageno.').text.split('/')[1]strip('頁'))


for i in range(pagenum):

   print(i+1)






import time


pageurl='http://xxxxxxx-{}'             {}=variable page


for i in range(pagenum):

    driver.get(pageurl.format(i+1))

    soup = BeautifulSoup(driver.page_source,  'lxml')

    img link = "http://" + above soup.selectxxxxxx

    res=requests.get('img link')

    with open('{}.jpg'.format(i), 'wb') as f:

    f.write(res.content)

    time.sleep(1)






import requests

res=requests.get('img link')


with open('test.jpg', 'wb') as f:

    f.write(res.content)




from PIL import Image

   Image.open('test.jpg')


    









« 上一篇 下一篇 »

发表评论:

◎欢迎参与讨论,请在这里发表您的看法、交流您的观点。