18
2020
11

Gdrive img download

import os

from google.colab import drive

from google.colab import files

from google.colab import auth

auth.authenticate_user()

from oauth2client.client import GoogleCredentials

import requests

import lxml

from bs4 import BeautifulSoup



drive.mount('/content/drive', force_remount=True)

path = "/content/drive/My Drive/Colab Notebooks/test"

#os.chdir(path)

os.chdir('/content/drive/My Drive/Colab Notebooks/test')

os.listdir(path)


url="https://www.google.com.hk/search?q=emoji&hl=zh-HK&gbv=2&biw=1263&bih=625&tbm=isch&ei=sLncXv3pE5fbhwOk77eAAw&start=20&sa=N"

response = requests.get(url)

soup = BeautifulSoup(response.text, "lxml")

imglinks = soup.find_all('a')


img_links_list = []


for i in imglinks:

    imgs = i.find_all('img')

    for img in imgs:

        photo_url = img['src']

        #print(photo_url)

        if "https" in photo_url:

            img_links_list.append(photo_url)


#print(img_links_list)


while True:


    if len(img_links_list)==0:

        break


    else:

        imgurl=img_links_list.pop()

        r = requests.get(imgurl, stream=True)

        filename = imgurl[-8:]+".jpg"

        path = "/content/drive"+filename

        with open('/content/drive/My Drive/Colab Notebooks/test/%s' %filename, 'wb') as f:

            for chunk in r.iter_content(chunk_size=128):

                f.write(chunk)













example 2  https://imgbin.com


import os

from google.colab import drive

from google.colab import files

from google.colab import auth

auth.authenticate_user()

from oauth2client.client import GoogleCredentials

import requests

import lxml

from bs4 import BeautifulSoup


drive.mount('/content/drive', force_remount=True)

path = "/content/drive/My Drive/Colab Notebooks/test"

#os.chdir(path)

os.chdir('/content/drive/My Drive/Colab Notebooks/test')

os.listdir(path)



def getlinks():

    url="https://imgbin.com/free-png/cats"

    response = requests.get(url)

    soup = BeautifulSoup(response.text, "lxml")

    lks = soup.find_all('a')


    links=[]

    for i in lks:

    #print(i.get('href'))

        if "/png/" in i.get('href'):

            s="https://imgbin.com"+i.get('href')

            links.append(s)

        else:

            continue

    return links


#print(getlinks())


imglinks=[]


for u in getlinks():

    response = requests.get(u)

    soup = BeautifulSoup(response.text, "lxml")

    images = soup.find_all('img')


    for i in images:

        i.get('src')

        #print(i.get('src'))

        if ".jpg" and "cdn" in i.get('src'):

            imglinks.append(i.get('src'))

        else:

            continue

#print(len(imglinks))


while True:

    if len(imglinks)==0:

        break

    else:

        imgurl=imglinks.pop()

        r = requests.get(imgurl, stream=True)

        filename = imgurl.split("/")[-1]

        path = "/content/drive"+filename

        with open('/content/drive/My Drive/Colab Notebooks/test/%s' %filename, 'wb') as f:

            for chunk in r.iter_content(chunk_size=128):

                f.write(chunk)














example 3


import all your modules


def getpages():

    url="https://imgbin.com/free-png/coronavirus"

    response = requests.get(url)

    soup = BeautifulSoup(response.text, "lxml")

    #pgs = soup.find_all('div', class_="pages")

    pgs = soup.find_all('a')

    try:

        for i in pgs:

            if "Last" in i.getText():

                l=i.get("href").split("/")[-1]

        return int(l)

    except UnboundLocalError:

        return len(list("1"))


ph=1

links=[]

imglinks=[]

while ph <= getpages():

    url="https://imgbin.com/free-png/rabbit/"+str(ph)

    response = requests.get(url)

    soup = BeautifulSoup(response.text, "lxml")

    lks = soup.find_all('a')


#   links=[]

    for i in lks:


        if "/png/" in i.get('href'):

            s="https://imgbin.com"+i.get('href')

            links.append(s)

        else:

            continue




  #  imglinks=[]


    for u in links:

        response = requests.get(u)

        soup = BeautifulSoup(response.text, "lxml")

        images = soup.find_all('img')


        for i in images:

            if ".jpg" and "cdn" in i.get('src'):

                imglinks.append(i.get('src'))

            else:

                continue

    ph+=1

for b in imglinks:

    print(b)


« 上一篇 下一篇 »

发表评论:

◎欢迎参与讨论,请在这里发表您的看法、交流您的观点。