import os
from google.colab import drive
from google.colab import files
from google.colab import auth
auth.authenticate_user()
from oauth2client.client import GoogleCredentials
import requests
import lxml
from bs4 import BeautifulSoup
drive.mount('/content/drive', force_remount=True)
path = "/content/drive/My Drive/Colab Notebooks/test"
#os.chdir(path)
os.chdir('/content/drive/My Drive/Colab Notebooks/test')
os.listdir(path)
url="https://www.google.com.hk/search?q=emoji&hl=zh-HK&gbv=2&biw=1263&bih=625&tbm=isch&ei=sLncXv3pE5fbhwOk77eAAw&start=20&sa=N"
response = requests.get(url)
soup = BeautifulSoup(response.text, "lxml")
imglinks = soup.find_all('a')
img_links_list = []
for i in imglinks:
imgs = i.find_all('img')
for img in imgs:
photo_url = img['src']
#print(photo_url)
if "https" in photo_url:
img_links_list.append(photo_url)
#print(img_links_list)
while True:
if len(img_links_list)==0:
break
else:
imgurl=img_links_list.pop()
r = requests.get(imgurl, stream=True)
filename = imgurl[-8:]+".jpg"
path = "/content/drive"+filename
with open('/content/drive/My Drive/Colab Notebooks/test/%s' %filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=128):
f.write(chunk)
example 2 https://imgbin.com
import os
from google.colab import drive
from google.colab import files
from google.colab import auth
auth.authenticate_user()
from oauth2client.client import GoogleCredentials
import requests
import lxml
from bs4 import BeautifulSoup
drive.mount('/content/drive', force_remount=True)
path = "/content/drive/My Drive/Colab Notebooks/test"
#os.chdir(path)
os.chdir('/content/drive/My Drive/Colab Notebooks/test')
os.listdir(path)
def getlinks():
url="https://imgbin.com/free-png/cats"
response = requests.get(url)
soup = BeautifulSoup(response.text, "lxml")
lks = soup.find_all('a')
links=[]
for i in lks:
#print(i.get('href'))
if "/png/" in i.get('href'):
s="https://imgbin.com"+i.get('href')
links.append(s)
else:
continue
return links
#print(getlinks())
imglinks=[]
for u in getlinks():
response = requests.get(u)
soup = BeautifulSoup(response.text, "lxml")
images = soup.find_all('img')
for i in images:
i.get('src')
#print(i.get('src'))
if ".jpg" and "cdn" in i.get('src'):
imglinks.append(i.get('src'))
else:
continue
#print(len(imglinks))
while True:
if len(imglinks)==0:
break
else:
imgurl=imglinks.pop()
r = requests.get(imgurl, stream=True)
filename = imgurl.split("/")[-1]
path = "/content/drive"+filename
with open('/content/drive/My Drive/Colab Notebooks/test/%s' %filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=128):
f.write(chunk)
example 3
import all your modules
def getpages():
url="https://imgbin.com/free-png/coronavirus"
response = requests.get(url)
soup = BeautifulSoup(response.text, "lxml")
#pgs = soup.find_all('div', class_="pages")
pgs = soup.find_all('a')
try:
for i in pgs:
if "Last" in i.getText():
l=i.get("href").split("/")[-1]
return int(l)
except UnboundLocalError:
return len(list("1"))
ph=1
links=[]
imglinks=[]
while ph <= getpages():
url="https://imgbin.com/free-png/rabbit/"+str(ph)
response = requests.get(url)
soup = BeautifulSoup(response.text, "lxml")
lks = soup.find_all('a')
# links=[]
for i in lks:
if "/png/" in i.get('href'):
s="https://imgbin.com"+i.get('href')
links.append(s)
else:
continue
# imglinks=[]
for u in links:
response = requests.get(u)
soup = BeautifulSoup(response.text, "lxml")
images = soup.find_all('img')
for i in images:
if ".jpg" and "cdn" in i.get('src'):
imglinks.append(i.get('src'))
else:
continue
ph+=1
for b in imglinks:
print(b)