import urllib
from bs4 import BeautifulSoup
import requests
import os
import time
import lxml
import json
import csv
import telnetlib
rows =[]
i=1
for i in range(3597):
url='https://www.kuaidaili.com/free/inha/'+str(i)
html = requests.get(url).text
soup = BeautifulSoup(html, 'lxml')
#table = soup.find('table', attrs={'class':'table table-bordered table-striped'})
#img_d = soup.find_all('tr')
#table = soup.find('table', attrs={'class': 'table table-bordered table-striped'})
img_d = soup.find_all('tr')
for d in img_d:
data = d.find_all("td")
if len(data) == 0:
continue
IP = data[0].getText()
PORT = data[1].getText()
location = data[4].getText()
rtime = data[5].getText()
#rows.append([IP, PORT, location, rtime])
#print(IP+':'+PORT)
#time.sleep(0.1)
try:
telnetlib.Telnet(IP, PORT, timeout=2)
print("代理IP有效!")
rows.append([IP, PORT])
except:
print("代理IP无效!")
with open('ip.csv', 'w', encoding="utf_8_sig", newline='') as f:
output = csv.writer(f)
output.writerows(rows)
#print(table)
#print(IP)
import urllib.request
proxy=urllib.request.ProxyHandler({"http": "http://120.77.249.46:8080"})
opener=urllib.request.build_opener(proxy)
urllib.request.install_opener(opener)
data = urllib.request.urlopen('http://www.baidu.com',timeout = 2).read().decode('utf-8','ignore')
try:
if(len(data) > 5000):
print(thisIP + ':可用')
else:
print(thisIP + ':无效')
except :
print(thisIP + ':无效!!!')