12
2022
01

Proxy test

try:

  from fake_useragent import UserAgent

except:

  !pip install fake_useragent


import threading

import requests

import queue

from fake_useragent import UserAgent

import os

from google.colab import drive

from google.colab import files

from google.colab import auth

auth.authenticate_user()

from oauth2client.client import GoogleCredentials

import lxml

from bs4 import BeautifulSoup

import time

import pandas as pd

from multiprocessing.dummy import Pool

import time

import pickle

import pandas as pd

import csv


drive.mount('/content/drive', force_remount=False)

global path

path = "/content/drive/MyDrive/"



df = pd.read_csv('/content/drive/My Drive/Colab Notebooks/bulk_proxy/China_Proxy.csv', error_bad_lines=False)

test_list=df['IP&PORT_China'].tolist()



ip_pool = []


#Random head

ua = UserAgent()

headers = {'User-Agent':ua.random}


url = 'http://icanhazip.com/'


def test_ip(queue_list):

    while True:

        if queue_list.empty():

            break

        else:

            ip = queue_list.get()

            proxies = {

                'http' : ip

            }

            try:

                response = requests.get(url=url, headers=headers, proxies=proxies,timeout=3)

                if response.status_code == 200:

                    print("[%s]test%s,Test results [available]" % (threading.current_thread().name, proxies))

                    ip_pool.append(ip)

            except:

                print("[%s]test%s,Test results [not available]" % (threading.current_thread().name, proxies))


if __name__ == '__main__':

    queue_list = queue.Queue()#Create queue

    #Put the crawled ip into the queue

    for i in test_list:

        queue_list.put(i)

    #Create thread


    out_thread = [threading.Thread(target=test_ip, args=(queue_list,), name="process%s" % item) for item in range(9)]

    for thread in out_thread:

        thread.start()

    for thread in out_thread:

        thread.join()


    print('Test completed')


    print(ip_pool)

    res = pd.DataFrame({'available_IP&PORT': ip_pool}, columns=['available_IP&PORT'])

    res.to_csv("/content/drive/My Drive/Colab Notebooks/AvailableIP.csv", encoding='utf_8_sig', index=False)

    print("Total crawl%s individual ip,available ip For:%s,Unavailable ip For:%s"%(len(test_list),len(ip_pool),len(test_list)-len(ip_pool)))

« 上一篇 下一篇 »

发表评论:

◎欢迎参与讨论,请在这里发表您的看法、交流您的观点。