|
|
|
@ -4,16 +4,15 @@ import time
|
|
|
|
|
from collections import namedtuple
|
|
|
|
|
from colorama import Fore, Style
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
A function which loads proxies from a .csv file, to a list.
|
|
|
|
|
|
|
|
|
|
Inputs: path to .csv file which contains proxies, described by fields: 'ip', 'port', 'protocol'.
|
|
|
|
|
|
|
|
|
|
Outputs: list containing proxies stored in named tuples.
|
|
|
|
|
"""
|
|
|
|
|
def load_proxies_from_csv(path_to_list):
|
|
|
|
|
"""
|
|
|
|
|
A function which loads proxies from a .csv file, to a list.
|
|
|
|
|
|
|
|
|
|
Inputs: path to .csv file which contains proxies, described by fields: 'ip', 'port', 'protocol'.
|
|
|
|
|
|
|
|
|
|
def load_proxies_from_csv(path_to_list):
|
|
|
|
|
Outputs: list containing proxies stored in named tuples.
|
|
|
|
|
"""
|
|
|
|
|
Proxy = namedtuple('Proxy', ['ip', 'port', 'protocol'])
|
|
|
|
|
|
|
|
|
|
with open(path_to_list, 'r') as csv_file:
|
|
|
|
@ -23,16 +22,15 @@ def load_proxies_from_csv(path_to_list):
|
|
|
|
|
return proxies
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
A function which test the proxy by attempting
|
|
|
|
|
to make a request to the designated website.
|
|
|
|
|
|
|
|
|
|
We use 'wikipedia.org' as a test, since we can test the proxy anonymity
|
|
|
|
|
by check if the returning 'X-Client-IP' header matches the proxy ip.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def check_proxy(proxy_ip, proxy_port, protocol):
|
|
|
|
|
"""
|
|
|
|
|
A function which test the proxy by attempting
|
|
|
|
|
to make a request to the designated website.
|
|
|
|
|
|
|
|
|
|
We use 'wikipedia.org' as a test, since we can test the proxy anonymity
|
|
|
|
|
by check if the returning 'X-Client-IP' header matches the proxy ip.
|
|
|
|
|
"""
|
|
|
|
|
full_proxy = f'{protocol}://{proxy_ip}:{proxy_port}'
|
|
|
|
|
proxies = {'http': full_proxy, 'https': full_proxy}
|
|
|
|
|
try:
|
|
|
|
@ -46,24 +44,24 @@ def check_proxy(proxy_ip, proxy_port, protocol):
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
A function which takes in one mandatory argument -> a proxy list in
|
|
|
|
|
the format returned by the function 'load_proxies_from_csv'.
|
|
|
|
|
|
|
|
|
|
It also takes an optional argument 'max_proxies', if the user wishes to
|
|
|
|
|
cap the number of validated proxies.
|
|
|
|
|
|
|
|
|
|
Each proxy is tested by the check_proxy function. Since each test is done on
|
|
|
|
|
'wikipedia.org', in order to be considerate to Wikipedia servers, we are not using any async modules,
|
|
|
|
|
but are sending successive requests each separated by at least 1 sec.
|
|
|
|
|
from colorama import Fore, Style
|
|
|
|
|
|
|
|
|
|
Outputs: list containing proxies stored in named tuples.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def check_proxy_list(proxy_list, max_proxies=None):
|
|
|
|
|
"""
|
|
|
|
|
A function which takes in one mandatory argument -> a proxy list in
|
|
|
|
|
the format returned by the function 'load_proxies_from_csv'.
|
|
|
|
|
|
|
|
|
|
from colorama import Fore, Style
|
|
|
|
|
It also takes an optional argument 'max_proxies', if the user wishes to
|
|
|
|
|
cap the number of validated proxies.
|
|
|
|
|
|
|
|
|
|
def check_proxy_list(proxy_list, max_proxies=None):
|
|
|
|
|
Each proxy is tested by the check_proxy function. Since each test is done on
|
|
|
|
|
'wikipedia.org', in order to be considerate to Wikipedia servers, we are not using any async modules,
|
|
|
|
|
but are sending successive requests each separated by at least 1 sec.
|
|
|
|
|
|
|
|
|
|
Outputs: list containing proxies stored in named tuples.
|
|
|
|
|
"""
|
|
|
|
|
print((Style.BRIGHT + Fore.GREEN + "[" +
|
|
|
|
|
Fore.YELLOW + "*" +
|
|
|
|
|
Fore.GREEN + "] Started checking proxies."))
|
|
|
|
@ -93,4 +91,4 @@ def check_proxy_list(proxy_list, max_proxies=None):
|
|
|
|
|
return working_proxies
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
raise Exception("Found no working proxies.")
|
|
|
|
|
raise Exception("Found no working proxies.")
|
|
|
|
|