forked from Ponysearch/Ponysearch
[enh] Add multiple outgoing proxies
credits go to @bauruine see https://github.com/searx/searx/pull/1958
This commit is contained in:
parent
2fc3b17c85
commit
3786920df9
7 changed files with 172 additions and 36 deletions
|
@ -36,14 +36,22 @@ Global Settings
|
||||||
image_proxy : False # proxying image results through searx
|
image_proxy : False # proxying image results through searx
|
||||||
default_locale : "" # default interface locale
|
default_locale : "" # default interface locale
|
||||||
|
|
||||||
# uncomment below section if you want to use a proxy
|
outgoing: # communication with search engines
|
||||||
|
request_timeout : 2.0 # default timeout in seconds, can be override by engine
|
||||||
|
# max_request_timeout: 10.0 # the maximum timeout in seconds
|
||||||
|
useragent_suffix : "" # suffix of searx_useragent, could contain informations like an email address to the administrator
|
||||||
|
pool_connections : 100 # Number of different hosts
|
||||||
|
pool_maxsize : 10 # Number of simultaneous requests by host
|
||||||
|
|
||||||
#outgoing_proxies :
|
#proxies:
|
||||||
# http : http://127.0.0.1:8080
|
# http:
|
||||||
# https: http://127.0.0.1:8080
|
# - http://proxy1:8080
|
||||||
|
# - http://proxy2:8080
|
||||||
# uncomment below section only if you have more than one network interface
|
# https:
|
||||||
# which can be the source of outgoing search requests
|
# - http://proxy1:8080
|
||||||
|
# - http://proxy2:8080
|
||||||
|
# - socks5://user:password@proxy3:1080
|
||||||
|
# - socks5h://user:password@proxy4:1080
|
||||||
|
|
||||||
#source_ips:
|
#source_ips:
|
||||||
# - 1.1.1.1
|
# - 1.1.1.1
|
||||||
|
@ -105,15 +113,16 @@ Global Settings
|
||||||
code, like ``fr``, ``en``, ``de``.
|
code, like ``fr``, ``en``, ``de``.
|
||||||
|
|
||||||
.. _requests proxies: http://requests.readthedocs.io/en/latest/user/advanced/#proxies
|
.. _requests proxies: http://requests.readthedocs.io/en/latest/user/advanced/#proxies
|
||||||
.. _PR SOCKS support: https://github.com/kennethreitz/requests/pull/478
|
.. _PySocks: https://pypi.org/project/PySocks/
|
||||||
|
|
||||||
``outgoing_proxies`` :
|
``proxies`` :
|
||||||
Define a proxy you wish to use, see `requests proxies`_. SOCKS proxies are
|
Define one or more proxies you wish to use, see `requests proxies`_.
|
||||||
not supported / see `PR SOCKS support`.
|
If there are more than one proxy for one protocol (http, https),
|
||||||
|
requests to the engines are distributed in a round-robin fashion.
|
||||||
|
|
||||||
``source_ips`` :
|
``source_ips`` :
|
||||||
If you use multiple network interfaces, define from which IP the requests must
|
If you use multiple network interfaces, define from which IP the requests must
|
||||||
be made.
|
be made. This parameter is ignored when ``proxies`` is set.
|
||||||
|
|
||||||
``locales`` :
|
``locales`` :
|
||||||
Locales codes and their names. Available translations of searx interface.
|
Locales codes and their names. Available translations of searx interface.
|
||||||
|
@ -139,6 +148,15 @@ Engine settings
|
||||||
api_key : 'apikey'
|
api_key : 'apikey'
|
||||||
disabled : True
|
disabled : True
|
||||||
language : en_US
|
language : en_US
|
||||||
|
#proxies:
|
||||||
|
# http:
|
||||||
|
# - http://proxy1:8080
|
||||||
|
# - http://proxy2:8080
|
||||||
|
# https:
|
||||||
|
# - http://proxy1:8080
|
||||||
|
# - http://proxy2:8080
|
||||||
|
# - socks5://user:password@proxy3:1080
|
||||||
|
# - socks5h://user:password@proxy4:1080
|
||||||
|
|
||||||
``name`` :
|
``name`` :
|
||||||
Name that will be used across searx to define this engine. In settings, on
|
Name that will be used across searx to define this engine. In settings, on
|
||||||
|
|
|
@ -25,7 +25,7 @@ from operator import itemgetter
|
||||||
from searx import settings
|
from searx import settings
|
||||||
from searx import logger
|
from searx import logger
|
||||||
from searx.data import ENGINES_LANGUAGES
|
from searx.data import ENGINES_LANGUAGES
|
||||||
from searx.poolrequests import get
|
from searx.poolrequests import get, get_proxy_cycles
|
||||||
from searx.utils import load_module, match_language, get_engine_from_settings
|
from searx.utils import load_module, match_language, get_engine_from_settings
|
||||||
|
|
||||||
|
|
||||||
|
@ -79,16 +79,18 @@ def load_engine(engine_data):
|
||||||
logger.exception('Cannot load engine "{}"'.format(engine_module))
|
logger.exception('Cannot load engine "{}"'.format(engine_module))
|
||||||
return None
|
return None
|
||||||
|
|
||||||
for param_name in engine_data:
|
for param_name, param_value in engine_data.items():
|
||||||
if param_name == 'engine':
|
if param_name == 'engine':
|
||||||
continue
|
pass
|
||||||
if param_name == 'categories':
|
elif param_name == 'categories':
|
||||||
if engine_data['categories'] == 'none':
|
if param_value == 'none':
|
||||||
engine.categories = []
|
engine.categories = []
|
||||||
else:
|
else:
|
||||||
engine.categories = list(map(str.strip, engine_data['categories'].split(',')))
|
engine.categories = list(map(str.strip, param_value.split(',')))
|
||||||
continue
|
elif param_name == 'proxies':
|
||||||
setattr(engine, param_name, engine_data[param_name])
|
engine.proxies = get_proxy_cycles(param_value)
|
||||||
|
else:
|
||||||
|
setattr(engine, param_name, param_value)
|
||||||
|
|
||||||
for arg_name, arg_value in engine_default_args.items():
|
for arg_name, arg_value in engine_default_args.items():
|
||||||
if not hasattr(engine, arg_name):
|
if not hasattr(engine, arg_name):
|
||||||
|
|
|
@ -111,6 +111,32 @@ def get_time_for_thread():
|
||||||
return threadLocal.total_time
|
return threadLocal.total_time
|
||||||
|
|
||||||
|
|
||||||
|
def get_proxy_cycles(proxy_settings):
|
||||||
|
if not proxy_settings:
|
||||||
|
return None
|
||||||
|
# Backwards compatibility for single proxy in settings.yml
|
||||||
|
for protocol, proxy in proxy_settings.items():
|
||||||
|
if isinstance(proxy, str):
|
||||||
|
proxy_settings[protocol] = [proxy]
|
||||||
|
|
||||||
|
for protocol in proxy_settings:
|
||||||
|
proxy_settings[protocol] = cycle(proxy_settings[protocol])
|
||||||
|
return proxy_settings
|
||||||
|
|
||||||
|
|
||||||
|
GLOBAL_PROXY_CYCLES = get_proxy_cycles(settings['outgoing'].get('proxies'))
|
||||||
|
|
||||||
|
|
||||||
|
def get_proxies(proxy_cycles):
|
||||||
|
if proxy_cycles:
|
||||||
|
return {protocol: next(proxy_cycle) for protocol, proxy_cycle in proxy_cycles.items()}
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_global_proxies():
|
||||||
|
return get_proxies(GLOBAL_PROXY_CYCLES)
|
||||||
|
|
||||||
|
|
||||||
def request(method, url, **kwargs):
|
def request(method, url, **kwargs):
|
||||||
"""same as requests/requests/api.py request(...)"""
|
"""same as requests/requests/api.py request(...)"""
|
||||||
time_before_request = time()
|
time_before_request = time()
|
||||||
|
@ -119,8 +145,8 @@ def request(method, url, **kwargs):
|
||||||
session = SessionSinglePool()
|
session = SessionSinglePool()
|
||||||
|
|
||||||
# proxies
|
# proxies
|
||||||
if kwargs.get('proxies') is None:
|
if not kwargs.get('proxies'):
|
||||||
kwargs['proxies'] = settings['outgoing'].get('proxies')
|
kwargs['proxies'] = get_global_proxies()
|
||||||
|
|
||||||
# timeout
|
# timeout
|
||||||
if 'timeout' in kwargs:
|
if 'timeout' in kwargs:
|
||||||
|
|
|
@ -119,7 +119,7 @@ def send_http_request(engine, request_params):
|
||||||
|
|
||||||
# setting engine based proxies
|
# setting engine based proxies
|
||||||
if hasattr(engine, 'proxies'):
|
if hasattr(engine, 'proxies'):
|
||||||
request_args['proxies'] = engine.proxies
|
request_args['proxies'] = requests_lib.get_proxies(engine.proxies)
|
||||||
|
|
||||||
# specific type of request (GET or POST)
|
# specific type of request (GET or POST)
|
||||||
if request_params['method'] == 'GET':
|
if request_params['method'] == 'GET':
|
||||||
|
|
|
@ -63,13 +63,15 @@ outgoing: # communication with search engines
|
||||||
pool_connections : 100 # Number of different hosts
|
pool_connections : 100 # Number of different hosts
|
||||||
pool_maxsize : 10 # Number of simultaneous requests by host
|
pool_maxsize : 10 # Number of simultaneous requests by host
|
||||||
# uncomment below section if you want to use a proxy
|
# uncomment below section if you want to use a proxy
|
||||||
# see http://docs.python-requests.org/en/latest/user/advanced/#proxies
|
# see https://2.python-requests.org/en/latest/user/advanced/#proxies
|
||||||
# SOCKS proxies are also supported: see http://requests.readthedocs.io/en/master/user/advanced/#socks
|
# SOCKS proxies are also supported: see https://2.python-requests.org/en/latest/user/advanced/#socks
|
||||||
# proxies:
|
# proxies:
|
||||||
# http : socks5h://127.0.0.1:9050
|
# http:
|
||||||
# https: socks5h://127.0.0.1:9050
|
# - http://proxy1:8080
|
||||||
# using_tor_proxy : True
|
# - http://proxy2:8080
|
||||||
# extra_proxy_timeout : 10.0 # Extra seconds to add in order to account for the time taken by the proxy
|
# https:
|
||||||
|
# - http://proxy1:8080
|
||||||
|
# - http://proxy2:8080
|
||||||
# uncomment below section only if you have more than one network interface
|
# uncomment below section only if you have more than one network interface
|
||||||
# which can be the source of outgoing search requests
|
# which can be the source of outgoing search requests
|
||||||
# source_ips:
|
# source_ips:
|
||||||
|
|
|
@ -78,6 +78,7 @@ from searx.plugins import plugins
|
||||||
from searx.plugins.oa_doi_rewrite import get_doi_resolver
|
from searx.plugins.oa_doi_rewrite import get_doi_resolver
|
||||||
from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES
|
from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES
|
||||||
from searx.answerers import answerers
|
from searx.answerers import answerers
|
||||||
|
from searx.poolrequests import get_global_proxies
|
||||||
|
|
||||||
|
|
||||||
# serve pages with HTTP/1.1
|
# serve pages with HTTP/1.1
|
||||||
|
@ -149,8 +150,6 @@ _category_names = (gettext('files'),
|
||||||
gettext('onions'),
|
gettext('onions'),
|
||||||
gettext('science'))
|
gettext('science'))
|
||||||
|
|
||||||
outgoing_proxies = settings['outgoing'].get('proxies') or None
|
|
||||||
|
|
||||||
_flask_babel_get_translations = flask_babel.get_translations
|
_flask_babel_get_translations = flask_babel.get_translations
|
||||||
|
|
||||||
|
|
||||||
|
@ -905,7 +904,7 @@ def image_proxy():
|
||||||
stream=True,
|
stream=True,
|
||||||
timeout=settings['outgoing']['request_timeout'],
|
timeout=settings['outgoing']['request_timeout'],
|
||||||
headers=headers,
|
headers=headers,
|
||||||
proxies=outgoing_proxies)
|
proxies=get_global_proxies())
|
||||||
|
|
||||||
if resp.status_code == 304:
|
if resp.status_code == 304:
|
||||||
return '', resp.status_code
|
return '', resp.status_code
|
||||||
|
|
89
tests/unit/test_poolrequests.py
Normal file
89
tests/unit/test_poolrequests.py
Normal file
|
@ -0,0 +1,89 @@
|
||||||
|
from unittest.mock import patch
|
||||||
|
from requests.models import Response
|
||||||
|
|
||||||
|
from searx.testing import SearxTestCase
|
||||||
|
|
||||||
|
import searx.poolrequests
|
||||||
|
from searx.poolrequests import get_proxy_cycles, get_proxies
|
||||||
|
|
||||||
|
|
||||||
|
CONFIG = {'http': ['http://localhost:9090', 'http://localhost:9092'],
|
||||||
|
'https': ['http://localhost:9091', 'http://localhost:9093']}
|
||||||
|
|
||||||
|
|
||||||
|
class TestProxy(SearxTestCase):
|
||||||
|
|
||||||
|
def test_noconfig(self):
|
||||||
|
cycles = get_proxy_cycles(None)
|
||||||
|
self.assertIsNone(cycles)
|
||||||
|
|
||||||
|
cycles = get_proxy_cycles(False)
|
||||||
|
self.assertIsNone(cycles)
|
||||||
|
|
||||||
|
def test_oldconfig(self):
|
||||||
|
config = {
|
||||||
|
'http': 'http://localhost:9090',
|
||||||
|
'https': 'http://localhost:9091',
|
||||||
|
}
|
||||||
|
cycles = get_proxy_cycles(config)
|
||||||
|
self.assertEqual(next(cycles['http']), 'http://localhost:9090')
|
||||||
|
self.assertEqual(next(cycles['http']), 'http://localhost:9090')
|
||||||
|
self.assertEqual(next(cycles['https']), 'http://localhost:9091')
|
||||||
|
self.assertEqual(next(cycles['https']), 'http://localhost:9091')
|
||||||
|
|
||||||
|
def test_one_proxy(self):
|
||||||
|
config = {
|
||||||
|
'http': ['http://localhost:9090'],
|
||||||
|
'https': ['http://localhost:9091'],
|
||||||
|
}
|
||||||
|
cycles = get_proxy_cycles(config)
|
||||||
|
self.assertEqual(next(cycles['http']), 'http://localhost:9090')
|
||||||
|
self.assertEqual(next(cycles['http']), 'http://localhost:9090')
|
||||||
|
self.assertEqual(next(cycles['https']), 'http://localhost:9091')
|
||||||
|
self.assertEqual(next(cycles['https']), 'http://localhost:9091')
|
||||||
|
|
||||||
|
def test_multiple_proxies(self):
|
||||||
|
cycles = get_proxy_cycles(CONFIG)
|
||||||
|
self.assertEqual(next(cycles['http']), 'http://localhost:9090')
|
||||||
|
self.assertEqual(next(cycles['http']), 'http://localhost:9092')
|
||||||
|
self.assertEqual(next(cycles['http']), 'http://localhost:9090')
|
||||||
|
self.assertEqual(next(cycles['https']), 'http://localhost:9091')
|
||||||
|
self.assertEqual(next(cycles['https']), 'http://localhost:9093')
|
||||||
|
self.assertEqual(next(cycles['https']), 'http://localhost:9091')
|
||||||
|
|
||||||
|
def test_getproxies_none(self):
|
||||||
|
self.assertIsNone(get_proxies(None))
|
||||||
|
|
||||||
|
def test_getproxies_config(self):
|
||||||
|
cycles = get_proxy_cycles(CONFIG)
|
||||||
|
self.assertEqual(get_proxies(cycles), {
|
||||||
|
'http': 'http://localhost:9090',
|
||||||
|
'https': 'http://localhost:9091'
|
||||||
|
})
|
||||||
|
self.assertEqual(get_proxies(cycles), {
|
||||||
|
'http': 'http://localhost:9092',
|
||||||
|
'https': 'http://localhost:9093'
|
||||||
|
})
|
||||||
|
|
||||||
|
@patch('searx.poolrequests.get_global_proxies')
|
||||||
|
def test_request(self, mock_get_global_proxies):
|
||||||
|
method = 'GET'
|
||||||
|
url = 'http://localhost'
|
||||||
|
custom_proxies = {
|
||||||
|
'https': 'http://localhost:1080'
|
||||||
|
}
|
||||||
|
global_proxies = {
|
||||||
|
'http': 'http://localhost:9092',
|
||||||
|
'https': 'http://localhost:9093'
|
||||||
|
}
|
||||||
|
mock_get_global_proxies.return_value = global_proxies
|
||||||
|
|
||||||
|
# check the global proxies usage
|
||||||
|
with patch.object(searx.poolrequests.SessionSinglePool, 'request', return_value=Response()) as mock_method:
|
||||||
|
searx.poolrequests.request(method, url)
|
||||||
|
mock_method.assert_called_once_with(method=method, url=url, proxies=global_proxies)
|
||||||
|
|
||||||
|
# check if the proxies parameter overrides the global proxies
|
||||||
|
with patch.object(searx.poolrequests.SessionSinglePool, 'request', return_value=Response()) as mock_method:
|
||||||
|
searx.poolrequests.request(method, url, proxies=custom_proxies)
|
||||||
|
mock_method.assert_called_once_with(method=method, url=url, proxies=custom_proxies)
|
Loading…
Reference in a new issue