forked from Ponysearch/Ponysearch
d14994dc73
settings.yml: * outgoing.networks: * can contains network definition * propertiers: enable_http, verify, http2, max_connections, max_keepalive_connections, keepalive_expiry, local_addresses, support_ipv4, support_ipv6, proxies, max_redirects, retries * retries: 0 by default, number of times searx retries to send the HTTP request (using different IP & proxy each time) * local_addresses can be "192.168.0.1/24" (it supports IPv6) * support_ipv4 & support_ipv6: both True by default see https://github.com/searx/searx/pull/1034 * each engine can define a "network" section: * either a full network description * either reference an existing network * all HTTP requests of engine use the same HTTP configuration (it was not the case before, see proxy configuration in master)
66 lines
2.6 KiB
Python
66 lines
2.6 KiB
Python
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
"""
|
|
Raise exception for an HTTP response is an error.
|
|
"""
|
|
from searx.exceptions import (SearxEngineCaptchaException, SearxEngineTooManyRequestsException,
|
|
SearxEngineAccessDeniedException)
|
|
|
|
|
|
def is_cloudflare_challenge(resp):
|
|
if resp.status_code in [429, 503]:
|
|
if ('__cf_chl_jschl_tk__=' in resp.text)\
|
|
or ('/cdn-cgi/challenge-platform/' in resp.text
|
|
and 'orchestrate/jsch/v1' in resp.text
|
|
and 'window._cf_chl_enter(' in resp.text):
|
|
return True
|
|
if resp.status_code == 403 and '__cf_chl_captcha_tk__=' in resp.text:
|
|
return True
|
|
return False
|
|
|
|
|
|
def is_cloudflare_firewall(resp):
|
|
return resp.status_code == 403 and '<span class="cf-error-code">1020</span>' in resp.text
|
|
|
|
|
|
def raise_for_cloudflare_captcha(resp):
|
|
if resp.headers.get('Server', '').startswith('cloudflare'):
|
|
if is_cloudflare_challenge(resp):
|
|
# https://support.cloudflare.com/hc/en-us/articles/200170136-Understanding-Cloudflare-Challenge-Passage-Captcha-
|
|
# suspend for 2 weeks
|
|
raise SearxEngineCaptchaException(message='Cloudflare CAPTCHA', suspended_time=3600 * 24 * 15)
|
|
|
|
if is_cloudflare_firewall(resp):
|
|
raise SearxEngineAccessDeniedException(message='Cloudflare Firewall', suspended_time=3600 * 24)
|
|
|
|
|
|
def raise_for_recaptcha(resp):
|
|
if resp.status_code == 503 \
|
|
and '"https://www.google.com/recaptcha/' in resp.text:
|
|
raise SearxEngineCaptchaException(message='ReCAPTCHA', suspended_time=3600 * 24 * 7)
|
|
|
|
|
|
def raise_for_captcha(resp):
|
|
raise_for_cloudflare_captcha(resp)
|
|
raise_for_recaptcha(resp)
|
|
|
|
|
|
def raise_for_httperror(resp):
|
|
"""Raise exception for an HTTP response is an error.
|
|
|
|
Args:
|
|
resp (requests.Response): Response to check
|
|
|
|
Raises:
|
|
requests.HTTPError: raise by resp.raise_for_status()
|
|
searx.exceptions.SearxEngineAccessDeniedException: raise when the HTTP status code is 402 or 403.
|
|
searx.exceptions.SearxEngineTooManyRequestsException: raise when the HTTP status code is 429.
|
|
searx.exceptions.SearxEngineCaptchaException: raise when if CATPCHA challenge is detected.
|
|
"""
|
|
if resp.status_code and resp.status_code >= 400:
|
|
raise_for_captcha(resp)
|
|
if resp.status_code in (402, 403):
|
|
raise SearxEngineAccessDeniedException(message='HTTP error ' + str(resp.status_code),
|
|
suspended_time=3600 * 24)
|
|
if resp.status_code == 429:
|
|
raise SearxEngineTooManyRequestsException()
|
|
resp.raise_for_status()
|