forked from Ponysearch/Ponysearch
[fix] startpage engine: fetch CAPTCHA & issues related to PR-695
In case of CAPTCHA raise a SearxEngineCaptchaException and suspend for 7 days. When get_sc_code() fails raise a SearxEngineResponseException and suspend for 7 days. [1] https://github.com/searxng/searxng/pull/695 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
2f4e567e90
commit
21e884f369
1 changed files with 21 additions and 2 deletions
|
@ -18,6 +18,11 @@ from babel.localedata import locale_identifiers
|
||||||
|
|
||||||
from searx import network
|
from searx import network
|
||||||
from searx.utils import extract_text, eval_xpath, match_language
|
from searx.utils import extract_text, eval_xpath, match_language
|
||||||
|
from searx.exceptions import (
|
||||||
|
SearxEngineResponseException,
|
||||||
|
SearxEngineCaptchaException,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
|
@ -54,6 +59,13 @@ sc_code_ts = 0
|
||||||
sc_code = ''
|
sc_code = ''
|
||||||
|
|
||||||
|
|
||||||
|
def raise_captcha(resp):
|
||||||
|
|
||||||
|
if str(resp.url).startswith('https://www.startpage.com/sp/captcha'):
|
||||||
|
# suspend CAPTCHA for 7 days
|
||||||
|
raise SearxEngineCaptchaException(suspended_time=7 * 24 * 3600)
|
||||||
|
|
||||||
|
|
||||||
def get_sc_code(headers):
|
def get_sc_code(headers):
|
||||||
"""Get an actual `sc` argument from startpage's home page.
|
"""Get an actual `sc` argument from startpage's home page.
|
||||||
|
|
||||||
|
@ -73,10 +85,17 @@ def get_sc_code(headers):
|
||||||
logger.debug("query new sc time-stamp ...")
|
logger.debug("query new sc time-stamp ...")
|
||||||
|
|
||||||
resp = network.get(base_url, headers=headers)
|
resp = network.get(base_url, headers=headers)
|
||||||
|
raise_captcha(resp)
|
||||||
dom = html.fromstring(resp.text)
|
dom = html.fromstring(resp.text)
|
||||||
|
|
||||||
|
try:
|
||||||
# href --> '/?sc=adrKJMgF8xwp20'
|
# href --> '/?sc=adrKJMgF8xwp20'
|
||||||
href = eval_xpath(dom, '//a[@class="footer-home__logo"]')[0].get('href')
|
href = eval_xpath(dom, '//a[@class="footer-home__logo"]')[0].get('href')
|
||||||
|
except IndexError as exc:
|
||||||
|
# suspend startpage API --> https://github.com/searxng/searxng/pull/695
|
||||||
|
raise SearxEngineResponseException(
|
||||||
|
suspended_time=7 * 24 * 3600, message="PR-695: query new sc time-stamp failed!"
|
||||||
|
)
|
||||||
|
|
||||||
sc_code = href[5:]
|
sc_code = href[5:]
|
||||||
sc_code_ts = time()
|
sc_code_ts = time()
|
||||||
|
|
Loading…
Reference in a new issue