From fa909c7c024d9ec98f6611fde0f99b0e797b1f3b Mon Sep 17 00:00:00 2001
From: Alexandre Flament <alex@al-f.net>
Date: Thu, 3 Dec 2020 13:23:19 +0100
Subject: [PATCH] [mod] stackoverflow & yandex: detect CAPTCHA response

---
 searx/engines/stackoverflow.py | 7 ++++++-
 searx/engines/yandex.py        | 7 ++++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/searx/engines/stackoverflow.py b/searx/engines/stackoverflow.py
index c6d58de65..f730264e2 100644
--- a/searx/engines/stackoverflow.py
+++ b/searx/engines/stackoverflow.py
@@ -10,9 +10,10 @@
  @parse       url, title, content
 """
 
-from urllib.parse import urlencode, urljoin
+from urllib.parse import urlencode, urljoin, urlparse
 from lxml import html
 from searx.utils import extract_text
+from searx.exceptions import SearxEngineCaptchaException
 
 # engine dependent config
 categories = ['it']
@@ -37,6 +38,10 @@ def request(query, params):
 
 # get response from search-request
 def response(resp):
+    resp_url = urlparse(resp.url)
+    if resp_url.path.startswith('/nocaptcha'):
+        raise SearxEngineCaptchaException()
+
     results = []
 
     dom = html.fromstring(resp.text)
diff --git a/searx/engines/yandex.py b/searx/engines/yandex.py
index ff1ef5a26..b4a6a54cf 100644
--- a/searx/engines/yandex.py
+++ b/searx/engines/yandex.py
@@ -9,9 +9,10 @@
  @parse       url, title, content
 """
 
-from urllib.parse import urlencode
+from urllib.parse import urlencode, urlparse
 from lxml import html
 from searx import logger
+from searx.exceptions import SearxEngineCaptchaException
 
 logger = logger.getChild('yandex engine')
 
@@ -47,6 +48,10 @@ def request(query, params):
 
 # get response from search-request
 def response(resp):
+    resp_url = urlparse(resp.url)
+    if resp_url.path.startswith('/showcaptcha'):
+        raise SearxEngineCaptchaException()
+
     dom = html.fromstring(resp.text)
     results = []