[fix] make flask_babel.gettext() work in engine modules (L10n & threads)

incident:
  flask_babel.gettext() does not work in the engine modules.

cause:
  the request() and response() functions of the engine modules run in the
  processor, whose search() method runs in a thread and in the threads the
  context of the Flask app does not exist. The context of the Flask app is
  needed by the gettext() function for the L10n.

Solution:
  copy context of the Flask app into the threads. [1]

special case:
  We cannot equip the search() method of the processors with the decorator [1],
  because the decorator requires a context (Flask app) that does not yet exist
  at the time of the initialization of the processors (the initialization of the
  processors is part of the initialization of the Flask app).

[1] https://flask.palletsprojects.com/en/2.3.x/api/#flask.copy_current_request_context

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2023-08-09 09:55:48 +02:00 committed by Markus Heiser
parent c00c0c5434
commit 733b795d53
6 changed files with 43 additions and 173 deletions

View file

@ -8,6 +8,7 @@ from timeit import default_timer
from uuid import uuid4
import flask
from flask import copy_current_request_context
import babel
from searx import settings
@ -140,8 +141,9 @@ class Search:
search_id = str(uuid4())
for engine_name, query, request_params in requests:
_search = copy_current_request_context(PROCESSORS[engine_name].search)
th = threading.Thread( # pylint: disable=invalid-name
target=PROCESSORS[engine_name].search,
target=_search,
args=(query, request_params, self.result_container, self.start_time, self.actual_timeout),
name=search_id,
)

View file

@ -1,12 +1,26 @@
#!/usr/bin/env python
# lint: pylint
# SPDX-License-Identifier: AGPL-3.0-or-later
# (C) Copyright Contributors to the SearXNG project.
# (C) Copyright Contributors to the searx project (2014 - 2021)
"""Script to run SearXNG from terminal.
DON'T USE THIS SCRIPT!!
.. danger::
Be warned, using the ``standalone_searx.py`` won't give you privacy!
On the contrary, this script behaves like a SearXNG server: your IP is
exposed and tracked by all active engines (google, bing, qwant, ... ), with
every query!
.. note::
This is an old and grumpy hack / SearXNG is a Flask application with
client/server structure, which can't be turned into a command line tool the
way it was done here.
Getting categories without initiate the engine will only return `['general']`
>>> import searx.engines
@ -23,54 +37,6 @@ Example to use this script:
$ python3 searxng_extra/standalone_searx.py rain
.. danger::
Be warned, using the ``standalone_searx.py`` won't give you privacy!
On the contrary, this script behaves like a SearXNG server: your IP is
exposed and tracked by all active engines (google, bing, qwant, ... ), with
every query!
Example to run it from python:
>>> import importlib
... import json
... import sys
... import searx.engines
... import searx.search
... search_query = 'rain'
... # initialize engines
... searx.search.initialize()
... # load engines categories once instead of each time the function called
... engine_cs = list(searx.engines.categories.keys())
... # load module
... spec = importlib.util.spec_from_file_location(
... 'utils.standalone_searx', 'searxng_extra/standalone_searx.py')
... sas = importlib.util.module_from_spec(spec)
... spec.loader.exec_module(sas)
... # use function from module
... prog_args = sas.parse_argument([search_query], category_choices=engine_cs)
... search_q = sas.get_search_query(prog_args, engine_categories=engine_cs)
... res_dict = sas.to_dict(search_q)
... sys.stdout.write(json.dumps(
... res_dict, sort_keys=True, indent=4, ensure_ascii=False,
... default=sas.json_serial))
{
"answers": [],
"infoboxes": [ {...} ],
"paging": true,
"results": [... ],
"number_of_results": 820000000.0,
"search": {
"lang": "all",
"pageno": 1,
"q": "rain",
"safesearch": 0,
"timerange": null
},
"suggestions": [...]
}
""" # pylint: disable=line-too-long
import argparse

View file

@ -3,3 +3,8 @@
use_default_settings: true
search:
formats: [html, csv, json, rss]
engines:
- name: general dummy
engine: demo_offline
timeout: 3

View file

@ -43,6 +43,12 @@ class SearchQueryTestCase(SearxTestCase):
class SearchTestCase(SearxTestCase):
def setUp(self):
from searx import webapp # pylint disable=import-outside-toplevel
self.app = webapp.app
@classmethod
def setUpClass(cls):
searx.search.initialize(TEST_ENGINES)
@ -53,7 +59,8 @@ class SearchTestCase(SearxTestCase):
'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, None
)
search = searx.search.Search(search_query)
search.search()
with self.app.test_request_context('/search'):
search.search()
self.assertEqual(search.actual_timeout, 3.0)
def test_timeout_query_above_default_nomax(self):
@ -62,7 +69,8 @@ class SearchTestCase(SearxTestCase):
'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, 5.0
)
search = searx.search.Search(search_query)
search.search()
with self.app.test_request_context('/search'):
search.search()
self.assertEqual(search.actual_timeout, 3.0)
def test_timeout_query_below_default_nomax(self):
@ -71,7 +79,8 @@ class SearchTestCase(SearxTestCase):
'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, 1.0
)
search = searx.search.Search(search_query)
search.search()
with self.app.test_request_context('/search'):
search.search()
self.assertEqual(search.actual_timeout, 1.0)
def test_timeout_query_below_max(self):
@ -80,7 +89,8 @@ class SearchTestCase(SearxTestCase):
'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, 5.0
)
search = searx.search.Search(search_query)
search.search()
with self.app.test_request_context('/search'):
search.search()
self.assertEqual(search.actual_timeout, 5.0)
def test_timeout_query_above_max(self):
@ -89,7 +99,8 @@ class SearchTestCase(SearxTestCase):
'test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], 'en-US', SAFESEARCH, PAGENO, None, 15.0
)
search = searx.search.Search(search_query)
search.search()
with self.app.test_request_context('/search'):
search.search()
self.assertEqual(search.actual_timeout, 10.0)
def test_external_bang(self):
@ -119,6 +130,7 @@ class SearchTestCase(SearxTestCase):
)
search = searx.search.Search(search_query)
results = search.search()
with self.app.test_request_context('/search'):
results = search.search()
# This should not redirect
self.assertTrue(results.redirect_url is None)

View file

@ -1,113 +0,0 @@
# -*- coding: utf-8 -*-
"""Test utils/standalone_searx.py"""
import datetime
import io
import sys
from mock import Mock, patch
from nose2.tools import params
from searx.search import SearchQuery, EngineRef, initialize
from searxng_extra import standalone_searx as sas
from tests import SearxTestCase
class StandaloneSearx(SearxTestCase):
"""Unit test for standalone_searx."""
@classmethod
def setUpClass(cls):
engine_list = [{'engine': 'dummy', 'name': 'engine1', 'shortcut': 'e1'}]
initialize(engine_list)
def test_parse_argument_no_args(self):
"""Test parse argument without args."""
with patch.object(sys, 'argv', ['standalone_searx']), self.assertRaises(SystemExit):
sys.stderr = io.StringIO()
sas.parse_argument()
sys.stdout = sys.__stderr__
def test_parse_argument_basic_args(self):
"""Test parse argument with basic args."""
query = 'red box'
exp_dict = {
'query': query,
'category': 'general',
'lang': 'all',
'pageno': 1,
'safesearch': '0',
'timerange': None,
}
args = ['standalone_searx', query]
with patch.object(sys, 'argv', args):
res = sas.parse_argument()
self.assertEqual(exp_dict, vars(res))
res2 = sas.parse_argument(args[1:])
self.assertEqual(exp_dict, vars(res2))
def test_to_dict(self):
"""test to_dict."""
self.assertEqual(
sas.to_dict(sas.get_search_query(sas.parse_argument(['red box']))),
{
'search': {'q': 'red box', 'pageno': 1, 'lang': 'all', 'safesearch': 0, 'timerange': None},
'results': [],
'infoboxes': [],
'suggestions': [],
'answers': [],
'paging': False,
'number_of_results': 0,
},
)
def test_to_dict_with_mock(self):
"""test to dict."""
with patch.object(sas.searx.search, 'Search') as mock_s:
m_search = mock_s().search()
m_sq = Mock()
self.assertEqual(
sas.to_dict(m_sq),
{
'answers': [],
'infoboxes': m_search.infoboxes,
'paging': m_search.paging,
'results': m_search.get_ordered_results(),
'number_of_results': m_search.number_of_results,
'search': {
'lang': m_sq.lang,
'pageno': m_sq.pageno,
'q': m_sq.query,
'safesearch': m_sq.safesearch,
'timerange': m_sq.time_range,
},
'suggestions': [],
},
)
def test_get_search_query(self):
"""test get_search_query."""
args = sas.parse_argument(
[
'rain',
]
)
search_q = sas.get_search_query(args)
self.assertTrue(search_q)
self.assertEqual(
search_q, SearchQuery('rain', [EngineRef('engine1', 'general')], 'all', 0, 1, None, None, None)
)
def test_no_parsed_url(self):
"""test no_parsed_url func"""
self.assertEqual(sas.no_parsed_url([{'parsed_url': 'http://example.com'}]), [{}])
@params((datetime.datetime(2020, 1, 1), '2020-01-01T00:00:00'), ('a'.encode('utf8'), 'a'), (set([1]), [1]))
def test_json_serial(self, arg, exp_res):
"""test json_serial func"""
self.assertEqual(sas.json_serial(arg), exp_res)
def test_json_serial_error(self):
"""test error on json_serial."""
with self.assertRaises(TypeError):
sas.json_serial('a')

View file

@ -198,9 +198,7 @@ class ViewsTestCase(SearxTestCase):
result = self.app.get('/preferences')
self.assertEqual(result.status_code, 200)
self.assertIn(b'<form id="search_form" method="post" action="/preferences"', result.data)
self.assertIn(
b'<input type="checkbox" id="checkbox_general" name="category_general" checked="checked"/>', result.data
)
self.assertIn(b'<div id="categories_container">', result.data)
self.assertIn(b'<legend id="pref_ui_locale">Interface language</legend>', result.data)
def test_browser_locale(self):