diff --git a/searx/search/processors/__init__.py b/searx/search/processors/__init__.py index 4e85527ba..a270b4ef5 100644 --- a/searx/search/processors/__init__.py +++ b/searx/search/processors/__init__.py @@ -11,6 +11,7 @@ __all__ = [ 'OnlineProcessor', 'OnlineDictionaryProcessor', 'OnlineCurrencyProcessor', + 'OnlineUrlSearchProcessor', 'PROCESSORS', ] @@ -24,6 +25,7 @@ from .online import OnlineProcessor from .offline import OfflineProcessor from .online_dictionary import OnlineDictionaryProcessor from .online_currency import OnlineCurrencyProcessor +from .online_url_search import OnlineUrlSearchProcessor from .abstract import EngineProcessor logger = logger.getChild('search.processors') @@ -33,7 +35,13 @@ PROCESSORS: Dict[str, EngineProcessor] = {} def get_processor_class(engine_type): """Return processor class according to the ``engine_type``""" - for c in [OnlineProcessor, OfflineProcessor, OnlineDictionaryProcessor, OnlineCurrencyProcessor]: + for c in [ + OnlineProcessor, + OfflineProcessor, + OnlineDictionaryProcessor, + OnlineCurrencyProcessor, + OnlineUrlSearchProcessor, + ]: if c.engine_type == engine_type: return c return None diff --git a/searx/search/processors/online_url_search.py b/searx/search/processors/online_url_search.py new file mode 100644 index 000000000..2863be28e --- /dev/null +++ b/searx/search/processors/online_url_search.py @@ -0,0 +1,42 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Processores for engine-type: ``online_url_search`` + +""" + +import re +from .online import OnlineProcessor + +re_search_urls = { + 'http': re.compile(r'https?:\/\/[^ ]*'), + 'ftp': re.compile(r'ftps?:\/\/[^ ]*'), + 'data:image': re.compile('data:image/[^; ]*;base64,[^ ]*'), +} + + +class OnlineUrlSearchProcessor(OnlineProcessor): + """Processor class used by ``online_url_search`` engines.""" + + engine_type = 'online_url_search' + + def get_params(self, search_query, engine_category): + params = super().get_params(search_query, engine_category) + if params is None: + return None + + url_match = False + search_urls = {} + + for k, v in re_search_urls.items(): + m = v.search(search_query.query) + v = None + if m: + url_match = True + v = m[0] + search_urls[k] = v + + if not url_match: + return None + + params['search_urls'] = search_urls + return params