[mod] remove obsolete EngineTraits.supported_languages

All engines has been migrated from ``supported_languages`` to the
``fetch_traits`` concept.  There is no longer a need for the obsolete code that
implements the ``supported_languages`` concept.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2022-12-30 18:28:02 +01:00
parent 96a2eec3b5
commit 4d4aa13e1f
8 changed files with 31 additions and 217 deletions

View file

@ -19,9 +19,6 @@ from searx.engines import (
from searx.network import get as http_get from searx.network import get as http_get
from searx.exceptions import SearxEngineResponseException from searx.exceptions import SearxEngineResponseException
# a fetch_supported_languages() for XPath engines isn't available right now
# _brave = ENGINES_LANGUAGES['brave'].keys()
def get(*args, **kwargs): def get(*args, **kwargs):
if 'timeout' not in kwargs: if 'timeout' not in kwargs:
@ -225,14 +222,6 @@ def search_autocomplete(backend_name, query, sxng_locale):
backend = backends.get(backend_name) backend = backends.get(backend_name)
if backend is None: if backend is None:
return [] return []
if engines[backend_name].traits.data_type != "traits_v1":
# vintage / deprecated
if not sxng_locale or sxng_locale == 'all':
sxng_locale = 'en'
else:
sxng_locale = sxng_locale.split('-')[0]
try: try:
return backend(query, sxng_locale) return backend(query, sxng_locale)
except (HTTPError, SearxEngineResponseException): except (HTTPError, SearxEngineResponseException):

View file

@ -49,8 +49,7 @@
"uk": "\u0423\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430", "uk": "\u0423\u043a\u0440\u0430\u0457\u043d\u0441\u044c\u043a\u0430",
"zh": "\u4e2d\u6587\uff08\u7e41\u9ad4\uff09" "zh": "\u4e2d\u6587\uff08\u7e41\u9ad4\uff09"
}, },
"regions": {}, "regions": {}
"supported_languages": {}
}, },
"bing": { "bing": {
"all_locale": null, "all_locale": null,
@ -146,8 +145,7 @@
"zh-CN": "zh-CN", "zh-CN": "zh-CN",
"zh-HK": "zh-HK", "zh-HK": "zh-HK",
"zh-TW": "zh-TW" "zh-TW": "zh-TW"
}, }
"supported_languages": {}
}, },
"bing images": { "bing images": {
"all_locale": null, "all_locale": null,
@ -243,8 +241,7 @@
"zh-CN": "zh-CN", "zh-CN": "zh-CN",
"zh-HK": "zh-HK", "zh-HK": "zh-HK",
"zh-TW": "zh-TW" "zh-TW": "zh-TW"
}, }
"supported_languages": {}
}, },
"bing news": { "bing news": {
"all_locale": "en-WW", "all_locale": "en-WW",
@ -316,8 +313,7 @@
"it-IT": "it-IT", "it-IT": "it-IT",
"pt-BR": "pt-BR", "pt-BR": "pt-BR",
"zh-CN": "zh-CN" "zh-CN": "zh-CN"
}, }
"supported_languages": {}
}, },
"bing videos": { "bing videos": {
"all_locale": null, "all_locale": null,
@ -413,8 +409,7 @@
"zh-CN": "zh-CN", "zh-CN": "zh-CN",
"zh-HK": "zh-HK", "zh-HK": "zh-HK",
"zh-TW": "zh-TW" "zh-TW": "zh-TW"
}, }
"supported_languages": {}
}, },
"dailymotion": { "dailymotion": {
"all_locale": null, "all_locale": null,
@ -491,8 +486,7 @@
"vi-VN": "vi_VN", "vi-VN": "vi_VN",
"zh-CN": "zh_CN", "zh-CN": "zh_CN",
"zh-TW": "zh_TW" "zh-TW": "zh_TW"
}, }
"supported_languages": {}
}, },
"duckduckgo": { "duckduckgo": {
"all_locale": "wt-wt", "all_locale": "wt-wt",
@ -656,8 +650,7 @@
"zh-CN": "cn-zh", "zh-CN": "cn-zh",
"zh-HK": "hk-tzh", "zh-HK": "hk-tzh",
"zh-TW": "tw-tzh" "zh-TW": "tw-tzh"
}, }
"supported_languages": {}
}, },
"duckduckgo images": { "duckduckgo images": {
"all_locale": "wt-wt", "all_locale": "wt-wt",
@ -821,8 +814,7 @@
"zh-CN": "cn-zh", "zh-CN": "cn-zh",
"zh-HK": "hk-tzh", "zh-HK": "hk-tzh",
"zh-TW": "tw-tzh" "zh-TW": "tw-tzh"
}, }
"supported_languages": {}
}, },
"duckduckgo weather": { "duckduckgo weather": {
"all_locale": "wt-wt", "all_locale": "wt-wt",
@ -986,8 +978,7 @@
"zh-CN": "cn-zh", "zh-CN": "cn-zh",
"zh-HK": "hk-tzh", "zh-HK": "hk-tzh",
"zh-TW": "tw-tzh" "zh-TW": "tw-tzh"
}, }
"supported_languages": {}
}, },
"google": { "google": {
"all_locale": "ZZ", "all_locale": "ZZ",
@ -1439,8 +1430,7 @@
"zh-HK": "HK", "zh-HK": "HK",
"zh-SG": "SG", "zh-SG": "SG",
"zh-TW": "TW" "zh-TW": "TW"
}, }
"supported_languages": {}
}, },
"google images": { "google images": {
"all_locale": "ZZ", "all_locale": "ZZ",
@ -1892,8 +1882,7 @@
"zh-HK": "HK", "zh-HK": "HK",
"zh-SG": "SG", "zh-SG": "SG",
"zh-TW": "TW" "zh-TW": "TW"
}, }
"supported_languages": {}
}, },
"google news": { "google news": {
"all_locale": "ZZ", "all_locale": "ZZ",
@ -2238,8 +2227,7 @@
"zh-HK": "HK", "zh-HK": "HK",
"zh-SG": "SG", "zh-SG": "SG",
"zh-TW": "TW" "zh-TW": "TW"
}, }
"supported_languages": {}
}, },
"google scholar": { "google scholar": {
"all_locale": "ZZ", "all_locale": "ZZ",
@ -2691,8 +2679,7 @@
"zh-HK": "HK", "zh-HK": "HK",
"zh-SG": "SG", "zh-SG": "SG",
"zh-TW": "TW" "zh-TW": "TW"
}, }
"supported_languages": {}
}, },
"google videos": { "google videos": {
"all_locale": "ZZ", "all_locale": "ZZ",
@ -3144,8 +3131,7 @@
"zh-HK": "HK", "zh-HK": "HK",
"zh-SG": "SG", "zh-SG": "SG",
"zh-TW": "TW" "zh-TW": "TW"
}, }
"supported_languages": {}
}, },
"peertube": { "peertube": {
"all_locale": null, "all_locale": null,
@ -3174,8 +3160,7 @@
"zh_Hans": "zh", "zh_Hans": "zh",
"zh_Hant": "zh" "zh_Hant": "zh"
}, },
"regions": {}, "regions": {}
"supported_languages": {}
}, },
"qwant": { "qwant": {
"all_locale": null, "all_locale": null,
@ -3222,8 +3207,7 @@
"th-TH": "th_TH", "th-TH": "th_TH",
"zh-CN": "zh_CN", "zh-CN": "zh_CN",
"zh-HK": "zh_HK" "zh-HK": "zh_HK"
}, }
"supported_languages": {}
}, },
"qwant images": { "qwant images": {
"all_locale": null, "all_locale": null,
@ -3270,8 +3254,7 @@
"th-TH": "th_TH", "th-TH": "th_TH",
"zh-CN": "zh_CN", "zh-CN": "zh_CN",
"zh-HK": "zh_HK" "zh-HK": "zh_HK"
}, }
"supported_languages": {}
}, },
"qwant news": { "qwant news": {
"all_locale": null, "all_locale": null,
@ -3303,8 +3286,7 @@
"nl-BE": "nl_BE", "nl-BE": "nl_BE",
"nl-NL": "nl_NL", "nl-NL": "nl_NL",
"pt-PT": "pt_PT" "pt-PT": "pt_PT"
}, }
"supported_languages": {}
}, },
"qwant videos": { "qwant videos": {
"all_locale": null, "all_locale": null,
@ -3351,8 +3333,7 @@
"th-TH": "th_TH", "th-TH": "th_TH",
"zh-CN": "zh_CN", "zh-CN": "zh_CN",
"zh-HK": "zh_HK" "zh-HK": "zh_HK"
}, }
"supported_languages": {}
}, },
"sepiasearch": { "sepiasearch": {
"all_locale": null, "all_locale": null,
@ -3381,8 +3362,7 @@
"zh_Hans": "zh", "zh_Hans": "zh",
"zh_Hant": "zh" "zh_Hant": "zh"
}, },
"regions": {}, "regions": {}
"supported_languages": {}
}, },
"startpage": { "startpage": {
"all_locale": null, "all_locale": null,
@ -3521,8 +3501,7 @@
"zh-CN": "zh-CN_CN", "zh-CN": "zh-CN_CN",
"zh-HK": "zh-TW_HK", "zh-HK": "zh-TW_HK",
"zh-TW": "zh-TW_TW" "zh-TW": "zh-TW_TW"
}, }
"supported_languages": {}
}, },
"wikidata": { "wikidata": {
"all_locale": null, "all_locale": null,
@ -3610,8 +3589,7 @@
"zh": "zh", "zh": "zh",
"zh_Hant": "zh-classical" "zh_Hant": "zh-classical"
}, },
"regions": {}, "regions": {}
"supported_languages": {}
}, },
"wikipedia": { "wikipedia": {
"all_locale": null, "all_locale": null,
@ -3779,8 +3757,7 @@
"zh_Hans": "zh", "zh_Hans": "zh",
"zh_Hant": "zh-classical" "zh_Hant": "zh-classical"
}, },
"regions": {}, "regions": {}
"supported_languages": {}
}, },
"yahoo": { "yahoo": {
"all_locale": "any", "all_locale": "any",
@ -3820,7 +3797,6 @@
"zh_Hans": "zh_chs", "zh_Hans": "zh_chs",
"zh_Hant": "zh_cht" "zh_Hant": "zh_cht"
}, },
"regions": {}, "regions": {}
"supported_languages": {}
} }
} }

View file

@ -134,10 +134,3 @@ class Engine: # pylint: disable=too-few-public-methods
require_api_key: true require_api_key: true
results: HTML results: HTML
""" """
# deprecated properties
_fetch_supported_languages: Callable # deprecated use fetch_traits
supported_languages: Union[List[str], Dict[str, str]] # deprecated use traits
language_aliases: Dict[str, str] # deprecated not needed when using triats
supported_languages_url: str # deprecated not needed when using triats

View file

@ -13,11 +13,9 @@ used.
from __future__ import annotations from __future__ import annotations
import json import json
import dataclasses import dataclasses
from typing import Dict, Union, List, Callable, Optional, TYPE_CHECKING from typing import Dict, Union, Callable, Optional, TYPE_CHECKING
from typing_extensions import Literal, Self from typing_extensions import Literal, Self
from babel.localedata import locale_identifiers
from searx import locales from searx import locales
from searx.data import data_dir, ENGINE_TRAITS from searx.data import data_dir, ENGINE_TRAITS
@ -79,18 +77,8 @@ class EngineTraits:
language"). language").
""" """
data_type: Literal['traits_v1', 'supported_languages'] = 'traits_v1' data_type: Literal['traits_v1'] = 'traits_v1'
"""Data type, default is 'traits_v1' for vintage use 'supported_languages'. """Data type, default is 'traits_v1'.
.. hint::
For the transition period until the *fetch* functions of all the engines
are converted there will be the data_type 'supported_languages', which
maps the old logic unchanged 1:1.
Instances of data_type 'supported_languages' do not implement methods
like ``self.get_language(..)`` and ``self.get_region(..)``
""" """
custom: Dict[str, Dict] = dataclasses.field(default_factory=dict) custom: Dict[str, Dict] = dataclasses.field(default_factory=dict)
@ -139,16 +127,6 @@ class EngineTraits:
if self.data_type == 'traits_v1': if self.data_type == 'traits_v1':
return bool(self.get_region(searxng_locale) or self.get_language(searxng_locale)) return bool(self.get_region(searxng_locale) or self.get_language(searxng_locale))
if self.data_type == 'supported_languages': # vintage / deprecated
# pylint: disable=import-outside-toplevel
from searx.utils import match_language
if searxng_locale == 'all':
return True
x = match_language(searxng_locale, self.supported_languages, self.language_aliases, None)
return bool(x)
# return bool(self.get_supported_language(searxng_locale))
raise TypeError('engine traits of type %s is unknown' % self.data_type) raise TypeError('engine traits of type %s is unknown' % self.data_type)
def copy(self): def copy(self):
@ -178,10 +156,6 @@ class EngineTraits:
if self.data_type == 'traits_v1': if self.data_type == 'traits_v1':
self._set_traits_v1(engine) self._set_traits_v1(engine)
elif self.data_type == 'supported_languages': # vintage / deprecated
self._set_supported_languages(engine)
else: else:
raise TypeError('engine traits of type %s is unknown' % self.data_type) raise TypeError('engine traits of type %s is unknown' % self.data_type)
@ -215,106 +189,6 @@ class EngineTraits:
# set the copied & modified traits in engine's namespace # set the copied & modified traits in engine's namespace
engine.traits = traits engine.traits = traits
# -------------------------------------------------------------------------
# The code below is deprecated an can hopefully be deleted at one day
# -------------------------------------------------------------------------
supported_languages: Union[List[str], Dict[str, str]] = dataclasses.field(default_factory=dict)
"""depricated: does not work for engines that do support languages based on a
region. With this type it is not guaranteed that the key values can be
parsed by :py:obj:`babel.Locale.parse`!
"""
# language_aliases: Dict[str, str] = dataclasses.field(default_factory=dict)
# """depricated: does not work for engines that do support languages based on a
# region. With this type it is not guaranteed that the key values can be
# parsed by :py:obj:`babel.Locale.parse`!
# """
BABEL_LANGS = [
lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0]
for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())
]
# def get_supported_language(self, searxng_locale, default=None): # vintage / deprecated
# """Return engine's language string that *best fits* to SearXNG's locale."""
# if searxng_locale == 'all' and self.all_locale is not None:
# return self.all_locale
# return locales.get_engine_locale(searxng_locale, self.supported_languages, default=default)
@classmethod # vintage / deprecated
def fetch_supported_languages(cls, engine: Engine) -> Union[Self, None]:
"""DEPRECATED: Calls a function ``_fetch_supported_languages`` from engine's
namespace to fetch languages from the origin engine. If function does
not exists, ``None`` is returned.
"""
# pylint: disable=import-outside-toplevel
from searx import network
from searx.utils import gen_useragent
fetch_languages = getattr(engine, '_fetch_supported_languages', None)
if fetch_languages is None:
return None
# The headers has been moved here from commit 9b6ffed06: Some engines (at
# least bing and startpage) return a different result list of supported
# languages depending on the IP location where the HTTP request comes from.
# The IP based results (from bing) can be avoided by setting a
# 'Accept-Language' in the HTTP request.
headers = {
'User-Agent': gen_useragent(),
'Accept-Language': "en-US,en;q=0.5", # bing needs to set the English language
}
resp = network.get(engine.supported_languages_url, headers=headers)
supported_languages = fetch_languages(resp)
if isinstance(supported_languages, list):
supported_languages.sort()
engine_traits = cls()
engine_traits.data_type = 'supported_languages'
engine_traits.supported_languages = supported_languages
return engine_traits
def _set_supported_languages(self, engine: Engine): # vintage / deprecated
traits = self.copy()
# pylint: disable=import-outside-toplevel
from searx.utils import match_language
_msg = "settings.yml - engine: '%s' / %s: '%s' not supported"
if hasattr(engine, 'language'):
if engine.language not in self.supported_languages:
raise ValueError(_msg % (engine.name, 'language', engine.language))
if isinstance(self.supported_languages, dict):
traits.supported_languages = {engine.language: self.supported_languages[engine.language]}
else:
traits.supported_languages = [engine.language]
engine.language_support = bool(traits.supported_languages)
engine.supported_languages = traits.supported_languages
# find custom aliases for non standard language codes
traits.language_aliases = {} # pylint: disable=attribute-defined-outside-init
for engine_lang in getattr(engine, 'language_aliases', {}):
iso_lang = match_language(engine_lang, self.BABEL_LANGS, fallback=None)
if (
iso_lang
and iso_lang != engine_lang
and not engine_lang.startswith(iso_lang)
and iso_lang not in self.supported_languages
):
traits.language_aliases[iso_lang] = engine_lang
engine.language_aliases = traits.language_aliases
# set the copied & modified traits in engine's namespace
engine.traits = traits
class EngineTraitsMap(Dict[str, EngineTraits]): class EngineTraitsMap(Dict[str, EngineTraits]):
"""A python dictionary to map :class:`EngineTraits` by engine name.""" """A python dictionary to map :class:`EngineTraits` by engine name."""
@ -352,17 +226,6 @@ class EngineTraitsMap(Dict[str, EngineTraits]):
log("%-20s: SearXNG regions --> %s" % (engine_name, len(traits.regions))) log("%-20s: SearXNG regions --> %s" % (engine_name, len(traits.regions)))
obj[engine_name] = traits obj[engine_name] = traits
# vintage / deprecated
_traits = EngineTraits.fetch_supported_languages(engine)
if _traits is not None:
log("%-20s: %s supported_languages (deprecated)" % (engine_name, len(_traits.supported_languages)))
if traits is not None:
traits.supported_languages = _traits.supported_languages
obj[engine_name] = traits
else:
obj[engine_name] = _traits
continue
return obj return obj
def set_traits(self, engine: Engine): def set_traits(self, engine: Engine):

View file

@ -43,8 +43,6 @@ ENGINE_DEFAULT_ARGS = {
"send_accept_language_header": False, "send_accept_language_header": False,
"tokens": [], "tokens": [],
"about": {}, "about": {},
"supported_languages": [], # deprecated use traits
"language_aliases": {}, # deprecated not needed when using traits
} }
# set automatically when an engine does not have any tab category # set automatically when an engine does not have any tab category
OTHER_CATEGORY = 'other' OTHER_CATEGORY = 'other'

View file

@ -25,6 +25,7 @@ base_url = 'https://wiki.gentoo.org'
# xpath queries # xpath queries
xpath_results = '//ul[@class="mw-search-results"]/li' xpath_results = '//ul[@class="mw-search-results"]/li'
xpath_link = './/div[@class="mw-search-result-heading"]/a' xpath_link = './/div[@class="mw-search-result-heading"]/a'
xpath_content = './/div[@class="searchresult"]'
# cut 'en' from 'en-US', 'de' from 'de-CH', and so on # cut 'en' from 'en-US', 'de' from 'de-CH', and so on
@ -77,8 +78,6 @@ main_langs = {
'uk': 'Українська', 'uk': 'Українська',
'zh': '简体中文', 'zh': '简体中文',
} }
supported_languages = dict(lang_urls, **main_langs)
# do search-request # do search-request
def request(query, params): def request(query, params):
@ -118,7 +117,8 @@ def response(resp):
link = result.xpath(xpath_link)[0] link = result.xpath(xpath_link)[0]
href = urljoin(base_url, link.attrib.get('href')) href = urljoin(base_url, link.attrib.get('href'))
title = extract_text(link) title = extract_text(link)
content = extract_text(result.xpath(xpath_content))
results.append({'url': href, 'title': title}) results.append({'url': href, 'title': title, 'content': content})
return results return results

View file

@ -221,7 +221,7 @@ class OnlineProcessor(EngineProcessor):
'test': ['unique_results'], 'test': ['unique_results'],
} }
if getattr(self.engine, 'supported_languages', []): if getattr(self.engine, 'traits', False):
tests['lang_fr'] = { tests['lang_fr'] = {
'matrix': {'query': 'paris', 'lang': 'fr'}, 'matrix': {'query': 'paris', 'lang': 'fr'},
'result_container': ['not_empty', ('has_language', 'fr')], 'result_container': ['not_empty', ('has_language', 'fr')],

View file

@ -1317,11 +1317,6 @@ def config():
continue continue
_languages = engine.traits.languages.keys() _languages = engine.traits.languages.keys()
if engine.traits.data_type == 'supported_languages': # vintage / deprecated
_languages = engine.traits.supported_languages
if isinstance(_languages, dict):
_languages = _languages.keys()
_engines.append( _engines.append(
{ {
'name': name, 'name': name,