Ponysearch/searx/engines/wikipedia.py

# SPDX-License-Identifier: AGPL-3.0-or-later
"""
 Wikipedia (Web)
"""

from urllib.parse import quote
from json import loads
from lxml import html
from searx.utils import match_language, searx_useragent
from searx import network
from searx.enginelib.traits import EngineTraits

engine_traits: EngineTraits

# about
about = {
    "website": 'https://www.wikipedia.org/',
    "wikidata_id": 'Q52',
    "official_api_documentation": 'https://en.wikipedia.org/api/',
    "use_official_api": True,
    "require_api_key": False,
    "results": 'JSON',
}


send_accept_language_header = True

# search-url
search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
language_variants = {"zh": ("zh-cn", "zh-hk", "zh-mo", "zh-my", "zh-sg", "zh-tw")}


# set language in base_url
def url_lang(lang):
    lang_pre = lang.split('-')[0]
    if lang_pre == 'all' or lang_pre not in supported_languages and lang_pre not in language_aliases:
        return 'en'
    return match_language(lang, supported_languages, language_aliases).split('-')[0]


# do search-request
def request(query, params):
    if query.islower():
        query = query.title()

    language = url_lang(params['language'])
    params['url'] = search_url.format(title=quote(query), language=language)

    params['headers']['User-Agent'] = searx_useragent()
    params['raise_for_httperror'] = False
    params['soft_max_redirects'] = 2

    return params


# get response from search-request
def response(resp):
    if resp.status_code == 404:
        return []

    if resp.status_code == 400:
        try:
            api_result = loads(resp.text)
        except:
            pass
        else:
            if (
                api_result['type'] == 'https://mediawiki.org/wiki/HyperSwitch/errors/bad_request'
                and api_result['detail'] == 'title-invalid-characters'
            ):
                return []

    network.raise_for_httperror(resp)

    results = []
    api_result = loads(resp.text)

    # skip disambiguation pages
    if api_result.get('type') != 'standard':
        return []

    title = api_result['title']
    wikipedia_link = api_result['content_urls']['desktop']['page']

    results.append({'url': wikipedia_link, 'title': title})

    results.append(
        {
            'infobox': title,
            'id': wikipedia_link,
            'content': api_result.get('extract', ''),
            'img_src': api_result.get('thumbnail', {}).get('source'),
            'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}],
        }
    )

    return results


# get supported languages from their site
def _fetch_supported_languages(resp):
    supported_languages = {}
    dom = html.fromstring(resp.text)
    tables = dom.xpath('//table[contains(@class,"sortable")]')
    for table in tables:
        # exclude header row
        trs = table.xpath('.//tr')[1:]
        for tr in trs:
            td = tr.xpath('./td')
            code = td[3].xpath('./a')[0].text
            name = td[1].xpath('./a')[0].text
            english_name = td[1].xpath('./a')[0].text
            articles = int(td[4].xpath('./a')[0].text.replace(',', ''))
            # exclude languages with too few articles
            if articles >= 100:
                supported_languages[code] = {"name": name, "english_name": english_name}

    return supported_languages


# Nonstandard language codes
#
# These Wikipedias use language codes that do not conform to the ISO 639
# standard (which is how wiki subdomains are chosen nowadays).

lang_map = {
    'be-tarask': 'bel',
    'ak': 'aka',
    'als': 'gsw',
    'bat-smg': 'sgs',
    'cbk-zam': 'cbk',
    'fiu-vro': 'vro',
    'map-bms': 'map',
    'nrm': 'nrf',
    'roa-rup': 'rup',
    'nds-nl': 'nds',
    #'roa-tara: – invented code used for the Tarantino Wikipedia (again, roa is the standard code for the large family of Romance languages that the Tarantino dialect falls within)
    #'simple: – invented code used for the Simple English Wikipedia (not the official IETF code en-simple)
    'zh-classical': 'zh_Hant',
    'zh-min-nan': 'nan',
    'zh-yue': 'yue',
    'an': 'arg',
}

unknown_langs = [
    'ab',  # Abkhazian
    'alt',  # Southern Altai
    'an',  # Aragonese
    'ang',  # Anglo-Saxon
    'arc',  # Aramaic
    'ary',  # Moroccan Arabic
    'av',  # Avar
    'ba',  # Bashkir
    'be-tarask',
    'bar',  # Bavarian
    'bcl',  # Central Bicolano
    'bh',  # Bhojpuri
    'bi',  # Bislama
    'bjn',  # Banjar
    'blk',  # Pa'O
    'bpy',  # Bishnupriya Manipuri
    'bxr',  # Buryat
    'cbk-zam',  # Zamboanga Chavacano
    'co',  # Corsican
    'cu',  # Old Church Slavonic
    'dty',  # Doteli
    'dv',  # Divehi
    'ext',  # Extremaduran
    'fj',  # Fijian
    'frp',  # Franco-Provençal
    'gan',  # Gan
    'gom',  # Goan Konkani
    'hif',  # Fiji Hindi
    'ilo',  # Ilokano
    'inh',  # Ingush
    'jbo',  # Lojban
    'kaa',  # Karakalpak
    'kbd',  # Kabardian Circassian
    'kg',  # Kongo
    'koi',  # Komi-Permyak
    'krc',  # Karachay-Balkar
    'kv',  # Komi
    'lad',  # Ladino
    'lbe',  # Lak
    'lez',  # Lezgian
    'li',  # Limburgish
    'ltg',  # Latgalian
    'mdf',  # Moksha
    'mnw',  # Mon
    'mwl',  # Mirandese
    'myv',  # Erzya
    'na',  # Nauruan
    'nah',  # Nahuatl
    'nov',  # Novial
    'nrm',  # Norman
    'pag',  # Pangasinan
    'pam',  # Kapampangan
    'pap',  # Papiamentu
    'pdc',  # Pennsylvania German
    'pfl',  # Palatinate German
    'roa-rup',  # Aromanian
    'sco',  # Scots
    'sco',  # Scots (https://sco.wikipedia.org) is not known by babel, Scottish Gaelic (https://gd.wikipedia.org) is known by babel
    'sh',  # Serbo-Croatian
    'simple',  # simple english is not know as a natural language different to english (babel)
    'sm',  # Samoan
    'srn',  # Sranan
    'stq',  # Saterland Frisian
    'szy',  # Sakizaya
    'tcy',  # Tulu
    'tet',  # Tetum
    'tpi',  # Tok Pisin
    'trv',  # Seediq
    'ty',  # Tahitian
    'tyv',  # Tuvan
    'udm',  # Udmurt
    'vep',  # Vepsian
    'vls',  # West Flemish
    'vo',  # Volapük
    'wa',  # Walloon
    'xal',  # Kalmyk
]


def fetch_traits(engine_traits: EngineTraits):
    """Fetch languages from Wikipedia"""
    # pylint: disable=import-outside-toplevel

    engine_traits.data_type = 'supported_languages'  # deprecated

    import babel
    from searx.locales import language_tag

    resp = network.get('https://meta.wikimedia.org/wiki/List_of_Wikipedias')
    if not resp.ok:
        print("ERROR: response from Wikipedia is not OK.")

    dom = html.fromstring(resp.text)
    for row in dom.xpath('//table[contains(@class,"sortable")]//tbody/tr'):

        cols = row.xpath('./td')
        if not cols:
            continue

        cols = [c.text_content().strip() for c in cols]
        articles = int(cols[4].replace(',', '').replace('-', '0'))
        users = int(cols[8].replace(',', '').replace('-', '0'))
        depth = cols[11].strip('-')

        if articles < 1000:
            # exclude languages with too few articles
            continue

        # depth: rough indicator of a Wikipedia’s quality, showing how
        #        frequently its articles are updated.
        if depth == '':
            if users < 1000:
                # depth is not calculated --> at least 1000 user should registered
                continue
        elif int(depth) < 20:
            continue

        eng_tag = cols[3]

        if eng_tag in unknown_langs:
            continue

        try:
            sxng_tag = language_tag(babel.Locale.parse(lang_map.get(eng_tag, eng_tag)))
        except babel.UnknownLocaleError:
            print("ERROR: %s -> %s is unknown by babel" % (cols[1], eng_tag))
            continue

        conflict = engine_traits.languages.get(sxng_tag)
        if conflict:
            if conflict != eng_tag:
                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
            continue
        engine_traits.languages[sxng_tag] = eng_tag

    engine_traits.languages['zh_Hans'] = 'zh'
-												[enh] engines: add about variable

move meta information from comment to the about variable
so the preferences, the documentation can show these information

											
										
										
											2021-01-13 11:31:25 +01:00
+								# SPDX-License-Identifier: AGPL-3.0-or-later
-												[enh] wikipedia infobox

creates simple multilingual infobox using wikipedia's api

											
										
										
											2016-03-14 07:32:36 +01:00
+								"""
 								 Wikipedia (Web)
 								"""
-												Drop Python 2 (1/n): remove unicode string and url_utils

											
										
										
											2020-08-06 17:42:46 +02:00
+								from urllib.parse import quote
-												[enh] wikipedia infobox

creates simple multilingual infobox using wikipedia's api

											
										
										
											2016-03-14 07:32:36 +01:00
+								from json import loads
-												[mod] Wikipedia: fetch engine traits (data_type: supported_languages)

Implements a fetch_traits function for the Wikipedia engines.

.. note::

   Does not include migration of the request methode from 'supported_languages'
   to 'traits' (EngineTraits) object!

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-08 16:22:26 +02:00
+								from lxml import html
-												use Wikipedia's REST v1 API

											
										
										
											2020-09-08 07:05:21 +02:00
+								from searx.utils import match_language, searx_useragent
-												[mod] Wikipedia: fetch engine traits (data_type: supported_languages)

Implements a fetch_traits function for the Wikipedia engines.

.. note::

   Does not include migration of the request methode from 'supported_languages'
   to 'traits' (EngineTraits) object!

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-08 16:22:26 +02:00
+								from searx import network
 								from searx.enginelib.traits import EngineTraits
 								engine_traits: EngineTraits
-												[enh] add supported_languages on engines and auto-generate languages.py

											
										
										
											2016-08-06 06:34:56 +02:00
-												[enh] engines: add about variable

move meta information from comment to the about variable
so the preferences, the documentation can show these information

											
										
										
											2021-01-13 11:31:25 +01:00
+								# about
 								about = {
 								    "website": 'https://www.wikipedia.org/',
 								    "wikidata_id": 'Q52',
 								    "official_api_documentation": 'https://en.wikipedia.org/api/',
 								    "use_official_api": True,
 								    "require_api_key": False,
 								    "results": 'JSON',
 								}
-												[mod] add 'Accept-Language' HTTP header to online processores

Most engines that support languages (and regions) use the Accept-Language from
the WEB browser to build a response that fits to the language (and region).

- add new engine option: send_accept_language_header

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-08-01 17:01:59 +02:00
 								send_accept_language_header = True
-												[enh] wikipedia infobox

creates simple multilingual infobox using wikipedia's api

											
										
										
											2016-03-14 07:32:36 +01:00
+								# search-url
-												Drop Python 2 (1/n): remove unicode string and url_utils

											
										
										
											2020-08-06 17:42:46 +02:00
+								search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
-												[mod] fetch supported languages for several engines
utils/fetch_languages.py gets languages supported by each engine and
generates engines_languages.json with each engine's supported language.

											
										
										
											2016-11-06 03:51:38 +01:00
+								supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
-												add support for Chinese variants in Wikipedia

											
										
										
											2021-02-09 05:56:45 +01:00
+								language_variants = {"zh": ("zh-cn", "zh-hk", "zh-mo", "zh-my", "zh-sg", "zh-tw")}
-												[enh] wikipedia infobox

creates simple multilingual infobox using wikipedia's api

											
										
										
											2016-03-14 07:32:36 +01:00
 								# set language in base_url
 								def url_lang(lang):
-												Revert "remove 'all' option from search languages"

This reverts commit 4d1770398a6af8902e75c0bd885781584d39e796.

											
										
										
											2019-01-06 15:27:46 +01:00
+								    lang_pre = lang.split('-')[0]
-												fix after rebase

											
										
										
											2019-01-07 21:28:58 +01:00
+								    if lang_pre == 'all' or lang_pre not in supported_languages and lang_pre not in language_aliases:
-												Revert "remove 'all' option from search languages"

This reverts commit 4d1770398a6af8902e75c0bd885781584d39e796.

											
										
										
											2019-01-06 15:27:46 +01:00
+								        return 'en'
-												[fix] check language aliases when setting search language

											
										
										
											2018-11-26 06:32:48 +01:00
+								    return match_language(lang, supported_languages, language_aliases).split('-')[0]
-												[enh] wikipedia infobox

creates simple multilingual infobox using wikipedia's api

											
										
										
											2016-03-14 07:32:36 +01:00
 								# do search-request
 								def request(query, params):
 								    if query.islower():
-												use Wikipedia's REST v1 API

											
										
										
											2020-09-08 07:05:21 +02:00
+								        query = query.title()
-												[enh] wikipedia infobox

creates simple multilingual infobox using wikipedia's api

											
										
										
											2016-03-14 07:32:36 +01:00
-												add support for Chinese variants in Wikipedia

											
										
										
											2021-02-09 05:56:45 +01:00
+								    language = url_lang(params['language'])
-												[format.python] initial formatting of the python code

This patch was generated by black [1]::

    make format.python

[1] https://github.com/psf/black

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2021-12-27 09:26:22 +01:00
+								    params['url'] = search_url.format(title=quote(query), language=language)
-												add support for Chinese variants in Wikipedia

											
										
										
											2021-02-09 05:56:45 +01:00
-												use Wikipedia's REST v1 API

											
										
										
											2020-09-08 07:05:21 +02:00
+								    params['headers']['User-Agent'] = searx_useragent()
-												[enh] add raise_for_httperror

check HTTP response:
* detect some comme CAPTCHA challenge (no solving). In this case the engine is suspended for long a time.
* otherwise raise HTTPError as before

the check is done in poolrequests.py (was before in search.py).

update qwant, wikipedia, wikidata to use raise_for_httperror instead of raise_for_status

											
										
										
											2020-12-09 21:23:20 +01:00
+								    params['raise_for_httperror'] = False
-												[fix] wikipedia engine: don't raise an error when the query is not found

Add a new parameter "raise_for_status", set by default to True.
When True, any HTTP status code >= 300 raise an exception ( #2332 )
When False, the engine can manage the HTTP status code by itself.

											
										
										
											2020-12-04 20:04:39 +01:00
+								    params['soft_max_redirects'] = 2
-												use Wikipedia's REST v1 API

											
										
										
											2020-09-08 07:05:21 +02:00
-												[enh] wikipedia infobox

creates simple multilingual infobox using wikipedia's api

											
										
										
											2016-03-14 07:32:36 +01:00
+								    return params
 								# get response from search-request
 								def response(resp):
-												[fix] wikipedia engine: don't raise an error when the query is not found

Add a new parameter "raise_for_status", set by default to True.
When True, any HTTP status code >= 300 raise an exception ( #2332 )
When False, the engine can manage the HTTP status code by itself.

											
										
										
											2020-12-04 20:04:39 +01:00
+								    if resp.status_code == 404:
-												[enh] wikipedia infobox

creates simple multilingual infobox using wikipedia's api

											
										
										
											2016-03-14 07:32:36 +01:00
+								        return []
-												[upd] wikipedia engine: return an empty result on query with illegal characters

on some queries (like an IT error message), wikipedia returns an HTTP error 400.
this commit returns an empty result instead of showing an error to the user.

											
										
										
											2021-02-11 12:29:21 +01:00
 								    if resp.status_code == 400:
 								        try:
 								            api_result = loads(resp.text)
 								        except:
 								            pass
 								        else:
-												[format.python] initial formatting of the python code

This patch was generated by black [1]::

    make format.python

[1] https://github.com/psf/black

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2021-12-27 09:26:22 +01:00
+								            if (
 								                api_result['type'] == 'https://mediawiki.org/wiki/HyperSwitch/errors/bad_request'
 								                and api_result['detail'] == 'title-invalid-characters'
 								            ):
-												[upd] wikipedia engine: return an empty result on query with illegal characters

on some queries (like an IT error message), wikipedia returns an HTTP error 400.
this commit returns an empty result instead of showing an error to the user.

											
										
										
											2021-02-11 12:29:21 +01:00
+								                return []
-												[mod] Wikipedia: fetch engine traits (data_type: supported_languages)

Implements a fetch_traits function for the Wikipedia engines.

.. note::

   Does not include migration of the request methode from 'supported_languages'
   to 'traits' (EngineTraits) object!

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-08 16:22:26 +02:00
+								    network.raise_for_httperror(resp)
-												[enh] wikipedia infobox

creates simple multilingual infobox using wikipedia's api

											
										
										
											2016-03-14 07:32:36 +01:00
-												use Wikipedia's REST v1 API

											
										
										
											2020-09-08 07:05:21 +02:00
+								    results = []
 								    api_result = loads(resp.text)
-												[enh] wikipedia infobox

creates simple multilingual infobox using wikipedia's api

											
										
										
											2016-03-14 07:32:36 +01:00
-												use Wikipedia's REST v1 API

											
										
										
											2020-09-08 07:05:21 +02:00
+								    # skip disambiguation pages
-												[fix] wikipedia: minor fix: return no result instead of crash in some very few cases.

In few cases, the JSON results doesn't contains the key 'type'.

											
										
										
											2020-12-07 17:42:05 +01:00
+								    if api_result.get('type') != 'standard':
-												use Wikipedia's REST v1 API

											
										
										
											2020-09-08 07:05:21 +02:00
+								        return []
-												[enh] wikipedia infobox

creates simple multilingual infobox using wikipedia's api

											
										
										
											2016-03-14 07:32:36 +01:00
-												[fix] wikipedia: remove HTML from the title

fr.wikipedia.org (and it seems not other wikipedia websites),
adds HTML to api_result['displayTitle'].
(Search for '!wp :fr Braid' for example)

The commit uses api_result['title']

											
										
										
											2021-03-25 08:31:39 +01:00
+								    title = api_result['title']
-												use Wikipedia's REST v1 API

											
										
										
											2020-09-08 07:05:21 +02:00
+								    wikipedia_link = api_result['content_urls']['desktop']['page']
-												[enh] wikipedia infobox

creates simple multilingual infobox using wikipedia's api

											
										
										
											2016-03-14 07:32:36 +01:00
 								    results.append({'url': wikipedia_link, 'title': title})
-												[format.python] initial formatting of the python code

This patch was generated by black [1]::

    make format.python

[1] https://github.com/psf/black

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2021-12-27 09:26:22 +01:00
+								    results.append(
 								        {
 								            'infobox': title,
 								            'id': wikipedia_link,
 								            'content': api_result.get('extract', ''),
 								            'img_src': api_result.get('thumbnail', {}).get('source'),
 								            'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}],
 								        }
 								    )
-												[enh] wikipedia infobox

creates simple multilingual infobox using wikipedia's api

											
										
										
											2016-03-14 07:32:36 +01:00
 								    return results
-												[mod] fetch supported languages for several engines
utils/fetch_languages.py gets languages supported by each engine and
generates engines_languages.json with each engine's supported language.

											
										
										
											2016-11-06 03:51:38 +01:00
 								# get supported languages from their site
-												tests for _fetch_supported_languages in engines
and refactor method to make it testable without making requests

											
										
										
											2016-12-15 07:34:43 +01:00
+								def _fetch_supported_languages(resp):
-												[mod] fetch supported languages for several engines
utils/fetch_languages.py gets languages supported by each engine and
generates engines_languages.json with each engine's supported language.

											
										
										
											2016-11-06 03:51:38 +01:00
+								    supported_languages = {}
-												[mod] Wikipedia: fetch engine traits (data_type: supported_languages)

Implements a fetch_traits function for the Wikipedia engines.

.. note::

   Does not include migration of the request methode from 'supported_languages'
   to 'traits' (EngineTraits) object!

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-08 16:22:26 +02:00
+								    dom = html.fromstring(resp.text)
-												[mod] fetch supported languages for several engines
utils/fetch_languages.py gets languages supported by each engine and
generates engines_languages.json with each engine's supported language.

											
										
										
											2016-11-06 03:51:38 +01:00
+								    tables = dom.xpath('//table[contains(@class,"sortable")]')
 								    for table in tables:
 								        # exclude header row
 								        trs = table.xpath('.//tr')[1:]
 								        for tr in trs:
 								            td = tr.xpath('./td')
 								            code = td[3].xpath('./a')[0].text
-												wikipedia engine: update _fetch_supported_languages

the layout https://meta.wikimedia.org/wiki/List_of_Wikipedias has changed

											
										
										
											2023-01-29 11:01:02 +01:00
+								            name = td[1].xpath('./a')[0].text
-												[mod] fetch supported languages for several engines
utils/fetch_languages.py gets languages supported by each engine and
generates engines_languages.json with each engine's supported language.

											
										
										
											2016-11-06 03:51:38 +01:00
+								            english_name = td[1].xpath('./a')[0].text
-												wikipedia engine: update _fetch_supported_languages

the layout https://meta.wikimedia.org/wiki/List_of_Wikipedias has changed

											
										
										
											2023-01-29 11:01:02 +01:00
+								            articles = int(td[4].xpath('./a')[0].text.replace(',', ''))
-												minor fixes in utils/fetch_languages.py

											
										
										
											2016-12-17 05:14:14 +01:00
+								            # exclude languages with too few articles
-												change language list to only include languages with a minimum of engines
that support them.
users can still query lesser supported through the :lang_code bang.

											
										
										
											2016-12-29 06:24:56 +01:00
+								            if articles >= 100:
-												remove articles number from engines_languages.json

											
										
										
											2021-02-26 07:49:15 +01:00
+								                supported_languages[code] = {"name": name, "english_name": english_name}
-												[mod] fetch supported languages for several engines
utils/fetch_languages.py gets languages supported by each engine and
generates engines_languages.json with each engine's supported language.

											
										
										
											2016-11-06 03:51:38 +01:00
 								    return supported_languages
-												[mod] Wikipedia: fetch engine traits (data_type: supported_languages)

Implements a fetch_traits function for the Wikipedia engines.

.. note::

   Does not include migration of the request methode from 'supported_languages'
   to 'traits' (EngineTraits) object!

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>

											
										
										
											2022-10-08 16:22:26 +02:00
 								# Nonstandard language codes
 								#
 								# These Wikipedias use language codes that do not conform to the ISO 639
 								# standard (which is how wiki subdomains are chosen nowadays).
 								lang_map = {
 								    'be-tarask': 'bel',
 								    'ak': 'aka',
 								    'als': 'gsw',
 								    'bat-smg': 'sgs',
 								    'cbk-zam': 'cbk',
 								    'fiu-vro': 'vro',
 								    'map-bms': 'map',
 								    'nrm': 'nrf',
 								    'roa-rup': 'rup',
 								    'nds-nl': 'nds',
 								    #'roa-tara: – invented code used for the Tarantino Wikipedia (again, roa is the standard code for the large family of Romance languages that the Tarantino dialect falls within)
 								    #'simple: – invented code used for the Simple English Wikipedia (not the official IETF code en-simple)
 								    'zh-classical': 'zh_Hant',
 								    'zh-min-nan': 'nan',
 								    'zh-yue': 'yue',
 								    'an': 'arg',
 								}
 								unknown_langs = [
 								    'ab',  # Abkhazian
 								    'alt',  # Southern Altai
 								    'an',  # Aragonese
 								    'ang',  # Anglo-Saxon
 								    'arc',  # Aramaic
 								    'ary',  # Moroccan Arabic
 								    'av',  # Avar
 								    'ba',  # Bashkir
 								    'be-tarask',
 								    'bar',  # Bavarian
 								    'bcl',  # Central Bicolano
 								    'bh',  # Bhojpuri
 								    'bi',  # Bislama
 								    'bjn',  # Banjar
 								    'blk',  # Pa'O
 								    'bpy',  # Bishnupriya Manipuri
 								    'bxr',  # Buryat
 								    'cbk-zam',  # Zamboanga Chavacano
 								    'co',  # Corsican
 								    'cu',  # Old Church Slavonic
 								    'dty',  # Doteli
 								    'dv',  # Divehi
 								    'ext',  # Extremaduran
 								    'fj',  # Fijian
 								    'frp',  # Franco-Provençal
 								    'gan',  # Gan
 								    'gom',  # Goan Konkani
 								    'hif',  # Fiji Hindi
 								    'ilo',  # Ilokano
 								    'inh',  # Ingush
 								    'jbo',  # Lojban
 								    'kaa',  # Karakalpak
 								    'kbd',  # Kabardian Circassian
 								    'kg',  # Kongo
 								    'koi',  # Komi-Permyak
 								    'krc',  # Karachay-Balkar
 								    'kv',  # Komi
 								    'lad',  # Ladino
 								    'lbe',  # Lak
 								    'lez',  # Lezgian
 								    'li',  # Limburgish
 								    'ltg',  # Latgalian
 								    'mdf',  # Moksha
 								    'mnw',  # Mon
 								    'mwl',  # Mirandese
 								    'myv',  # Erzya
 								    'na',  # Nauruan
 								    'nah',  # Nahuatl
 								    'nov',  # Novial
 								    'nrm',  # Norman
 								    'pag',  # Pangasinan
 								    'pam',  # Kapampangan
 								    'pap',  # Papiamentu
 								    'pdc',  # Pennsylvania German
 								    'pfl',  # Palatinate German
 								    'roa-rup',  # Aromanian
 								    'sco',  # Scots
 								    'sco',  # Scots (https://sco.wikipedia.org) is not known by babel, Scottish Gaelic (https://gd.wikipedia.org) is known by babel
 								    'sh',  # Serbo-Croatian
 								    'simple',  # simple english is not know as a natural language different to english (babel)
 								    'sm',  # Samoan
 								    'srn',  # Sranan
 								    'stq',  # Saterland Frisian
 								    'szy',  # Sakizaya
 								    'tcy',  # Tulu
 								    'tet',  # Tetum
 								    'tpi',  # Tok Pisin
 								    'trv',  # Seediq
 								    'ty',  # Tahitian
 								    'tyv',  # Tuvan
 								    'udm',  # Udmurt
 								    'vep',  # Vepsian
 								    'vls',  # West Flemish
 								    'vo',  # Volapük
 								    'wa',  # Walloon
 								    'xal',  # Kalmyk
 								]
 								def fetch_traits(engine_traits: EngineTraits):
 								    """Fetch languages from Wikipedia"""
 								    # pylint: disable=import-outside-toplevel
 								    engine_traits.data_type = 'supported_languages'  # deprecated
 								    import babel
 								    from searx.locales import language_tag
 								    resp = network.get('https://meta.wikimedia.org/wiki/List_of_Wikipedias')
 								    if not resp.ok:
 								        print("ERROR: response from Wikipedia is not OK.")
 								    dom = html.fromstring(resp.text)
 								    for row in dom.xpath('//table[contains(@class,"sortable")]//tbody/tr'):
 								        cols = row.xpath('./td')
 								        if not cols:
 								            continue
 								        cols = [c.text_content().strip() for c in cols]
 								        articles = int(cols[4].replace(',', '').replace('-', '0'))
 								        users = int(cols[8].replace(',', '').replace('-', '0'))
 								        depth = cols[11].strip('-')
 								        if articles < 1000:
 								            # exclude languages with too few articles
 								            continue
 								        # depth: rough indicator of a Wikipedia’s quality, showing how
 								        #        frequently its articles are updated.
 								        if depth == '':
 								            if users < 1000:
 								                # depth is not calculated --> at least 1000 user should registered
 								                continue
 								        elif int(depth) < 20:
 								            continue
 								        eng_tag = cols[3]
 								        if eng_tag in unknown_langs:
 								            continue
 								        try:
 								            sxng_tag = language_tag(babel.Locale.parse(lang_map.get(eng_tag, eng_tag)))
 								        except babel.UnknownLocaleError:
 								            print("ERROR: %s -> %s is unknown by babel" % (cols[1], eng_tag))
 								            continue
 								        conflict = engine_traits.languages.get(sxng_tag)
 								        if conflict:
 								            if conflict != eng_tag:
 								                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
 								            continue
 								        engine_traits.languages[sxng_tag] = eng_tag
 								    engine_traits.languages['zh_Hans'] = 'zh'