[mod] replace searx.languages by searx.sxng_locales

With the language and region tags from the EngineTraitsMap the handling of
SearXNG's tags of languages and regions has been normalized and is no longer
a *mystery*.  The "languages" became "locales" that are supported by babel and
by this, the update_engine_traits.py can be simplified a lot.

Other code places can be simplified as well, but these simplifications
should (respectively can) only be done when none of the engines work with the
deprecated EngineTraits.supported_languages interface anymore.

This commit replaces searx.languages by searx.sxng_locales and fix the naming of
some names from "language" to "locale" (e.g. language_codes --> sxng_locales).

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2022-10-10 19:31:22 +02:00
parent 7daf4f95ef
commit c9cd376186
10 changed files with 233 additions and 304 deletions

View file

@ -4,5 +4,17 @@
Locales Locales
======= =======
.. contents:: Contents
:depth: 2
:local:
:backlinks: entry
.. automodule:: searx.locales .. automodule:: searx.locales
:members: :members:
SearXNG's locale codes
======================
.. automodule:: searx.sxng_locales
:members:

View file

@ -4,7 +4,7 @@ from abc import abstractmethod, ABC
import re import re
from searx import settings from searx import settings
from searx.languages import language_codes from searx.sxng_locales import sxng_locales
from searx.engines import categories, engines, engine_shortcuts from searx.engines import categories, engines, engine_shortcuts
from searx.external_bang import get_bang_definition_and_autocomplete from searx.external_bang import get_bang_definition_and_autocomplete
from searx.search import EngineRef from searx.search import EngineRef
@ -84,7 +84,7 @@ class LanguageParser(QueryPartParser):
found = False found = False
# check if any language-code is equal with # check if any language-code is equal with
# declared language-codes # declared language-codes
for lc in language_codes: for lc in sxng_locales:
lang_id, lang_name, country, english_name, _flag = map(str.lower, lc) lang_id, lang_name, country, english_name, _flag = map(str.lower, lc)
# if correct language-code is found # if correct language-code is found
@ -125,7 +125,7 @@ class LanguageParser(QueryPartParser):
self.raw_text_query.autocomplete_list.append(lang) self.raw_text_query.autocomplete_list.append(lang)
return return
for lc in language_codes: for lc in sxng_locales:
if lc[0] not in settings['search']['languages']: if lc[0] not in settings['search']['languages']:
continue continue
lang_id, lang_name, country, english_name, _flag = map(str.lower, lc) lang_id, lang_name, country, english_name, _flag = map(str.lower, lc)

View file

@ -12,13 +12,13 @@ import logging
from base64 import b64decode from base64 import b64decode
from os.path import dirname, abspath from os.path import dirname, abspath
from searx.languages import language_codes as languages from .sxng_locales import sxng_locales
searx_dir = abspath(dirname(__file__)) searx_dir = abspath(dirname(__file__))
logger = logging.getLogger('searx') logger = logging.getLogger('searx')
OUTPUT_FORMATS = ['html', 'csv', 'json', 'rss'] OUTPUT_FORMATS = ['html', 'csv', 'json', 'rss']
LANGUAGE_CODES = ['all', 'auto'] + list(l[0] for l in languages) SXNG_LOCALE_TAGS = ['all', 'auto'] + list(l[0] for l in sxng_locales)
SIMPLE_STYLE = ('auto', 'light', 'dark') SIMPLE_STYLE = ('auto', 'light', 'dark')
CATEGORIES_AS_TABS = { CATEGORIES_AS_TABS = {
'general': {}, 'general': {},
@ -156,8 +156,8 @@ SCHEMA = {
'safe_search': SettingsValue((0, 1, 2), 0), 'safe_search': SettingsValue((0, 1, 2), 0),
'autocomplete': SettingsValue(str, ''), 'autocomplete': SettingsValue(str, ''),
'autocomplete_min': SettingsValue(int, 4), 'autocomplete_min': SettingsValue(int, 4),
'default_lang': SettingsValue(tuple(LANGUAGE_CODES + ['']), ''), 'default_lang': SettingsValue(tuple(SXNG_LOCALE_TAGS + ['']), ''),
'languages': SettingSublistValue(LANGUAGE_CODES, LANGUAGE_CODES), 'languages': SettingSublistValue(SXNG_LOCALE_TAGS, SXNG_LOCALE_TAGS),
'ban_time_on_fail': SettingsValue(numbers.Real, 5), 'ban_time_on_fail': SettingsValue(numbers.Real, 5),
'max_ban_time_on_fail': SettingsValue(numbers.Real, 120), 'max_ban_time_on_fail': SettingsValue(numbers.Real, 120),
'suspended_times': { 'suspended_times': {

View file

@ -1,73 +1,128 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# list of language codes '''List of SearXNG's locale codes.
# this file is generated automatically by utils/fetch_languages.py
language_codes = ( This file is generated automatically by::
('af-ZA', 'Afrikaans', 'Suid-Afrika', 'Afrikaans', '\U0001f1ff\U0001f1e6'),
./manage pyenv.cmd searxng_extra/update/update_engine_traits.py
'''
sxng_locales = (
('ar', 'العربية', '', 'Arabic', '\U0001f310'),
('ar-EG', 'العربية', 'مصر', 'Arabic', '\U0001f1ea\U0001f1ec'), ('ar-EG', 'العربية', 'مصر', 'Arabic', '\U0001f1ea\U0001f1ec'),
('be-BY', 'Беларуская', 'Беларусь', 'Belarusian', '\U0001f1e7\U0001f1fe'), ('ar-SA', 'العربية', 'المملكة العربية السعودية', 'Arabic', '\U0001f1f8\U0001f1e6'),
('bg', 'Български', '', 'Bulgarian', '\U0001f310'),
('bg-BG', 'Български', 'България', 'Bulgarian', '\U0001f1e7\U0001f1ec'), ('bg-BG', 'Български', 'България', 'Bulgarian', '\U0001f1e7\U0001f1ec'),
('ca', 'Català', '', 'Catalan', '\U0001f310'),
('ca-ES', 'Català', 'Espanya', 'Catalan', '\U0001f1ea\U0001f1f8'), ('ca-ES', 'Català', 'Espanya', 'Catalan', '\U0001f1ea\U0001f1f8'),
('cs', 'Čeština', '', 'Czech', '\U0001f310'),
('cs-CZ', 'Čeština', 'Česko', 'Czech', '\U0001f1e8\U0001f1ff'), ('cs-CZ', 'Čeština', 'Česko', 'Czech', '\U0001f1e8\U0001f1ff'),
('da', 'Dansk', '', 'Danish', '\U0001f310'),
('da-DK', 'Dansk', 'Danmark', 'Danish', '\U0001f1e9\U0001f1f0'), ('da-DK', 'Dansk', 'Danmark', 'Danish', '\U0001f1e9\U0001f1f0'),
('de', 'Deutsch', '', 'German', '\U0001f310'), ('de', 'Deutsch', '', 'German', '\U0001f310'),
('de-AT', 'Deutsch', 'Österreich', 'German', '\U0001f1e6\U0001f1f9'), ('de-AT', 'Deutsch', 'Österreich', 'German', '\U0001f1e6\U0001f1f9'),
('de-CH', 'Deutsch', 'Schweiz', 'German', '\U0001f1e8\U0001f1ed'), ('de-CH', 'Deutsch', 'Schweiz', 'German', '\U0001f1e8\U0001f1ed'),
('de-DE', 'Deutsch', 'Deutschland', 'German', '\U0001f1e9\U0001f1ea'), ('de-DE', 'Deutsch', 'Deutschland', 'German', '\U0001f1e9\U0001f1ea'),
('el', 'Ελληνικά', '', 'Greek', '\U0001f310'),
('el-GR', 'Ελληνικά', 'Ελλάδα', 'Greek', '\U0001f1ec\U0001f1f7'), ('el-GR', 'Ελληνικά', 'Ελλάδα', 'Greek', '\U0001f1ec\U0001f1f7'),
('en', 'English', '', 'English', '\U0001f310'), ('en', 'English', '', 'English', '\U0001f310'),
('en-AU', 'English', 'Australia', 'English', '\U0001f1e6\U0001f1fa'), ('en-AU', 'English', 'Australia', 'English', '\U0001f1e6\U0001f1fa'),
('en-CA', 'English', 'Canada', 'English', '\U0001f1e8\U0001f1e6'), ('en-CA', 'English', 'Canada', 'English', '\U0001f1e8\U0001f1e6'),
('en-GB', 'English', 'United Kingdom', 'English', '\U0001f1ec\U0001f1e7'), ('en-GB', 'English', 'United Kingdom', 'English', '\U0001f1ec\U0001f1e7'),
('en-IE', 'English', 'Ireland', 'English', '\U0001f1ee\U0001f1ea'), ('en-IE', 'English', 'Ireland', 'English', '\U0001f1ee\U0001f1ea'),
('en-MY', 'English', 'Malaysia', 'English', '\U0001f1f2\U0001f1fe'), ('en-IN', 'English', 'India', 'English', '\U0001f1ee\U0001f1f3'),
('en-NZ', 'English', 'New Zealand', 'English', '\U0001f1f3\U0001f1ff'), ('en-NZ', 'English', 'New Zealand', 'English', '\U0001f1f3\U0001f1ff'),
('en-PH', 'English', 'Philippines', 'English', '\U0001f1f5\U0001f1ed'),
('en-PK', 'English', 'Pakistan', 'English', '\U0001f1f5\U0001f1f0'),
('en-SG', 'English', 'Singapore', 'English', '\U0001f1f8\U0001f1ec'),
('en-US', 'English', 'United States', 'English', '\U0001f1fa\U0001f1f8'), ('en-US', 'English', 'United States', 'English', '\U0001f1fa\U0001f1f8'),
('en-ZA', 'English', 'South Africa', 'English', '\U0001f1ff\U0001f1e6'),
('es', 'Español', '', 'Spanish', '\U0001f310'), ('es', 'Español', '', 'Spanish', '\U0001f310'),
('es-AR', 'Español', 'Argentina', 'Spanish', '\U0001f1e6\U0001f1f7'), ('es-AR', 'Español', 'Argentina', 'Spanish', '\U0001f1e6\U0001f1f7'),
('es-CL', 'Español', 'Chile', 'Spanish', '\U0001f1e8\U0001f1f1'), ('es-CL', 'Español', 'Chile', 'Spanish', '\U0001f1e8\U0001f1f1'),
('es-CO', 'Español', 'Colombia', 'Spanish', '\U0001f1e8\U0001f1f4'),
('es-ES', 'Español', 'España', 'Spanish', '\U0001f1ea\U0001f1f8'), ('es-ES', 'Español', 'España', 'Spanish', '\U0001f1ea\U0001f1f8'),
('es-MX', 'Español', 'México', 'Spanish', '\U0001f1f2\U0001f1fd'), ('es-MX', 'Español', 'México', 'Spanish', '\U0001f1f2\U0001f1fd'),
('es-PE', 'Español', 'Perú', 'Spanish', '\U0001f1f5\U0001f1ea'),
('et', 'Eesti', '', 'Estonian', '\U0001f310'),
('et-EE', 'Eesti', 'Eesti', 'Estonian', '\U0001f1ea\U0001f1ea'), ('et-EE', 'Eesti', 'Eesti', 'Estonian', '\U0001f1ea\U0001f1ea'),
('fa-IR', 'فارسی', 'ایران', 'Persian', '\U0001f1ee\U0001f1f7'), ('fi', 'Suomi', '', 'Finnish', '\U0001f310'),
('fi-FI', 'Suomi', 'Suomi', 'Finnish', '\U0001f1eb\U0001f1ee'), ('fi-FI', 'Suomi', 'Suomi', 'Finnish', '\U0001f1eb\U0001f1ee'),
('fil-PH', 'Filipino', 'Pilipinas', 'Filipino', '\U0001f1f5\U0001f1ed'),
('fr', 'Français', '', 'French', '\U0001f310'), ('fr', 'Français', '', 'French', '\U0001f310'),
('fr-BE', 'Français', 'Belgique', 'French', '\U0001f1e7\U0001f1ea'), ('fr-BE', 'Français', 'Belgique', 'French', '\U0001f1e7\U0001f1ea'),
('fr-CA', 'Français', 'Canada', 'French', '\U0001f1e8\U0001f1e6'), ('fr-CA', 'Français', 'Canada', 'French', '\U0001f1e8\U0001f1e6'),
('fr-CH', 'Français', 'Suisse', 'French', '\U0001f1e8\U0001f1ed'), ('fr-CH', 'Français', 'Suisse', 'French', '\U0001f1e8\U0001f1ed'),
('fr-FR', 'Français', 'France', 'French', '\U0001f1eb\U0001f1f7'), ('fr-FR', 'Français', 'France', 'French', '\U0001f1eb\U0001f1f7'),
('he-IL', 'עברית', 'ישראל', 'Hebrew', '\U0001f1ee\U0001f1f1'), ('he', 'עברית', '', 'Hebrew', '\U0001f1ee\U0001f1f7'),
('hi-IN', 'हिन्दी', 'भारत', 'Hindi', '\U0001f1ee\U0001f1f3'), ('hr', 'Hrvatski', '', 'Croatian', '\U0001f310'),
('hr-HR', 'Hrvatski', 'Hrvatska', 'Croatian', '\U0001f1ed\U0001f1f7'), ('hr-HR', 'Hrvatski', 'Hrvatska', 'Croatian', '\U0001f1ed\U0001f1f7'),
('hu', 'Magyar', '', 'Hungarian', '\U0001f310'),
('hu-HU', 'Magyar', 'Magyarország', 'Hungarian', '\U0001f1ed\U0001f1fa'), ('hu-HU', 'Magyar', 'Magyarország', 'Hungarian', '\U0001f1ed\U0001f1fa'),
('id', 'Indonesia', '', 'Indonesian', '\U0001f310'),
('id-ID', 'Indonesia', 'Indonesia', 'Indonesian', '\U0001f1ee\U0001f1e9'), ('id-ID', 'Indonesia', 'Indonesia', 'Indonesian', '\U0001f1ee\U0001f1e9'),
('is-IS', 'Íslenska', 'Ísland', 'Icelandic', '\U0001f1ee\U0001f1f8'), ('it', 'Italiano', '', 'Italian', '\U0001f310'),
('it-CH', 'Italiano', 'Svizzera', 'Italian', '\U0001f1e8\U0001f1ed'),
('it-IT', 'Italiano', 'Italia', 'Italian', '\U0001f1ee\U0001f1f9'), ('it-IT', 'Italiano', 'Italia', 'Italian', '\U0001f1ee\U0001f1f9'),
('ja', '日本語', '', 'Japanese', '\U0001f310'),
('ja-JP', '日本語', '日本', 'Japanese', '\U0001f1ef\U0001f1f5'), ('ja-JP', '日本語', '日本', 'Japanese', '\U0001f1ef\U0001f1f5'),
('ko', '한국어', '', 'Korean', '\U0001f310'),
('ko-KR', '한국어', '대한민국', 'Korean', '\U0001f1f0\U0001f1f7'), ('ko-KR', '한국어', '대한민국', 'Korean', '\U0001f1f0\U0001f1f7'),
('lt', 'Lietuvių', '', 'Lithuanian', '\U0001f310'),
('lt-LT', 'Lietuvių', 'Lietuva', 'Lithuanian', '\U0001f1f1\U0001f1f9'), ('lt-LT', 'Lietuvių', 'Lietuva', 'Lithuanian', '\U0001f1f1\U0001f1f9'),
('lv', 'Latviešu', '', 'Latvian', '\U0001f310'),
('lv-LV', 'Latviešu', 'Latvija', 'Latvian', '\U0001f1f1\U0001f1fb'), ('lv-LV', 'Latviešu', 'Latvija', 'Latvian', '\U0001f1f1\U0001f1fb'),
('nb', 'Norsk Bokmål', '', 'Norwegian Bokmål', '\U0001f310'),
('nb-NO', 'Norsk Bokmål', 'Norge', 'Norwegian Bokmål', '\U0001f1f3\U0001f1f4'),
('nl', 'Nederlands', '', 'Dutch', '\U0001f310'), ('nl', 'Nederlands', '', 'Dutch', '\U0001f310'),
('nl-BE', 'Nederlands', 'België', 'Dutch', '\U0001f1e7\U0001f1ea'), ('nl-BE', 'Nederlands', 'België', 'Dutch', '\U0001f1e7\U0001f1ea'),
('nl-NL', 'Nederlands', 'Nederland', 'Dutch', '\U0001f1f3\U0001f1f1'), ('nl-NL', 'Nederlands', 'Nederland', 'Dutch', '\U0001f1f3\U0001f1f1'),
('no-NO', 'Norsk', '', 'Norwegian (Bokmål)', '\U0001f1f3\U0001f1f4'), ('pl', 'Polski', '', 'Polish', '\U0001f310'),
('pl-PL', 'Polski', 'Polska', 'Polish', '\U0001f1f5\U0001f1f1'), ('pl-PL', 'Polski', 'Polska', 'Polish', '\U0001f1f5\U0001f1f1'),
('pt', 'Português', '', 'Portuguese', '\U0001f310'), ('pt', 'Português', '', 'Portuguese', '\U0001f310'),
('pt-BR', 'Português', 'Brasil', 'Portuguese', '\U0001f1e7\U0001f1f7'), ('pt-BR', 'Português', 'Brasil', 'Portuguese', '\U0001f1e7\U0001f1f7'),
('pt-PT', 'Português', 'Portugal', 'Portuguese', '\U0001f1f5\U0001f1f9'), ('pt-PT', 'Português', 'Portugal', 'Portuguese', '\U0001f1f5\U0001f1f9'),
('ro', 'Română', '', 'Romanian', '\U0001f310'),
('ro-RO', 'Română', 'România', 'Romanian', '\U0001f1f7\U0001f1f4'), ('ro-RO', 'Română', 'România', 'Romanian', '\U0001f1f7\U0001f1f4'),
('ru', 'Русский', '', 'Russian', '\U0001f310'),
('ru-RU', 'Русский', 'Россия', 'Russian', '\U0001f1f7\U0001f1fa'), ('ru-RU', 'Русский', 'Россия', 'Russian', '\U0001f1f7\U0001f1fa'),
('sk', 'Slovenčina', '', 'Slovak', '\U0001f310'),
('sk-SK', 'Slovenčina', 'Slovensko', 'Slovak', '\U0001f1f8\U0001f1f0'), ('sk-SK', 'Slovenčina', 'Slovensko', 'Slovak', '\U0001f1f8\U0001f1f0'),
('sl', 'Slovenščina', '', 'Slovenian', '\U0001f310'),
('sl-SI', 'Slovenščina', 'Slovenija', 'Slovenian', '\U0001f1f8\U0001f1ee'), ('sl-SI', 'Slovenščina', 'Slovenija', 'Slovenian', '\U0001f1f8\U0001f1ee'),
('sr-RS', 'Српски', 'Србија', 'Serbian', '\U0001f1f7\U0001f1f8'), ('sv', 'Svenska', '', 'Swedish', '\U0001f310'),
('sv-SE', 'Svenska', 'Sverige', 'Swedish', '\U0001f1f8\U0001f1ea'), ('sv-SE', 'Svenska', 'Sverige', 'Swedish', '\U0001f1f8\U0001f1ea'),
('sw-TZ', 'Kiswahili', 'Tanzania', 'Swahili', '\U0001f1f9\U0001f1ff'), ('th', 'ไทย', '', 'Thai', '\U0001f310'),
('th-TH', 'ไทย', 'ไทย', 'Thai', '\U0001f1f9\U0001f1ed'), ('th-TH', 'ไทย', 'ไทย', 'Thai', '\U0001f1f9\U0001f1ed'),
('tr', 'Türkçe', '', 'Turkish', '\U0001f310'),
('tr-TR', 'Türkçe', 'Türkiye', 'Turkish', '\U0001f1f9\U0001f1f7'), ('tr-TR', 'Türkçe', 'Türkiye', 'Turkish', '\U0001f1f9\U0001f1f7'),
('uk', 'Українська', '', 'Ukrainian', '\U0001f310'),
('uk-UA', 'Українська', 'Україна', 'Ukrainian', '\U0001f1fa\U0001f1e6'), ('uk-UA', 'Українська', 'Україна', 'Ukrainian', '\U0001f1fa\U0001f1e6'),
('vi', 'Tiếng Việt', '', 'Vietnamese', '\U0001f310'),
('vi-VN', 'Tiếng Việt', 'Việt Nam', 'Vietnamese', '\U0001f1fb\U0001f1f3'), ('vi-VN', 'Tiếng Việt', 'Việt Nam', 'Vietnamese', '\U0001f1fb\U0001f1f3'),
('zh', '中文', '', 'Chinese', '\U0001f310'), ('zh', '中文', '', 'Chinese', '\U0001f310'),
('zh-CN', '中文', '中国', 'Chinese', '\U0001f1e8\U0001f1f3'), ('zh-CN', '中文', '中国', 'Chinese', '\U0001f1e8\U0001f1f3'),
('zh-HK', '中文', '中國香港', 'Chinese', '\U0001f1ed\U0001f1f0'), ('zh-HK', '中文', '中國香港', 'Chinese', '\U0001f1ed\U0001f1f0'),
('zh-TW', '中文', '台灣', 'Chinese', '\U0001f1f9\U0001f1fc'), ('zh-TW', '中文', '台灣', 'Chinese', '\U0001f1f9\U0001f1fc'),
) )
'''
A list of five-digit tuples:
0. SearXNG's internal locale tag (a language or region tag)
1. Name of the language (:py:obj:`babel.core.Locale.get_language_name`)
2. For region tags the name of the region (:py:obj:`babel.core.Locale.get_territory_name`).
Empty string for language tags.
3. English language name (from :py:obj:`babel.core.Locale.english_name`)
4. Unicode flag (emoji) that fits to SearXNG's internal region tag. Languages
are represented by a globe (🌐)
.. code:: python
('en', 'English', '', 'English', '🌐'),
('en-CA', 'English', 'Canada', 'English', '🇨🇦'),
('en-US', 'English', 'United States', 'English', '🇺🇸'),
..
('fr', 'Français', '', 'French', '🌐'),
('fr-BE', 'Français', 'Belgique', 'French', '🇧🇪'),
('fr-CA', 'Français', 'Canada', 'French', '🇨🇦'),
:meta hide-value:
'''

View file

@ -1,12 +1,12 @@
<select class="language" id="language" name="language" aria-label="{{ _('Search language') }}">{{- '' -}} <select class="language" id="language" name="language" aria-label="{{ _('Search language') }}">{{- '' -}}
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option> <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }} [all]</option>
<option value="auto" {% if current_language == 'auto' %}selected="selected"{% endif %}> <option value="auto" {% if current_language == 'auto' %}selected="selected"{% endif %}>
{{- _('Auto-detect') -}} {{- _('Auto-detect') -}}
{%- if current_language == 'auto' %} ({{ search_language }}){%- endif -%} {%- if current_language == 'auto' %} ({{ search_language }}){%- endif -%}
</option> </option>
{%- for lang_id,lang_name,country_name,english_name,flag in language_codes | sort(attribute=1) -%} {%- for sxng_tag,lang_name,country_name,english_name,flag in sxng_locales | sort(attribute=1) -%}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}> <option value="{{ sxng_tag }}" {% if sxng_tag == current_language %}selected="selected"{% endif %}>
{% if flag %}{{ flag }} {% endif%} {{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %} {% if flag %}{{ flag }} {% endif%} {{- lang_name }} {% if country_name %} - {{ country_name }} {% endif %} [{{sxng_tag}}]
</option> </option>
{%- endfor -%} {%- endfor -%}
</select> </select>

View file

@ -115,10 +115,10 @@
<legend id="pref_language">{{ _('Search language') }}</legend> <legend id="pref_language">{{ _('Search language') }}</legend>
<p class="value">{{- '' -}} <p class="value">{{- '' -}}
<select name='language' aria-labelledby="pref_language" aria-describedby="desc_language">{{- '' -}} <select name='language' aria-labelledby="pref_language" aria-describedby="desc_language">{{- '' -}}
<option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }}</option> <option value="all" {% if current_language == 'all' %}selected="selected"{% endif %}>{{ _('Default language') }} [all]</option>
<option value="auto" {% if current_language == 'auto' %}selected="selected"{% endif %}>{{ _('Auto-detect') }}</option> <option value="auto" {% if current_language == 'auto' %}selected="selected"{% endif %}>{{ _('Auto-detect') }} [auto]</option>
{%- for lang_id,lang_name,country_name,english_name,flag in language_codes | sort(attribute=1) -%} {%- for sxng_tag,lang_name,country_name,english_name,flag in sxng_locales | sort(attribute=1) -%}
<option value="{{ lang_id }}" {% if lang_id == current_language %}selected="selected"{% endif %}>{% if flag %}{{ flag }} {% endif%} {{- lang_name }} {% if country_name %}({{ country_name }}) {% endif %}</option> <option value="{{ sxng_tag }}" {% if sxng_tag == current_language %}selected="selected"{% endif %}>{% if flag %}{{ flag }} {% endif%} {{- lang_name }} {% if country_name %} - {{ country_name }} {% endif %} [{{sxng_tag}}]</option>
{%- endfor -%} {%- endfor -%}
</select>{{- '' -}} </select>{{- '' -}}
</p> </p>

View file

@ -24,7 +24,7 @@ from babel.core import get_global
from searx import settings from searx import settings
from searx.data import USER_AGENTS, data_dir from searx.data import USER_AGENTS, data_dir
from searx.version import VERSION_TAG from searx.version import VERSION_TAG
from searx.languages import language_codes from searx.sxng_locales import sxng_locales
from searx.exceptions import SearxXPathSyntaxException, SearxEngineXPathException from searx.exceptions import SearxXPathSyntaxException, SearxEngineXPathException
from searx import logger from searx import logger
@ -53,8 +53,8 @@ _LANG_TO_LC_CACHE: Dict[str, Dict[str, str]] = {}
_FASTTEXT_MODEL: Optional["fasttext.FastText._FastText"] = None _FASTTEXT_MODEL: Optional["fasttext.FastText._FastText"] = None
"""fasttext model to predict laguage of a search term""" """fasttext model to predict laguage of a search term"""
SEARCH_LANGUAGE_CODES = frozenset([searxng_locale[0].split('-')[0] for searxng_locale in language_codes]) SEARCH_LANGUAGE_CODES = frozenset([searxng_locale[0].split('-')[0] for searxng_locale in sxng_locales])
"""Languages supported by most searxng engines (:py:obj:`searx.languages.language_codes`).""" """Languages supported by most searxng engines (:py:obj:`searx.sxng_locales.sxng_locales`)."""
class _NotSetClass: # pylint: disable=too-few-public-methods class _NotSetClass: # pylint: disable=too-few-public-methods
@ -355,11 +355,11 @@ def is_valid_lang(lang) -> Optional[Tuple[bool, str, str]]:
is_abbr = len(lang) == 2 is_abbr = len(lang) == 2
lang = lang.lower() lang = lang.lower()
if is_abbr: if is_abbr:
for l in language_codes: for l in sxng_locales:
if l[0][:2] == lang: if l[0][:2] == lang:
return (True, l[0][:2], l[3].lower()) return (True, l[0][:2], l[3].lower())
return None return None
for l in language_codes: for l in sxng_locales:
if l[1].lower() == lang or l[3].lower() == lang: if l[1].lower() == lang or l[3].lower() == lang:
return (True, l[0][:2], l[3].lower()) return (True, l[0][:2], l[3].lower())
return None return None

View file

@ -121,8 +121,8 @@ from searx.locales import (
# renaming names from searx imports ... # renaming names from searx imports ...
from searx.autocomplete import search_autocomplete, backends as autocomplete_backends from searx.autocomplete import search_autocomplete, backends as autocomplete_backends
from searx.languages import language_codes as languages
from searx.redisdb import initialize as redis_initialize from searx.redisdb import initialize as redis_initialize
from searx.sxng_locales import sxng_locales
from searx.search import SearchWithPlugins, initialize as search_initialize from searx.search import SearchWithPlugins, initialize as search_initialize
from searx.network import stream as http_stream, set_context_network_name from searx.network import stream as http_stream, set_context_network_name
from searx.search.checker import get_result as checker_get_result from searx.search.checker import get_result as checker_get_result
@ -438,7 +438,7 @@ def render(template_name: str, **kwargs):
kwargs['OTHER_CATEGORY'] = OTHER_CATEGORY kwargs['OTHER_CATEGORY'] = OTHER_CATEGORY
# i18n # i18n
kwargs['language_codes'] = [l for l in languages if l[0] in settings['search']['languages']] kwargs['sxng_locales'] = [l for l in sxng_locales if l[0] in settings['search']['languages']]
locale = request.preferences.get_value('locale') locale = request.preferences.get_value('locale')
kwargs['locale_rfc5646'] = _get_locale_rfc5646(locale) kwargs['locale_rfc5646'] = _get_locale_rfc5646(locale)

View file

@ -18,17 +18,69 @@ the :origin:`CI Update data ... <.github/workflows/data-update.yml>`
from unicodedata import lookup from unicodedata import lookup
from pathlib import Path from pathlib import Path
from pprint import pformat from pprint import pformat
from babel import Locale, UnknownLocaleError import babel
from babel.languages import get_global
from babel.core import parse_locale
from searx import settings, searx_dir from searx import settings, searx_dir
from searx import network from searx import network
from searx.engines import load_engines, engines from searx.engines import load_engines
from searx.enginelib.traits import EngineTraitsMap from searx.enginelib.traits import EngineTraitsMap
# Output files. # Output files.
languages_file = Path(searx_dir) / 'languages.py' languages_file = Path(searx_dir) / 'sxng_locales.py'
languages_file_header = """\
# -*- coding: utf-8 -*-
'''List of SearXNG's locale codes.
This file is generated automatically by::
./manage pyenv.cmd searxng_extra/update/update_engine_traits.py
'''
sxng_locales = (
"""
languages_file_footer = """,
)
'''
A list of five-digit tuples:
0. SearXNG's internal locale tag (a language or region tag)
1. Name of the language (:py:obj:`babel.core.Locale.get_language_name`)
2. For region tags the name of the region (:py:obj:`babel.core.Locale.get_territory_name`).
Empty string for language tags.
3. English language name (from :py:obj:`babel.core.Locale.english_name`)
4. Unicode flag (emoji) that fits to SearXNG's internal region tag. Languages
are represented by a globe (\U0001F310)
.. code:: python
('en', 'English', '', 'English', '\U0001f310'),
('en-CA', 'English', 'Canada', 'English', '\U0001f1e8\U0001f1e6'),
('en-US', 'English', 'United States', 'English', '\U0001f1fa\U0001f1f8'),
..
('fr', 'Français', '', 'French', '\U0001f310'),
('fr-BE', 'Français', 'Belgique', 'French', '\U0001f1e7\U0001f1ea'),
('fr-CA', 'Français', 'Canada', 'French', '\U0001f1e8\U0001f1e6'),
:meta hide-value:
'''
"""
lang2emoji = {
'ha': '\U0001F1F3\U0001F1EA', # Hausa / Niger
'bs': '\U0001F1E7\U0001F1E6', # Bosnian / Bosnia & Herzegovina
'jp': '\U0001F1EF\U0001F1F5', # Japanese
'ua': '\U0001F1FA\U0001F1E6', # Ukrainian
'he': '\U0001F1EE\U0001F1F7', # Hebrew
}
def main():
load_engines(settings['engines'])
# traits_map = EngineTraitsMap.from_data()
traits_map = fetch_traits_map()
sxng_tag_list = filter_locales(traits_map)
write_languages_file(sxng_tag_list)
def fetch_traits_map(): def fetch_traits_map():
@ -45,234 +97,69 @@ def fetch_traits_map():
return traits_map return traits_map
# Get babel Locale object from lang_code if possible. def filter_locales(traits_map: EngineTraitsMap):
def get_locale(lang_code): """Filter language & region tags by a threshold."""
try:
locale = Locale.parse(lang_code, sep='-') min_eng_per_region = 11
return locale min_eng_per_lang = 13
except (UnknownLocaleError, ValueError):
return None _ = {}
for eng in traits_map.values():
for reg in eng.regions.keys():
_[reg] = _.get(reg, 0) + 1
regions = set(k for k, v in _.items() if v >= min_eng_per_region)
lang_from_region = set(k.split('-')[0] for k in regions)
_ = {}
for eng in traits_map.values():
for lang in eng.languages.keys():
# ignore script types like zh_Hant, zh_Hans or sr_Latin, pa_Arab (they
# already counted by existence of 'zh' or 'sr', 'pa')
if '_' in lang:
# print("ignore %s" % lang)
continue
_[lang] = _.get(lang, 0) + 1
languages = set(k for k, v in _.items() if v >= min_eng_per_lang)
sxng_tag_list = set()
sxng_tag_list.update(regions)
sxng_tag_list.update(lang_from_region)
sxng_tag_list.update(languages)
return sxng_tag_list
lang2emoji = { def write_languages_file(sxng_tag_list):
'ha': '\U0001F1F3\U0001F1EA', # Hausa / Niger
'bs': '\U0001F1E7\U0001F1E6', # Bosnian / Bosnia & Herzegovina
'jp': '\U0001F1EF\U0001F1F5', # Japanese
'ua': '\U0001F1FA\U0001F1E6', # Ukrainian
'he': '\U0001F1EE\U0001F1F7', # Hebrew
}
language_codes = []
def get_unicode_flag(lang_code): for sxng_tag in sorted(sxng_tag_list):
"""Determine a unicode flag (emoji) that fits to the ``lang_code``""" sxng_locale: babel.Locale = babel.Locale.parse(sxng_tag, sep='-')
emoji = lang2emoji.get(lang_code.lower()) flag = get_unicode_flag(sxng_locale) or ''
if emoji:
return emoji
if len(lang_code) == 2: item = (
return '\U0001F310' sxng_tag,
sxng_locale.get_language_name().title(),
language = territory = script = variant = '' sxng_locale.get_territory_name() or '',
try: sxng_locale.english_name.split(' (')[0],
language, territory, script, variant = parse_locale(lang_code, '-') UnicodeEscape(flag),
except ValueError as exc:
print(exc)
# https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
if not territory:
# https://www.unicode.org/emoji/charts/emoji-list.html#country-flag
emoji = lang2emoji.get(language)
if not emoji:
print(
"%s --> language: %s / territory: %s / script: %s / variant: %s"
% (lang_code, language, territory, script, variant)
) )
return emoji
emoji = lang2emoji.get(territory.lower()) language_codes.append(item)
if emoji:
return emoji
try: language_codes = tuple(language_codes)
c1 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[0])
c2 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + territory[1])
# print("%s --> territory: %s --> %s%s" %(lang_code, territory, c1, c2 ))
except KeyError as exc:
print("%s --> territory: %s --> %s" % (lang_code, territory, exc))
return None
return c1 + c2 with open(languages_file, 'w', encoding='utf-8') as new_file:
file_content = "{header} {language_codes}{footer}".format(
header=languages_file_header,
def get_territory_name(lang_code): language_codes=pformat(language_codes, width=120, indent=4)[1:-1],
country_name = None footer=languages_file_footer,
locale = get_locale(lang_code)
try:
if locale is not None:
country_name = locale.get_territory_name()
except FileNotFoundError as exc:
print("ERROR: %s --> %s" % (locale, exc))
return country_name
def join_language_lists(traits_map: EngineTraitsMap):
"""Join all languages of the engines into one list. The returned language list
contains language codes (``zh``) and region codes (``zh-TW``). The codes can
be parsed by babel::
babel.Locale.parse(language_list[n])
"""
# pylint: disable=too-many-branches
language_list = {}
for eng_name, eng_traits in traits_map.items():
eng = engines[eng_name]
eng_codes = set()
if eng_traits.data_type == 'traits_v1':
# items of type 'engine_traits' do have regions & languages, the
# list of eng_codes should contain both.
eng_codes.update(eng_traits.regions.keys())
eng_codes.update(eng_traits.languages.keys())
elif eng_traits.data_type == 'supported_languages':
# vintage / deprecated
_codes = set()
if isinstance(eng_traits.supported_languages, dict):
_codes.update(eng_traits.supported_languages.keys())
elif isinstance(eng_traits.supported_languages, list):
_codes.update(eng_traits.supported_languages)
else:
raise TypeError('engine.supported_languages type %s is unknown' % type(eng_traits.supported_languages))
for lang_code in _codes:
# apply custom fixes if necessary
if lang_code in getattr(eng, 'language_aliases', {}).values():
lang_code = next(lc for lc, alias in eng.language_aliases.items() if lang_code == alias)
eng_codes.add(lang_code)
for lang_code in eng_codes:
locale = get_locale(lang_code)
# ensure that lang_code uses standard language and country codes
if locale and locale.territory:
lang_code = "{lang}-{country}".format(lang=locale.language, country=locale.territory)
short_code = lang_code.split('-')[0]
# add language without country if not in list
if short_code not in language_list:
if locale:
# get language's data from babel's Locale object
language_name = locale.get_language_name().title()
english_name = locale.english_name.split(' (')[0]
elif short_code in traits_map['wikipedia'].supported_languages:
# get language's data from wikipedia if not known by babel
language_name = traits_map['wikipedia'].supported_languages[short_code]['name']
english_name = traits_map['wikipedia'].supported_languages[short_code]['english_name']
else:
language_name = None
english_name = None
# add language to list
language_list[short_code] = {
'name': language_name,
'english_name': english_name,
'counter': set(),
'countries': {},
}
# add language with country if not in list
if lang_code != short_code and lang_code not in language_list[short_code]['countries']:
country_name = ''
if locale:
# get country name from babel's Locale object
try:
country_name = locale.get_territory_name()
except FileNotFoundError as exc:
print("ERROR: %s --> %s" % (locale, exc))
locale = None
language_list[short_code]['countries'][lang_code] = {
'country_name': country_name,
'counter': set(),
}
# count engine for both language_country combination and language alone
language_list[short_code]['counter'].add(eng_name)
if lang_code != short_code:
language_list[short_code]['countries'][lang_code]['counter'].add(eng_name)
return language_list
# Filter language list so it only includes the most supported languages and countries
def filter_language_list(joined_languages_map):
min_engines_per_lang = 12
min_engines_per_country = 7
# pylint: disable=consider-using-dict-items, consider-iterating-dictionary
main_engines = [
engine_name
for engine_name in engines.keys()
if 'general' in engines[engine_name].categories
and hasattr(engines[engine_name], 'supported_languages')
and engines[engine_name].supported_languages
and not engines[engine_name].disabled
]
# filter list to include only languages supported by most engines or all default general engines
filtered_languages = {
code: lang
for code, lang in joined_languages_map.items()
if (
len(lang['counter']) >= min_engines_per_lang
or all(main_engine in lang['counter'] for main_engine in main_engines)
) )
} new_file.write(file_content)
new_file.close()
def _copy_lang_data(lang, country_name=None):
new_dict = {}
new_dict['name'] = joined_languages_map[lang]['name']
new_dict['english_name'] = joined_languages_map[lang]['english_name']
if country_name:
new_dict['country_name'] = country_name
return new_dict
# for each language get country codes supported by most engines or at least one country code
filtered_languages_with_countries = {}
for lang, lang_data in filtered_languages.items():
countries = lang_data['countries']
filtered_countries = {}
# get language's country codes with enough supported engines
for lang_country, country_data in countries.items():
if len(country_data['counter']) >= min_engines_per_country:
filtered_countries[lang_country] = _copy_lang_data(lang, country_data['country_name'])
# add language without countries too if there's more than one country to choose from
if len(filtered_countries) > 1:
filtered_countries[lang] = _copy_lang_data(lang, None)
elif len(filtered_countries) == 1:
lang_country = next(iter(filtered_countries))
# if no country has enough engines try to get most likely country code from babel
if not filtered_countries:
lang_country = None
subtags = get_global('likely_subtags').get(lang)
if subtags:
country_code = subtags.split('_')[-1]
if len(country_code) == 2:
lang_country = "{lang}-{country}".format(lang=lang, country=country_code)
if lang_country:
filtered_countries[lang_country] = _copy_lang_data(lang, None)
else:
filtered_countries[lang] = _copy_lang_data(lang, None)
filtered_languages_with_countries.update(filtered_countries)
return filtered_languages_with_countries
class UnicodeEscape(str): class UnicodeEscape(str):
@ -282,54 +169,29 @@ class UnicodeEscape(str):
return "'" + "".join([chr(c) for c in self.encode('unicode-escape')]) + "'" return "'" + "".join([chr(c) for c in self.encode('unicode-escape')]) + "'"
# Write languages.py. def get_unicode_flag(locale: babel.Locale):
def write_languages_file(languages): """Determine a unicode flag (emoji) that fits to the ``locale``"""
file_headers = (
"# -*- coding: utf-8 -*-",
"# list of language codes",
"# this file is generated automatically by utils/fetch_languages.py",
"language_codes = (\n",
)
language_codes = [] emoji = lang2emoji.get(locale.language)
if emoji:
return emoji
for code in sorted(languages): if not locale.territory:
return '\U0001F310'
name = languages[code]['name'] emoji = lang2emoji.get(locale.territory.lower())
if name is None: if emoji:
print("ERROR: languages['%s'] --> %s" % (code, languages[code])) return emoji
continue
flag = get_unicode_flag(code) or '' try:
item = ( c1 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + locale.territory[0])
code, c2 = lookup('REGIONAL INDICATOR SYMBOL LETTER ' + locale.territory[1])
languages[code]['name'].split(' (')[0], # print("OK : %s --> %s%s" % (locale, c1, c2))
get_territory_name(code) or '', except KeyError as exc:
languages[code].get('english_name') or '', print("ERROR: %s --> %s" % (locale, exc))
UnicodeEscape(flag), return None
)
language_codes.append(item) return c1 + c2
language_codes = tuple(language_codes)
with open(languages_file, 'w', encoding='utf-8') as new_file:
file_content = "{file_headers} {language_codes},\n)\n".format(
# fmt: off
file_headers = '\n'.join(file_headers),
language_codes = pformat(language_codes, indent=4)[1:-1]
# fmt: on
)
new_file.write(file_content)
new_file.close()
def main():
load_engines(settings['engines'])
traits_map = fetch_traits_map()
joined_languages_map = join_language_lists(traits_map)
filtered_languages = filter_language_list(joined_languages_map)
write_languages_file(filtered_languages)
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -50,7 +50,7 @@ from pathlib import Path
from searx import searx_dir from searx import searx_dir
from searx.network import set_timeout_for_thread from searx.network import set_timeout_for_thread
from searx.engines import wikidata, set_loggers from searx.engines import wikidata, set_loggers
from searx.languages import language_codes from searx.sxng_locales import sxng_locales
from searx.engines.openstreetmap import get_key_rank, VALUE_TO_LINK from searx.engines.openstreetmap import get_key_rank, VALUE_TO_LINK
set_loggers(wikidata, 'wikidata') set_loggers(wikidata, 'wikidata')
@ -76,7 +76,7 @@ GROUP BY ?key ?item ?itemLabel
ORDER BY ?key ?item ?itemLabel ORDER BY ?key ?item ?itemLabel
""" """
LANGUAGES = [l[0].lower() for l in language_codes] LANGUAGES = [l[0].lower() for l in sxng_locales]
PRESET_KEYS = { PRESET_KEYS = {
('wikidata',): {'en': 'Wikidata'}, ('wikidata',): {'en': 'Wikidata'},