[mod] comprehensive revision of the searxng_extra/update/ scripts

- pylint all scripts
- fix some errors reported by pyright
- from searx.data import data_dir (Path.open)
- fix import from pygments.formatters.html

NOTE: none functional changes!

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2024-03-10 15:33:23 +01:00 committed by Markus Heiser
parent 0ffec440b2
commit ce4aaf6cad
10 changed files with 51 additions and 61 deletions

View file

@ -11,11 +11,10 @@ Output file: :origin:`searx/data/ahmia_blacklist.txt` (:origin:`CI Update data
"""
# pylint: disable=use-dict-literal
from os.path import join
import requests
from searx import searx_dir
from searx.data import data_dir
DATA_FILE = data_dir / 'ahmia_blacklist.txt'
URL = 'https://ahmia.fi/blacklist/banned/'
@ -23,15 +22,12 @@ def fetch_ahmia_blacklist():
resp = requests.get(URL, timeout=3.0)
if resp.status_code != 200:
# pylint: disable=broad-exception-raised
raise Exception("Error fetching Ahmia blacklist, HTTP code " + resp.status_code)
raise Exception("Error fetching Ahmia blacklist, HTTP code " + resp.status_code) # type: ignore
return resp.text.split()
def get_ahmia_blacklist_filename():
return join(join(searx_dir, "data"), "ahmia_blacklist.txt")
if __name__ == '__main__':
blacklist = fetch_ahmia_blacklist()
with open(get_ahmia_blacklist_filename(), "w", encoding='utf-8') as f:
blacklist.sort()
with DATA_FILE.open("w", encoding='utf-8') as f:
f.write('\n'.join(blacklist))

View file

@ -15,12 +15,11 @@ import re
import unicodedata
import json
# set path
from os.path import join
from searx import searx_dir
from searx.locales import LOCALE_NAMES, locales_initialize
from searx.engines import wikidata, set_loggers
from searx.data import data_dir
DATA_FILE = data_dir / 'currencies.json'
set_loggers(wikidata, 'wikidata')
locales_initialize()
@ -133,10 +132,6 @@ def fetch_db():
return db
def get_filename():
return join(join(searx_dir, "data"), "currencies.json")
def main():
db = fetch_db()
@ -156,8 +151,8 @@ def main():
if len(db['names'][name]) == 1:
db['names'][name] = db['names'][name][0]
with open(get_filename(), 'w', encoding='utf8') as f:
json.dump(db, f, ensure_ascii=False, indent=4)
with DATA_FILE.open('w', encoding='utf8') as f:
json.dump(db, f, indent=4, sort_keys=True, ensure_ascii=False)
if __name__ == '__main__':

View file

@ -24,6 +24,9 @@ from searx import searx_dir
from searx.utils import gen_useragent, detect_language
import searx.search
import searx.network
from searx.data import data_dir
DATA_FILE = data_dir / 'engine_descriptions.json'
set_loggers(wikidata, 'wikidata')
locales_initialize()
@ -362,8 +365,8 @@ def main():
fetch_website_descriptions()
output = get_output()
with open(get_engine_descriptions_filename(), 'w', encoding='utf8') as f:
f.write(json.dumps(output, indent=1, separators=(',', ':'), ensure_ascii=False))
with DATA_FILE.open('w', encoding='utf8') as f:
f.write(json.dumps(output, indent=1, separators=(',', ':'), sort_keys=True, ensure_ascii=False))
if __name__ == "__main__":

View file

@ -144,9 +144,9 @@ def write_languages_file(sxng_tag_list):
item = (
sxng_tag,
sxng_locale.get_language_name().title(),
sxng_locale.get_language_name().title(), # type: ignore
sxng_locale.get_territory_name() or '',
sxng_locale.english_name.split(' (')[0],
sxng_locale.english_name.split(' (')[0] if sxng_locale.english_name else '',
UnicodeEscape(flag),
)
@ -154,7 +154,7 @@ def write_languages_file(sxng_tag_list):
language_codes = tuple(language_codes)
with open(languages_file, 'w', encoding='utf-8') as new_file:
with languages_file.open('w', encoding='utf-8') as new_file:
file_content = "{header} {language_codes}{footer}".format(
header=languages_file_header,
language_codes=pformat(language_codes, width=120, indent=4)[1:-1],

View file

@ -8,20 +8,17 @@ from :py:obj:`BANGS_URL`.
"""
from pathlib import Path
import json
import httpx
from searx import searx_dir
from searx.external_bang import LEAF_KEY
from searx.data import data_dir
DATA_FILE = data_dir / 'external_bangs.json'
BANGS_URL = 'https://duckduckgo.com/bang.js'
"""JSON file which contains the bangs."""
BANGS_DATA_FILE = Path(searx_dir) / 'data' / 'external_bangs.json'
HTTPS_COLON = 'https:'
HTTP_COLON = 'http:'
@ -36,8 +33,8 @@ def main():
'version': 0,
'trie': trie,
}
with open(BANGS_DATA_FILE, 'w', encoding="utf8") as f:
json.dump(output, f, sort_keys=True, ensure_ascii=False, indent=4)
with DATA_FILE.open('w', encoding="utf8") as f:
json.dump(output, f, indent=4, sort_keys=True, ensure_ascii=False)
def merge_when_no_leaf(node):

View file

@ -11,13 +11,14 @@ Output file: :origin:`searx/data/useragents.json` (:origin:`CI Update data ...
import json
import re
from os.path import join
from urllib.parse import urlparse, urljoin
from packaging.version import parse
import requests
from lxml import html
from searx import searx_dir
from searx.data import data_dir
DATA_FILE = data_dir / 'useragents.json'
URL = 'https://ftp.mozilla.org/pub/firefox/releases/'
RELEASE_PATH = '/pub/firefox/releases/'
@ -41,7 +42,7 @@ def fetch_firefox_versions():
resp = requests.get(URL, timeout=2.0)
if resp.status_code != 200:
# pylint: disable=broad-exception-raised
raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code)
raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code) # type: ignore
dom = html.fromstring(resp.text)
versions = []
@ -74,11 +75,7 @@ def fetch_firefox_last_versions():
return result
def get_useragents_filename():
return join(join(searx_dir, "data"), "useragents.json")
if __name__ == '__main__':
useragents["versions"] = fetch_firefox_last_versions()
with open(get_useragents_filename(), "w", encoding='utf-8') as f:
json.dump(useragents, f, indent=4, ensure_ascii=False)
with DATA_FILE.open('w', encoding='utf-8') as f:
json.dump(useragents, f, indent=4, sort_keys=True, ensure_ascii=False)

View file

@ -1,4 +1,5 @@
#!/usr/bin/env python
# lint: pylint
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Update locale names in :origin:`searx/data/locales.json` used by
:ref:`searx.locales`
@ -6,12 +7,12 @@
- :py:obj:`searx.locales.RTL_LOCALES`
- :py:obj:`searx.locales.LOCALE_NAMES`
"""
# pylint: disable=invalid-name
from __future__ import annotations
from typing import Set
import json
from pathlib import Path
import os
import babel
import babel.languages
@ -61,7 +62,7 @@ def main():
"RTL_LOCALES": sorted(RTL_LOCALES),
}
with open(LOCALE_DATA_FILE, 'w', encoding='utf-8') as f:
with LOCALE_DATA_FILE.open('w', encoding='utf-8') as f:
json.dump(content, f, indent=2, sort_keys=True, ensure_ascii=False)
@ -84,11 +85,10 @@ def get_locale_descr(locale: babel.Locale, tr_locale):
return native_language
return native_language + ' (' + english_language + ')'
else:
result = native_language + ', ' + native_territory + ' (' + english_language
if english_territory:
return result + ', ' + english_territory + ')'
return result + ')'
result = native_language + ', ' + native_territory + ' (' + english_language
if english_territory:
return result + ', ' + english_territory + ')'
return result + ')'
def _get_locale_descr(locale: babel.Locale, tr_locale: str) -> tuple[str, str]:

View file

@ -45,13 +45,14 @@ Output file: :origin:`searx/data/osm_keys_tags` (:origin:`CI Update data ...
import json
import collections
from pathlib import Path
from searx import searx_dir
from searx.network import set_timeout_for_thread
from searx.engines import wikidata, set_loggers
from searx.sxng_locales import sxng_locales
from searx.engines.openstreetmap import get_key_rank, VALUE_TO_LINK
from searx.data import data_dir
DATA_FILE = data_dir / 'osm_keys_tags.json'
set_loggers(wikidata, 'wikidata')
@ -203,10 +204,6 @@ def optimize_keys(data):
return data
def get_osm_tags_filename():
return Path(searx_dir) / "data" / "osm_keys_tags.json"
if __name__ == '__main__':
set_timeout_for_thread(60)
@ -214,5 +211,5 @@ if __name__ == '__main__':
'keys': optimize_keys(get_keys()),
'tags': optimize_tags(get_tags()),
}
with open(get_osm_tags_filename(), 'w', encoding="utf8") as f:
json.dump(result, f, indent=4, ensure_ascii=False, sort_keys=True)
with DATA_FILE.open('w', encoding="utf8") as f:
json.dump(result, f, indent=4, sort_keys=True, ensure_ascii=False)

View file

@ -1,14 +1,16 @@
#!/usr/bin/env python
# lint: pylint
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Update pygments style
Call this script after each upgrade of pygments
"""
# pylint: disable=too-few-public-methods
from pathlib import Path
import pygments
from pygments.formatters import HtmlFormatter
from pygments.formatters.html import HtmlFormatter
from searx import searx_dir
@ -41,7 +43,7 @@ END_DARK_THEME = """
"""
class Formatter(HtmlFormatter):
class Formatter(HtmlFormatter): # pylint: disable=missing-class-docstring
@property
def _pre_style(self):
return 'line-height: 100%;'
@ -67,5 +69,5 @@ def generat_css(light_style, dark_style) -> str:
if __name__ == '__main__':
print("update: %s" % LESS_FILE)
with open(LESS_FILE, 'w') as f:
with LESS_FILE.open('w', encoding='utf8') as f:
f.write(generat_css('default', 'lightbulb'))

View file

@ -18,6 +18,9 @@ from os.path import join
from searx import searx_dir
from searx.engines import wikidata, set_loggers
from searx.data import data_dir
DATA_FILE = data_dir / 'wikidata_units.json'
set_loggers(wikidata, 'wikidata')
@ -58,9 +61,9 @@ def get_data():
def get_wikidata_units_filename():
return join(join(searx_dir, "data"), "wikidata_units.json")
return join(join(searx_dir, "data"), "")
if __name__ == '__main__':
with open(get_wikidata_units_filename(), 'w', encoding="utf8") as f:
json.dump(get_data(), f, indent=4, ensure_ascii=False)
with DATA_FILE.open('w', encoding="utf8") as f:
json.dump(get_data(), f, indent=4, sort_keys=True, ensure_ascii=False)