2021-02-19 12:52:26 +01:00
#!/usr/bin/env python
2022-01-03 12:58:48 +01:00
# lint: pylint
2021-10-03 15:12:09 +02:00
# SPDX-License-Identifier: AGPL-3.0-or-later
2020-08-06 17:42:46 +02:00
2022-01-03 12:40:06 +01:00
"""Fetch currencies from :origin:`searx/engines/wikidata.py` engine.
Output file: :origin:`searx/data/currencies.json` (:origin:`CI Update data ...
2022-01-03 12:58:48 +01:00
# pylint: disable=invalid-name
2015-05-12 20:52:08 +02:00
import re
import unicodedata
2021-02-19 12:52:26 +01:00
import json
2016-07-08 23:43:28 +07:00
2022-06-29 20:56:16 +02:00
from searx.locales import LOCALE_NAMES, locales_initialize
2021-09-19 09:10:02 +00:00
from searx.engines import wikidata, set_loggers
2024-03-10 15:33:23 +01:00
from searx.data import data_dir
DATA_FILE = data_dir / 'currencies.json'
2021-02-19 12:52:26 +01:00
2021-09-19 09:10:02 +00:00
set_loggers(wikidata, 'wikidata')
2022-06-29 20:56:16 +02:00
2021-02-19 12:52:26 +01:00
# ORDER BY (with all the query fields) is important to keep a deterministic result order
2022-09-27 17:01:00 +02:00
# so multiple invocation of this script doesn't change currencies.json
2021-02-19 12:52:26 +01:00
SELECT DISTINCT ?iso4217 ?unit ?unicode ?label ?alias WHERE {
?item wdt:P498 ?iso4217; rdfs:label ?label.
OPTIONAL { ?item skos:altLabel ?alias FILTER (LANG (?alias) = LANG(?label)). }
OPTIONAL { ?item wdt:P5061 ?unit. }
OPTIONAL { ?item wdt:P489 ?symbol.
?symbol wdt:P487 ?unicode. }
MINUS { ?item wdt:P582 ?end_data . } # Ignore monney with an end date
MINUS { ?item wdt:P31/wdt:P279* wd:Q15893266 . } # Ignore "former entity" (obsolete currency)
2015-05-12 20:52:08 +02:00
2021-02-19 12:52:26 +01:00
ORDER BY ?iso4217 ?unit ?unicode ?label ?alias
# ORDER BY (with all the query fields) is important to keep a deterministic result order
2022-09-27 17:01:00 +02:00
# so multiple invocation of this script doesn't change currencies.json
2021-02-19 12:52:26 +01:00
SELECT DISTINCT ?iso4217 ?article_name WHERE {
?item wdt:P498 ?iso4217 .
?article schema:about ?item ;
schema:name ?article_name ;
schema:isPartOf [ wikibase:wikiGroup "wikipedia" ]
MINUS { ?item wdt:P582 ?end_data . } # Ignore monney with an end date
MINUS { ?item wdt:P31/wdt:P279* wd:Q15893266 . } # Ignore "former entity" (obsolete currency)
ORDER BY ?iso4217 ?article_name
2015-05-12 20:52:08 +02:00
2021-08-03 15:13:00 +02:00
2021-02-19 12:52:26 +01:00
LANGUAGES_SPARQL = ', '.join(set(map(lambda l: repr(l.split('_')[0]), LANGUAGES)))
2015-05-12 20:52:08 +02:00
2021-02-19 12:52:26 +01:00
def remove_accents(name):
return unicodedata.normalize('NFKD', name).lower()
2015-05-12 20:52:08 +02:00
2021-02-19 12:52:26 +01:00
def remove_extra(name):
for c in ('(', ':'):
if c in name:
name = name.split(c)[0].strip()
return name
2015-05-12 20:52:08 +02:00
2016-07-08 23:43:28 +07:00
2021-02-19 12:52:26 +01:00
def _normalize_name(name):
name = re.sub(' +', ' ', remove_accents(name.lower()).replace('-', ' '))
name = remove_extra(name)
return name
2015-05-12 20:52:08 +02:00
2016-07-08 23:43:28 +07:00
2021-02-19 12:52:26 +01:00
def add_currency_name(db, name, iso4217, normalize_name=True):
db_names = db['names']
2015-05-12 20:52:08 +02:00
2021-02-19 12:52:26 +01:00
if normalize_name:
name = _normalize_name(name)
2016-07-08 23:43:28 +07:00
2021-02-19 12:52:26 +01:00
iso4217_set = db_names.setdefault(name, [])
if iso4217 not in iso4217_set:
iso4217_set.insert(0, iso4217)
2016-07-08 23:43:28 +07:00
2015-05-12 20:52:08 +02:00
2021-02-19 12:52:26 +01:00
def add_currency_label(db, label, iso4217, language):
labels = db['iso4217'].setdefault(iso4217, {})
labels[language] = label
2015-05-12 20:52:08 +02:00
2016-07-08 23:43:28 +07:00
2021-02-19 12:52:26 +01:00
def wikidata_request_result_iterator(request):
2021-12-27 09:26:22 +01:00
result = wikidata.send_wikidata_query(request.replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL))
2021-02-19 12:52:26 +01:00
if result is not None:
2024-03-09 08:34:43 +01:00
yield from result['results']['bindings']
2016-07-08 23:43:28 +07:00
2021-02-19 12:52:26 +01:00
def fetch_db():
db = {
'names': {},
'iso4217': {},
2016-07-08 23:43:28 +07:00
2021-02-19 12:52:26 +01:00
for r in wikidata_request_result_iterator(SPARQL_WIKIPEDIA_NAMES_REQUEST):
iso4217 = r['iso4217']['value']
article_name = r['article_name']['value']
article_lang = r['article_name']['xml:lang']
add_currency_name(db, article_name, iso4217)
add_currency_label(db, article_name, iso4217, article_lang)
2016-07-08 23:43:28 +07:00
2021-02-19 12:52:26 +01:00
for r in wikidata_request_result_iterator(SARQL_REQUEST):
iso4217 = r['iso4217']['value']
if 'label' in r:
label = r['label']['value']
label_lang = r['label']['xml:lang']
add_currency_name(db, label, iso4217)
add_currency_label(db, label, iso4217, label_lang)
2016-07-08 23:43:28 +07:00
2021-02-19 12:52:26 +01:00
if 'alias' in r:
add_currency_name(db, r['alias']['value'], iso4217)
2016-07-08 23:43:28 +07:00
2021-02-19 12:52:26 +01:00
if 'unicode' in r:
add_currency_name(db, r['unicode']['value'], iso4217, normalize_name=False)
2016-07-08 23:43:28 +07:00
2021-02-19 12:52:26 +01:00
if 'unit' in r:
add_currency_name(db, r['unit']['value'], iso4217, normalize_name=False)
2015-05-12 20:52:08 +02:00
2021-02-19 12:52:26 +01:00
return db
2016-07-08 23:43:28 +07:00
2021-02-19 12:52:26 +01:00
def main():
[fix] update_currencies.py - AttributeError: 'str' object has no attribute 'insert'
Replace lists with one item by the item, not before last currency has been
added. In this traceback 'MXN' is added to 'pesos' while pesos is no longer a
list as the optimization was carried out too early.
$ ./local/py3/bin/python searxng_extra/update/update_currencies.py
Traceback (most recent call last):
File "searxng_extra/update/update_currencies.py", line 164, in <module>
File "searxng_extra/update/update_currencies.py", line 157, in main
add_currency_name(db, "pesos", 'MXN')
File "searxng_extra/update/update_currencies.py", line 89, in add_currency_name
iso4217_set.insert(0, iso4217)
AttributeError: 'str' object has no attribute 'insert'
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2023-06-28 20:48:20 +02:00
2021-02-19 12:52:26 +01:00
db = fetch_db()
[fix] update_currencies.py - AttributeError: 'str' object has no attribute 'insert'
Replace lists with one item by the item, not before last currency has been
added. In this traceback 'MXN' is added to 'pesos' while pesos is no longer a
list as the optimization was carried out too early.
$ ./local/py3/bin/python searxng_extra/update/update_currencies.py
Traceback (most recent call last):
File "searxng_extra/update/update_currencies.py", line 164, in <module>
File "searxng_extra/update/update_currencies.py", line 157, in main
add_currency_name(db, "pesos", 'MXN')
File "searxng_extra/update/update_currencies.py", line 89, in add_currency_name
iso4217_set.insert(0, iso4217)
AttributeError: 'str' object has no attribute 'insert'
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2023-06-28 20:48:20 +02:00
2021-02-19 12:52:26 +01:00
# static
add_currency_name(db, "euro", 'EUR')
add_currency_name(db, "euros", 'EUR')
add_currency_name(db, "dollar", 'USD')
add_currency_name(db, "dollars", 'USD')
add_currency_name(db, "peso", 'MXN')
add_currency_name(db, "pesos", 'MXN')
2015-05-12 20:52:08 +02:00
[fix] update_currencies.py - AttributeError: 'str' object has no attribute 'insert'
Replace lists with one item by the item, not before last currency has been
added. In this traceback 'MXN' is added to 'pesos' while pesos is no longer a
list as the optimization was carried out too early.
$ ./local/py3/bin/python searxng_extra/update/update_currencies.py
Traceback (most recent call last):
File "searxng_extra/update/update_currencies.py", line 164, in <module>
File "searxng_extra/update/update_currencies.py", line 157, in main
add_currency_name(db, "pesos", 'MXN')
File "searxng_extra/update/update_currencies.py", line 89, in add_currency_name
iso4217_set.insert(0, iso4217)
AttributeError: 'str' object has no attribute 'insert'
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2023-06-28 20:48:20 +02:00
# reduce memory usage:
# replace lists with one item by the item. see
# searx.search.processors.online_currency.name_to_iso4217
for name in db['names']:
if len(db['names'][name]) == 1:
db['names'][name] = db['names'][name][0]
2024-03-10 15:33:23 +01:00
with DATA_FILE.open('w', encoding='utf8') as f:
json.dump(db, f, indent=4, sort_keys=True, ensure_ascii=False)
2015-05-12 20:52:08 +02:00
2021-12-27 09:26:22 +01:00
2021-02-19 12:52:26 +01:00
if __name__ == '__main__':