[fix] update wikidata units - remove URL prefix from Q-name

Sometimes the URL prefix switches from a http to a https, this patch harden the
code that removes the URL prefix from wikidata Q-name, issue has been reported
in [1].

[1] https://github.com/searxng/searxng/pull/3437#issuecomment-2082121730

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2024-05-01 18:25:22 +02:00 committed by Markus Heiser
parent c8d0b6529b
commit 11fe88bb40
2 changed files with 138 additions and 6 deletions

View file

@ -1404,6 +1404,136 @@
"symbol": "cm H₂O", "symbol": "cm H₂O",
"to_si_factor": 98.0665 "to_si_factor": 98.0665
}, },
"Q125387265": {
"si_name": "Q11574",
"symbol": "qs",
"to_si_factor": 1e-30
},
"Q125387281": {
"si_name": "Q11574",
"symbol": "rs",
"to_si_factor": 1e-27
},
"Q125389370": {
"si_name": "Q11579",
"symbol": "rK",
"to_si_factor": 1e-27
},
"Q125389387": {
"si_name": "Q11579",
"symbol": "qK",
"to_si_factor": 1e-30
},
"Q125389519": {
"si_name": "Q11579",
"symbol": "RK",
"to_si_factor": 1e+27
},
"Q125389534": {
"si_name": "Q11579",
"symbol": "QK",
"to_si_factor": 1e+30
},
"Q125390959": {
"si_name": "Q41509",
"symbol": "rmol",
"to_si_factor": 1e-27
},
"Q125390987": {
"si_name": "Q41509",
"symbol": "qmol",
"to_si_factor": 1e-30
},
"Q125392001": {
"si_name": "Q41509",
"symbol": "Rmol",
"to_si_factor": 1e+27
},
"Q125392014": {
"si_name": "Q41509",
"symbol": "Qmol",
"to_si_factor": 1e+30
},
"Q125470272": {
"si_name": "Q102573",
"symbol": "rBq",
"to_si_factor": 1e-27
},
"Q125470277": {
"si_name": "Q102573",
"symbol": "qBq",
"to_si_factor": 1e-30
},
"Q125470426": {
"si_name": "Q102573",
"symbol": "RBq",
"to_si_factor": 1e+27
},
"Q125470445": {
"si_name": "Q102573",
"symbol": "QBq",
"to_si_factor": 1e+30
},
"Q125470704": {
"si_name": "Q25406",
"symbol": "rC",
"to_si_factor": 1e-27
},
"Q125470716": {
"si_name": "Q25406",
"symbol": "qC",
"to_si_factor": 1e-30
},
"Q125471094": {
"si_name": "Q25406",
"symbol": "RC",
"to_si_factor": 1e+27
},
"Q125471109": {
"si_name": "Q25406",
"symbol": "QC",
"to_si_factor": 1e+30
},
"Q125471199": {
"si_name": null,
"symbol": "r°C",
"to_si_factor": null
},
"Q125471200": {
"si_name": null,
"symbol": "q°C",
"to_si_factor": null
},
"Q125471246": {
"si_name": null,
"symbol": "R°C",
"to_si_factor": null
},
"Q125471247": {
"si_name": null,
"symbol": "Q°C",
"to_si_factor": null
},
"Q125471334": {
"si_name": "Q131255",
"symbol": "rF",
"to_si_factor": 1e-27
},
"Q125471344": {
"si_name": "Q131255",
"symbol": "qF",
"to_si_factor": 1e-30
},
"Q125471409": {
"si_name": "Q131255",
"symbol": "RF",
"to_si_factor": 1e+27
},
"Q125471423": {
"si_name": "Q131255",
"symbol": "QF",
"to_si_factor": 1e+30
},
"Q12714022": { "Q12714022": {
"si_name": "Q11570", "si_name": "Q11570",
"symbol": "cwt", "symbol": "cwt",
@ -4506,7 +4636,7 @@
}, },
"Q829073": { "Q829073": {
"si_name": "Q33680", "si_name": "Q33680",
"symbol": null, "symbol": "\"",
"to_si_factor": 4.84813681109536e-06 "to_si_factor": 4.84813681109536e-06
}, },
"Q83216": { "Q83216": {
@ -6274,4 +6404,4 @@
"symbol": "m Hg", "symbol": "m Hg",
"to_si_factor": 133322.0 "to_si_factor": 133322.0
} }
} }

View file

@ -51,16 +51,18 @@ WHERE
ORDER BY ?item DESC(?rank) ?symbol ORDER BY ?item DESC(?rank) ?symbol
""" """
_wikidata_url = "https://www.wikidata.org/entity/"
def get_data(): def get_data():
results = collections.OrderedDict() results = collections.OrderedDict()
response = wikidata.send_wikidata_query(SARQL_REQUEST) response = wikidata.send_wikidata_query(SARQL_REQUEST)
for unit in response['results']['bindings']: for unit in response['results']['bindings']:
name = unit['item']['value'].replace(_wikidata_url, '')
symbol = unit['symbol']['value'] symbol = unit['symbol']['value']
si_name = unit.get('tosiUnit', {}).get('value', '').replace(_wikidata_url, '') name = unit['item']['value'].rsplit('/', 1)[1]
si_name = unit.get('tosiUnit', {}).get('value', '')
if si_name:
si_name = si_name.rsplit('/', 1)[1]
to_si_factor = unit.get('tosi', {}).get('value', '') to_si_factor = unit.get('tosi', {}).get('value', '')
if name not in results: if name not in results:
# ignore duplicate: always use the first one # ignore duplicate: always use the first one