forked from Ponysearch/Ponysearch
[fix] make search requests on wikidata more accurate
This commit is contained in:
parent
835d1edd58
commit
b12857a70d
2 changed files with 7 additions and 7 deletions
|
@ -27,7 +27,7 @@ result_count = 1
|
|||
# urls
|
||||
wikidata_host = 'https://www.wikidata.org'
|
||||
url_search = wikidata_host \
|
||||
+ '/wiki/Special:ItemDisambiguation?{query}'
|
||||
+ '/w/index.php?{query}'
|
||||
|
||||
wikidata_api = wikidata_host + '/w/api.php'
|
||||
url_detail = wikidata_api\
|
||||
|
@ -40,7 +40,7 @@ url_map = 'https://www.openstreetmap.org/'\
|
|||
url_image = 'https://commons.wikimedia.org/wiki/Special:FilePath/{filename}?width=500&height=400'
|
||||
|
||||
# xpaths
|
||||
wikidata_ids_xpath = '//div/ul[@class="wikibase-disambiguation"]/li/a/@title'
|
||||
wikidata_ids_xpath = '//ul[@class="mw-search-results"]/li//a/@href'
|
||||
title_xpath = '//*[contains(@class,"wikibase-title-label")]'
|
||||
description_xpath = '//div[contains(@class,"wikibase-entitytermsview-heading-description")]'
|
||||
property_xpath = '//div[@id="{propertyid}"]'
|
||||
|
@ -57,22 +57,21 @@ calendar_name_xpath = './/sup[contains(@class,"wb-calendar-name")]'
|
|||
|
||||
|
||||
def request(query, params):
|
||||
language = match_language(params['language'], supported_languages).split('-')[0]
|
||||
|
||||
params['url'] = url_search.format(
|
||||
query=urlencode({'label': query, 'language': language}))
|
||||
query=urlencode({'search': query}))
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
results = []
|
||||
html = fromstring(resp.text)
|
||||
wikidata_ids = html.xpath(wikidata_ids_xpath)
|
||||
search_results = html.xpath(wikidata_ids_xpath)
|
||||
|
||||
language = match_language(resp.search_params['language'], supported_languages).split('-')[0]
|
||||
|
||||
# TODO: make requests asynchronous to avoid timeout when result_count > 1
|
||||
for wikidata_id in wikidata_ids[:result_count]:
|
||||
for search_result in search_results[:result_count]:
|
||||
wikidata_id = search_result.split('/')[-1]
|
||||
url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
|
||||
htmlresponse = get(url)
|
||||
jsonresponse = loads(htmlresponse.text)
|
||||
|
|
|
@ -174,6 +174,7 @@ engines:
|
|||
- name : wikidata
|
||||
engine : wikidata
|
||||
shortcut : wd
|
||||
timeout : 3.0
|
||||
weight : 2
|
||||
|
||||
- name : duckduckgo
|
||||
|
|
Loading…
Reference in a new issue