forked from Ponysearch/Ponysearch
commit
57149661e4
2 changed files with 15 additions and 4 deletions
|
@ -31,8 +31,6 @@ if xpath_results is a string element, then it's already done
|
|||
def extract_text(xpath_results):
|
||||
if type(xpath_results) == list:
|
||||
# it's list of result : concat everything using recursive call
|
||||
if not xpath_results:
|
||||
raise Exception('Empty url resultset')
|
||||
result = ''
|
||||
for e in xpath_results:
|
||||
result = result + extract_text(e)
|
||||
|
@ -48,6 +46,8 @@ def extract_text(xpath_results):
|
|||
|
||||
|
||||
def extract_url(xpath_results, search_url):
|
||||
if xpath_results == []:
|
||||
raise Exception('Empty url resultset')
|
||||
url = extract_text(xpath_results)
|
||||
|
||||
if url.startswith('//'):
|
||||
|
@ -103,8 +103,8 @@ def response(resp):
|
|||
if results_xpath:
|
||||
for result in dom.xpath(results_xpath):
|
||||
url = extract_url(result.xpath(url_xpath), search_url)
|
||||
title = extract_text(result.xpath(title_xpath)[0])
|
||||
content = extract_text(result.xpath(content_xpath)[0])
|
||||
title = extract_text(result.xpath(title_xpath))
|
||||
content = extract_text(result.xpath(content_xpath))
|
||||
results.append({'url': url, 'title': title, 'content': content})
|
||||
else:
|
||||
for url, title, content in zip(
|
||||
|
|
|
@ -462,6 +462,17 @@ engines:
|
|||
# - ...
|
||||
# disabled : True
|
||||
|
||||
- name : semantic scholar
|
||||
engine : xpath
|
||||
paging : True
|
||||
search_url : https://www.semanticscholar.org/search?q={query}&sort=relevance&page={pageno}&ae=false
|
||||
results_xpath : //article
|
||||
url_xpath : .//div[@class="search-result-title"]/a/@href
|
||||
title_xpath : .//div[@class="search-result-title"]/a
|
||||
content_xpath : .//div[@class="search-result-abstract"]
|
||||
shortcut : se
|
||||
categories : science
|
||||
|
||||
- name : spotify
|
||||
engine : spotify
|
||||
shortcut : stf
|
||||
|
|
Loading…
Reference in a new issue