forked from Ponysearch/Ponysearch
Merge pull request #1705 from dalf/template_paper
Theme: add a paper.html template and update of the science engines
This commit is contained in:
commit
fc389f009d
18 changed files with 536 additions and 146 deletions
|
@ -311,3 +311,88 @@ the parameter ``template`` must be set to the desired type.
|
|||
address.postcode postcode of object
|
||||
address.country country of object
|
||||
========================= =====================================================
|
||||
|
||||
.. _BibTeX format: https://www.bibtex.com/g/bibtex-format/
|
||||
.. _BibTeX field types: https://en.wikipedia.org/wiki/BibTeX#Field_types
|
||||
|
||||
.. list-table:: Parameter of the **paper** media type /
|
||||
see `BibTeX field types`_ and `BibTeX format`_
|
||||
:header-rows: 2
|
||||
:width: 100%
|
||||
|
||||
* - result-parameter
|
||||
- Python type
|
||||
- information
|
||||
|
||||
* - template
|
||||
- :py:class:`str`
|
||||
- is set to ``paper.html``
|
||||
|
||||
* - title
|
||||
- :py:class:`str`
|
||||
- title of the result
|
||||
|
||||
* - content
|
||||
- :py:class:`str`
|
||||
- abstract
|
||||
|
||||
* - comments
|
||||
- :py:class:`str`
|
||||
- free text display in italic below the content
|
||||
|
||||
* - tags
|
||||
- :py:class:`List <list>`\ [\ :py:class:`str`\ ]
|
||||
- free tag list
|
||||
|
||||
* - publishedDate
|
||||
- :py:class:`datetime <datetime.datetime>`
|
||||
- last publication date
|
||||
|
||||
* - authors
|
||||
- :py:class:`List <list>`\ [\ :py:class:`str`\ ]
|
||||
- list of authors of the work (authors with a "s")
|
||||
|
||||
* - editor
|
||||
- :py:class:`str`
|
||||
- list of editors of a book
|
||||
|
||||
* - publisher
|
||||
- :py:class:`str`
|
||||
- name of the publisher
|
||||
|
||||
* - journal
|
||||
- :py:class:`str`
|
||||
- name of the journal or magazine the article was
|
||||
published in
|
||||
|
||||
* - volume
|
||||
- :py:class:`str`
|
||||
- volume number
|
||||
|
||||
* - pages
|
||||
- :py:class:`str`
|
||||
- page range where the article is
|
||||
|
||||
* - number
|
||||
- :py:class:`str`
|
||||
- number of the report or the issue number for a journal article
|
||||
|
||||
* - doi
|
||||
- :py:class:`str`
|
||||
- DOI number (like ``10.1038/d41586-018-07848-2``)
|
||||
|
||||
* - issn
|
||||
- :py:class:`str`
|
||||
- ISSN number like ``1476-4687``
|
||||
|
||||
* - isbn
|
||||
- :py:class:`str`
|
||||
- ISBN number like ``9780201896831``
|
||||
|
||||
* - pdf_url
|
||||
- :py:class:`str`
|
||||
- URL to the full article, the PDF version
|
||||
|
||||
* - html_url
|
||||
- :py:class:`str`
|
||||
- URL to full article, HTML version
|
||||
|
|
|
@ -3,9 +3,10 @@
|
|||
ArXiV (Scientific preprints)
|
||||
"""
|
||||
|
||||
from lxml import html
|
||||
from lxml import etree
|
||||
from lxml.etree import XPath
|
||||
from datetime import datetime
|
||||
from searx.utils import eval_xpath_list, eval_xpath_getindex
|
||||
from searx.utils import eval_xpath, eval_xpath_list, eval_xpath_getindex
|
||||
|
||||
# about
|
||||
about = {
|
||||
|
@ -17,7 +18,7 @@ about = {
|
|||
"results": 'XML-RSS',
|
||||
}
|
||||
|
||||
categories = ['science']
|
||||
categories = ['science', 'scientific publications']
|
||||
paging = True
|
||||
|
||||
base_url = (
|
||||
|
@ -27,6 +28,23 @@ base_url = (
|
|||
# engine dependent config
|
||||
number_of_results = 10
|
||||
|
||||
# xpaths
|
||||
arxiv_namespaces = {
|
||||
"atom": "http://www.w3.org/2005/Atom",
|
||||
"arxiv": "http://arxiv.org/schemas/atom",
|
||||
}
|
||||
xpath_entry = XPath('//atom:entry', namespaces=arxiv_namespaces)
|
||||
xpath_title = XPath('.//atom:title', namespaces=arxiv_namespaces)
|
||||
xpath_id = XPath('.//atom:id', namespaces=arxiv_namespaces)
|
||||
xpath_summary = XPath('.//atom:summary', namespaces=arxiv_namespaces)
|
||||
xpath_author_name = XPath('.//atom:author/atom:name', namespaces=arxiv_namespaces)
|
||||
xpath_doi = XPath('.//arxiv:doi', namespaces=arxiv_namespaces)
|
||||
xpath_pdf = XPath('.//atom:link[@title="pdf"]', namespaces=arxiv_namespaces)
|
||||
xpath_published = XPath('.//atom:published', namespaces=arxiv_namespaces)
|
||||
xpath_journal = XPath('.//arxiv:journal_ref', namespaces=arxiv_namespaces)
|
||||
xpath_category = XPath('.//atom:category/@term', namespaces=arxiv_namespaces)
|
||||
xpath_comment = XPath('./arxiv:comment', namespaces=arxiv_namespaces)
|
||||
|
||||
|
||||
def request(query, params):
|
||||
# basic search
|
||||
|
@ -41,30 +59,50 @@ def request(query, params):
|
|||
|
||||
def response(resp):
|
||||
results = []
|
||||
dom = etree.fromstring(resp.content)
|
||||
for entry in eval_xpath_list(dom, xpath_entry):
|
||||
title = eval_xpath_getindex(entry, xpath_title, 0).text
|
||||
|
||||
dom = html.fromstring(resp.content)
|
||||
url = eval_xpath_getindex(entry, xpath_id, 0).text
|
||||
abstract = eval_xpath_getindex(entry, xpath_summary, 0).text
|
||||
|
||||
for entry in eval_xpath_list(dom, '//entry'):
|
||||
title = eval_xpath_getindex(entry, './/title', 0).text
|
||||
authors = [author.text for author in eval_xpath_list(entry, xpath_author_name)]
|
||||
|
||||
url = eval_xpath_getindex(entry, './/id', 0).text
|
||||
# doi
|
||||
doi_element = eval_xpath_getindex(entry, xpath_doi, 0, default=None)
|
||||
doi = None if doi_element is None else doi_element.text
|
||||
|
||||
content_string = '{doi_content}{abstract_content}'
|
||||
# pdf
|
||||
pdf_element = eval_xpath_getindex(entry, xpath_pdf, 0, default=None)
|
||||
pdf_url = None if pdf_element is None else pdf_element.attrib.get('href')
|
||||
|
||||
abstract = eval_xpath_getindex(entry, './/summary', 0).text
|
||||
# journal
|
||||
journal_element = eval_xpath_getindex(entry, xpath_journal, 0, default=None)
|
||||
journal = None if journal_element is None else journal_element.text
|
||||
|
||||
# If a doi is available, add it to the snipppet
|
||||
doi_element = eval_xpath_getindex(entry, './/link[@title="doi"]', 0, default=None)
|
||||
doi_content = doi_element.text if doi_element is not None else ''
|
||||
content = content_string.format(doi_content=doi_content, abstract_content=abstract)
|
||||
# tags
|
||||
tag_elements = eval_xpath(entry, xpath_category)
|
||||
tags = [str(tag) for tag in tag_elements]
|
||||
|
||||
if len(content) > 300:
|
||||
content = content[0:300] + "..."
|
||||
# TODO: center snippet on query term
|
||||
# comments
|
||||
comments_elements = eval_xpath_getindex(entry, xpath_comment, 0, default=None)
|
||||
comments = None if comments_elements is None else comments_elements.text
|
||||
|
||||
publishedDate = datetime.strptime(eval_xpath_getindex(entry, './/published', 0).text, '%Y-%m-%dT%H:%M:%SZ')
|
||||
publishedDate = datetime.strptime(eval_xpath_getindex(entry, xpath_published, 0).text, '%Y-%m-%dT%H:%M:%SZ')
|
||||
|
||||
res_dict = {'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content}
|
||||
res_dict = {
|
||||
'template': 'paper.html',
|
||||
'url': url,
|
||||
'title': title,
|
||||
'publishedDate': publishedDate,
|
||||
'content': abstract,
|
||||
'doi': doi,
|
||||
'authors': authors,
|
||||
'journal': journal,
|
||||
'tags': tags,
|
||||
'comments': comments,
|
||||
'pdf_url': pdf_url,
|
||||
}
|
||||
|
||||
results.append(res_dict)
|
||||
|
||||
|
|
59
searx/engines/crossref.py
Normal file
59
searx/engines/crossref.py
Normal file
|
@ -0,0 +1,59 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
# lint: pylint
|
||||
"""Semantic Scholar (Science)
|
||||
"""
|
||||
|
||||
from urllib.parse import urlencode
|
||||
from searx.utils import html_to_text
|
||||
|
||||
about = {
|
||||
"website": 'https://www.crossref.org/',
|
||||
"wikidata_id": 'Q5188229',
|
||||
"official_api_documentation": 'https://github.com/CrossRef/rest-api-doc',
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": 'JSON',
|
||||
}
|
||||
|
||||
categories = ['science', 'scientific publications']
|
||||
paging = True
|
||||
search_url = 'https://api.crossref.org/works'
|
||||
|
||||
|
||||
def request(query, params):
|
||||
params['url'] = search_url + '?' + urlencode(dict(query=query, offset=20 * (params['pageno'] - 1)))
|
||||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
res = resp.json()
|
||||
results = []
|
||||
for record in res['message']['items']:
|
||||
record_type = record['type']
|
||||
if record_type == 'book-chapter':
|
||||
title = record['container-title'][0]
|
||||
if record['title'][0].lower().strip() != title.lower().strip():
|
||||
title = html_to_text(title) + ' (' + html_to_text(record['title'][0]) + ')'
|
||||
journal = None
|
||||
else:
|
||||
title = html_to_text(record['title'][0])
|
||||
journal = record.get('container-title', [None])[0]
|
||||
url = record.get('resource', {}).get('primary', {}).get('URL') or record['URL']
|
||||
authors = [author.get('given', '') + ' ' + author.get('family', '') for author in record.get('author', [])]
|
||||
isbn = record.get('isbn') or [i['value'] for i in record.get('isbn-type', [])]
|
||||
results.append(
|
||||
{
|
||||
'template': 'paper.html',
|
||||
'url': url,
|
||||
'title': title,
|
||||
'journal': journal,
|
||||
'volume': record.get('volume'),
|
||||
'type': record['type'],
|
||||
'content': html_to_text(record.get('abstract', '')),
|
||||
'publisher': record.get('publisher'),
|
||||
'authors': authors,
|
||||
'doi': record['DOI'],
|
||||
'isbn': isbn,
|
||||
}
|
||||
)
|
||||
return results
|
|
@ -13,10 +13,12 @@ Definitions`_.
|
|||
|
||||
from urllib.parse import urlencode
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
from lxml import html
|
||||
|
||||
from searx.utils import (
|
||||
eval_xpath,
|
||||
eval_xpath_getindex,
|
||||
eval_xpath_list,
|
||||
extract_text,
|
||||
)
|
||||
|
@ -46,7 +48,7 @@ about = {
|
|||
}
|
||||
|
||||
# engine dependent config
|
||||
categories = ['science']
|
||||
categories = ['science', 'scientific publications']
|
||||
paging = True
|
||||
language_support = True
|
||||
use_locale_domain = True
|
||||
|
@ -99,7 +101,43 @@ def request(query, params):
|
|||
return params
|
||||
|
||||
|
||||
def response(resp):
|
||||
def parse_gs_a(text: Optional[str]):
|
||||
"""Parse the text written in green.
|
||||
|
||||
Possible formats:
|
||||
* "{authors} - {journal}, {year} - {publisher}"
|
||||
* "{authors} - {year} - {publisher}"
|
||||
* "{authors} - {publisher}"
|
||||
"""
|
||||
if text is None or text == "":
|
||||
return None, None, None, None
|
||||
|
||||
s_text = text.split(' - ')
|
||||
authors = s_text[0].split(', ')
|
||||
publisher = s_text[-1]
|
||||
if len(s_text) != 3:
|
||||
return authors, None, publisher, None
|
||||
|
||||
# the format is "{authors} - {journal}, {year} - {publisher}" or "{authors} - {year} - {publisher}"
|
||||
# get journal and year
|
||||
journal_year = s_text[1].split(', ')
|
||||
# journal is optional and may contains some coma
|
||||
if len(journal_year) > 1:
|
||||
journal = ', '.join(journal_year[0:-1])
|
||||
if journal == '…':
|
||||
journal = None
|
||||
else:
|
||||
journal = None
|
||||
# year
|
||||
year = journal_year[-1]
|
||||
try:
|
||||
publishedDate = datetime.strptime(year.strip(), '%Y')
|
||||
except ValueError:
|
||||
publishedDate = None
|
||||
return authors, journal, publisher, publishedDate
|
||||
|
||||
|
||||
def response(resp): # pylint: disable=too-many-locals
|
||||
"""Get response from google's search request"""
|
||||
results = []
|
||||
|
||||
|
@ -112,30 +150,53 @@ def response(resp):
|
|||
dom = html.fromstring(resp.text)
|
||||
|
||||
# parse results
|
||||
for result in eval_xpath_list(dom, '//div[@class="gs_ri"]'):
|
||||
for result in eval_xpath_list(dom, '//div[@data-cid]'):
|
||||
|
||||
title = extract_text(eval_xpath(result, './h3[1]//a'))
|
||||
title = extract_text(eval_xpath(result, './/h3[1]//a'))
|
||||
|
||||
if not title:
|
||||
# this is a [ZITATION] block
|
||||
continue
|
||||
|
||||
url = eval_xpath(result, './h3[1]//a/@href')[0]
|
||||
content = extract_text(eval_xpath(result, './div[@class="gs_rs"]')) or ''
|
||||
|
||||
pub_info = extract_text(eval_xpath(result, './div[@class="gs_a"]'))
|
||||
if pub_info:
|
||||
content += "[%s]" % pub_info
|
||||
|
||||
pub_type = extract_text(eval_xpath(result, './/span[@class="gs_ct1"]'))
|
||||
if pub_type:
|
||||
title = title + " " + pub_type
|
||||
pub_type = pub_type[1:-1].lower()
|
||||
|
||||
url = eval_xpath_getindex(result, './/h3[1]//a/@href', 0)
|
||||
content = extract_text(eval_xpath(result, './/div[@class="gs_rs"]'))
|
||||
authors, journal, publisher, publishedDate = parse_gs_a(
|
||||
extract_text(eval_xpath(result, './/div[@class="gs_a"]'))
|
||||
)
|
||||
if publisher in url:
|
||||
publisher = None
|
||||
|
||||
# cited by
|
||||
comments = extract_text(eval_xpath(result, './/div[@class="gs_fl"]/a[starts-with(@href,"/scholar?cites=")]'))
|
||||
|
||||
# link to the html or pdf document
|
||||
html_url = None
|
||||
pdf_url = None
|
||||
doc_url = eval_xpath_getindex(result, './/div[@class="gs_or_ggsm"]/a/@href', 0, default=None)
|
||||
doc_type = extract_text(eval_xpath(result, './/span[@class="gs_ctg2"]'))
|
||||
if doc_type == "[PDF]":
|
||||
pdf_url = doc_url
|
||||
else:
|
||||
html_url = doc_url
|
||||
|
||||
results.append(
|
||||
{
|
||||
'template': 'paper.html',
|
||||
'type': pub_type,
|
||||
'url': url,
|
||||
'title': title,
|
||||
'authors': authors,
|
||||
'publisher': publisher,
|
||||
'journal': journal,
|
||||
'publishedDate': publishedDate,
|
||||
'content': content,
|
||||
'comments': comments,
|
||||
'html_url': html_url,
|
||||
'pdf_url': pdf_url,
|
||||
}
|
||||
)
|
||||
|
||||
|
|
|
@ -3,11 +3,15 @@
|
|||
PubMed (Scholar publications)
|
||||
"""
|
||||
|
||||
from flask_babel import gettext
|
||||
from lxml import etree
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlencode
|
||||
from searx.network import get
|
||||
from searx.utils import (
|
||||
eval_xpath_getindex,
|
||||
eval_xpath_list,
|
||||
extract_text,
|
||||
)
|
||||
|
||||
# about
|
||||
about = {
|
||||
|
@ -22,7 +26,7 @@ about = {
|
|||
"results": 'XML',
|
||||
}
|
||||
|
||||
categories = ['science']
|
||||
categories = ['science', 'scientific publications']
|
||||
|
||||
base_url = (
|
||||
'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi' + '?db=pubmed&{query}&retstart={offset}&retmax={hits}'
|
||||
|
@ -63,45 +67,60 @@ def response(resp):
|
|||
|
||||
retrieve_url_encoded = pubmed_retrieve_api_url.format(**retrieve_notice_args)
|
||||
|
||||
search_results_xml = get(retrieve_url_encoded).content
|
||||
search_results = etree.XML(search_results_xml).xpath('//PubmedArticleSet/PubmedArticle/MedlineCitation')
|
||||
search_results_response = get(retrieve_url_encoded).content
|
||||
search_results = etree.XML(search_results_response)
|
||||
for entry in eval_xpath_list(search_results, '//PubmedArticle'):
|
||||
medline = eval_xpath_getindex(entry, './MedlineCitation', 0)
|
||||
|
||||
for entry in search_results:
|
||||
title = entry.xpath('.//Article/ArticleTitle')[0].text
|
||||
|
||||
pmid = entry.xpath('.//PMID')[0].text
|
||||
title = eval_xpath_getindex(medline, './/Article/ArticleTitle', 0).text
|
||||
pmid = eval_xpath_getindex(medline, './/PMID', 0).text
|
||||
url = pubmed_url + pmid
|
||||
content = extract_text(
|
||||
eval_xpath_getindex(medline, './/Abstract/AbstractText//text()', 0, default=None), allow_none=True
|
||||
)
|
||||
doi = extract_text(
|
||||
eval_xpath_getindex(medline, './/ELocationID[@EIdType="doi"]/text()', 0, default=None), allow_none=True
|
||||
)
|
||||
journal = extract_text(
|
||||
eval_xpath_getindex(medline, './Article/Journal/Title/text()', 0, default=None), allow_none=True
|
||||
)
|
||||
issn = extract_text(
|
||||
eval_xpath_getindex(medline, './Article/Journal/ISSN/text()', 0, default=None), allow_none=True
|
||||
)
|
||||
authors = []
|
||||
for author in eval_xpath_list(medline, './Article/AuthorList/Author'):
|
||||
f = eval_xpath_getindex(author, './ForeName', 0, default=None)
|
||||
l = eval_xpath_getindex(author, './LastName', 0, default=None)
|
||||
f = '' if f is None else f.text
|
||||
l = '' if l is None else l.text
|
||||
authors.append((f + ' ' + l).strip())
|
||||
|
||||
try:
|
||||
content = entry.xpath('.//Abstract/AbstractText')[0].text
|
||||
except:
|
||||
content = gettext('No abstract is available for this publication.')
|
||||
|
||||
# If a doi is available, add it to the snipppet
|
||||
try:
|
||||
doi = entry.xpath('.//ELocationID[@EIdType="doi"]')[0].text
|
||||
content = 'DOI: {doi} Abstract: {content}'.format(doi=doi, content=content)
|
||||
except:
|
||||
pass
|
||||
|
||||
if len(content) > 300:
|
||||
content = content[0:300] + "..."
|
||||
# TODO: center snippet on query term
|
||||
|
||||
res_dict = {'url': url, 'title': title, 'content': content}
|
||||
res_dict = {
|
||||
'template': 'paper.html',
|
||||
'url': url,
|
||||
'title': title,
|
||||
'content': content,
|
||||
'journal': journal,
|
||||
'issn': [issn],
|
||||
'authors': authors,
|
||||
'doi': doi,
|
||||
}
|
||||
|
||||
accepted_date = eval_xpath_getindex(
|
||||
entry, './PubmedData/History//PubMedPubDate[@PubStatus="accepted"]', 0, default=None
|
||||
)
|
||||
if accepted_date is not None:
|
||||
year = eval_xpath_getindex(accepted_date, './Year', 0)
|
||||
month = eval_xpath_getindex(accepted_date, './Month', 0)
|
||||
day = eval_xpath_getindex(accepted_date, './Day', 0)
|
||||
try:
|
||||
publishedDate = datetime.strptime(
|
||||
entry.xpath('.//DateCreated/Year')[0].text
|
||||
+ '-'
|
||||
+ entry.xpath('.//DateCreated/Month')[0].text
|
||||
+ '-'
|
||||
+ entry.xpath('.//DateCreated/Day')[0].text,
|
||||
year.text + '-' + month.text + '-' + day.text,
|
||||
'%Y-%m-%d',
|
||||
)
|
||||
res_dict['publishedDate'] = publishedDate
|
||||
except:
|
||||
pass
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
results.append(res_dict)
|
||||
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
from json import dumps, loads
|
||||
from datetime import datetime
|
||||
|
||||
from flask_babel import gettext
|
||||
|
||||
about = {
|
||||
"website": 'https://www.semanticscholar.org/',
|
||||
"wikidata_id": 'Q22908627',
|
||||
|
@ -15,6 +17,7 @@ about = {
|
|||
"results": 'JSON',
|
||||
}
|
||||
|
||||
categories = ['science', 'scientific publications']
|
||||
paging = True
|
||||
search_url = 'https://www.semanticscholar.org/api/1/search'
|
||||
paper_url = 'https://www.semanticscholar.org/paper'
|
||||
|
@ -45,11 +48,7 @@ def request(query, params):
|
|||
def response(resp):
|
||||
res = loads(resp.text)
|
||||
results = []
|
||||
|
||||
for result in res['results']:
|
||||
item = {}
|
||||
metadata = []
|
||||
|
||||
url = result.get('primaryPaperLink', {}).get('url')
|
||||
if not url and result.get('links'):
|
||||
url = result.get('links')[0]
|
||||
|
@ -60,22 +59,47 @@ def response(resp):
|
|||
if not url:
|
||||
url = paper_url + '/%s' % result['id']
|
||||
|
||||
item['url'] = url
|
||||
# publishedDate
|
||||
if 'pubDate' in result:
|
||||
publishedDate = datetime.strptime(result['pubDate'], "%Y-%m-%d")
|
||||
else:
|
||||
publishedDate = None
|
||||
|
||||
item['title'] = result['title']['text']
|
||||
item['content'] = result['paperAbstract']['text']
|
||||
# authors
|
||||
authors = [author[0]['name'] for author in result.get('authors', [])]
|
||||
|
||||
metadata = result.get('fieldsOfStudy') or []
|
||||
venue = result.get('venue', {}).get('text')
|
||||
if venue:
|
||||
metadata.append(venue)
|
||||
if metadata:
|
||||
item['metadata'] = ', '.join(metadata)
|
||||
# pick for the first alternate link, but not from the crawler
|
||||
pdf_url = None
|
||||
for doc in result.get('alternatePaperLinks', []):
|
||||
if doc['linkType'] not in ('crawler', 'doi'):
|
||||
pdf_url = doc['url']
|
||||
break
|
||||
|
||||
pubDate = result.get('pubDate')
|
||||
if pubDate:
|
||||
item['publishedDate'] = datetime.strptime(pubDate, "%Y-%m-%d")
|
||||
# comments
|
||||
comments = None
|
||||
if 'citationStats' in result:
|
||||
comments = gettext(
|
||||
'{numCitations} citations from the year {firstCitationVelocityYear} to {lastCitationVelocityYear}'
|
||||
).format(
|
||||
numCitations=result['citationStats']['numCitations'],
|
||||
firstCitationVelocityYear=result['citationStats']['firstCitationVelocityYear'],
|
||||
lastCitationVelocityYear=result['citationStats']['lastCitationVelocityYear'],
|
||||
)
|
||||
|
||||
results.append(item)
|
||||
results.append(
|
||||
{
|
||||
'template': 'paper.html',
|
||||
'url': url,
|
||||
'title': result['title']['text'],
|
||||
'content': result['paperAbstract']['text'],
|
||||
'journal': result.get('venue', {}).get('text') or result.get('journal', {}).get('name'),
|
||||
'doi': result.get('doiInfo', {}).get('doi'),
|
||||
'tags': result.get('fieldsOfStudy'),
|
||||
'authors': authors,
|
||||
'pdf_url': pdf_url,
|
||||
'publishedDate': publishedDate,
|
||||
'comments': comments,
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
|
|
@ -19,7 +19,7 @@ about = {
|
|||
"results": 'JSON',
|
||||
}
|
||||
|
||||
categories = ['science']
|
||||
categories = ['science', 'scientific publications']
|
||||
paging = True
|
||||
nb_per_page = 10
|
||||
api_key = 'unset'
|
||||
|
@ -41,32 +41,29 @@ def response(resp):
|
|||
json_data = loads(resp.text)
|
||||
|
||||
for record in json_data['records']:
|
||||
content = record['abstract'][0:500]
|
||||
if len(record['abstract']) > len(content):
|
||||
content += "..."
|
||||
content = record['abstract']
|
||||
published = datetime.strptime(record['publicationDate'], '%Y-%m-%d')
|
||||
|
||||
metadata = [
|
||||
record[x]
|
||||
for x in [
|
||||
'publicationName',
|
||||
'identifier',
|
||||
'contentType',
|
||||
]
|
||||
if record.get(x) is not None
|
||||
]
|
||||
|
||||
metadata = ' / '.join(metadata)
|
||||
if record.get('startingPage') and record.get('endingPage') is not None:
|
||||
metadata += " (%(startingPage)s-%(endingPage)s)" % record
|
||||
|
||||
authors = [" ".join(author['creator'].split(', ')[::-1]) for author in record['creators']]
|
||||
tags = record.get('genre')
|
||||
if isinstance(tags, str):
|
||||
tags = [tags]
|
||||
results.append(
|
||||
{
|
||||
'template': 'paper.html',
|
||||
'title': record['title'],
|
||||
'url': record['url'][0]['value'].replace('http://', 'https://', 1),
|
||||
'type': record.get('contentType'),
|
||||
'content': content,
|
||||
'publishedDate': published,
|
||||
'metadata': metadata,
|
||||
'authors': authors,
|
||||
'doi': record.get('doi'),
|
||||
'journal': record.get('publicationName'),
|
||||
'pages': record.get('start_page') + '-' + record.get('end_page'),
|
||||
'tags': tags,
|
||||
'issn': [record.get('issn')],
|
||||
'isbn': [record.get('isbn')],
|
||||
'volume': record.get('volume') or None,
|
||||
'number': record.get('number') or None,
|
||||
}
|
||||
)
|
||||
return results
|
||||
|
|
|
@ -42,4 +42,6 @@ def on_result(request, search, result):
|
|||
doi = doi[: -len(suffix)]
|
||||
result['url'] = get_doi_resolver(request.preferences) + doi
|
||||
result['parsed_url'] = urlparse(result['url'])
|
||||
if 'doi' not in result:
|
||||
result['doi'] = doi
|
||||
return True
|
||||
|
|
|
@ -43,6 +43,7 @@ CATEGORY_GROUPS = {
|
|||
'REPOS': 'repos',
|
||||
'SOFTWARE_WIKIS': 'software wikis',
|
||||
'WEB': 'web',
|
||||
'SCIENTIFIC PUBLICATIONS': 'scientific publications',
|
||||
}
|
||||
|
||||
STYLE_NAMES = {
|
||||
|
|
|
@ -319,7 +319,6 @@ engines:
|
|||
- name: arxiv
|
||||
engine: arxiv
|
||||
shortcut: arx
|
||||
categories: science
|
||||
timeout: 4.0
|
||||
|
||||
# tmp suspended: dh key too small
|
||||
|
@ -411,23 +410,10 @@ engines:
|
|||
# api_key: 'unset'
|
||||
|
||||
- name: crossref
|
||||
engine: json_engine
|
||||
paging: true
|
||||
search_url: https://search.crossref.org/dois?q={query}&page={pageno}
|
||||
url_query: doi
|
||||
title_query: title
|
||||
title_html_to_text: true
|
||||
content_query: fullCitation
|
||||
content_html_to_text: true
|
||||
categories: science
|
||||
engine: crossref
|
||||
shortcut: cr
|
||||
about:
|
||||
website: https://www.crossref.org/
|
||||
wikidata_id: Q5188229
|
||||
official_api_documentation: https://github.com/CrossRef/rest-api-doc
|
||||
use_official_api: false
|
||||
require_api_key: false
|
||||
results: JSON
|
||||
timeout: 30
|
||||
disable: true
|
||||
|
||||
- name: yep
|
||||
engine: json_engine
|
||||
|
@ -1068,7 +1054,7 @@ engines:
|
|||
title_query: metadata/oaf:entity/oaf:result/title/$
|
||||
content_query: metadata/oaf:entity/oaf:result/description/$
|
||||
content_html_to_text: true
|
||||
categories: science
|
||||
categories: "science"
|
||||
shortcut: oad
|
||||
timeout: 5.0
|
||||
about:
|
||||
|
@ -1198,7 +1184,6 @@ engines:
|
|||
- name: pubmed
|
||||
engine: pubmed
|
||||
shortcut: pub
|
||||
categories: science
|
||||
timeout: 3.0
|
||||
|
||||
- name: pypi
|
||||
|
@ -1346,7 +1331,6 @@ engines:
|
|||
engine: semantic_scholar
|
||||
disabled: true
|
||||
shortcut: se
|
||||
categories: science
|
||||
|
||||
# Spotify needs API credentials
|
||||
# - name: spotify
|
||||
|
@ -1372,8 +1356,7 @@ engines:
|
|||
# # working API key, for test & debug: "a69685087d07eca9f13db62f65b8f601"
|
||||
# api_key: 'unset'
|
||||
# shortcut: springer
|
||||
# categories: science
|
||||
# timeout: 6.0
|
||||
# timeout: 15.0
|
||||
|
||||
- name: startpage
|
||||
engine: startpage
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -302,6 +302,49 @@ article[data-vim-selected].category-social {
|
|||
}
|
||||
}
|
||||
|
||||
.result-paper {
|
||||
.attributes {
|
||||
display: table;
|
||||
border-spacing: 0.125rem;
|
||||
|
||||
div {
|
||||
display: table-row;
|
||||
|
||||
span {
|
||||
font-size: 0.9rem;
|
||||
margin-top: 0.25rem;
|
||||
display: table-cell;
|
||||
|
||||
time {
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
}
|
||||
|
||||
span:first-child {
|
||||
color: var(--color-base-font);
|
||||
min-width: 10rem;
|
||||
}
|
||||
|
||||
span:nth-child(2) {
|
||||
color: var(--color-result-publishdate-font);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
.content {
|
||||
margin-top: 0.25rem;
|
||||
}
|
||||
|
||||
.comments {
|
||||
font-size: 0.9rem;
|
||||
margin: 0.25rem 0 0 0;
|
||||
padding: 0;
|
||||
word-wrap: break-word;
|
||||
line-height: 1.24;
|
||||
font-style: italic;
|
||||
}
|
||||
}
|
||||
|
||||
.template_group_images {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
|
@ -955,6 +998,28 @@ article[data-vim-selected].category-social {
|
|||
border: none !important;
|
||||
background-color: var(--color-sidebar-background);
|
||||
}
|
||||
|
||||
.result-paper {
|
||||
.attributes {
|
||||
display: block;
|
||||
|
||||
div {
|
||||
display: block;
|
||||
|
||||
span {
|
||||
display: inline;
|
||||
}
|
||||
|
||||
span:first-child {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
span:nth-child(2) {
|
||||
.ltr-margin-left(0.5rem);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
44
searx/templates/simple/result_templates/paper.html
Normal file
44
searx/templates/simple/result_templates/paper.html
Normal file
|
@ -0,0 +1,44 @@
|
|||
{% from 'simple/macros.html' import result_header, result_sub_header, result_sub_footer, result_footer with context %}
|
||||
|
||||
{{ result_header(result, favicons, image_proxify) -}}
|
||||
<div class="attributes">
|
||||
{%- if result.publishedDate %}<div class="result_publishedDate"><span>{{ _("Published date") }}:</span><span><time class="published_date" datetime="{{ result.pubdate }}" >{{ result.publishedDate }}</time></span></div>{% endif -%}
|
||||
{%- if result.authors %}<div class="result_authors"><span>{{ _("Author") }}:</span><span>{{ result.authors | join(", ") }}</span></div>{% endif -%}
|
||||
{%- if result.journal -%}
|
||||
<div class="result_journal">
|
||||
<span>{{- _("Journal") }}:</span><span>{{ result.journal -}}
|
||||
{%- if result.volume -%}
|
||||
{{- result.volume -}}
|
||||
{%- if result.number -%}
|
||||
.{{- result.number -}}
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- if result.pages -%}
|
||||
{{- result.pages -}}
|
||||
{%- endif -%}
|
||||
</span>
|
||||
</div>
|
||||
{%- endif %}
|
||||
{%- if result.editor %}<div class="result_editor"><span>{{ _("Editor") }}:</span><span>{{ result.editor }}</span></div>{% endif -%}
|
||||
{%- if result.publisher %}<div class="result_publisher"><span>{{ _("Publisher") }}:</span><span>{{ result.publisher }}</span></div>{% endif -%}
|
||||
{%- if result.type %}<div class="result_type"><span>{{ _("Type") }}:</span><span>{{ result.type }}</span></div>{% endif -%}
|
||||
{%- if result.tags %}<div class="result_tags"><span>{{ _("Tags") }}:</span><span>{{ result.tags | join(", ")}}</span></div>{%- endif -%}
|
||||
{%- if result.doi %}<div class="result_doi"><span>{{ _("DOI") }}:</span><span>{{- result.doi -}}</span></div>{% endif -%}
|
||||
{%- if result.issn %}<div class="result_issn"><span>{{ _("ISSN") }}:</span><span>{{ result.issn | join(", ") }}</span></div>{% endif -%}
|
||||
{%- if result.isbn %}<div class="result_isbn"><span>{{ _("ISBN") }}:</span><span>{{ result.isbn | join(", ") }}</span></div>{% endif -%}
|
||||
</div>
|
||||
{%- if result.content -%}<p class="content">{{- result.content | safe -}}</p>{%- endif -%}
|
||||
{%- if result.comments -%}<p class="comments">{{- result.comments -}}</p>{%- endif -%}
|
||||
<p class="altlink">
|
||||
{%- if result.pdf_url -%}
|
||||
<a href="{{ result.pdf_url }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ _('PDF') }}</a>
|
||||
{%- endif -%}
|
||||
{%- if result.html_url -%}
|
||||
<a href="{{ result.html_url }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ _('HTML') }}</a>
|
||||
{%- endif -%}
|
||||
{%- if result.doi %}
|
||||
<a href="https://www.altmetric.com/details/doi/{{result.doi}}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>Altmetric</a>
|
||||
{% endif -%}
|
||||
</p>
|
||||
{{- result_sub_footer(result, proxify) -}}
|
||||
{{- result_footer(result) }}
|
|
@ -12,7 +12,6 @@ import os
|
|||
import sys
|
||||
import base64
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from timeit import default_timer
|
||||
from html import escape
|
||||
from io import StringIO
|
||||
|
@ -45,7 +44,6 @@ from flask.json import jsonify
|
|||
from flask_babel import (
|
||||
Babel,
|
||||
gettext,
|
||||
format_date,
|
||||
format_decimal,
|
||||
)
|
||||
|
||||
|
@ -79,6 +77,7 @@ from searx.webutils import (
|
|||
is_hmac_of,
|
||||
is_flask_run_cmdline,
|
||||
group_engines_in_tab,
|
||||
searxng_l10n_timespan,
|
||||
)
|
||||
from searx.webadapter import (
|
||||
get_search_query_from_webapp,
|
||||
|
@ -718,25 +717,13 @@ def search():
|
|||
if 'url' in result:
|
||||
result['pretty_url'] = prettify_url(result['url'])
|
||||
|
||||
# TODO, check if timezone is calculated right # pylint: disable=fixme
|
||||
if result.get('publishedDate'): # do not try to get a date from an empty string or a None type
|
||||
try: # test if publishedDate >= 1900 (datetime module bug)
|
||||
result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z')
|
||||
except ValueError:
|
||||
result['publishedDate'] = None
|
||||
else:
|
||||
if result['publishedDate'].replace(tzinfo=None) >= datetime.now() - timedelta(days=1):
|
||||
timedifference = datetime.now() - result['publishedDate'].replace(tzinfo=None)
|
||||
minutes = int((timedifference.seconds / 60) % 60)
|
||||
hours = int(timedifference.seconds / 60 / 60)
|
||||
if hours == 0:
|
||||
result['publishedDate'] = gettext('{minutes} minute(s) ago').format(minutes=minutes)
|
||||
else:
|
||||
result['publishedDate'] = gettext('{hours} hour(s), {minutes} minute(s) ago').format(
|
||||
hours=hours, minutes=minutes
|
||||
)
|
||||
else:
|
||||
result['publishedDate'] = format_date(result['publishedDate'])
|
||||
result['publishedDate'] = searxng_l10n_timespan(result['publishedDate'])
|
||||
|
||||
# set result['open_group'] = True when the template changes from the previous result
|
||||
# set result['close_group'] = True when the template changes on the next result
|
||||
|
|
|
@ -7,11 +7,14 @@ import hmac
|
|||
import re
|
||||
import inspect
|
||||
import itertools
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Iterable, List, Tuple, Dict
|
||||
|
||||
from io import StringIO
|
||||
from codecs import getincrementalencoder
|
||||
|
||||
from flask_babel import gettext, format_date
|
||||
|
||||
from searx import logger, settings
|
||||
from searx.engines import Engine, OTHER_CATEGORY
|
||||
|
||||
|
@ -138,6 +141,28 @@ def highlight_content(content, query):
|
|||
return content
|
||||
|
||||
|
||||
def searxng_l10n_timespan(dt: datetime) -> str: # pylint: disable=invalid-name
|
||||
"""Returns a human-readable and translated string indicating how long ago
|
||||
a date was in the past / the time span of the date to the present.
|
||||
|
||||
On January 1st, midnight, the returned string only indicates how many years
|
||||
ago the date was.
|
||||
"""
|
||||
# TODO, check if timezone is calculated right # pylint: disable=fixme
|
||||
d = dt.date()
|
||||
t = dt.time()
|
||||
if d.month == 1 and d.day == 1 and t.hour == 0 and t.minute == 0 and t.second == 0:
|
||||
return str(d.year)
|
||||
if dt.replace(tzinfo=None) >= datetime.now() - timedelta(days=1):
|
||||
timedifference = datetime.now() - dt.replace(tzinfo=None)
|
||||
minutes = int((timedifference.seconds / 60) % 60)
|
||||
hours = int(timedifference.seconds / 60 / 60)
|
||||
if hours == 0:
|
||||
return gettext('{minutes} minute(s) ago').format(minutes=minutes)
|
||||
return gettext('{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes)
|
||||
return format_date(dt)
|
||||
|
||||
|
||||
def is_flask_run_cmdline():
|
||||
"""Check if the application was started using "flask run" command line
|
||||
|
||||
|
|
Loading…
Reference in a new issue