Merge pull request #1705 from dalf/template_paper

Theme: add a paper.html template and update of the science engines
This commit is contained in:
Alexandre Flament 2022-09-23 23:09:27 +02:00 committed by GitHub
commit fc389f009d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
18 changed files with 536 additions and 146 deletions

View file

@ -311,3 +311,88 @@ the parameter ``template`` must be set to the desired type.
address.postcode postcode of object address.postcode postcode of object
address.country country of object address.country country of object
========================= ===================================================== ========================= =====================================================
.. _BibTeX format: https://www.bibtex.com/g/bibtex-format/
.. _BibTeX field types: https://en.wikipedia.org/wiki/BibTeX#Field_types
.. list-table:: Parameter of the **paper** media type /
see `BibTeX field types`_ and `BibTeX format`_
:header-rows: 2
:width: 100%
* - result-parameter
- Python type
- information
* - template
- :py:class:`str`
- is set to ``paper.html``
* - title
- :py:class:`str`
- title of the result
* - content
- :py:class:`str`
- abstract
* - comments
- :py:class:`str`
- free text display in italic below the content
* - tags
- :py:class:`List <list>`\ [\ :py:class:`str`\ ]
- free tag list
* - publishedDate
- :py:class:`datetime <datetime.datetime>`
- last publication date
* - authors
- :py:class:`List <list>`\ [\ :py:class:`str`\ ]
- list of authors of the work (authors with a "s")
* - editor
- :py:class:`str`
- list of editors of a book
* - publisher
- :py:class:`str`
- name of the publisher
* - journal
- :py:class:`str`
- name of the journal or magazine the article was
published in
* - volume
- :py:class:`str`
- volume number
* - pages
- :py:class:`str`
- page range where the article is
* - number
- :py:class:`str`
- number of the report or the issue number for a journal article
* - doi
- :py:class:`str`
- DOI number (like ``10.1038/d41586-018-07848-2``)
* - issn
- :py:class:`str`
- ISSN number like ``1476-4687``
* - isbn
- :py:class:`str`
- ISBN number like ``9780201896831``
* - pdf_url
- :py:class:`str`
- URL to the full article, the PDF version
* - html_url
- :py:class:`str`
- URL to full article, HTML version

View file

@ -3,9 +3,10 @@
ArXiV (Scientific preprints) ArXiV (Scientific preprints)
""" """
from lxml import html from lxml import etree
from lxml.etree import XPath
from datetime import datetime from datetime import datetime
from searx.utils import eval_xpath_list, eval_xpath_getindex from searx.utils import eval_xpath, eval_xpath_list, eval_xpath_getindex
# about # about
about = { about = {
@ -17,7 +18,7 @@ about = {
"results": 'XML-RSS', "results": 'XML-RSS',
} }
categories = ['science'] categories = ['science', 'scientific publications']
paging = True paging = True
base_url = ( base_url = (
@ -27,6 +28,23 @@ base_url = (
# engine dependent config # engine dependent config
number_of_results = 10 number_of_results = 10
# xpaths
arxiv_namespaces = {
"atom": "http://www.w3.org/2005/Atom",
"arxiv": "http://arxiv.org/schemas/atom",
}
xpath_entry = XPath('//atom:entry', namespaces=arxiv_namespaces)
xpath_title = XPath('.//atom:title', namespaces=arxiv_namespaces)
xpath_id = XPath('.//atom:id', namespaces=arxiv_namespaces)
xpath_summary = XPath('.//atom:summary', namespaces=arxiv_namespaces)
xpath_author_name = XPath('.//atom:author/atom:name', namespaces=arxiv_namespaces)
xpath_doi = XPath('.//arxiv:doi', namespaces=arxiv_namespaces)
xpath_pdf = XPath('.//atom:link[@title="pdf"]', namespaces=arxiv_namespaces)
xpath_published = XPath('.//atom:published', namespaces=arxiv_namespaces)
xpath_journal = XPath('.//arxiv:journal_ref', namespaces=arxiv_namespaces)
xpath_category = XPath('.//atom:category/@term', namespaces=arxiv_namespaces)
xpath_comment = XPath('./arxiv:comment', namespaces=arxiv_namespaces)
def request(query, params): def request(query, params):
# basic search # basic search
@ -41,30 +59,50 @@ def request(query, params):
def response(resp): def response(resp):
results = [] results = []
dom = etree.fromstring(resp.content)
for entry in eval_xpath_list(dom, xpath_entry):
title = eval_xpath_getindex(entry, xpath_title, 0).text
dom = html.fromstring(resp.content) url = eval_xpath_getindex(entry, xpath_id, 0).text
abstract = eval_xpath_getindex(entry, xpath_summary, 0).text
for entry in eval_xpath_list(dom, '//entry'): authors = [author.text for author in eval_xpath_list(entry, xpath_author_name)]
title = eval_xpath_getindex(entry, './/title', 0).text
url = eval_xpath_getindex(entry, './/id', 0).text # doi
doi_element = eval_xpath_getindex(entry, xpath_doi, 0, default=None)
doi = None if doi_element is None else doi_element.text
content_string = '{doi_content}{abstract_content}' # pdf
pdf_element = eval_xpath_getindex(entry, xpath_pdf, 0, default=None)
pdf_url = None if pdf_element is None else pdf_element.attrib.get('href')
abstract = eval_xpath_getindex(entry, './/summary', 0).text # journal
journal_element = eval_xpath_getindex(entry, xpath_journal, 0, default=None)
journal = None if journal_element is None else journal_element.text
# If a doi is available, add it to the snipppet # tags
doi_element = eval_xpath_getindex(entry, './/link[@title="doi"]', 0, default=None) tag_elements = eval_xpath(entry, xpath_category)
doi_content = doi_element.text if doi_element is not None else '' tags = [str(tag) for tag in tag_elements]
content = content_string.format(doi_content=doi_content, abstract_content=abstract)
if len(content) > 300: # comments
content = content[0:300] + "..." comments_elements = eval_xpath_getindex(entry, xpath_comment, 0, default=None)
# TODO: center snippet on query term comments = None if comments_elements is None else comments_elements.text
publishedDate = datetime.strptime(eval_xpath_getindex(entry, './/published', 0).text, '%Y-%m-%dT%H:%M:%SZ') publishedDate = datetime.strptime(eval_xpath_getindex(entry, xpath_published, 0).text, '%Y-%m-%dT%H:%M:%SZ')
res_dict = {'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content} res_dict = {
'template': 'paper.html',
'url': url,
'title': title,
'publishedDate': publishedDate,
'content': abstract,
'doi': doi,
'authors': authors,
'journal': journal,
'tags': tags,
'comments': comments,
'pdf_url': pdf_url,
}
results.append(res_dict) results.append(res_dict)

59
searx/engines/crossref.py Normal file
View file

@ -0,0 +1,59 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Semantic Scholar (Science)
"""
from urllib.parse import urlencode
from searx.utils import html_to_text
about = {
"website": 'https://www.crossref.org/',
"wikidata_id": 'Q5188229',
"official_api_documentation": 'https://github.com/CrossRef/rest-api-doc',
"use_official_api": False,
"require_api_key": False,
"results": 'JSON',
}
categories = ['science', 'scientific publications']
paging = True
search_url = 'https://api.crossref.org/works'
def request(query, params):
params['url'] = search_url + '?' + urlencode(dict(query=query, offset=20 * (params['pageno'] - 1)))
return params
def response(resp):
res = resp.json()
results = []
for record in res['message']['items']:
record_type = record['type']
if record_type == 'book-chapter':
title = record['container-title'][0]
if record['title'][0].lower().strip() != title.lower().strip():
title = html_to_text(title) + ' (' + html_to_text(record['title'][0]) + ')'
journal = None
else:
title = html_to_text(record['title'][0])
journal = record.get('container-title', [None])[0]
url = record.get('resource', {}).get('primary', {}).get('URL') or record['URL']
authors = [author.get('given', '') + ' ' + author.get('family', '') for author in record.get('author', [])]
isbn = record.get('isbn') or [i['value'] for i in record.get('isbn-type', [])]
results.append(
{
'template': 'paper.html',
'url': url,
'title': title,
'journal': journal,
'volume': record.get('volume'),
'type': record['type'],
'content': html_to_text(record.get('abstract', '')),
'publisher': record.get('publisher'),
'authors': authors,
'doi': record['DOI'],
'isbn': isbn,
}
)
return results

View file

@ -13,10 +13,12 @@ Definitions`_.
from urllib.parse import urlencode from urllib.parse import urlencode
from datetime import datetime from datetime import datetime
from typing import Optional
from lxml import html from lxml import html
from searx.utils import ( from searx.utils import (
eval_xpath, eval_xpath,
eval_xpath_getindex,
eval_xpath_list, eval_xpath_list,
extract_text, extract_text,
) )
@ -46,7 +48,7 @@ about = {
} }
# engine dependent config # engine dependent config
categories = ['science'] categories = ['science', 'scientific publications']
paging = True paging = True
language_support = True language_support = True
use_locale_domain = True use_locale_domain = True
@ -99,7 +101,43 @@ def request(query, params):
return params return params
def response(resp): def parse_gs_a(text: Optional[str]):
"""Parse the text written in green.
Possible formats:
* "{authors} - {journal}, {year} - {publisher}"
* "{authors} - {year} - {publisher}"
* "{authors} - {publisher}"
"""
if text is None or text == "":
return None, None, None, None
s_text = text.split(' - ')
authors = s_text[0].split(', ')
publisher = s_text[-1]
if len(s_text) != 3:
return authors, None, publisher, None
# the format is "{authors} - {journal}, {year} - {publisher}" or "{authors} - {year} - {publisher}"
# get journal and year
journal_year = s_text[1].split(', ')
# journal is optional and may contains some coma
if len(journal_year) > 1:
journal = ', '.join(journal_year[0:-1])
if journal == '':
journal = None
else:
journal = None
# year
year = journal_year[-1]
try:
publishedDate = datetime.strptime(year.strip(), '%Y')
except ValueError:
publishedDate = None
return authors, journal, publisher, publishedDate
def response(resp): # pylint: disable=too-many-locals
"""Get response from google's search request""" """Get response from google's search request"""
results = [] results = []
@ -112,30 +150,53 @@ def response(resp):
dom = html.fromstring(resp.text) dom = html.fromstring(resp.text)
# parse results # parse results
for result in eval_xpath_list(dom, '//div[@class="gs_ri"]'): for result in eval_xpath_list(dom, '//div[@data-cid]'):
title = extract_text(eval_xpath(result, './h3[1]//a')) title = extract_text(eval_xpath(result, './/h3[1]//a'))
if not title: if not title:
# this is a [ZITATION] block # this is a [ZITATION] block
continue continue
url = eval_xpath(result, './h3[1]//a/@href')[0]
content = extract_text(eval_xpath(result, './div[@class="gs_rs"]')) or ''
pub_info = extract_text(eval_xpath(result, './div[@class="gs_a"]'))
if pub_info:
content += "[%s]" % pub_info
pub_type = extract_text(eval_xpath(result, './/span[@class="gs_ct1"]')) pub_type = extract_text(eval_xpath(result, './/span[@class="gs_ct1"]'))
if pub_type: if pub_type:
title = title + " " + pub_type pub_type = pub_type[1:-1].lower()
url = eval_xpath_getindex(result, './/h3[1]//a/@href', 0)
content = extract_text(eval_xpath(result, './/div[@class="gs_rs"]'))
authors, journal, publisher, publishedDate = parse_gs_a(
extract_text(eval_xpath(result, './/div[@class="gs_a"]'))
)
if publisher in url:
publisher = None
# cited by
comments = extract_text(eval_xpath(result, './/div[@class="gs_fl"]/a[starts-with(@href,"/scholar?cites=")]'))
# link to the html or pdf document
html_url = None
pdf_url = None
doc_url = eval_xpath_getindex(result, './/div[@class="gs_or_ggsm"]/a/@href', 0, default=None)
doc_type = extract_text(eval_xpath(result, './/span[@class="gs_ctg2"]'))
if doc_type == "[PDF]":
pdf_url = doc_url
else:
html_url = doc_url
results.append( results.append(
{ {
'template': 'paper.html',
'type': pub_type,
'url': url, 'url': url,
'title': title, 'title': title,
'authors': authors,
'publisher': publisher,
'journal': journal,
'publishedDate': publishedDate,
'content': content, 'content': content,
'comments': comments,
'html_url': html_url,
'pdf_url': pdf_url,
} }
) )

View file

@ -3,11 +3,15 @@
PubMed (Scholar publications) PubMed (Scholar publications)
""" """
from flask_babel import gettext
from lxml import etree from lxml import etree
from datetime import datetime from datetime import datetime
from urllib.parse import urlencode from urllib.parse import urlencode
from searx.network import get from searx.network import get
from searx.utils import (
eval_xpath_getindex,
eval_xpath_list,
extract_text,
)
# about # about
about = { about = {
@ -22,7 +26,7 @@ about = {
"results": 'XML', "results": 'XML',
} }
categories = ['science'] categories = ['science', 'scientific publications']
base_url = ( base_url = (
'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi' + '?db=pubmed&{query}&retstart={offset}&retmax={hits}' 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi' + '?db=pubmed&{query}&retstart={offset}&retmax={hits}'
@ -63,46 +67,61 @@ def response(resp):
retrieve_url_encoded = pubmed_retrieve_api_url.format(**retrieve_notice_args) retrieve_url_encoded = pubmed_retrieve_api_url.format(**retrieve_notice_args)
search_results_xml = get(retrieve_url_encoded).content search_results_response = get(retrieve_url_encoded).content
search_results = etree.XML(search_results_xml).xpath('//PubmedArticleSet/PubmedArticle/MedlineCitation') search_results = etree.XML(search_results_response)
for entry in eval_xpath_list(search_results, '//PubmedArticle'):
medline = eval_xpath_getindex(entry, './MedlineCitation', 0)
for entry in search_results: title = eval_xpath_getindex(medline, './/Article/ArticleTitle', 0).text
title = entry.xpath('.//Article/ArticleTitle')[0].text pmid = eval_xpath_getindex(medline, './/PMID', 0).text
pmid = entry.xpath('.//PMID')[0].text
url = pubmed_url + pmid url = pubmed_url + pmid
content = extract_text(
eval_xpath_getindex(medline, './/Abstract/AbstractText//text()', 0, default=None), allow_none=True
)
doi = extract_text(
eval_xpath_getindex(medline, './/ELocationID[@EIdType="doi"]/text()', 0, default=None), allow_none=True
)
journal = extract_text(
eval_xpath_getindex(medline, './Article/Journal/Title/text()', 0, default=None), allow_none=True
)
issn = extract_text(
eval_xpath_getindex(medline, './Article/Journal/ISSN/text()', 0, default=None), allow_none=True
)
authors = []
for author in eval_xpath_list(medline, './Article/AuthorList/Author'):
f = eval_xpath_getindex(author, './ForeName', 0, default=None)
l = eval_xpath_getindex(author, './LastName', 0, default=None)
f = '' if f is None else f.text
l = '' if l is None else l.text
authors.append((f + ' ' + l).strip())
try: res_dict = {
content = entry.xpath('.//Abstract/AbstractText')[0].text 'template': 'paper.html',
except: 'url': url,
content = gettext('No abstract is available for this publication.') 'title': title,
'content': content,
'journal': journal,
'issn': [issn],
'authors': authors,
'doi': doi,
}
# If a doi is available, add it to the snipppet accepted_date = eval_xpath_getindex(
try: entry, './PubmedData/History//PubMedPubDate[@PubStatus="accepted"]', 0, default=None
doi = entry.xpath('.//ELocationID[@EIdType="doi"]')[0].text )
content = 'DOI: {doi} Abstract: {content}'.format(doi=doi, content=content) if accepted_date is not None:
except: year = eval_xpath_getindex(accepted_date, './Year', 0)
pass month = eval_xpath_getindex(accepted_date, './Month', 0)
day = eval_xpath_getindex(accepted_date, './Day', 0)
if len(content) > 300: try:
content = content[0:300] + "..." publishedDate = datetime.strptime(
# TODO: center snippet on query term year.text + '-' + month.text + '-' + day.text,
'%Y-%m-%d',
res_dict = {'url': url, 'title': title, 'content': content} )
res_dict['publishedDate'] = publishedDate
try: except Exception as e:
publishedDate = datetime.strptime( print(e)
entry.xpath('.//DateCreated/Year')[0].text
+ '-'
+ entry.xpath('.//DateCreated/Month')[0].text
+ '-'
+ entry.xpath('.//DateCreated/Day')[0].text,
'%Y-%m-%d',
)
res_dict['publishedDate'] = publishedDate
except:
pass
results.append(res_dict) results.append(res_dict)
return results return results

View file

@ -6,6 +6,8 @@
from json import dumps, loads from json import dumps, loads
from datetime import datetime from datetime import datetime
from flask_babel import gettext
about = { about = {
"website": 'https://www.semanticscholar.org/', "website": 'https://www.semanticscholar.org/',
"wikidata_id": 'Q22908627', "wikidata_id": 'Q22908627',
@ -15,6 +17,7 @@ about = {
"results": 'JSON', "results": 'JSON',
} }
categories = ['science', 'scientific publications']
paging = True paging = True
search_url = 'https://www.semanticscholar.org/api/1/search' search_url = 'https://www.semanticscholar.org/api/1/search'
paper_url = 'https://www.semanticscholar.org/paper' paper_url = 'https://www.semanticscholar.org/paper'
@ -45,11 +48,7 @@ def request(query, params):
def response(resp): def response(resp):
res = loads(resp.text) res = loads(resp.text)
results = [] results = []
for result in res['results']: for result in res['results']:
item = {}
metadata = []
url = result.get('primaryPaperLink', {}).get('url') url = result.get('primaryPaperLink', {}).get('url')
if not url and result.get('links'): if not url and result.get('links'):
url = result.get('links')[0] url = result.get('links')[0]
@ -60,22 +59,47 @@ def response(resp):
if not url: if not url:
url = paper_url + '/%s' % result['id'] url = paper_url + '/%s' % result['id']
item['url'] = url # publishedDate
if 'pubDate' in result:
publishedDate = datetime.strptime(result['pubDate'], "%Y-%m-%d")
else:
publishedDate = None
item['title'] = result['title']['text'] # authors
item['content'] = result['paperAbstract']['text'] authors = [author[0]['name'] for author in result.get('authors', [])]
metadata = result.get('fieldsOfStudy') or [] # pick for the first alternate link, but not from the crawler
venue = result.get('venue', {}).get('text') pdf_url = None
if venue: for doc in result.get('alternatePaperLinks', []):
metadata.append(venue) if doc['linkType'] not in ('crawler', 'doi'):
if metadata: pdf_url = doc['url']
item['metadata'] = ', '.join(metadata) break
pubDate = result.get('pubDate') # comments
if pubDate: comments = None
item['publishedDate'] = datetime.strptime(pubDate, "%Y-%m-%d") if 'citationStats' in result:
comments = gettext(
'{numCitations} citations from the year {firstCitationVelocityYear} to {lastCitationVelocityYear}'
).format(
numCitations=result['citationStats']['numCitations'],
firstCitationVelocityYear=result['citationStats']['firstCitationVelocityYear'],
lastCitationVelocityYear=result['citationStats']['lastCitationVelocityYear'],
)
results.append(item) results.append(
{
'template': 'paper.html',
'url': url,
'title': result['title']['text'],
'content': result['paperAbstract']['text'],
'journal': result.get('venue', {}).get('text') or result.get('journal', {}).get('name'),
'doi': result.get('doiInfo', {}).get('doi'),
'tags': result.get('fieldsOfStudy'),
'authors': authors,
'pdf_url': pdf_url,
'publishedDate': publishedDate,
'comments': comments,
}
)
return results return results

View file

@ -19,7 +19,7 @@ about = {
"results": 'JSON', "results": 'JSON',
} }
categories = ['science'] categories = ['science', 'scientific publications']
paging = True paging = True
nb_per_page = 10 nb_per_page = 10
api_key = 'unset' api_key = 'unset'
@ -41,32 +41,29 @@ def response(resp):
json_data = loads(resp.text) json_data = loads(resp.text)
for record in json_data['records']: for record in json_data['records']:
content = record['abstract'][0:500] content = record['abstract']
if len(record['abstract']) > len(content):
content += "..."
published = datetime.strptime(record['publicationDate'], '%Y-%m-%d') published = datetime.strptime(record['publicationDate'], '%Y-%m-%d')
authors = [" ".join(author['creator'].split(', ')[::-1]) for author in record['creators']]
metadata = [ tags = record.get('genre')
record[x] if isinstance(tags, str):
for x in [ tags = [tags]
'publicationName',
'identifier',
'contentType',
]
if record.get(x) is not None
]
metadata = ' / '.join(metadata)
if record.get('startingPage') and record.get('endingPage') is not None:
metadata += " (%(startingPage)s-%(endingPage)s)" % record
results.append( results.append(
{ {
'template': 'paper.html',
'title': record['title'], 'title': record['title'],
'url': record['url'][0]['value'].replace('http://', 'https://', 1), 'url': record['url'][0]['value'].replace('http://', 'https://', 1),
'type': record.get('contentType'),
'content': content, 'content': content,
'publishedDate': published, 'publishedDate': published,
'metadata': metadata, 'authors': authors,
'doi': record.get('doi'),
'journal': record.get('publicationName'),
'pages': record.get('start_page') + '-' + record.get('end_page'),
'tags': tags,
'issn': [record.get('issn')],
'isbn': [record.get('isbn')],
'volume': record.get('volume') or None,
'number': record.get('number') or None,
} }
) )
return results return results

View file

@ -42,4 +42,6 @@ def on_result(request, search, result):
doi = doi[: -len(suffix)] doi = doi[: -len(suffix)]
result['url'] = get_doi_resolver(request.preferences) + doi result['url'] = get_doi_resolver(request.preferences) + doi
result['parsed_url'] = urlparse(result['url']) result['parsed_url'] = urlparse(result['url'])
if 'doi' not in result:
result['doi'] = doi
return True return True

View file

@ -43,6 +43,7 @@ CATEGORY_GROUPS = {
'REPOS': 'repos', 'REPOS': 'repos',
'SOFTWARE_WIKIS': 'software wikis', 'SOFTWARE_WIKIS': 'software wikis',
'WEB': 'web', 'WEB': 'web',
'SCIENTIFIC PUBLICATIONS': 'scientific publications',
} }
STYLE_NAMES = { STYLE_NAMES = {

View file

@ -319,7 +319,6 @@ engines:
- name: arxiv - name: arxiv
engine: arxiv engine: arxiv
shortcut: arx shortcut: arx
categories: science
timeout: 4.0 timeout: 4.0
# tmp suspended: dh key too small # tmp suspended: dh key too small
@ -411,23 +410,10 @@ engines:
# api_key: 'unset' # api_key: 'unset'
- name: crossref - name: crossref
engine: json_engine engine: crossref
paging: true
search_url: https://search.crossref.org/dois?q={query}&page={pageno}
url_query: doi
title_query: title
title_html_to_text: true
content_query: fullCitation
content_html_to_text: true
categories: science
shortcut: cr shortcut: cr
about: timeout: 30
website: https://www.crossref.org/ disable: true
wikidata_id: Q5188229
official_api_documentation: https://github.com/CrossRef/rest-api-doc
use_official_api: false
require_api_key: false
results: JSON
- name: yep - name: yep
engine: json_engine engine: json_engine
@ -1068,7 +1054,7 @@ engines:
title_query: metadata/oaf:entity/oaf:result/title/$ title_query: metadata/oaf:entity/oaf:result/title/$
content_query: metadata/oaf:entity/oaf:result/description/$ content_query: metadata/oaf:entity/oaf:result/description/$
content_html_to_text: true content_html_to_text: true
categories: science categories: "science"
shortcut: oad shortcut: oad
timeout: 5.0 timeout: 5.0
about: about:
@ -1198,7 +1184,6 @@ engines:
- name: pubmed - name: pubmed
engine: pubmed engine: pubmed
shortcut: pub shortcut: pub
categories: science
timeout: 3.0 timeout: 3.0
- name: pypi - name: pypi
@ -1346,7 +1331,6 @@ engines:
engine: semantic_scholar engine: semantic_scholar
disabled: true disabled: true
shortcut: se shortcut: se
categories: science
# Spotify needs API credentials # Spotify needs API credentials
# - name: spotify # - name: spotify
@ -1372,8 +1356,7 @@ engines:
# # working API key, for test & debug: "a69685087d07eca9f13db62f65b8f601" # # working API key, for test & debug: "a69685087d07eca9f13db62f65b8f601"
# api_key: 'unset' # api_key: 'unset'
# shortcut: springer # shortcut: springer
# categories: science # timeout: 15.0
# timeout: 6.0
- name: startpage - name: startpage
engine: startpage engine: startpage

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -302,6 +302,49 @@ article[data-vim-selected].category-social {
} }
} }
.result-paper {
.attributes {
display: table;
border-spacing: 0.125rem;
div {
display: table-row;
span {
font-size: 0.9rem;
margin-top: 0.25rem;
display: table-cell;
time {
font-size: 0.9rem;
}
}
span:first-child {
color: var(--color-base-font);
min-width: 10rem;
}
span:nth-child(2) {
color: var(--color-result-publishdate-font);
}
}
}
.content {
margin-top: 0.25rem;
}
.comments {
font-size: 0.9rem;
margin: 0.25rem 0 0 0;
padding: 0;
word-wrap: break-word;
line-height: 1.24;
font-style: italic;
}
}
.template_group_images { .template_group_images {
display: flex; display: flex;
flex-wrap: wrap; flex-wrap: wrap;
@ -955,6 +998,28 @@ article[data-vim-selected].category-social {
border: none !important; border: none !important;
background-color: var(--color-sidebar-background); background-color: var(--color-sidebar-background);
} }
.result-paper {
.attributes {
display: block;
div {
display: block;
span {
display: inline;
}
span:first-child {
font-weight: bold;
}
span:nth-child(2) {
.ltr-margin-left(0.5rem);
}
}
}
}
} }
/* /*

View file

@ -0,0 +1,44 @@
{% from 'simple/macros.html' import result_header, result_sub_header, result_sub_footer, result_footer with context %}
{{ result_header(result, favicons, image_proxify) -}}
<div class="attributes">
{%- if result.publishedDate %}<div class="result_publishedDate"><span>{{ _("Published date") }}:</span><span><time class="published_date" datetime="{{ result.pubdate }}" >{{ result.publishedDate }}</time></span></div>{% endif -%}
{%- if result.authors %}<div class="result_authors"><span>{{ _("Author") }}:</span><span>{{ result.authors | join(", ") }}</span></div>{% endif -%}
{%- if result.journal -%}
<div class="result_journal">
<span>{{- _("Journal") }}:</span><span>{{ result.journal -}}
{%- if result.volume -%}
&nbsp;{{- result.volume -}}
{%- if result.number -%}
.{{- result.number -}}
{%- endif -%}
{%- endif -%}
{%- if result.pages -%}
&nbsp;{{- result.pages -}}
{%- endif -%}
</span>
</div>
{%- endif %}
{%- if result.editor %}<div class="result_editor"><span>{{ _("Editor") }}:</span><span>{{ result.editor }}</span></div>{% endif -%}
{%- if result.publisher %}<div class="result_publisher"><span>{{ _("Publisher") }}:</span><span>{{ result.publisher }}</span></div>{% endif -%}
{%- if result.type %}<div class="result_type"><span>{{ _("Type") }}:</span><span>{{ result.type }}</span></div>{% endif -%}
{%- if result.tags %}<div class="result_tags"><span>{{ _("Tags") }}:</span><span>{{ result.tags | join(", ")}}</span></div>{%- endif -%}
{%- if result.doi %}<div class="result_doi"><span>{{ _("DOI") }}:</span><span>{{- result.doi -}}</span></div>{% endif -%}
{%- if result.issn %}<div class="result_issn"><span>{{ _("ISSN") }}:</span><span>{{ result.issn | join(", ") }}</span></div>{% endif -%}
{%- if result.isbn %}<div class="result_isbn"><span>{{ _("ISBN") }}:</span><span>{{ result.isbn | join(", ") }}</span></div>{% endif -%}
</div>
{%- if result.content -%}<p class="content">{{- result.content | safe -}}</p>{%- endif -%}
{%- if result.comments -%}<p class="comments">{{- result.comments -}}</p>{%- endif -%}
<p class="altlink">
{%- if result.pdf_url -%}
<a href="{{ result.pdf_url }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ _('PDF') }}</a>
{%- endif -%}
{%- if result.html_url -%}
<a href="{{ result.html_url }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ _('HTML') }}</a>
{%- endif -%}
{%- if result.doi %}
<a href="https://www.altmetric.com/details/doi/{{result.doi}}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>Altmetric</a>
{% endif -%}
</p>
{{- result_sub_footer(result, proxify) -}}
{{- result_footer(result) }}

View file

@ -12,7 +12,6 @@ import os
import sys import sys
import base64 import base64
from datetime import datetime, timedelta
from timeit import default_timer from timeit import default_timer
from html import escape from html import escape
from io import StringIO from io import StringIO
@ -45,7 +44,6 @@ from flask.json import jsonify
from flask_babel import ( from flask_babel import (
Babel, Babel,
gettext, gettext,
format_date,
format_decimal, format_decimal,
) )
@ -79,6 +77,7 @@ from searx.webutils import (
is_hmac_of, is_hmac_of,
is_flask_run_cmdline, is_flask_run_cmdline,
group_engines_in_tab, group_engines_in_tab,
searxng_l10n_timespan,
) )
from searx.webadapter import ( from searx.webadapter import (
get_search_query_from_webapp, get_search_query_from_webapp,
@ -718,25 +717,13 @@ def search():
if 'url' in result: if 'url' in result:
result['pretty_url'] = prettify_url(result['url']) result['pretty_url'] = prettify_url(result['url'])
# TODO, check if timezone is calculated right # pylint: disable=fixme
if result.get('publishedDate'): # do not try to get a date from an empty string or a None type if result.get('publishedDate'): # do not try to get a date from an empty string or a None type
try: # test if publishedDate >= 1900 (datetime module bug) try: # test if publishedDate >= 1900 (datetime module bug)
result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z') result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z')
except ValueError: except ValueError:
result['publishedDate'] = None result['publishedDate'] = None
else: else:
if result['publishedDate'].replace(tzinfo=None) >= datetime.now() - timedelta(days=1): result['publishedDate'] = searxng_l10n_timespan(result['publishedDate'])
timedifference = datetime.now() - result['publishedDate'].replace(tzinfo=None)
minutes = int((timedifference.seconds / 60) % 60)
hours = int(timedifference.seconds / 60 / 60)
if hours == 0:
result['publishedDate'] = gettext('{minutes} minute(s) ago').format(minutes=minutes)
else:
result['publishedDate'] = gettext('{hours} hour(s), {minutes} minute(s) ago').format(
hours=hours, minutes=minutes
)
else:
result['publishedDate'] = format_date(result['publishedDate'])
# set result['open_group'] = True when the template changes from the previous result # set result['open_group'] = True when the template changes from the previous result
# set result['close_group'] = True when the template changes on the next result # set result['close_group'] = True when the template changes on the next result

View file

@ -7,11 +7,14 @@ import hmac
import re import re
import inspect import inspect
import itertools import itertools
from datetime import datetime, timedelta
from typing import Iterable, List, Tuple, Dict from typing import Iterable, List, Tuple, Dict
from io import StringIO from io import StringIO
from codecs import getincrementalencoder from codecs import getincrementalencoder
from flask_babel import gettext, format_date
from searx import logger, settings from searx import logger, settings
from searx.engines import Engine, OTHER_CATEGORY from searx.engines import Engine, OTHER_CATEGORY
@ -138,6 +141,28 @@ def highlight_content(content, query):
return content return content
def searxng_l10n_timespan(dt: datetime) -> str: # pylint: disable=invalid-name
"""Returns a human-readable and translated string indicating how long ago
a date was in the past / the time span of the date to the present.
On January 1st, midnight, the returned string only indicates how many years
ago the date was.
"""
# TODO, check if timezone is calculated right # pylint: disable=fixme
d = dt.date()
t = dt.time()
if d.month == 1 and d.day == 1 and t.hour == 0 and t.minute == 0 and t.second == 0:
return str(d.year)
if dt.replace(tzinfo=None) >= datetime.now() - timedelta(days=1):
timedifference = datetime.now() - dt.replace(tzinfo=None)
minutes = int((timedifference.seconds / 60) % 60)
hours = int(timedifference.seconds / 60 / 60)
if hours == 0:
return gettext('{minutes} minute(s) ago').format(minutes=minutes)
return gettext('{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes)
return format_date(dt)
def is_flask_run_cmdline(): def is_flask_run_cmdline():
"""Check if the application was started using "flask run" command line """Check if the application was started using "flask run" command line