[mod] core.ac.uk engine

- add to list of pylint scripts
- add debug log messages
- move API key int `settings.yml`
- improved readability
- add some metadata to results

Signed-off-by: Markus Heiser <markus@darmarit.de>
This commit is contained in:
Markus Heiser 2021-04-04 12:48:24 +02:00
parent 7528e38c8a
commit 8efabd3ab7
2 changed files with 44 additions and 24 deletions

1
manage
View file

@ -38,6 +38,7 @@ PYLINT_FILES=(
searx/engines/yahoo_news.py searx/engines/yahoo_news.py
searx/engines/apkmirror.py searx/engines/apkmirror.py
searx/engines/artic.py searx/engines/artic.py
searx/engines/core.py
searx_extra/update/update_external_bangs.py searx_extra/update/update_external_bangs.py
searx/metrics/__init__.py searx/metrics/__init__.py
) )

View file

@ -1,14 +1,18 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
""" """CORE (science)
Core Engine (science)
""" """
# pylint: disable=missing-function-docstring
from json import loads from json import loads
from datetime import datetime from datetime import datetime
from urllib.parse import urlencode from urllib.parse import urlencode
from searx import logger
from searx.exceptions import SearxEngineAPIException
logger = logger.getChild('CORE engine')
about = { about = {
"website": 'https://core.ac.uk', "website": 'https://core.ac.uk',
"wikidata_id": 'Q22661180', "wikidata_id": 'Q22661180',
@ -19,45 +23,60 @@ about = {
} }
categories = ['science'] categories = ['science']
paging = True paging = True
nb_per_page = 20 nb_per_page = 10
api_key = 'unset'
# apikey = '' logger = logger.getChild('CORE engine')
apikey = 'MVBozuTX8QF9I1D0GviL5bCn2Ueat6NS'
base_url = 'https://core.ac.uk:443/api-v2/search/' base_url = 'https://core.ac.uk:443/api-v2/search/'
search_string = '{query}?page={page}&pageSize={nb_per_page}&apiKey={apikey}' search_string = '{query}?page={page}&pageSize={nb_per_page}&apiKey={apikey}'
def request(query, params): def request(query, params):
if api_key == 'unset':
raise SearxEngineAPIException('missing CORE API key')
search_path = search_string.format( search_path = search_string.format(
query=urlencode({'q': query}), query = urlencode({'q': query}),
nb_per_page=nb_per_page, nb_per_page = nb_per_page,
page=params['pageno'], page = params['pageno'],
apikey=apikey) apikey = api_key,
)
params['url'] = base_url + search_path params['url'] = base_url + search_path
return params
logger.debug("query_url --> %s", params['url'])
return params
def response(resp): def response(resp):
results = [] results = []
json_data = loads(resp.text) json_data = loads(resp.text)
for result in json_data['data']: for result in json_data['data']:
time = result['_source']['publishedDate']
if time is None: source = result['_source']
date = datetime.now() time = source['publishedDate'] or source['depositedDate']
else: if time :
date = datetime.fromtimestamp(time / 1000) date = datetime.fromtimestamp(time / 1000)
else:
date = None
metadata = []
if source['publisher'] and len(source['publisher']) > 3:
metadata.append(source['publisher'])
if source['topics']:
metadata.append(source['topics'][0])
if source['doi']:
metadata.append(source['doi'])
metadata = ' / '.join(metadata)
results.append({ results.append({
'url': result['_source']['urls'][0], 'url': source['urls'][0].replace('http://', 'https://', 1),
'title': result['_source']['title'], 'title': source['title'],
'content': result['_source']['description'], 'content': source['description'],
'publishedDate': date}) 'publishedDate': date,
'metadata' : metadata,
})
return results return results