forked from Ponysearch/Ponysearch
initial commit of pdbe engine
Adds support for queries to the Protein Data Bank Europe (PDBe).
This commit is contained in:
parent
8b10eb6fe1
commit
3c5883408c
4 changed files with 226 additions and 0 deletions
|
@ -59,3 +59,4 @@ generally made searx better:
|
|||
- Harry Wood @harry-wood
|
||||
- Thomas Renard @threnard
|
||||
- Pydo `<https://github.com/pydo>`_
|
||||
- Athemis `<https://github.com/Athemis>`_
|
||||
|
|
109
searx/engines/pdbe.py
Normal file
109
searx/engines/pdbe.py
Normal file
|
@ -0,0 +1,109 @@
|
|||
"""
|
||||
PDBe (Protein Data Bank in Europe)
|
||||
|
||||
@website https://www.ebi.ac.uk/pdbe
|
||||
@provide-api yes (https://www.ebi.ac.uk/pdbe/api/doc/search.html),
|
||||
unlimited
|
||||
@using-api yes
|
||||
@results python dictionary (from json)
|
||||
@stable yes
|
||||
@parse url, title, content, img_src
|
||||
"""
|
||||
|
||||
from json import loads
|
||||
from flask_babel import gettext
|
||||
|
||||
categories = ['science']
|
||||
|
||||
hide_obsolete = False
|
||||
|
||||
# status codes of unpublished entries
|
||||
pdb_unpublished_codes = ['HPUB', 'HOLD', 'PROC', 'WAIT', 'AUTH', 'AUCO', 'REPL', 'POLC', 'REFI', 'TRSF', 'WDRN']
|
||||
# url for api query
|
||||
pdbe_solr_url = 'https://www.ebi.ac.uk/pdbe/search/pdb/select?'
|
||||
# base url for results
|
||||
pdbe_entry_url = 'https://www.ebi.ac.uk/pdbe/entry/pdb/{pdb_id}'
|
||||
# link to preview image of structure
|
||||
pdbe_preview_url = 'https://www.ebi.ac.uk/pdbe/static/entry/{pdb_id}_deposited_chain_front_image-200x200.png'
|
||||
|
||||
|
||||
def request(query, params):
|
||||
|
||||
params['url'] = pdbe_solr_url
|
||||
params['method'] = 'POST'
|
||||
params['data'] = {
|
||||
'q': query,
|
||||
'wt': "json" # request response in parsable format
|
||||
}
|
||||
return params
|
||||
|
||||
|
||||
def construct_body(result):
|
||||
# set title
|
||||
title = result['title']
|
||||
|
||||
# construct content body
|
||||
content = """{title}<br />{authors} {journal} <strong>{volume}</strong> {page} ({year})"""
|
||||
|
||||
# replace placeholders with actual content
|
||||
try:
|
||||
if result['journal']:
|
||||
content = content.format(
|
||||
title=result['citation_title'],
|
||||
authors=result['entry_author_list'][0], journal=result['journal'], volume=result['journal_volume'],
|
||||
page=result['journal_page'], year=result['citation_year'])
|
||||
else:
|
||||
content = content.format(
|
||||
title=result['citation_title'],
|
||||
authors=result['entry_author_list'][0], journal='', volume='', page='', year=result['release_year'])
|
||||
img_src = pdbe_preview_url.format(pdb_id=result['pdb_id'])
|
||||
except (KeyError):
|
||||
content = None
|
||||
img_src = None
|
||||
|
||||
# construct url for preview image
|
||||
try:
|
||||
img_src = pdbe_preview_url.format(pdb_id=result['pdb_id'])
|
||||
except (KeyError):
|
||||
img_src = None
|
||||
|
||||
return [title, content, img_src]
|
||||
|
||||
|
||||
def response(resp):
|
||||
|
||||
results = []
|
||||
json = loads(resp.text)['response']['docs']
|
||||
|
||||
# parse results
|
||||
for result in json:
|
||||
# catch obsolete entries and mark them accordingly
|
||||
if result['status'] in pdb_unpublished_codes:
|
||||
continue
|
||||
if hide_obsolete:
|
||||
continue
|
||||
if result['status'] == 'OBS':
|
||||
# expand title to add some sort of warning message
|
||||
title = gettext('{title} (OBSOLETE)').format(title=result['title'])
|
||||
superseded_url = pdbe_entry_url.format(pdb_id=result['superseded_by'])
|
||||
|
||||
# since we can't construct a proper body from the response, we'll make up our own
|
||||
msg_superseded = gettext("This entry has been superseded by")
|
||||
content = '<em>{msg_superseded} \<a href="{url}">{pdb_id}</a></em>'.format(
|
||||
msg_superseded=msg_superseded,
|
||||
url=superseded_url,
|
||||
pdb_id=result['superseded_by'], )
|
||||
|
||||
# obsoleted entries don't have preview images
|
||||
img_src = None
|
||||
else:
|
||||
title, content, img_src = construct_body(result)
|
||||
|
||||
results.append({
|
||||
'url': pdbe_entry_url.format(pdb_id=result['pdb_id']),
|
||||
'title': title,
|
||||
'content': content,
|
||||
'img_src': img_src
|
||||
})
|
||||
|
||||
return results
|
|
@ -339,6 +339,13 @@ engines:
|
|||
disabled : True
|
||||
shortcut : or
|
||||
|
||||
- name : pdbe
|
||||
engine : pdbe
|
||||
shortcut : pdb
|
||||
# Hide obsolete PDB entries.
|
||||
# Default is not to hide obsolete structures
|
||||
# hide_obsolete : False
|
||||
|
||||
- name : photon
|
||||
engine : photon
|
||||
shortcut : ph
|
||||
|
|
109
tests/unit/engines/test_pdbe.py
Normal file
109
tests/unit/engines/test_pdbe.py
Normal file
|
@ -0,0 +1,109 @@
|
|||
import mock
|
||||
from collections import defaultdict
|
||||
from searx.engines import pdbe
|
||||
from searx.testing import SearxTestCase
|
||||
|
||||
|
||||
class TestPdbeEngine(SearxTestCase):
|
||||
def test_request(self):
|
||||
query = 'test_query'
|
||||
dicto = defaultdict(dict)
|
||||
params = pdbe.request(query, dicto)
|
||||
self.assertTrue('url' in params)
|
||||
self.assertTrue('ebi.ac.uk' in params['url'])
|
||||
self.assertTrue('data' in params)
|
||||
self.assertTrue('q' in params['data'])
|
||||
self.assertTrue(query in params['data']['q'])
|
||||
self.assertTrue('wt' in params['data'])
|
||||
self.assertTrue('json' in params['data']['wt'])
|
||||
self.assertTrue('method' in params)
|
||||
self.assertTrue(params['method'] == 'POST')
|
||||
|
||||
def test_response(self):
|
||||
self.assertRaises(AttributeError, pdbe.response, None)
|
||||
self.assertRaises(AttributeError, pdbe.response, [])
|
||||
self.assertRaises(AttributeError, pdbe.response, '')
|
||||
self.assertRaises(AttributeError, pdbe.response, '[]')
|
||||
|
||||
json = """
|
||||
{
|
||||
"response": {
|
||||
"docs": [
|
||||
{
|
||||
"citation_title": "X-ray crystal structure of ferric Aplysia limacina myoglobin in different liganded states.",
|
||||
"citation_year": 1993,
|
||||
"entry_author_list": [
|
||||
"Conti E, Moser C, Rizzi M, Mattevi A, Lionetti C, Coda A, Ascenzi P, Brunori M, Bolognesi M"
|
||||
],
|
||||
"journal": "J. Mol. Biol.",
|
||||
"journal_page": "498-508",
|
||||
"journal_volume": "233",
|
||||
"pdb_id": "2fal",
|
||||
"status": "REL",
|
||||
"title": "X-RAY CRYSTAL STRUCTURE OF FERRIC APLYSIA LIMACINA MYOGLOBIN IN DIFFERENT LIGANDED STATES"
|
||||
}
|
||||
],
|
||||
"numFound": 1,
|
||||
"start": 0
|
||||
},
|
||||
"responseHeader": {
|
||||
"QTime": 0,
|
||||
"params": {
|
||||
"q": "2fal",
|
||||
"wt": "json"
|
||||
},
|
||||
"status": 0
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
response = mock.Mock(text=json)
|
||||
results = pdbe.response(response)
|
||||
self.assertEqual(type(results), list)
|
||||
self.assertEqual(len(results), 1)
|
||||
self.assertEqual(results[0]['title'],
|
||||
'X-RAY CRYSTAL STRUCTURE OF FERRIC APLYSIA LIMACINA MYOGLOBIN IN DIFFERENT LIGANDED STATES')
|
||||
self.assertEqual(results[0]['url'], pdbe.pdbe_entry_url.format(pdb_id='2fal'))
|
||||
self.assertEqual(results[0]['img_src'], pdbe.pdbe_preview_url.format(pdb_id='2fal'))
|
||||
self.assertTrue('Conti E' in results[0]['content'])
|
||||
self.assertTrue('X-ray crystal structure of ferric Aplysia limacina myoglobin in different liganded states.' in
|
||||
results[0]['content'])
|
||||
self.assertTrue('1993' in results[0]['content'])
|
||||
|
||||
# Testing proper handling of PDB entries marked as obsolete
|
||||
json = """
|
||||
{
|
||||
"response": {
|
||||
"docs": [
|
||||
{
|
||||
"citation_title": "Obsolete entry test",
|
||||
"citation_year": 2016,
|
||||
"entry_author_list": ["Doe J"],
|
||||
"journal": "J. Obs.",
|
||||
"journal_page": "1-2",
|
||||
"journal_volume": "1",
|
||||
"pdb_id": "xxxx",
|
||||
"status": "OBS",
|
||||
"title": "OBSOLETE ENTRY TEST",
|
||||
"superseded_by": "yyyy"
|
||||
}
|
||||
],
|
||||
"numFound": 1,
|
||||
"start": 0
|
||||
},
|
||||
"responseHeader": {
|
||||
"QTime": 0,
|
||||
"params": {
|
||||
"q": "xxxx",
|
||||
"wt": "json"
|
||||
},
|
||||
"status": 0
|
||||
}
|
||||
}
|
||||
"""
|
||||
response = mock.Mock(text=json)
|
||||
results = pdbe.response(response)
|
||||
self.assertEqual(type(results), list)
|
||||
self.assertEqual(len(results), 1)
|
||||
self.assertEqual(results[0]['title'], 'OBSOLETE ENTRY TEST (OBSOLETE)')
|
||||
self.assertTrue(results[0]['content'].startswith('<em>This entry has been superseded by'))
|
Loading…
Reference in a new issue