forked from Ponysearch/Ponysearch
fix Microsoft Academic engine
This commit is contained in:
parent
f97b4ff7b6
commit
8158d8654a
1 changed files with 25 additions and 32 deletions
|
@ -3,10 +3,7 @@
|
||||||
Microsoft Academic (Science)
|
Microsoft Academic (Science)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from datetime import datetime
|
from json import dumps, loads
|
||||||
from json import loads
|
|
||||||
from uuid import uuid4
|
|
||||||
from urllib.parse import urlencode
|
|
||||||
from searx.utils import html_to_text
|
from searx.utils import html_to_text
|
||||||
|
|
||||||
# about
|
# about
|
||||||
|
@ -21,26 +18,25 @@ about = {
|
||||||
|
|
||||||
categories = ['images']
|
categories = ['images']
|
||||||
paging = True
|
paging = True
|
||||||
result_url = 'https://academic.microsoft.com/api/search/GetEntityResults?{query}'
|
search_url = 'https://academic.microsoft.com/api/search'
|
||||||
|
_paper_url = 'https://academic.microsoft.com/paper/{id}/reference'
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
correlation_id = uuid4()
|
params['url'] = search_url
|
||||||
msacademic = uuid4()
|
|
||||||
time_now = datetime.now()
|
|
||||||
|
|
||||||
params['url'] = result_url.format(query=urlencode({'correlationId': correlation_id}))
|
|
||||||
params['cookies']['msacademic'] = str(msacademic)
|
|
||||||
params['cookies']['ai_user'] = 'vhd0H|{now}'.format(now=str(time_now))
|
|
||||||
params['method'] = 'POST'
|
params['method'] = 'POST'
|
||||||
params['data'] = {
|
params['headers']['content-type'] = 'application/json; charset=utf-8'
|
||||||
'Query': '@{query}@'.format(query=query),
|
params['data'] = dumps({
|
||||||
'Limit': 10,
|
'query': query,
|
||||||
'Offset': params['pageno'] - 1,
|
'queryExpression': '',
|
||||||
'Filters': '',
|
'filters': [],
|
||||||
'OrderBy': '',
|
'orderBy': 0,
|
||||||
'SortAscending': False,
|
'skip': (params['pageno'] - 1) * 10,
|
||||||
}
|
'sortAscending': True,
|
||||||
|
'take': 10,
|
||||||
|
'includeCitationContexts': False,
|
||||||
|
'profileId': '',
|
||||||
|
})
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
@ -51,10 +47,13 @@ def response(resp):
|
||||||
if not response_data:
|
if not response_data:
|
||||||
return results
|
return results
|
||||||
|
|
||||||
for result in response_data['results']:
|
for result in response_data['pr']:
|
||||||
url = _get_url(result)
|
if 'dn' not in result['paper']:
|
||||||
title = result['e']['dn']
|
continue
|
||||||
content = _get_content(result)
|
|
||||||
|
title = result['paper']['dn']
|
||||||
|
content = _get_content(result['paper'])
|
||||||
|
url = _paper_url.format(id=result['paper']['id'])
|
||||||
results.append({
|
results.append({
|
||||||
'url': url,
|
'url': url,
|
||||||
'title': html_to_text(title),
|
'title': html_to_text(title),
|
||||||
|
@ -64,15 +63,9 @@ def response(resp):
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
def _get_url(result):
|
|
||||||
if 's' in result['e']:
|
|
||||||
return result['e']['s'][0]['u']
|
|
||||||
return 'https://academic.microsoft.com/#/detail/{pid}'.format(pid=result['id'])
|
|
||||||
|
|
||||||
|
|
||||||
def _get_content(result):
|
def _get_content(result):
|
||||||
if 'd' in result['e']:
|
if 'd' in result:
|
||||||
content = result['e']['d']
|
content = result['d']
|
||||||
if len(content) > 300:
|
if len(content) > 300:
|
||||||
return content[:300] + '...'
|
return content[:300] + '...'
|
||||||
return content
|
return content
|
||||||
|
|
Loading…
Reference in a new issue