forked from Ponysearch/Ponysearch
Merge pull request #2292 from kvch/elasticsearch-engine
New engine: Elasticsearch
This commit is contained in:
commit
c3d9b17c2a
3 changed files with 158 additions and 1 deletions
142
searx/engines/elasticsearch.py
Normal file
142
searx/engines/elasticsearch.py
Normal file
|
@ -0,0 +1,142 @@
|
||||||
|
from json import loads, dumps
|
||||||
|
from lxml import html
|
||||||
|
from urllib.parse import quote, urljoin
|
||||||
|
from requests.auth import HTTPBasicAuth
|
||||||
|
from searx.utils import extract_text, get_torrent_size
|
||||||
|
|
||||||
|
|
||||||
|
base_url = 'http://localhost:9200'
|
||||||
|
username = ''
|
||||||
|
password = ''
|
||||||
|
index = ''
|
||||||
|
search_url = base_url + '/' + index + '/_search'
|
||||||
|
query_type = 'match'
|
||||||
|
custom_query_json = {}
|
||||||
|
show_metadata = False
|
||||||
|
categories = ['general']
|
||||||
|
|
||||||
|
|
||||||
|
def init(engine_settings):
|
||||||
|
if 'query_type' in engine_settings and engine_settings['query_type'] not in _available_query_types:
|
||||||
|
raise ValueError('unsupported query type', engine_settings['query_type'])
|
||||||
|
|
||||||
|
if index == '':
|
||||||
|
raise ValueError('index cannot be empty')
|
||||||
|
|
||||||
|
|
||||||
|
def request(query, params):
|
||||||
|
if query_type not in _available_query_types:
|
||||||
|
return params
|
||||||
|
|
||||||
|
if username and password:
|
||||||
|
params['auth'] = HTTPBasicAuth(username, password)
|
||||||
|
|
||||||
|
params['url'] = search_url
|
||||||
|
params['method'] = 'GET'
|
||||||
|
params['data'] = dumps(_available_query_types[query_type](query))
|
||||||
|
params['headers']['Content-Type'] = 'application/json'
|
||||||
|
|
||||||
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
def _match_query(query):
|
||||||
|
"""
|
||||||
|
The standard for full text queries.
|
||||||
|
searx format: "key:value" e.g. city:berlin
|
||||||
|
REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query.html
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
key, value = query.split(':')
|
||||||
|
except:
|
||||||
|
raise ValueError('query format must be "key:value"')
|
||||||
|
|
||||||
|
return {"query": {"match": {key: {'query': value}}}}
|
||||||
|
|
||||||
|
|
||||||
|
def _simple_query_string_query(query):
|
||||||
|
"""
|
||||||
|
Accepts query strings, but it is less strict than query_string
|
||||||
|
The field used can be specified in index.query.default_field in Elasticsearch.
|
||||||
|
REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html
|
||||||
|
"""
|
||||||
|
|
||||||
|
return {'query': {'simple_query_string': {'query': query}}}
|
||||||
|
|
||||||
|
|
||||||
|
def _term_query(query):
|
||||||
|
"""
|
||||||
|
Accepts one term and the name of the field.
|
||||||
|
searx format: "key:value" e.g. city:berlin
|
||||||
|
REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-term-query.html
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
key, value = query.split(':')
|
||||||
|
except:
|
||||||
|
raise ValueError('query format must be key:value')
|
||||||
|
|
||||||
|
return {'query': {'term': {key: value}}}
|
||||||
|
|
||||||
|
|
||||||
|
def _terms_query(query):
|
||||||
|
"""
|
||||||
|
Accepts multiple terms and the name of the field.
|
||||||
|
searx format: "key:value1,value2" e.g. city:berlin,paris
|
||||||
|
REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-terms-query.html
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
key, values = query.split(':')
|
||||||
|
except:
|
||||||
|
raise ValueError('query format must be key:value1,value2')
|
||||||
|
|
||||||
|
return {'query': {'terms': {key: values.split(',')}}}
|
||||||
|
|
||||||
|
|
||||||
|
def _custom_query(query):
|
||||||
|
key, value = query.split(':')
|
||||||
|
custom_query = custom_query_json
|
||||||
|
for query_key, query_value in custom_query.items():
|
||||||
|
if query_key == '{{KEY}}':
|
||||||
|
custom_query[key] = custom_query.pop(query_key)
|
||||||
|
if query_value == '{{VALUE}}':
|
||||||
|
custom_query[query_key] = value
|
||||||
|
return custom_query
|
||||||
|
|
||||||
|
|
||||||
|
def response(resp):
|
||||||
|
results = []
|
||||||
|
|
||||||
|
resp_json = loads(resp.text)
|
||||||
|
if 'error' in resp_json:
|
||||||
|
raise Exception(resp_json['error'])
|
||||||
|
|
||||||
|
for result in resp_json['hits']['hits']:
|
||||||
|
r = {key: str(value) if not key.startswith('_') else value for key, value in result['_source'].items()}
|
||||||
|
r['template'] = 'key-value.html'
|
||||||
|
|
||||||
|
if show_metadata:
|
||||||
|
r['metadata'] = {'index': result['_index'],
|
||||||
|
'id': result['_id'],
|
||||||
|
'score': result['_score']}
|
||||||
|
|
||||||
|
results.append(r)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
_available_query_types = {
|
||||||
|
# Full text queries
|
||||||
|
# https://www.elastic.co/guide/en/elasticsearch/reference/current/full-text-queries.html
|
||||||
|
'match': _match_query,
|
||||||
|
'simple_query_string': _simple_query_string_query,
|
||||||
|
|
||||||
|
# Term-level queries
|
||||||
|
# https://www.elastic.co/guide/en/elasticsearch/reference/current/term-level-queries.html
|
||||||
|
'term': _term_query,
|
||||||
|
'terms': _terms_query,
|
||||||
|
|
||||||
|
# Query JSON defined by the instance administrator.
|
||||||
|
'custom': _custom_query,
|
||||||
|
}
|
|
@ -126,7 +126,8 @@ def send_http_request(engine, request_params):
|
||||||
req = requests_lib.get
|
req = requests_lib.get
|
||||||
else:
|
else:
|
||||||
req = requests_lib.post
|
req = requests_lib.post
|
||||||
request_args['data'] = request_params['data']
|
|
||||||
|
request_args['data'] = request_params['data']
|
||||||
|
|
||||||
# send the request
|
# send the request
|
||||||
return req(request_params['url'], **request_args)
|
return req(request_params['url'], **request_args)
|
||||||
|
|
|
@ -231,6 +231,20 @@ engines:
|
||||||
shortcut : ew
|
shortcut : ew
|
||||||
disabled : True
|
disabled : True
|
||||||
|
|
||||||
|
# - name : elasticsearch
|
||||||
|
# shortcut : es
|
||||||
|
# engine : elasticsearch
|
||||||
|
# base_url : http://localhost:9200
|
||||||
|
# username : elastic
|
||||||
|
# password : changeme
|
||||||
|
# index : my-index
|
||||||
|
# # available options: match, simple_query_string, term, terms, custom
|
||||||
|
# query_type : match
|
||||||
|
# # if query_type is set to custom, provide your query here
|
||||||
|
# #custom_query_json: {"query":{"match_all": {}}}
|
||||||
|
# #show_metadata: False
|
||||||
|
# disabled : True
|
||||||
|
|
||||||
- name : wikidata
|
- name : wikidata
|
||||||
engine : wikidata
|
engine : wikidata
|
||||||
shortcut : wd
|
shortcut : wd
|
||||||
|
|
Loading…
Reference in a new issue