forked from Ponysearch/Ponysearch
[enh] validate input and raise an exception inside search.py. The exception message is output in json and rss format.
This commit is contained in:
parent
7fdfeca3a4
commit
15eef0ebdb
4 changed files with 133 additions and 38 deletions
32
searx/exceptions.py
Normal file
32
searx/exceptions.py
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
'''
|
||||||
|
searx is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
searx is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with searx. If not, see < http://www.gnu.org/licenses/ >.
|
||||||
|
|
||||||
|
(C) 2017- by Alexandre Flament, <alex@al-f.net>
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
class SearxException(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class SearxParameterException(SearxException):
|
||||||
|
|
||||||
|
def __init__(self, name, value):
|
||||||
|
if value == '' or value is None:
|
||||||
|
message = 'Empty ' + name + ' parameter'
|
||||||
|
else:
|
||||||
|
message = 'Invalid value "' + value + '" for parameter ' + name
|
||||||
|
super(SearxParameterException, self).__init__(message)
|
||||||
|
self.parameter_name = name
|
||||||
|
self.parameter_value = value
|
|
@ -31,11 +31,16 @@ from searx.query import RawTextQuery, SearchQuery
|
||||||
from searx.results import ResultContainer
|
from searx.results import ResultContainer
|
||||||
from searx import logger
|
from searx import logger
|
||||||
from searx.plugins import plugins
|
from searx.plugins import plugins
|
||||||
|
from searx.languages import language_codes
|
||||||
|
from searx.exceptions import SearxParameterException
|
||||||
|
|
||||||
logger = logger.getChild('search')
|
logger = logger.getChild('search')
|
||||||
|
|
||||||
number_of_searches = 0
|
number_of_searches = 0
|
||||||
|
|
||||||
|
language_code_set = set(l[0].lower() for l in language_codes)
|
||||||
|
language_code_set.add('all')
|
||||||
|
|
||||||
|
|
||||||
def send_http_request(engine, request_params, start_time, timeout_limit):
|
def send_http_request(engine, request_params, start_time, timeout_limit):
|
||||||
# for page_load_time stats
|
# for page_load_time stats
|
||||||
|
@ -182,33 +187,13 @@ def default_request_params():
|
||||||
|
|
||||||
|
|
||||||
def get_search_query_from_webapp(preferences, form):
|
def get_search_query_from_webapp(preferences, form):
|
||||||
query = None
|
# no text for the query ?
|
||||||
query_engines = []
|
if not form.get('q'):
|
||||||
query_categories = []
|
raise SearxParameterException('q', '')
|
||||||
query_pageno = 1
|
|
||||||
query_lang = 'all'
|
|
||||||
query_time_range = None
|
|
||||||
|
|
||||||
# set blocked engines
|
# set blocked engines
|
||||||
disabled_engines = preferences.engines.get_disabled()
|
disabled_engines = preferences.engines.get_disabled()
|
||||||
|
|
||||||
# set specific language if set
|
|
||||||
query_lang = preferences.get_value('language')
|
|
||||||
|
|
||||||
# safesearch
|
|
||||||
query_safesearch = preferences.get_value('safesearch')
|
|
||||||
|
|
||||||
# TODO better exceptions
|
|
||||||
if not form.get('q'):
|
|
||||||
raise Exception('noquery')
|
|
||||||
|
|
||||||
# set pagenumber
|
|
||||||
pageno_param = form.get('pageno', '1')
|
|
||||||
if not pageno_param.isdigit() or int(pageno_param) < 1:
|
|
||||||
pageno_param = 1
|
|
||||||
|
|
||||||
query_pageno = int(pageno_param)
|
|
||||||
|
|
||||||
# parse query, if tags are set, which change
|
# parse query, if tags are set, which change
|
||||||
# the serch engine or search-language
|
# the serch engine or search-language
|
||||||
raw_text_query = RawTextQuery(form['q'], disabled_engines)
|
raw_text_query = RawTextQuery(form['q'], disabled_engines)
|
||||||
|
@ -217,6 +202,13 @@ def get_search_query_from_webapp(preferences, form):
|
||||||
# set query
|
# set query
|
||||||
query = raw_text_query.getSearchQuery()
|
query = raw_text_query.getSearchQuery()
|
||||||
|
|
||||||
|
# get and check page number
|
||||||
|
pageno_param = form.get('pageno', '1')
|
||||||
|
if not pageno_param.isdigit() or int(pageno_param) < 1:
|
||||||
|
raise SearxParameterException('pageno', pageno_param)
|
||||||
|
query_pageno = int(pageno_param)
|
||||||
|
|
||||||
|
# get language
|
||||||
# set specific language if set on request, query or preferences
|
# set specific language if set on request, query or preferences
|
||||||
# TODO support search with multible languages
|
# TODO support search with multible languages
|
||||||
if len(raw_text_query.languages):
|
if len(raw_text_query.languages):
|
||||||
|
@ -226,10 +218,38 @@ def get_search_query_from_webapp(preferences, form):
|
||||||
else:
|
else:
|
||||||
query_lang = preferences.get_value('language')
|
query_lang = preferences.get_value('language')
|
||||||
|
|
||||||
|
# check language
|
||||||
|
if query_lang not in language_code_set:
|
||||||
|
raise SearxParameterException('language', query_lang)
|
||||||
|
|
||||||
|
# get safesearch
|
||||||
|
if 'safesearch' in form:
|
||||||
|
query_safesearch = form.get('safesearch')
|
||||||
|
# first check safesearch
|
||||||
|
if not query_safesearch.isdigit():
|
||||||
|
raise SearxParameterException('safesearch', query_safesearch)
|
||||||
|
query_safesearch = int(query_safesearch)
|
||||||
|
else:
|
||||||
|
query_safesearch = preferences.get_value('safesearch')
|
||||||
|
|
||||||
|
# safesearch : second check
|
||||||
|
if query_safesearch < 0 or query_safesearch > 2:
|
||||||
|
raise SearxParameterException('safesearch', query_safesearch)
|
||||||
|
|
||||||
|
# get time_range
|
||||||
query_time_range = form.get('time_range')
|
query_time_range = form.get('time_range')
|
||||||
|
|
||||||
|
# check time_range
|
||||||
|
if not(query_time_range is None)\
|
||||||
|
and not (query_time_range in ['', 'day', 'week', 'month', 'year']):
|
||||||
|
raise SearxParameterException('time_range', query_time_range)
|
||||||
|
|
||||||
|
# query_engines
|
||||||
query_engines = raw_text_query.engines
|
query_engines = raw_text_query.engines
|
||||||
|
|
||||||
|
# query_categories
|
||||||
|
query_categories = []
|
||||||
|
|
||||||
# if engines are calculated from query,
|
# if engines are calculated from query,
|
||||||
# set categories by using that informations
|
# set categories by using that informations
|
||||||
if query_engines and raw_text_query.specific:
|
if query_engines and raw_text_query.specific:
|
||||||
|
|
|
@ -11,6 +11,12 @@
|
||||||
<opensearch:itemsPerPage>{{ number_of_results }}</opensearch:itemsPerPage>
|
<opensearch:itemsPerPage>{{ number_of_results }}</opensearch:itemsPerPage>
|
||||||
<atom:link rel="search" type="application/opensearchdescription+xml" href="{{ base_url }}opensearch.xml"/>
|
<atom:link rel="search" type="application/opensearchdescription+xml" href="{{ base_url }}opensearch.xml"/>
|
||||||
<opensearch:Query role="request" searchTerms="{{ q|e }}" startPage="1" />
|
<opensearch:Query role="request" searchTerms="{{ q|e }}" startPage="1" />
|
||||||
|
{% if error_message %}
|
||||||
|
<item>
|
||||||
|
<title>Error</title>
|
||||||
|
<description>{{ error_message|e }}</description>
|
||||||
|
</item>
|
||||||
|
{% endif %}
|
||||||
{% for r in results %}
|
{% for r in results %}
|
||||||
<item>
|
<item>
|
||||||
<title>{{ r.title }}</title>
|
<title>{{ r.title }}</title>
|
||||||
|
|
|
@ -52,6 +52,7 @@ from flask import (
|
||||||
from flask_babel import Babel, gettext, format_date, format_decimal
|
from flask_babel import Babel, gettext, format_date, format_decimal
|
||||||
from flask.json import jsonify
|
from flask.json import jsonify
|
||||||
from searx import settings, searx_dir, searx_debug
|
from searx import settings, searx_dir, searx_debug
|
||||||
|
from searx.exceptions import SearxException, SearxParameterException
|
||||||
from searx.engines import (
|
from searx.engines import (
|
||||||
categories, engines, engine_shortcuts, get_engines_stats, initialize_engines
|
categories, engines, engine_shortcuts, get_engines_stats, initialize_engines
|
||||||
)
|
)
|
||||||
|
@ -400,6 +401,33 @@ def pre_request():
|
||||||
request.user_plugins.append(plugin)
|
request.user_plugins.append(plugin)
|
||||||
|
|
||||||
|
|
||||||
|
def index_error(output_format, error_message):
|
||||||
|
if output_format == 'json':
|
||||||
|
return Response(json.dumps({'error': error_message}),
|
||||||
|
mimetype='application/json')
|
||||||
|
elif output_format == 'csv':
|
||||||
|
response = Response('', mimetype='application/csv')
|
||||||
|
cont_disp = 'attachment;Filename=searx.csv'
|
||||||
|
response.headers.add('Content-Disposition', cont_disp)
|
||||||
|
return response
|
||||||
|
elif output_format == 'rss':
|
||||||
|
response_rss = render(
|
||||||
|
'opensearch_response_rss.xml',
|
||||||
|
results=[],
|
||||||
|
q=request.form['q'] if 'q' in request.form else '',
|
||||||
|
number_of_results=0,
|
||||||
|
base_url=get_base_url(),
|
||||||
|
error_message=error_message
|
||||||
|
)
|
||||||
|
return Response(response_rss, mimetype='text/xml')
|
||||||
|
else:
|
||||||
|
# html
|
||||||
|
request.errors.append(gettext('search error'))
|
||||||
|
return render(
|
||||||
|
'index.html',
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.route('/search', methods=['GET', 'POST'])
|
@app.route('/search', methods=['GET', 'POST'])
|
||||||
@app.route('/', methods=['GET', 'POST'])
|
@app.route('/', methods=['GET', 'POST'])
|
||||||
def index():
|
def index():
|
||||||
|
@ -408,10 +436,19 @@ def index():
|
||||||
Supported outputs: html, json, csv, rss.
|
Supported outputs: html, json, csv, rss.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# output_format
|
||||||
|
output_format = request.form.get('format', 'html')
|
||||||
|
if output_format not in ['html', 'csv', 'json', 'rss']:
|
||||||
|
output_format = 'html'
|
||||||
|
|
||||||
|
# check if there is query
|
||||||
if request.form.get('q') is None:
|
if request.form.get('q') is None:
|
||||||
return render(
|
if output_format == 'html':
|
||||||
'index.html',
|
return render(
|
||||||
)
|
'index.html',
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return index_error(output_format, 'No query'), 400
|
||||||
|
|
||||||
# search
|
# search
|
||||||
search_query = None
|
search_query = None
|
||||||
|
@ -421,20 +458,24 @@ def index():
|
||||||
# search = Search(search_query) # without plugins
|
# search = Search(search_query) # without plugins
|
||||||
search = SearchWithPlugins(search_query, request)
|
search = SearchWithPlugins(search_query, request)
|
||||||
result_container = search.search()
|
result_container = search.search()
|
||||||
except:
|
except Exception as e:
|
||||||
request.errors.append(gettext('search error'))
|
# log exception
|
||||||
logger.exception('search error')
|
logger.exception('search error')
|
||||||
return render(
|
|
||||||
'index.html',
|
|
||||||
)
|
|
||||||
|
|
||||||
|
# is it an invalid input parameter or something else ?
|
||||||
|
if (issubclass(e.__class__, SearxParameterException)):
|
||||||
|
return index_error(output_format, e.message), 400
|
||||||
|
else:
|
||||||
|
return index_error(output_format, gettext('search error')), 500
|
||||||
|
|
||||||
|
# results
|
||||||
results = result_container.get_ordered_results()
|
results = result_container.get_ordered_results()
|
||||||
|
number_of_results = result_container.results_number()
|
||||||
|
if number_of_results < result_container.results_length():
|
||||||
|
number_of_results = 0
|
||||||
|
|
||||||
# UI
|
# UI
|
||||||
advanced_search = request.form.get('advanced_search', None)
|
advanced_search = request.form.get('advanced_search', None)
|
||||||
output_format = request.form.get('format', 'html')
|
|
||||||
if output_format not in ['html', 'csv', 'json', 'rss']:
|
|
||||||
output_format = 'html'
|
|
||||||
|
|
||||||
# output
|
# output
|
||||||
for result in results:
|
for result in results:
|
||||||
|
@ -470,10 +511,6 @@ def index():
|
||||||
else:
|
else:
|
||||||
result['publishedDate'] = format_date(result['publishedDate'])
|
result['publishedDate'] = format_date(result['publishedDate'])
|
||||||
|
|
||||||
number_of_results = result_container.results_number()
|
|
||||||
if number_of_results < result_container.results_length():
|
|
||||||
number_of_results = 0
|
|
||||||
|
|
||||||
if output_format == 'json':
|
if output_format == 'json':
|
||||||
return Response(json.dumps({'query': search_query.query,
|
return Response(json.dumps({'query': search_query.query,
|
||||||
'number_of_results': number_of_results,
|
'number_of_results': number_of_results,
|
||||||
|
|
Loading…
Reference in a new issue