[enh] validate input and raise an exception inside search.py. The exception message is output in json and rss format.

This commit is contained in:
Alexandre Flament 2017-01-20 18:52:47 +01:00
parent 7fdfeca3a4
commit 15eef0ebdb
4 changed files with 133 additions and 38 deletions

32
searx/exceptions.py Normal file
View file

@ -0,0 +1,32 @@
'''
searx is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
searx is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with searx. If not, see < http://www.gnu.org/licenses/ >.
(C) 2017- by Alexandre Flament, <alex@al-f.net>
'''
class SearxException(Exception):
pass
class SearxParameterException(SearxException):
def __init__(self, name, value):
if value == '' or value is None:
message = 'Empty ' + name + ' parameter'
else:
message = 'Invalid value "' + value + '" for parameter ' + name
super(SearxParameterException, self).__init__(message)
self.parameter_name = name
self.parameter_value = value

View file

@ -31,11 +31,16 @@ from searx.query import RawTextQuery, SearchQuery
from searx.results import ResultContainer
from searx import logger
from searx.plugins import plugins
from searx.languages import language_codes
from searx.exceptions import SearxParameterException
logger = logger.getChild('search')
number_of_searches = 0
language_code_set = set(l[0].lower() for l in language_codes)
language_code_set.add('all')
def send_http_request(engine, request_params, start_time, timeout_limit):
# for page_load_time stats
@ -182,33 +187,13 @@ def default_request_params():
def get_search_query_from_webapp(preferences, form):
query = None
query_engines = []
query_categories = []
query_pageno = 1
query_lang = 'all'
query_time_range = None
# no text for the query ?
if not form.get('q'):
raise SearxParameterException('q', '')
# set blocked engines
disabled_engines = preferences.engines.get_disabled()
# set specific language if set
query_lang = preferences.get_value('language')
# safesearch
query_safesearch = preferences.get_value('safesearch')
# TODO better exceptions
if not form.get('q'):
raise Exception('noquery')
# set pagenumber
pageno_param = form.get('pageno', '1')
if not pageno_param.isdigit() or int(pageno_param) < 1:
pageno_param = 1
query_pageno = int(pageno_param)
# parse query, if tags are set, which change
# the serch engine or search-language
raw_text_query = RawTextQuery(form['q'], disabled_engines)
@ -217,6 +202,13 @@ def get_search_query_from_webapp(preferences, form):
# set query
query = raw_text_query.getSearchQuery()
# get and check page number
pageno_param = form.get('pageno', '1')
if not pageno_param.isdigit() or int(pageno_param) < 1:
raise SearxParameterException('pageno', pageno_param)
query_pageno = int(pageno_param)
# get language
# set specific language if set on request, query or preferences
# TODO support search with multible languages
if len(raw_text_query.languages):
@ -226,10 +218,38 @@ def get_search_query_from_webapp(preferences, form):
else:
query_lang = preferences.get_value('language')
# check language
if query_lang not in language_code_set:
raise SearxParameterException('language', query_lang)
# get safesearch
if 'safesearch' in form:
query_safesearch = form.get('safesearch')
# first check safesearch
if not query_safesearch.isdigit():
raise SearxParameterException('safesearch', query_safesearch)
query_safesearch = int(query_safesearch)
else:
query_safesearch = preferences.get_value('safesearch')
# safesearch : second check
if query_safesearch < 0 or query_safesearch > 2:
raise SearxParameterException('safesearch', query_safesearch)
# get time_range
query_time_range = form.get('time_range')
# check time_range
if not(query_time_range is None)\
and not (query_time_range in ['', 'day', 'week', 'month', 'year']):
raise SearxParameterException('time_range', query_time_range)
# query_engines
query_engines = raw_text_query.engines
# query_categories
query_categories = []
# if engines are calculated from query,
# set categories by using that informations
if query_engines and raw_text_query.specific:

View file

@ -11,6 +11,12 @@
<opensearch:itemsPerPage>{{ number_of_results }}</opensearch:itemsPerPage>
<atom:link rel="search" type="application/opensearchdescription+xml" href="{{ base_url }}opensearch.xml"/>
<opensearch:Query role="request" searchTerms="{{ q|e }}" startPage="1" />
{% if error_message %}
<item>
<title>Error</title>
<description>{{ error_message|e }}</description>
</item>
{% endif %}
{% for r in results %}
<item>
<title>{{ r.title }}</title>

View file

@ -52,6 +52,7 @@ from flask import (
from flask_babel import Babel, gettext, format_date, format_decimal
from flask.json import jsonify
from searx import settings, searx_dir, searx_debug
from searx.exceptions import SearxException, SearxParameterException
from searx.engines import (
categories, engines, engine_shortcuts, get_engines_stats, initialize_engines
)
@ -400,6 +401,33 @@ def pre_request():
request.user_plugins.append(plugin)
def index_error(output_format, error_message):
if output_format == 'json':
return Response(json.dumps({'error': error_message}),
mimetype='application/json')
elif output_format == 'csv':
response = Response('', mimetype='application/csv')
cont_disp = 'attachment;Filename=searx.csv'
response.headers.add('Content-Disposition', cont_disp)
return response
elif output_format == 'rss':
response_rss = render(
'opensearch_response_rss.xml',
results=[],
q=request.form['q'] if 'q' in request.form else '',
number_of_results=0,
base_url=get_base_url(),
error_message=error_message
)
return Response(response_rss, mimetype='text/xml')
else:
# html
request.errors.append(gettext('search error'))
return render(
'index.html',
)
@app.route('/search', methods=['GET', 'POST'])
@app.route('/', methods=['GET', 'POST'])
def index():
@ -408,10 +436,19 @@ def index():
Supported outputs: html, json, csv, rss.
"""
# output_format
output_format = request.form.get('format', 'html')
if output_format not in ['html', 'csv', 'json', 'rss']:
output_format = 'html'
# check if there is query
if request.form.get('q') is None:
return render(
'index.html',
)
if output_format == 'html':
return render(
'index.html',
)
else:
return index_error(output_format, 'No query'), 400
# search
search_query = None
@ -421,20 +458,24 @@ def index():
# search = Search(search_query) # without plugins
search = SearchWithPlugins(search_query, request)
result_container = search.search()
except:
request.errors.append(gettext('search error'))
except Exception as e:
# log exception
logger.exception('search error')
return render(
'index.html',
)
# is it an invalid input parameter or something else ?
if (issubclass(e.__class__, SearxParameterException)):
return index_error(output_format, e.message), 400
else:
return index_error(output_format, gettext('search error')), 500
# results
results = result_container.get_ordered_results()
number_of_results = result_container.results_number()
if number_of_results < result_container.results_length():
number_of_results = 0
# UI
advanced_search = request.form.get('advanced_search', None)
output_format = request.form.get('format', 'html')
if output_format not in ['html', 'csv', 'json', 'rss']:
output_format = 'html'
# output
for result in results:
@ -470,10 +511,6 @@ def index():
else:
result['publishedDate'] = format_date(result['publishedDate'])
number_of_results = result_container.results_number()
if number_of_results < result_container.results_length():
number_of_results = 0
if output_format == 'json':
return Response(json.dumps({'query': search_query.query,
'number_of_results': number_of_results,