[enh] validate input and raise an exception inside search.py. The exception message is output in json and rss format.

This commit is contained in:
Alexandre Flament 2017-01-20 18:52:47 +01:00
parent 7fdfeca3a4
commit 15eef0ebdb
4 changed files with 133 additions and 38 deletions

32
searx/exceptions.py Normal file
View file

@ -0,0 +1,32 @@
'''
searx is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
searx is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with searx. If not, see < http://www.gnu.org/licenses/ >.
(C) 2017- by Alexandre Flament, <alex@al-f.net>
'''
class SearxException(Exception):
pass
class SearxParameterException(SearxException):
def __init__(self, name, value):
if value == '' or value is None:
message = 'Empty ' + name + ' parameter'
else:
message = 'Invalid value "' + value + '" for parameter ' + name
super(SearxParameterException, self).__init__(message)
self.parameter_name = name
self.parameter_value = value

View file

@ -31,11 +31,16 @@ from searx.query import RawTextQuery, SearchQuery
from searx.results import ResultContainer from searx.results import ResultContainer
from searx import logger from searx import logger
from searx.plugins import plugins from searx.plugins import plugins
from searx.languages import language_codes
from searx.exceptions import SearxParameterException
logger = logger.getChild('search') logger = logger.getChild('search')
number_of_searches = 0 number_of_searches = 0
language_code_set = set(l[0].lower() for l in language_codes)
language_code_set.add('all')
def send_http_request(engine, request_params, start_time, timeout_limit): def send_http_request(engine, request_params, start_time, timeout_limit):
# for page_load_time stats # for page_load_time stats
@ -182,33 +187,13 @@ def default_request_params():
def get_search_query_from_webapp(preferences, form): def get_search_query_from_webapp(preferences, form):
query = None # no text for the query ?
query_engines = [] if not form.get('q'):
query_categories = [] raise SearxParameterException('q', '')
query_pageno = 1
query_lang = 'all'
query_time_range = None
# set blocked engines # set blocked engines
disabled_engines = preferences.engines.get_disabled() disabled_engines = preferences.engines.get_disabled()
# set specific language if set
query_lang = preferences.get_value('language')
# safesearch
query_safesearch = preferences.get_value('safesearch')
# TODO better exceptions
if not form.get('q'):
raise Exception('noquery')
# set pagenumber
pageno_param = form.get('pageno', '1')
if not pageno_param.isdigit() or int(pageno_param) < 1:
pageno_param = 1
query_pageno = int(pageno_param)
# parse query, if tags are set, which change # parse query, if tags are set, which change
# the serch engine or search-language # the serch engine or search-language
raw_text_query = RawTextQuery(form['q'], disabled_engines) raw_text_query = RawTextQuery(form['q'], disabled_engines)
@ -217,6 +202,13 @@ def get_search_query_from_webapp(preferences, form):
# set query # set query
query = raw_text_query.getSearchQuery() query = raw_text_query.getSearchQuery()
# get and check page number
pageno_param = form.get('pageno', '1')
if not pageno_param.isdigit() or int(pageno_param) < 1:
raise SearxParameterException('pageno', pageno_param)
query_pageno = int(pageno_param)
# get language
# set specific language if set on request, query or preferences # set specific language if set on request, query or preferences
# TODO support search with multible languages # TODO support search with multible languages
if len(raw_text_query.languages): if len(raw_text_query.languages):
@ -226,10 +218,38 @@ def get_search_query_from_webapp(preferences, form):
else: else:
query_lang = preferences.get_value('language') query_lang = preferences.get_value('language')
# check language
if query_lang not in language_code_set:
raise SearxParameterException('language', query_lang)
# get safesearch
if 'safesearch' in form:
query_safesearch = form.get('safesearch')
# first check safesearch
if not query_safesearch.isdigit():
raise SearxParameterException('safesearch', query_safesearch)
query_safesearch = int(query_safesearch)
else:
query_safesearch = preferences.get_value('safesearch')
# safesearch : second check
if query_safesearch < 0 or query_safesearch > 2:
raise SearxParameterException('safesearch', query_safesearch)
# get time_range
query_time_range = form.get('time_range') query_time_range = form.get('time_range')
# check time_range
if not(query_time_range is None)\
and not (query_time_range in ['', 'day', 'week', 'month', 'year']):
raise SearxParameterException('time_range', query_time_range)
# query_engines
query_engines = raw_text_query.engines query_engines = raw_text_query.engines
# query_categories
query_categories = []
# if engines are calculated from query, # if engines are calculated from query,
# set categories by using that informations # set categories by using that informations
if query_engines and raw_text_query.specific: if query_engines and raw_text_query.specific:

View file

@ -11,6 +11,12 @@
<opensearch:itemsPerPage>{{ number_of_results }}</opensearch:itemsPerPage> <opensearch:itemsPerPage>{{ number_of_results }}</opensearch:itemsPerPage>
<atom:link rel="search" type="application/opensearchdescription+xml" href="{{ base_url }}opensearch.xml"/> <atom:link rel="search" type="application/opensearchdescription+xml" href="{{ base_url }}opensearch.xml"/>
<opensearch:Query role="request" searchTerms="{{ q|e }}" startPage="1" /> <opensearch:Query role="request" searchTerms="{{ q|e }}" startPage="1" />
{% if error_message %}
<item>
<title>Error</title>
<description>{{ error_message|e }}</description>
</item>
{% endif %}
{% for r in results %} {% for r in results %}
<item> <item>
<title>{{ r.title }}</title> <title>{{ r.title }}</title>

View file

@ -52,6 +52,7 @@ from flask import (
from flask_babel import Babel, gettext, format_date, format_decimal from flask_babel import Babel, gettext, format_date, format_decimal
from flask.json import jsonify from flask.json import jsonify
from searx import settings, searx_dir, searx_debug from searx import settings, searx_dir, searx_debug
from searx.exceptions import SearxException, SearxParameterException
from searx.engines import ( from searx.engines import (
categories, engines, engine_shortcuts, get_engines_stats, initialize_engines categories, engines, engine_shortcuts, get_engines_stats, initialize_engines
) )
@ -400,6 +401,33 @@ def pre_request():
request.user_plugins.append(plugin) request.user_plugins.append(plugin)
def index_error(output_format, error_message):
if output_format == 'json':
return Response(json.dumps({'error': error_message}),
mimetype='application/json')
elif output_format == 'csv':
response = Response('', mimetype='application/csv')
cont_disp = 'attachment;Filename=searx.csv'
response.headers.add('Content-Disposition', cont_disp)
return response
elif output_format == 'rss':
response_rss = render(
'opensearch_response_rss.xml',
results=[],
q=request.form['q'] if 'q' in request.form else '',
number_of_results=0,
base_url=get_base_url(),
error_message=error_message
)
return Response(response_rss, mimetype='text/xml')
else:
# html
request.errors.append(gettext('search error'))
return render(
'index.html',
)
@app.route('/search', methods=['GET', 'POST']) @app.route('/search', methods=['GET', 'POST'])
@app.route('/', methods=['GET', 'POST']) @app.route('/', methods=['GET', 'POST'])
def index(): def index():
@ -408,10 +436,19 @@ def index():
Supported outputs: html, json, csv, rss. Supported outputs: html, json, csv, rss.
""" """
# output_format
output_format = request.form.get('format', 'html')
if output_format not in ['html', 'csv', 'json', 'rss']:
output_format = 'html'
# check if there is query
if request.form.get('q') is None: if request.form.get('q') is None:
if output_format == 'html':
return render( return render(
'index.html', 'index.html',
) )
else:
return index_error(output_format, 'No query'), 400
# search # search
search_query = None search_query = None
@ -421,20 +458,24 @@ def index():
# search = Search(search_query) # without plugins # search = Search(search_query) # without plugins
search = SearchWithPlugins(search_query, request) search = SearchWithPlugins(search_query, request)
result_container = search.search() result_container = search.search()
except: except Exception as e:
request.errors.append(gettext('search error')) # log exception
logger.exception('search error') logger.exception('search error')
return render(
'index.html',
)
# is it an invalid input parameter or something else ?
if (issubclass(e.__class__, SearxParameterException)):
return index_error(output_format, e.message), 400
else:
return index_error(output_format, gettext('search error')), 500
# results
results = result_container.get_ordered_results() results = result_container.get_ordered_results()
number_of_results = result_container.results_number()
if number_of_results < result_container.results_length():
number_of_results = 0
# UI # UI
advanced_search = request.form.get('advanced_search', None) advanced_search = request.form.get('advanced_search', None)
output_format = request.form.get('format', 'html')
if output_format not in ['html', 'csv', 'json', 'rss']:
output_format = 'html'
# output # output
for result in results: for result in results:
@ -470,10 +511,6 @@ def index():
else: else:
result['publishedDate'] = format_date(result['publishedDate']) result['publishedDate'] = format_date(result['publishedDate'])
number_of_results = result_container.results_number()
if number_of_results < result_container.results_length():
number_of_results = 0
if output_format == 'json': if output_format == 'json':
return Response(json.dumps({'query': search_query.query, return Response(json.dumps({'query': search_query.query,
'number_of_results': number_of_results, 'number_of_results': number_of_results,