From 510aba5e6699f76e5b9dc32db18b0f19db6e5da4 Mon Sep 17 00:00:00 2001
From: Thomas Pointhuber <thomas.pointhuber@gmx.at>
Date: Wed, 1 Oct 2014 17:18:18 +0200
Subject: [PATCH 1/2] implement query parser and use it inside autocompletion

---
 searx/query.py  | 125 ++++++++++++++++++++++++++++++++++++++++++++++++
 searx/webapp.py |  30 ++++++++++--
 2 files changed, 152 insertions(+), 3 deletions(-)
 create mode 100644 searx/query.py

diff --git a/searx/query.py b/searx/query.py
new file mode 100644
index 000000000..59a1e347b
--- /dev/null
+++ b/searx/query.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python
+
+'''
+searx is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+searx is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with searx. If not, see < http://www.gnu.org/licenses/ >.
+
+(C) 2014 by Thomas Pointhuber, <thomas.pointhuber@gmx.at>
+'''
+
+from searx.languages import language_codes
+from searx.engines import (
+    categories, engines, engine_shortcuts
+)
+import string
+import re
+
+
+class Query(object):
+    """parse query"""
+
+    def __init__(self, query, blocked_engines):
+        self.query = query
+        self.blocked_engines = []
+        
+        if blocked_engines:
+            self.blocked_engines = blocked_engines
+            
+        self.query_parts = []
+        self.engines = []
+        self.languages = []
+    
+    def parse_query(self):
+        self.query_parts = []
+        
+        # split query, including whitespaces
+        raw_query_parts = re.split(r'(\s+)', self.query)
+        
+        parse_next = True
+        
+        for query_part in raw_query_parts:
+            if not parse_next:
+                self.query_parts[-1] += query_part
+                continue
+           
+            parse_next = False
+           
+            # part does only contain spaces, skip
+            if query_part.isspace():
+                parse_next = True
+                self.query_parts.append(query_part)
+                continue
+
+            # this force a language            
+            if query_part[0] == ':':
+                lang = query_part[1:].lower()
+
+                # check if any language-code is equal with declared language-codes
+                for lc in language_codes:
+                    lang_id, lang_name, country = map(str.lower, lc)
+
+                    # if correct language-code is found, set it as new search-language
+                    if lang == lang_id\
+                       or lang_id.startswith(lang)\
+                       or lang == lang_name\
+                       or lang == country:
+                        parse_next = True
+                        self.languages.append(lang)
+                        break
+
+            # this force a engine or category
+            if query_part[0] == '!':
+                prefix = query_part[1:].replace('_', ' ')
+
+                # check if prefix is equal with engine shortcut
+                if prefix in engine_shortcuts\
+                   and not engine_shortcuts[prefix] in self.blocked_engines:
+                    parse_next = True
+                    self.engines.append({'category': 'none',
+                                         'name': engine_shortcuts[prefix]})
+                
+                # check if prefix is equal with engine name
+                elif prefix in engines\
+                        and not prefix in self.blocked_engines:
+                    parse_next = True
+                    self.engines.append({'category': 'none',
+                                        'name': prefix})
+
+                # check if prefix is equal with categorie name
+                elif prefix in categories:
+                    # using all engines for that search, which are declared under that categorie name
+                    parse_next = True
+                    self.engines.extend({'category': prefix,
+                                        'name': engine.name}
+                                        for engine in categories[prefix]
+                                        if not engine in self.blocked_engines)
+          
+            # append query part to query_part list
+            self.query_parts.append(query_part)
+
+    def changeSearchQuery(self, search_query):
+        if len(self.query_parts):
+            self.query_parts[-1] = search_query
+        else:
+            self.query_parts.append(search_query)
+            
+    def getSearchQuery(self):
+        if len(self.query_parts):
+            return self.query_parts[-1]
+        else:
+            return ''
+    
+    def getFullQuery(self):
+        # get full querry including whitespaces
+        return string.join(self.query_parts, '')
+
diff --git a/searx/webapp.py b/searx/webapp.py
index 42cb42678..f66466b35 100644
--- a/searx/webapp.py
+++ b/searx/webapp.py
@@ -47,6 +47,7 @@ from searx.utils import (
 from searx.https_rewrite import https_rules
 from searx.languages import language_codes
 from searx.search import Search
+from searx.query import Query
 from searx.autocomplete import backends as autocomplete_backends
 
 
@@ -308,23 +309,46 @@ def autocompleter():
     """Return autocompleter results"""
     request_data = {}
 
+    # select request method
     if request.method == 'POST':
         request_data = request.form
     else:
         request_data = request.args
 
-    query = request_data.get('q', '').encode('utf-8')
+    # set blocked engines
+    if request.cookies.get('blocked_engines'):
+        blocked_engines = request.cookies['blocked_engines'].split(',')  # noqa
+    else:
+        blocked_engines = []
 
-    if not query:
+    # parse query
+    query = Query(request_data.get('q', '').encode('utf-8'), blocked_engines)
+    query.parse_query()
+
+    # check if search query is set
+    if not query.getSearchQuery():
         return
 
+    # run autocompleter
     completer = autocomplete_backends.get(request.cookies.get('autocomplete'))
 
+    # check if valid autocompleter is selected
     if not completer:
         return
 
-    results = completer(query)
+    # run autocompletion
+    raw_results = completer(query.getSearchQuery())
 
+    # parse results (write :language and !engine back to result string)
+    results = []
+    for result in raw_results:
+        result_query = query
+        result_query.changeSearchQuery(result)
+
+        # add parsed result
+        results.append(result_query.getFullQuery())
+
+    # return autocompleter results
     if request_data.get('format') == 'x-suggestions':
         return Response(json.dumps([query, results]),
                         mimetype='application/json')

From 62d1a70c84367403222c15e25f597a8d6b336151 Mon Sep 17 00:00:00 2001
From: Thomas Pointhuber <thomas.pointhuber@gmx.at>
Date: Wed, 1 Oct 2014 17:57:53 +0200
Subject: [PATCH 2/2] using Query class for parsing of search query

---
 searx/query.py  |  4 ++-
 searx/search.py | 65 ++++++++-----------------------------------------
 2 files changed, 13 insertions(+), 56 deletions(-)

diff --git a/searx/query.py b/searx/query.py
index 59a1e347b..612d46f4b 100644
--- a/searx/query.py
+++ b/searx/query.py
@@ -39,6 +39,7 @@ class Query(object):
         self.engines = []
         self.languages = []
     
+    # parse query, if tags are set, which change the serch engine or search-language
     def parse_query(self):
         self.query_parts = []
         
@@ -55,7 +56,8 @@ class Query(object):
             parse_next = False
            
             # part does only contain spaces, skip
-            if query_part.isspace():
+            if query_part.isspace()\
+               or query_part == '':
                 parse_next = True
                 self.query_parts.append(query_part)
                 continue
diff --git a/searx/search.py b/searx/search.py
index c861a795a..17556dc4e 100644
--- a/searx/search.py
+++ b/searx/search.py
@@ -25,6 +25,7 @@ from searx.engines import (
 )
 from searx.languages import language_codes
 from searx.utils import gen_useragent
+from searx.query import Query
 
 
 number_of_searches = 0
@@ -235,7 +236,15 @@ class Search(object):
         self.pageno = int(pageno_param)
 
         # parse query, if tags are set, which change the serch engine or search-language
-        self.parse_query()
+        query_obj = Query(self.query, self.blocked_engines)
+        query_obj.parse_query()        
+
+        # get last selected language in query, if possible
+        # TODO support search with multible languages
+        if len(query_obj.languages):
+            self.lang = query_obj.languages[-1]
+
+        self.engines = query_obj.engines
 
         self.categories = []
 
@@ -276,60 +285,6 @@ class Search(object):
                                     for x in categories[categ]
                                     if not x.name in self.blocked_engines)
 
-    # parse query, if tags are set, which change the serch engine or search-language
-    def parse_query(self):
-        query_parts = self.query.split()
-        modified = False
-
-        # check if language-prefix is set
-        if query_parts[0].startswith(':'):
-            lang = query_parts[0][1:].lower()
-
-            # check if any language-code is equal with declared language-codes
-            for lc in language_codes:
-                lang_id, lang_name, country = map(str.lower, lc)
-
-                # if correct language-code is found, set it as new search-language
-                if lang == lang_id\
-                   or lang_id.startswith(lang)\
-                   or lang == lang_name\
-                   or lang == country:
-                    self.lang = lang
-                    modified = True
-                    break
-
-        # check if category/engine prefix is set
-        elif query_parts[0].startswith('!'):
-            prefix = query_parts[0][1:].replace('_', ' ')
-
-            # check if prefix is equal with engine shortcut
-            if prefix in engine_shortcuts\
-               and not engine_shortcuts[prefix] in self.blocked_engines:
-                modified = True
-                self.engines.append({'category': 'none',
-                                     'name': engine_shortcuts[prefix]})
-
-            # check if prefix is equal with engine name
-            elif prefix in engines\
-                    and not prefix in self.blocked_engines:
-                modified = True
-                self.engines.append({'category': 'none',
-                                    'name': prefix})
-
-            # check if prefix is equal with categorie name
-            elif prefix in categories:
-                modified = True
-                # using all engines for that search, which are declared under that categorie name
-                self.engines.extend({'category': prefix,
-                                    'name': engine.name}
-                                    for engine in categories[prefix]
-                                    if not engine in self.blocked_engines)
-
-        # if language, category or engine were specificed in this query, search for more tags which does the same
-        if modified:
-            self.query = self.query.replace(query_parts[0], '', 1).strip()
-            self.parse_query()
-
     # do search-request
     def search(self, request):
         global number_of_searches