From 880555e2637a3517fefdda77edb1dbe75ee7f066 Mon Sep 17 00:00:00 2001
From: Allen <64094914+allendema@users.noreply.github.com>
Date: Wed, 27 Oct 2021 03:04:52 +0200
Subject: [PATCH 1/5] [enh] engine - add Tineye reverse image search

Other optional parameter ..

`&sort=crawl_date`
    can be appended to search_string to sort results by date.

`&domain=example.org`
    can be implemented to search_string to get results from just one domain.

Public instances could get relatively fast timed-out for 3600s.

--

Merged from @allendema's commit [1] and slightly modfied / see [2].

Related-to: [1] https://github.com/allendema/searx/commit/455b2b4460cd830ac1f7e62e824040e2fe648de9
Related-to: [2] https://github.com/searx/searx/pull/3040
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
---
 searx/engines/tineye.py | 83 +++++++++++++++++++++++++++++++++++++++++
 searx/settings.yml      |  6 +++
 2 files changed, 89 insertions(+)
 create mode 100644 searx/engines/tineye.py

diff --git a/searx/engines/tineye.py b/searx/engines/tineye.py
new file mode 100644
index 000000000..e8e45fac4
--- /dev/null
+++ b/searx/engines/tineye.py
@@ -0,0 +1,83 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""Tineye - Reverse search images
+
+"""
+
+from json import loads
+from urllib.parse import urlencode
+from datetime import datetime
+
+about = {
+    "website": 'https://tineye.com',
+    "wikidata_id": 'Q2382535',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
+categories = ['images']
+paging = True
+safesearch = False
+base_url = 'https://tineye.com'
+search_string = '/result_json/?page={page}&{query}'
+
+
+def request(query, params):
+    # see https://github.com/TinEye/pytineye/blob/main/pytineye/api.py
+    params['url'] = base_url + search_string.format(query=urlencode({'url': query}), page=params['pageno'])
+
+    params['headers'].update(
+        {
+            'Connection': 'keep-alive',
+            'Accept-Encoding': 'gzip, defalte, br',
+            'Host': 'tineye.com',
+            'DNT': '1',
+            'TE': 'trailers',
+        }
+    )
+    return params
+
+
+def response(resp):
+    results = []
+
+    # Define wanted results
+    json_data = loads(resp.text)
+    number_of_results = json_data['num_matches']
+
+    for i in json_data['matches']:
+        image_format = i['format']
+        width = i['width']
+        height = i['height']
+        thumbnail_src = i['image_url']
+        backlink = i['domains'][0]['backlinks'][0]
+        url = backlink['backlink']
+        source = backlink['url']
+        title = backlink['image_name']
+        img_src = backlink['url']
+
+        # Get and convert published date
+        api_date = backlink['crawl_date'][:-3]
+        publishedDate = datetime.fromisoformat(api_date)
+
+        # Append results
+        results.append(
+            {
+                'template': 'images.html',
+                'url': url,
+                'thumbnail_src': thumbnail_src,
+                'source': source,
+                'title': title,
+                'img_src': img_src,
+                'format': image_format,
+                'widht': width,
+                'height': height,
+                'publishedDate': publishedDate,
+            }
+        )
+
+    # Append number of results
+    results.append({'number_of_results': number_of_results})
+
+    return results
diff --git a/searx/settings.yml b/searx/settings.yml
index ad38d543d..60b8b8278 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -483,6 +483,12 @@ engines:
     timeout: 3.0
     disabled: true
 
+  - name: tineye
+    engine: tineye
+    shortcut: tin
+    timeout: 9.0
+    disabled: true
+
   - name: etymonline
     engine: xpath
     paging: true

From b7f74fbe42f54ebd60aeeed77312bcb4c4d63f76 Mon Sep 17 00:00:00 2001
From: Markus Heiser <markus.heiser@darmarit.de>
Date: Tue, 25 Jan 2022 16:37:18 +0100
Subject: [PATCH 2/5] [mod] tineye - add some documentation

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
---
 docs/src/searx.engines.tineye.rst |  9 +++++++++
 searx/engines/tineye.py           | 14 +++++++++++++-
 2 files changed, 22 insertions(+), 1 deletion(-)
 create mode 100644 docs/src/searx.engines.tineye.rst

diff --git a/docs/src/searx.engines.tineye.rst b/docs/src/searx.engines.tineye.rst
new file mode 100644
index 000000000..79e24cfb8
--- /dev/null
+++ b/docs/src/searx.engines.tineye.rst
@@ -0,0 +1,9 @@
+.. _tineye engine:
+
+======
+Tineye
+======
+
+.. automodule:: searx.engines.tineye
+  :members:
+
diff --git a/searx/engines/tineye.py b/searx/engines/tineye.py
index e8e45fac4..302aea326 100644
--- a/searx/engines/tineye.py
+++ b/searx/engines/tineye.py
@@ -1,6 +1,17 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
-"""Tineye - Reverse search images
+"""This engine implements *Tineye - reverse image search*
+
+Using TinEye, you can search by image or perform what we call a reverse image
+search.  You can do that by uploading an image or searching by URL. You can also
+simply drag and drop your images to start your search.  TinEye constantly crawls
+the web and adds images to its index.  Today, the TinEye index is over 50.2
+billion images `[tineye.com] <https://tineye.com/how>`_.
+
+.. hint::
+
+   This SearXNG engine only supports *'searching by URL'* and it does not use
+   the official API `[api.tineye.com] <https://api.tineye.com/python/docs/>`_.
 
 """
 
@@ -11,6 +22,7 @@ from datetime import datetime
 about = {
     "website": 'https://tineye.com',
     "wikidata_id": 'Q2382535',
+    "official_api_documentation": 'https://api.tineye.com/python/docs/',
     "use_official_api": False,
     "require_api_key": False,
     "results": 'JSON',

From e92d40c854eb22f22baed9558257941057cf13fa Mon Sep 17 00:00:00 2001
From: Markus Heiser <markus.heiser@darmarit.de>
Date: Sun, 30 Jan 2022 16:05:08 +0100
Subject: [PATCH 3/5] [enh] implement a OnlineUrlSearchProcessor

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
---
 searx/search/processors/__init__.py          | 10 ++++-
 searx/search/processors/online_url_search.py | 42 ++++++++++++++++++++
 2 files changed, 51 insertions(+), 1 deletion(-)
 create mode 100644 searx/search/processors/online_url_search.py

diff --git a/searx/search/processors/__init__.py b/searx/search/processors/__init__.py
index 4e85527ba..a270b4ef5 100644
--- a/searx/search/processors/__init__.py
+++ b/searx/search/processors/__init__.py
@@ -11,6 +11,7 @@ __all__ = [
     'OnlineProcessor',
     'OnlineDictionaryProcessor',
     'OnlineCurrencyProcessor',
+    'OnlineUrlSearchProcessor',
     'PROCESSORS',
 ]
 
@@ -24,6 +25,7 @@ from .online import OnlineProcessor
 from .offline import OfflineProcessor
 from .online_dictionary import OnlineDictionaryProcessor
 from .online_currency import OnlineCurrencyProcessor
+from .online_url_search import OnlineUrlSearchProcessor
 from .abstract import EngineProcessor
 
 logger = logger.getChild('search.processors')
@@ -33,7 +35,13 @@ PROCESSORS: Dict[str, EngineProcessor] = {}
 
 def get_processor_class(engine_type):
     """Return processor class according to the ``engine_type``"""
-    for c in [OnlineProcessor, OfflineProcessor, OnlineDictionaryProcessor, OnlineCurrencyProcessor]:
+    for c in [
+        OnlineProcessor,
+        OfflineProcessor,
+        OnlineDictionaryProcessor,
+        OnlineCurrencyProcessor,
+        OnlineUrlSearchProcessor,
+    ]:
         if c.engine_type == engine_type:
             return c
     return None
diff --git a/searx/search/processors/online_url_search.py b/searx/search/processors/online_url_search.py
new file mode 100644
index 000000000..2863be28e
--- /dev/null
+++ b/searx/search/processors/online_url_search.py
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""Processores for engine-type: ``online_url_search``
+
+"""
+
+import re
+from .online import OnlineProcessor
+
+re_search_urls = {
+    'http': re.compile(r'https?:\/\/[^ ]*'),
+    'ftp': re.compile(r'ftps?:\/\/[^ ]*'),
+    'data:image': re.compile('data:image/[^; ]*;base64,[^ ]*'),
+}
+
+
+class OnlineUrlSearchProcessor(OnlineProcessor):
+    """Processor class used by ``online_url_search`` engines."""
+
+    engine_type = 'online_url_search'
+
+    def get_params(self, search_query, engine_category):
+        params = super().get_params(search_query, engine_category)
+        if params is None:
+            return None
+
+        url_match = False
+        search_urls = {}
+
+        for k, v in re_search_urls.items():
+            m = v.search(search_query.query)
+            v = None
+            if m:
+                url_match = True
+                v = m[0]
+            search_urls[k] = v
+
+        if not url_match:
+            return None
+
+        params['search_urls'] = search_urls
+        return params

From a6b879f19c74cd0c15907ed9d21b9185ccea9d25 Mon Sep 17 00:00:00 2001
From: Markus Heiser <markus.heiser@darmarit.de>
Date: Sun, 30 Jan 2022 16:30:52 +0100
Subject: [PATCH 4/5] [mod] tineye engine: set engine_type to
 'online_url_search'

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
---
 searx/engines/tineye.py | 13 +++++++++++--
 searx/settings.yml      |  2 +-
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/searx/engines/tineye.py b/searx/engines/tineye.py
index 302aea326..5a8c86062 100644
--- a/searx/engines/tineye.py
+++ b/searx/engines/tineye.py
@@ -28,7 +28,8 @@ about = {
     "results": 'JSON',
 }
 
-categories = ['images']
+engine_type = 'online_url_search'
+categories = ['general']
 paging = True
 safesearch = False
 base_url = 'https://tineye.com'
@@ -36,8 +37,16 @@ search_string = '/result_json/?page={page}&{query}'
 
 
 def request(query, params):
+
+    if params['search_urls']['data:image']:
+        query = params['search_urls']['data:image']
+    elif params['search_urls']['http']:
+        query = params['search_urls']['http']
+
+    query = urlencode({'url': query})
+
     # see https://github.com/TinEye/pytineye/blob/main/pytineye/api.py
-    params['url'] = base_url + search_string.format(query=urlencode({'url': query}), page=params['pageno'])
+    params['url'] = base_url + search_string.format(query=query, page=params['pageno'])
 
     params['headers'].update(
         {
diff --git a/searx/settings.yml b/searx/settings.yml
index 60b8b8278..ac320c457 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -487,7 +487,7 @@ engines:
     engine: tineye
     shortcut: tin
     timeout: 9.0
-    disabled: true
+    disabled: false
 
   - name: etymonline
     engine: xpath

From ebd3013a1aad1bc6def749dea07d6278f399fb69 Mon Sep 17 00:00:00 2001
From: Alexandre Flament <alex@al-f.net>
Date: Sun, 30 Jan 2022 20:44:30 +0100
Subject: [PATCH 5/5] [mod] tineye engine: minor changes

* remove "disable: false" in settings.yml
* use the json() method from httpx.Response (faster character encoding detection)
---
 searx/engines/tineye.py | 3 +--
 searx/settings.yml      | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/searx/engines/tineye.py b/searx/engines/tineye.py
index 5a8c86062..fe5b60393 100644
--- a/searx/engines/tineye.py
+++ b/searx/engines/tineye.py
@@ -15,7 +15,6 @@ billion images `[tineye.com] <https://tineye.com/how>`_.
 
 """
 
-from json import loads
 from urllib.parse import urlencode
 from datetime import datetime
 
@@ -64,7 +63,7 @@ def response(resp):
     results = []
 
     # Define wanted results
-    json_data = loads(resp.text)
+    json_data = resp.json()
     number_of_results = json_data['num_matches']
 
     for i in json_data['matches']:
diff --git a/searx/settings.yml b/searx/settings.yml
index ac320c457..be068a10e 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -487,7 +487,6 @@ engines:
     engine: tineye
     shortcut: tin
     timeout: 9.0
-    disabled: false
 
   - name: etymonline
     engine: xpath