From 560a14e77bcf804ab55ae1cc8c93ce0b2289cf2a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C3=A9on=20Tiek=C3=B6tter?= <leon@tiekoetter.com>
Date: Sun, 6 Feb 2022 17:50:00 +0100
Subject: [PATCH 1/5] [fix] wikidata info box images

Wikidata info box images are now loaded from uploads.wikimedia.org instead of commons.wikimedia.org to prevent redirects

Co-authored-by: Markus Heiser <markus.heiser@darmarit.de>
---
 searx/engines/wikidata.py | 33 ++++++++++++++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py
index e5d3f55c0..ce500cda6 100644
--- a/searx/engines/wikidata.py
+++ b/searx/engines/wikidata.py
@@ -18,6 +18,7 @@ from searx.engines.wikipedia import (  # pylint: disable=unused-import
     _fetch_supported_languages,
     supported_languages_url,
 )
+from hashlib import md5
 
 # about
 about = {
@@ -185,6 +186,36 @@ def response(resp):
     return results
 
 
+_IMG_SRC_DEFAULT_URL_PREFIX = "https://commons.wikimedia.org/wiki/Special:FilePath/"
+_IMG_SRC_NEW_URL_PREFIX = "https://upload.wikimedia.org/wikipedia/commons/"
+
+
+def get_thumbnail(img_src):
+    """Get Thumbnail image from wikimedia commons
+
+    Images from commons.wikimedia.org are (HTTP) redirected to
+    upload.wikimedia.org.  The redirected URL can be calculated by this
+    function.
+
+    - https://stackoverflow.com/a/33691240
+
+    """
+    logger.debug('get_thumbnail(): %s', img_src)
+    if not img_src is None and _IMG_SRC_DEFAULT_URL_PREFIX in img_src.split()[0]:
+        img_src_name = (
+            img_src.replace(_IMG_SRC_DEFAULT_URL_PREFIX, "")
+            .split("?", 1)[0]
+            .replace("%20", "_")
+            .replace("%28", "(")
+            .replace("%29", ")")
+        )
+        img_src_name_md5 = md5(img_src_name.encode("utf-8")).hexdigest()
+        img_src = _IMG_SRC_NEW_URL_PREFIX + img_src_name_md5[0] + "/" + img_src_name_md5[0:2] + "/" + img_src_name
+        logger.debug('get_thumbnail() redirected: %s', img_src)
+
+    return img_src
+
+
 def get_results(attribute_result, attributes, language):
     results = []
     infobox_title = attribute_result.get('itemLabel')
@@ -221,7 +252,7 @@ def get_results(attribute_result, attributes, language):
                 # replace the current image only the priority is lower
                 # (the infobox contain only one image).
                 if attribute.priority < img_src_priority:
-                    img_src = value
+                    img_src = get_thumbnail(value)
                     img_src_priority = attribute.priority
             elif attribute_type == WDGeoAttribute:
                 # geocoordinate link

From a50f32bcfcfce1de94c4eadb646c35489617fba3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C3=A9on=20Tiek=C3=B6tter?= <leon@tiekoetter.com>
Date: Sun, 6 Feb 2022 23:25:50 +0100
Subject: [PATCH 2/5] wikidata: load thumbnail instead of full image

---
 searx/engines/wikidata.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py
index ce500cda6..18dac3efb 100644
--- a/searx/engines/wikidata.py
+++ b/searx/engines/wikidata.py
@@ -187,7 +187,7 @@ def response(resp):
 
 
 _IMG_SRC_DEFAULT_URL_PREFIX = "https://commons.wikimedia.org/wiki/Special:FilePath/"
-_IMG_SRC_NEW_URL_PREFIX = "https://upload.wikimedia.org/wikipedia/commons/"
+_IMG_SRC_NEW_URL_PREFIX = "https://upload.wikimedia.org/wikipedia/commons/thumb/"
 
 
 def get_thumbnail(img_src):
@@ -209,8 +209,21 @@ def get_thumbnail(img_src):
             .replace("%28", "(")
             .replace("%29", ")")
         )
+        img_src_size = img_src.replace(_IMG_SRC_DEFAULT_URL_PREFIX, "").split("?", 1)[1]
+        img_src_size = img_src_size[img_src_size.index("=") + 1 : img_src_size.index("&")]
         img_src_name_md5 = md5(img_src_name.encode("utf-8")).hexdigest()
-        img_src = _IMG_SRC_NEW_URL_PREFIX + img_src_name_md5[0] + "/" + img_src_name_md5[0:2] + "/" + img_src_name
+        img_src = (
+            _IMG_SRC_NEW_URL_PREFIX
+            + img_src_name_md5[0]
+            + "/"
+            + img_src_name_md5[0:2]
+            + "/"
+            + img_src_name
+            + "/"
+            + img_src_size
+            + "px-"
+            + img_src_name
+        )
         logger.debug('get_thumbnail() redirected: %s', img_src)
 
     return img_src

From a13c5d70c7b3773afcb2c6bfe684f212b3a70707 Mon Sep 17 00:00:00 2001
From: Markus Heiser <markus.heiser@darmarit.de>
Date: Sun, 6 Feb 2022 23:35:55 +0100
Subject: [PATCH 3/5] [fix] wikidata engine: select image with higher (not
 lower) priority

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
---
 searx/engines/wikidata.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py
index 18dac3efb..c6a551e9c 100644
--- a/searx/engines/wikidata.py
+++ b/searx/engines/wikidata.py
@@ -238,7 +238,7 @@ def get_results(attribute_result, attributes, language):
     infobox_attributes = []
     infobox_content = attribute_result.get('itemDescription', [])
     img_src = None
-    img_src_priority = 100
+    img_src_priority = 0
 
     for attribute in attributes:
         value = attribute.get_str(attribute_result, language)
@@ -264,7 +264,7 @@ def get_results(attribute_result, attributes, language):
                 # this attribute is an image.
                 # replace the current image only the priority is lower
                 # (the infobox contain only one image).
-                if attribute.priority < img_src_priority:
+                if attribute.priority > img_src_priority:
                     img_src = get_thumbnail(value)
                     img_src_priority = attribute.priority
             elif attribute_type == WDGeoAttribute:

From 1c151ae92bfe51faa89af523194dca631a7c9378 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C3=A9on=20Tiek=C3=B6tter?= <leon@tiekoetter.com>
Date: Mon, 7 Feb 2022 00:19:25 +0100
Subject: [PATCH 4/5] [fix] wikidata: URL decoding and file extension handling

Add '.png' to the second img_src_name if it has the extension '.svg'.
Use urllib.parse.unquote for URL decoding.
---
 searx/engines/wikidata.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py
index c6a551e9c..a89f79bf9 100644
--- a/searx/engines/wikidata.py
+++ b/searx/engines/wikidata.py
@@ -4,7 +4,7 @@
 """
 
 
-from urllib.parse import urlencode
+from urllib.parse import urlencode, unquote
 from json import loads
 
 from dateutil.parser import isoparse
@@ -202,13 +202,13 @@ def get_thumbnail(img_src):
     """
     logger.debug('get_thumbnail(): %s', img_src)
     if not img_src is None and _IMG_SRC_DEFAULT_URL_PREFIX in img_src.split()[0]:
-        img_src_name = (
-            img_src.replace(_IMG_SRC_DEFAULT_URL_PREFIX, "")
-            .split("?", 1)[0]
-            .replace("%20", "_")
-            .replace("%28", "(")
-            .replace("%29", ")")
-        )
+        img_src_name = unquote(img_src.replace(_IMG_SRC_DEFAULT_URL_PREFIX, "").split("?", 1)[0].replace("%20", "_"))
+        img_src_name_first = img_src_name
+        img_src_name_second = img_src_name
+
+        if ".svg" in img_src_name.split()[0]:
+            img_src_name_second = img_src_name + ".png"
+
         img_src_size = img_src.replace(_IMG_SRC_DEFAULT_URL_PREFIX, "").split("?", 1)[1]
         img_src_size = img_src_size[img_src_size.index("=") + 1 : img_src_size.index("&")]
         img_src_name_md5 = md5(img_src_name.encode("utf-8")).hexdigest()
@@ -218,11 +218,11 @@ def get_thumbnail(img_src):
             + "/"
             + img_src_name_md5[0:2]
             + "/"
-            + img_src_name
+            + img_src_name_first
             + "/"
             + img_src_size
             + "px-"
-            + img_src_name
+            + img_src_name_second
         )
         logger.debug('get_thumbnail() redirected: %s', img_src)
 

From a967e5959012814210bebd48c7e0f75ab6f865ef Mon Sep 17 00:00:00 2001
From: Markus Heiser <markus.heiser@darmarit.de>
Date: Mon, 7 Feb 2022 10:15:32 +0100
Subject: [PATCH 5/5] [pylint] searx/engines/wikidata.py (no functional change)

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
---
 searx/engines/wikidata.py | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py
index a89f79bf9..592a51ec8 100644
--- a/searx/engines/wikidata.py
+++ b/searx/engines/wikidata.py
@@ -1,9 +1,10 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""Wikidata
 """
- Wikidata
-"""
-
+# pylint: disable=missing-class-docstring
 
+from hashlib import md5
 from urllib.parse import urlencode, unquote
 from json import loads
 
@@ -18,7 +19,6 @@ from searx.engines.wikipedia import (  # pylint: disable=unused-import
     _fetch_supported_languages,
     supported_languages_url,
 )
-from hashlib import md5
 
 # about
 about = {
@@ -230,6 +230,7 @@ def get_thumbnail(img_src):
 
 
 def get_results(attribute_result, attributes, language):
+    # pylint: disable=too-many-branches
     results = []
     infobox_title = attribute_result.get('itemLabel')
     infobox_id = attribute_result['item']
@@ -322,6 +323,7 @@ def get_query(query, language):
 
 
 def get_attributes(language):
+    # pylint: disable=too-many-statements
     attributes = []
 
     def add_value(name):
@@ -462,7 +464,7 @@ def get_attributes(language):
 
 
 class WDAttribute:
-
+    # pylint: disable=no-self-use
     __slots__ = ('name',)
 
     def __init__(self, name):
@@ -483,7 +485,7 @@ class WDAttribute:
     def get_group_by(self):
         return ""
 
-    def get_str(self, result, language):
+    def get_str(self, result, language):  # pylint: disable=unused-argument
         return result.get(self.name + 's')
 
     def __repr__(self):
@@ -624,6 +626,7 @@ class WDImageAttribute(WDURLAttribute):
 
 
 class WDDateAttribute(WDAttribute):
+    # pylint: disable=no-self-use
     def get_select(self):
         return '?{name} ?{name}timePrecision ?{name}timeZone ?{name}timeCalendar'.replace('{name}', self.name)
 
@@ -644,7 +647,7 @@ class WDDateAttribute(WDAttribute):
     def get_group_by(self):
         return self.get_select()
 
-    def format_8(self, value, locale):
+    def format_8(self, value, locale):  # pylint: disable=unused-argument
         # precision: less than a year
         return value
 
@@ -717,7 +720,7 @@ class WDDateAttribute(WDAttribute):
                     else:
                         value = t[0]
                 return format_method(value, language)
-            except Exception:
+            except Exception:  # pylint: disable=broad-except
                 return value
         return value
 
@@ -731,7 +734,7 @@ def debug_explain_wikidata_query(query, method='GET'):
     return http_response.content
 
 
-def init(engine_settings=None):
+def init(engine_settings=None):  # pylint: disable=unused-argument
     # WIKIDATA_PROPERTIES : add unit symbols
     WIKIDATA_PROPERTIES.update(WIKIDATA_UNITS)