From e4cf0a7d4f0416c9b7c45d45db26ccb3eb09af42 Mon Sep 17 00:00:00 2001
From: allixx <1695323+allixx@users.noreply.github.com>
Date: Tue, 19 Dec 2023 11:21:54 +0300
Subject: [PATCH] [fix] do highlight replacement at once
Highlights all search queries in search result in one go.
Fixes the case where search query contains word from highlight HTML code,
which causes broken HTML to appear in search results.
Closes #3057
---
searx/webutils.py | 6 ++----
tests/unit/test_webutils.py | 5 +++++
2 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/searx/webutils.py b/searx/webutils.py
index bfc6b22f7..8cdcab84b 100644
--- a/searx/webutils.py
+++ b/searx/webutils.py
@@ -290,10 +290,8 @@ def highlight_content(content, query):
if len(qs) > 0:
queries.extend(re.findall(regex_highlight_cjk(qs), content, flags=re.I | re.U))
if len(queries) > 0:
- for q in set(queries):
- content = re.sub(
- regex_highlight_cjk(q), f'{q}'.replace('\\', r'\\'), content
- )
+ regex = re.compile("|".join(map(regex_highlight_cjk, queries)))
+ return regex.sub(lambda match: f'{match.group(0)}'.replace('\\', r'\\'), content)
return content
diff --git a/tests/unit/test_webutils.py b/tests/unit/test_webutils.py
index 244d2b180..b4395539b 100644
--- a/tests/unit/test_webutils.py
+++ b/tests/unit/test_webutils.py
@@ -57,6 +57,11 @@ class TestWebUtils(SearxTestCase):
]
),
),
+ (
+ 'a class',
+ 'a string with class.',
+ 'a string with class.',
+ ),
)
for query, content, expected in data:
self.assertEqual(webutils.highlight_content(content, query), expected)