From 795e8af61dcb6f669ccc653f6524f652f7d62a5c Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Sun, 13 Feb 2022 12:34:04 +0100 Subject: [PATCH] [fix] hostname_replace.py: don't stop replace URL in fields This is a rewrite of the hostname_replace.py that: - don't stop to replace URL in fields ('data_src', 'audio_src') if there isn't a 'parsed_url', - adds a comment about keep or remove a result from the result list - adds a loop over ['data_src', 'audio_src'] instead of doubling code lines Signed-off-by: Markus Heiser --- searx/plugins/hostname_replace.py | 38 +++++++++++++++++-------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/searx/plugins/hostname_replace.py b/searx/plugins/hostname_replace.py index acbc6304c..51bd003b1 100644 --- a/searx/plugins/hostname_replace.py +++ b/searx/plugins/hostname_replace.py @@ -17,26 +17,30 @@ replacements = {re.compile(p): r for (p, r) in settings[plugin_id].items()} if p logger = logger.getChild(plugin_id) parsed = 'parsed_url' +_url_fields = ['data_src', 'audio_src'] def on_result(request, search, result): - if parsed not in result: - return True + for (pattern, replacement) in replacements.items(): - if pattern.search(result[parsed].netloc): - if not replacement: - return False - result[parsed] = result[parsed]._replace(netloc=pattern.sub(replacement, result[parsed].netloc)) - result['url'] = urlunparse(result[parsed]) - if result.get('data_src', False): - parsed_data_src = urlparse(result['data_src']) - if pattern.search(parsed_data_src.netloc): - parsed_data_src = parsed_data_src._replace(netloc=pattern.sub(replacement, parsed_data_src.netloc)) - result['data_src'] = urlunparse(parsed_data_src) - if result.get('audio_src', False): - parsed_audio_src = urlparse(result['audio_src']) - if pattern.search(parsed_audio_src.netloc): - parsed_audio_src = parsed_audio_src._replace(netloc=pattern.sub(replacement, parsed_audio_src.netloc)) - result['audio_src'] = urlunparse(parsed_audio_src) + + if parsed in result: + if pattern.search(result[parsed].netloc): + # to keep or remove this result from the result list depends + # (only) on the 'parsed_url' + if not replacement: + return False + result[parsed] = result[parsed]._replace(netloc=pattern.sub(replacement, result[parsed].netloc)) + result['url'] = urlunparse(result[parsed]) + + for url_field in _url_fields: + if result.get(url_field): + url_src = urlparse(result[url_field]) + if pattern.search(url_src.netloc): + if not replacement: + del result[url_field] + else: + url_src = url_src._replace(netloc=pattern.sub(replacement, url_src.netloc)) + result[url_field] = urlunparse(url_src) return True