[feat] duckduckgo: support for videos and news

This commit is contained in:
Bnyro 2023-10-07 10:26:04 +02:00 committed by Markus Heiser
parent c3ab49cd90
commit 48cb58bd2e
5 changed files with 405 additions and 21 deletions

View file

@ -12,7 +12,7 @@ DuckDuckGo Engines
.. automodule:: searx.engines.duckduckgo
:members:
.. automodule:: searx.engines.duckduckgo_images
.. automodule:: searx.engines.duckduckgo_extra
:members:
.. automodule:: searx.engines.duckduckgo_definitions

View file

@ -2390,6 +2390,334 @@
"zh-TW": "tw-tzh"
}
},
"duckduckgo videos": {
"all_locale": "wt-wt",
"custom": {
"lang_region": {
"ar-DZ": "ar_DZ",
"ar-JO": "ar_JO",
"ar-SA": "ar_SA",
"bn-IN": "bn_IN",
"de-CH": "de_CH",
"en-AU": "en_AU",
"en-CA": "en_CA",
"en-GB": "en_GB",
"es-AR": "es_AR",
"es-CL": "es_CL",
"es-CO": "es_CO",
"es-CR": "es_CR",
"es-EC": "es_EC",
"es-MX": "es_MX",
"es-PE": "es_PE",
"es-UY": "es_UY",
"es-VE": "es_VE",
"fr-BE": "fr_BE",
"fr-CA": "fr_CA",
"fr-CH": "fr_CH",
"nl-BE": "nl_BE",
"pt-BR": "pt_BR"
}
},
"data_type": "traits_v1",
"languages": {
"af": "af_ZA",
"ar": "ar_EG",
"ast": "ast_ES",
"az_Latn": "az_AZ",
"be": "be_BY",
"bg": "bg_BG",
"bn": "bn_BD",
"br": "br_FR",
"bs_Latn": "bs_BA",
"ca": "ca_ES",
"cs": "cs_CZ",
"cy": "cy_GB",
"da": "da_DK",
"de": "de_DE",
"el": "el_GR",
"en": "en_US",
"eo": "eo_XX",
"es": "es_ES",
"et": "et_EE",
"eu": "eu_ES",
"fa": "fa_IR",
"fi": "fi_FI",
"fil": "tl_PH",
"fr": "fr_FR",
"ga": "ga_IE",
"gd": "gd_GB",
"gl": "gl_ES",
"he": "he_IL",
"hi": "hi_IN",
"hr": "hr_HR",
"hu": "hu_HU",
"hy": "hy_AM",
"id": "id_ID",
"is": "is_IS",
"it": "it_IT",
"ja": "ja_JP",
"kab": "kab_DZ",
"kn": "kn_IN",
"ko": "ko_KR",
"ku": "ku",
"kw": "kw_GB",
"lt": "lt_LT",
"lv": "lv_LV",
"ml": "ml_IN",
"mr": "mr_IN",
"ms": "ms_MY",
"nb": "nb_NO",
"nl": "nl_NL",
"nn": "nn_NO",
"pl": "pl_PL",
"pt": "pt_PT",
"ro": "ro_RO",
"ru": "ru_RU",
"sc": "sc_IT",
"si": "si_LK",
"sk": "sk_SK",
"sl": "sl_SI",
"sq": "sq_AL",
"sr_Cyrl": "sr_RS",
"sv": "sv_SE",
"ta": "ta_IN",
"te": "te_IN",
"th": "th_TH",
"tr": "tr_TR",
"uk": "uk_UA",
"ur": "ur_PK",
"vi": "vi_VN",
"zh_Hans": "zh_CN",
"zh_Hant": "zh_TW"
},
"regions": {
"ar-SA": "xa-ar",
"bg-BG": "bg-bg",
"ca-ES": "es-ca",
"cs-CZ": "cz-cs",
"da-DK": "dk-da",
"de-AT": "at-de",
"de-CH": "ch-de",
"de-DE": "de-de",
"el-GR": "gr-el",
"en-AU": "au-en",
"en-CA": "ca-en",
"en-GB": "uk-en",
"en-IE": "ie-en",
"en-IL": "il-en",
"en-IN": "in-en",
"en-MY": "my-en",
"en-NZ": "nz-en",
"en-PH": "ph-en",
"en-PK": "pk-en",
"en-SG": "sg-en",
"en-US": "us-en",
"en-ZA": "za-en",
"es-AR": "ar-es",
"es-CL": "cl-es",
"es-CO": "co-es",
"es-ES": "es-es",
"es-MX": "mx-es",
"es-PE": "pe-es",
"es-US": "us-es",
"et-EE": "ee-et",
"fi-FI": "fi-fi",
"fr-BE": "be-fr",
"fr-CA": "ca-fr",
"fr-CH": "ch-fr",
"fr-FR": "fr-fr",
"hr-HR": "hr-hr",
"hu-HU": "hu-hu",
"id-ID": "id-en",
"it-IT": "it-it",
"ja-JP": "jp-jp",
"ko-KR": "kr-kr",
"lt-LT": "lt-lt",
"lv-LV": "lv-lv",
"nb-NO": "no-no",
"nl-BE": "be-nl",
"nl-NL": "nl-nl",
"pl-PL": "pl-pl",
"pt-BR": "br-pt",
"pt-PT": "pt-pt",
"ro-RO": "ro-ro",
"ru-RU": "ru-ru",
"sk-SK": "sk-sk",
"sl-SI": "sl-sl",
"sv-SE": "se-sv",
"th-TH": "th-en",
"tr-TR": "tr-tr",
"uk-UA": "ua-uk",
"vi-VN": "vn-en",
"zh-CN": "cn-zh",
"zh-HK": "hk-tzh",
"zh-TW": "tw-tzh"
}
},
"duckduckgo news": {
"all_locale": "wt-wt",
"custom": {
"lang_region": {
"ar-DZ": "ar_DZ",
"ar-JO": "ar_JO",
"ar-SA": "ar_SA",
"bn-IN": "bn_IN",
"de-CH": "de_CH",
"en-AU": "en_AU",
"en-CA": "en_CA",
"en-GB": "en_GB",
"es-AR": "es_AR",
"es-CL": "es_CL",
"es-CO": "es_CO",
"es-CR": "es_CR",
"es-EC": "es_EC",
"es-MX": "es_MX",
"es-PE": "es_PE",
"es-UY": "es_UY",
"es-VE": "es_VE",
"fr-BE": "fr_BE",
"fr-CA": "fr_CA",
"fr-CH": "fr_CH",
"nl-BE": "nl_BE",
"pt-BR": "pt_BR"
}
},
"data_type": "traits_v1",
"languages": {
"af": "af_ZA",
"ar": "ar_EG",
"ast": "ast_ES",
"az_Latn": "az_AZ",
"be": "be_BY",
"bg": "bg_BG",
"bn": "bn_BD",
"br": "br_FR",
"bs_Latn": "bs_BA",
"ca": "ca_ES",
"cs": "cs_CZ",
"cy": "cy_GB",
"da": "da_DK",
"de": "de_DE",
"el": "el_GR",
"en": "en_US",
"eo": "eo_XX",
"es": "es_ES",
"et": "et_EE",
"eu": "eu_ES",
"fa": "fa_IR",
"fi": "fi_FI",
"fil": "tl_PH",
"fr": "fr_FR",
"ga": "ga_IE",
"gd": "gd_GB",
"gl": "gl_ES",
"he": "he_IL",
"hi": "hi_IN",
"hr": "hr_HR",
"hu": "hu_HU",
"hy": "hy_AM",
"id": "id_ID",
"is": "is_IS",
"it": "it_IT",
"ja": "ja_JP",
"kab": "kab_DZ",
"kn": "kn_IN",
"ko": "ko_KR",
"ku": "ku",
"kw": "kw_GB",
"lt": "lt_LT",
"lv": "lv_LV",
"ml": "ml_IN",
"mr": "mr_IN",
"ms": "ms_MY",
"nb": "nb_NO",
"nl": "nl_NL",
"nn": "nn_NO",
"pl": "pl_PL",
"pt": "pt_PT",
"ro": "ro_RO",
"ru": "ru_RU",
"sc": "sc_IT",
"si": "si_LK",
"sk": "sk_SK",
"sl": "sl_SI",
"sq": "sq_AL",
"sr_Cyrl": "sr_RS",
"sv": "sv_SE",
"ta": "ta_IN",
"te": "te_IN",
"th": "th_TH",
"tr": "tr_TR",
"uk": "uk_UA",
"ur": "ur_PK",
"vi": "vi_VN",
"zh_Hans": "zh_CN",
"zh_Hant": "zh_TW"
},
"regions": {
"ar-SA": "xa-ar",
"bg-BG": "bg-bg",
"ca-ES": "es-ca",
"cs-CZ": "cz-cs",
"da-DK": "dk-da",
"de-AT": "at-de",
"de-CH": "ch-de",
"de-DE": "de-de",
"el-GR": "gr-el",
"en-AU": "au-en",
"en-CA": "ca-en",
"en-GB": "uk-en",
"en-IE": "ie-en",
"en-IL": "il-en",
"en-IN": "in-en",
"en-MY": "my-en",
"en-NZ": "nz-en",
"en-PH": "ph-en",
"en-PK": "pk-en",
"en-SG": "sg-en",
"en-US": "us-en",
"en-ZA": "za-en",
"es-AR": "ar-es",
"es-CL": "cl-es",
"es-CO": "co-es",
"es-ES": "es-es",
"es-MX": "mx-es",
"es-PE": "pe-es",
"es-US": "us-es",
"et-EE": "ee-et",
"fi-FI": "fi-fi",
"fr-BE": "be-fr",
"fr-CA": "ca-fr",
"fr-CH": "ch-fr",
"fr-FR": "fr-fr",
"hr-HR": "hr-hr",
"hu-HU": "hu-hu",
"id-ID": "id-en",
"it-IT": "it-it",
"ja-JP": "jp-jp",
"ko-KR": "kr-kr",
"lt-LT": "lt-lt",
"lv-LV": "lv-lv",
"nb-NO": "no-no",
"nl-BE": "be-nl",
"nl-NL": "nl-nl",
"pl-PL": "pl-pl",
"pt-BR": "br-pt",
"pt-PT": "pt-pt",
"ro-RO": "ro-ro",
"ru-RU": "ru-ru",
"sk-SK": "sk-sk",
"sl-SI": "sl-sl",
"sv-SE": "se-sv",
"th-TH": "th-en",
"tr-TR": "tr-tr",
"uk-UA": "ua-uk",
"vi-VN": "vn-en",
"zh-CN": "cn-zh",
"zh-HK": "hk-tzh",
"zh-TW": "tw-tzh"
}
},
"duckduckgo weather": {
"all_locale": "wt-wt",
"custom": {

View file

@ -66,8 +66,10 @@ def cache_vqd(query, value):
The vqd value depends on the query string and is needed for the follow up
pages or the images loaded by a XMLHttpRequest:
- DuckDuckGo Web: `https://links.duckduckgo.com/d.js?q=...&vqd=...`
- DuckDuckGo Images: `https://duckduckgo.com/i.js??q=...&vqd=...`
- DuckDuckGo Web: ``https://links.duckduckgo.com/d.js?q=...&vqd=...``
- DuckDuckGo Images: ``https://duckduckgo.com/i.js??q=...&vqd=...``
- DuckDuckGo Videos: ``https://duckduckgo.com/v.js??q=...&vqd=...``
- DuckDuckGo News: ``https://duckduckgo.com/news.js??q=...&vqd=...``
"""
c = redisdb.client()

View file

@ -1,9 +1,10 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
DuckDuckGo Images
~~~~~~~~~~~~~~~~~
DuckDuckGo Extra (images, videos, news)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
"""
from datetime import datetime
from typing import TYPE_CHECKING
from urllib.parse import urlencode
@ -32,6 +33,9 @@ about = {
# engine dependent config
categories = ['images', 'web']
ddg_category = 'images'
"""The category must be any of ``images``, ``videos`` and ``news``
"""
paging = True
safesearch = True
send_accept_language_header = True
@ -39,6 +43,8 @@ send_accept_language_header = True
safesearch_cookies = {0: '-2', 1: None, 2: '1'}
safesearch_args = {0: '1', 1: None, 2: '1'}
search_path_map = {'images': 'i', 'videos': 'v', 'news': 'news'}
def request(query, params):
@ -69,28 +75,61 @@ def request(query, params):
args['p'] = safe_search # "-1", "1"
logger.debug("cookies: %s", params['cookies'])
args = urlencode(args)
params['url'] = 'https://duckduckgo.com/i.js?{args}'.format(args=args)
params['url'] = f'https://duckduckgo.com/{search_path_map[ddg_category]}.js?{urlencode(args)}'
return params
def _image_result(result):
return {
'template': 'images.html',
'url': result['url'],
'title': result['title'],
'content': '',
'thumbnail_src': result['thumbnail'],
'img_src': result['image'],
'img_format': '%s x %s' % (result['width'], result['height']),
'source': result['source'],
}
def _video_result(result):
return {
'template': 'videos.html',
'url': result['content'],
'title': result['title'],
'content': result['description'],
'thumbnail': result['images'].get('small') or result['images'].get('medium'),
'iframe_src': result['embed_url'],
'source': result['provider'],
'length': result['duration'],
'metadata': result.get('uploader'),
}
def _news_result(result):
return {
'url': result['url'],
'title': result['title'],
'content': result['excerpt'],
'source': result['source'],
'publishedDate': datetime.utcfromtimestamp(result['date']),
}
def response(resp):
results = []
res_json = resp.json()
for result in res_json['results']:
results.append(
{
'template': 'images.html',
'title': result['title'],
'content': '',
'thumbnail_src': result['thumbnail'],
'img_src': result['image'],
'url': result['url'],
'img_format': '%s x %s' % (result['width'], result['height']),
'source': result['source'],
}
)
if ddg_category == 'images':
results.append(_image_result(result))
elif ddg_category == 'videos':
results.append(_video_result(result))
elif ddg_category == 'news':
results.append(_news_result(result))
else:
raise ValueError(f"Invalid duckduckgo category: {ddg_category}")
return results

View file

@ -603,9 +603,24 @@ engines:
shortcut: ddg
- name: duckduckgo images
engine: duckduckgo_images
engine: duckduckgo_extra
categories: [images, web]
ddg_category: images
shortcut: ddi
timeout: 3.0
disabled: true
- name: duckduckgo videos
engine: duckduckgo_extra
categories: [videos, web]
ddg_category: videos
shortcut: ddv
disabled: true
- name: duckduckgo news
engine: duckduckgo_extra
categories: [news, web]
ddg_category: news
shortcut: ddn
disabled: true
- name: duckduckgo weather