forked from Ponysearch/Ponysearch
[mod] Dailymotion: improved request API & upgrade to data_type: traits_v1
- fetch_traits(): fetch locales (and languages) from dailymotion API - removed obsolete data-type "supported_languages" - add documentation - improved argument list of the HTTP request: - add argument: family_filter_map - add conditional argument: localization Don't add localization and country arguments if the user does select a language (:de, :en, ..) - improve code quality (mainly improve readability) Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
2499899554
commit
8a8c584fec
3 changed files with 129 additions and 111 deletions
8
docs/src/searx.engine.dailymotion.rst
Normal file
8
docs/src/searx.engine.dailymotion.rst
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
.. _dailymotion engine:
|
||||||
|
|
||||||
|
===========
|
||||||
|
Dailymotion
|
||||||
|
===========
|
||||||
|
|
||||||
|
.. automodule:: searx.engines.dailymotion
|
||||||
|
:members:
|
|
@ -366,8 +366,29 @@
|
||||||
"dailymotion": {
|
"dailymotion": {
|
||||||
"all_locale": null,
|
"all_locale": null,
|
||||||
"custom": {},
|
"custom": {},
|
||||||
"data_type": "supported_languages",
|
"data_type": "traits_v1",
|
||||||
"languages": {},
|
"languages": {
|
||||||
|
"ar": "ar",
|
||||||
|
"de": "de",
|
||||||
|
"el": "el",
|
||||||
|
"en": "en",
|
||||||
|
"es": "es",
|
||||||
|
"fr": "fr",
|
||||||
|
"id": "id",
|
||||||
|
"it": "it",
|
||||||
|
"ja": "ja",
|
||||||
|
"ko": "ko",
|
||||||
|
"ms": "ms",
|
||||||
|
"nl": "nl",
|
||||||
|
"pl": "pl",
|
||||||
|
"pt": "pt",
|
||||||
|
"ro": "ro",
|
||||||
|
"ru": "ru",
|
||||||
|
"th": "th",
|
||||||
|
"tr": "tr",
|
||||||
|
"vi": "vi",
|
||||||
|
"zh": "zh"
|
||||||
|
},
|
||||||
"regions": {
|
"regions": {
|
||||||
"ar-AE": "ar_AE",
|
"ar-AE": "ar_AE",
|
||||||
"ar-EG": "ar_EG",
|
"ar-EG": "ar_EG",
|
||||||
|
@ -418,58 +439,7 @@
|
||||||
"zh-CN": "zh_CN",
|
"zh-CN": "zh_CN",
|
||||||
"zh-TW": "zh_TW"
|
"zh-TW": "zh_TW"
|
||||||
},
|
},
|
||||||
"supported_languages": [
|
"supported_languages": {}
|
||||||
"ar_AA",
|
|
||||||
"ar_AE",
|
|
||||||
"ar_EG",
|
|
||||||
"ar_SA",
|
|
||||||
"de_AT",
|
|
||||||
"de_CH",
|
|
||||||
"de_DE",
|
|
||||||
"el_GR",
|
|
||||||
"en_AU",
|
|
||||||
"en_CA",
|
|
||||||
"en_EN",
|
|
||||||
"en_GB",
|
|
||||||
"en_HK",
|
|
||||||
"en_IE",
|
|
||||||
"en_IN",
|
|
||||||
"en_NG",
|
|
||||||
"en_PH",
|
|
||||||
"en_PK",
|
|
||||||
"en_SG",
|
|
||||||
"en_US",
|
|
||||||
"en_ZA",
|
|
||||||
"es_AR",
|
|
||||||
"es_ES",
|
|
||||||
"es_MX",
|
|
||||||
"fr_BE",
|
|
||||||
"fr_CA",
|
|
||||||
"fr_CH",
|
|
||||||
"fr_CI",
|
|
||||||
"fr_FR",
|
|
||||||
"fr_MA",
|
|
||||||
"fr_SN",
|
|
||||||
"fr_TN",
|
|
||||||
"id_ID",
|
|
||||||
"it_CH",
|
|
||||||
"it_IT",
|
|
||||||
"ja_JP",
|
|
||||||
"ko_KR",
|
|
||||||
"ms_MY",
|
|
||||||
"nl_BE",
|
|
||||||
"nl_NL",
|
|
||||||
"pl_PL",
|
|
||||||
"pt_BR",
|
|
||||||
"pt_PT",
|
|
||||||
"ro_RO",
|
|
||||||
"ru_RU",
|
|
||||||
"th_TH",
|
|
||||||
"tr_TR",
|
|
||||||
"vi_VN",
|
|
||||||
"zh_CN",
|
|
||||||
"zh_TW"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"duckduckgo": {
|
"duckduckgo": {
|
||||||
"all_locale": "wt-wt",
|
"all_locale": "wt-wt",
|
||||||
|
|
|
@ -1,9 +1,18 @@
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
"""Dailymotion (Videos)
|
# lint: pylint
|
||||||
|
"""
|
||||||
|
Dailymotion (Videos)
|
||||||
|
~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
.. _REST GET: https://developers.dailymotion.com/tools/
|
||||||
|
.. _Global API Parameters: https://developers.dailymotion.com/api/#global-parameters
|
||||||
|
.. _Video filters API: https://developers.dailymotion.com/api/#video-filters
|
||||||
|
.. _Fields selection: https://developers.dailymotion.com/api/#fields-selection
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Set
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
import time
|
import time
|
||||||
|
@ -12,8 +21,16 @@ import babel
|
||||||
from searx.exceptions import SearxEngineAPIException
|
from searx.exceptions import SearxEngineAPIException
|
||||||
from searx import network
|
from searx import network
|
||||||
from searx.utils import html_to_text
|
from searx.utils import html_to_text
|
||||||
|
from searx.locales import region_tag, language_tag
|
||||||
from searx.enginelib.traits import EngineTraits
|
from searx.enginelib.traits import EngineTraits
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger: logging.Logger
|
||||||
|
|
||||||
|
traits: EngineTraits
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://www.dailymotion.com',
|
"website": 'https://www.dailymotion.com',
|
||||||
|
@ -38,11 +55,24 @@ time_delta_dict = {
|
||||||
}
|
}
|
||||||
|
|
||||||
safesearch = True
|
safesearch = True
|
||||||
safesearch_params = {2: '&is_created_for_kids=true', 1: '&is_created_for_kids=true', 0: ''}
|
safesearch_params = {
|
||||||
|
2: {'is_created_for_kids': 'true'},
|
||||||
|
1: {'is_created_for_kids': 'true'},
|
||||||
|
0: {},
|
||||||
|
}
|
||||||
|
"""True if this video is "Created for Kids" / intends to target an audience
|
||||||
|
under the age of 16 (``is_created_for_kids`` in `Video filters API`_ )
|
||||||
|
"""
|
||||||
|
|
||||||
# search-url
|
family_filter_map = {
|
||||||
# - https://developers.dailymotion.com/tools/
|
2: 'true',
|
||||||
# - https://www.dailymotion.com/doc/api/obj-video.html
|
1: 'true',
|
||||||
|
0: 'false',
|
||||||
|
}
|
||||||
|
"""By default, the family filter is turned on. Setting this parameter to
|
||||||
|
``false`` will stop filtering-out explicit content from searches and global
|
||||||
|
contexts (``family_filter`` in `Global API Parameters`_ ).
|
||||||
|
"""
|
||||||
|
|
||||||
result_fields = [
|
result_fields = [
|
||||||
'allow_embed',
|
'allow_embed',
|
||||||
|
@ -54,27 +84,21 @@ result_fields = [
|
||||||
'thumbnail_360_url',
|
'thumbnail_360_url',
|
||||||
'id',
|
'id',
|
||||||
]
|
]
|
||||||
search_url = (
|
"""`Fields selection`_, by default, a few fields are returned. To request more
|
||||||
'https://api.dailymotion.com/videos?'
|
specific fields, the ``fields`` parameter is used with the list of fields
|
||||||
'fields={fields}&password_protected={password_protected}&private={private}&sort={sort}&limit={limit}'
|
SearXNG needs in the response to build a video result list.
|
||||||
).format(
|
"""
|
||||||
fields=','.join(result_fields),
|
|
||||||
password_protected='false',
|
search_url = 'https://api.dailymotion.com/videos?'
|
||||||
private='false',
|
"""URL to retrieve a list of videos.
|
||||||
sort='relevance',
|
|
||||||
limit=number_of_results,
|
- `REST GET`_
|
||||||
)
|
- `Global API Parameters`_
|
||||||
|
- `Video filters API`_
|
||||||
|
"""
|
||||||
|
|
||||||
iframe_src = "https://www.dailymotion.com/embed/video/{video_id}"
|
iframe_src = "https://www.dailymotion.com/embed/video/{video_id}"
|
||||||
|
"""URL template to embed video in SearXNG's result list."""
|
||||||
# The request query filters by 'languages' & 'country', therefore instead of
|
|
||||||
# fetching only languages we need to fetch locales.
|
|
||||||
supported_languages_url = 'https://api.dailymotion.com/locales'
|
|
||||||
supported_languages_iso639: Set[str] = set()
|
|
||||||
|
|
||||||
|
|
||||||
def init(_engine_settings):
|
|
||||||
global supported_languages_iso639
|
|
||||||
supported_languages_iso639 = set([language.split('_')[0] for language in supported_languages])
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
def request(query, params):
|
||||||
|
@ -82,34 +106,42 @@ def request(query, params):
|
||||||
if not query:
|
if not query:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
language = params['language']
|
eng_region = traits.get_region(params['searxng_locale'], 'en_US')
|
||||||
if language == 'all':
|
eng_lang = traits.get_language(params['searxng_locale'], 'en')
|
||||||
language = 'en-US'
|
|
||||||
locale = babel.Locale.parse(language, sep='-')
|
|
||||||
|
|
||||||
language_iso639 = locale.language
|
args = {
|
||||||
if locale.language not in supported_languages_iso639:
|
|
||||||
language_iso639 = 'en'
|
|
||||||
|
|
||||||
query_args = {
|
|
||||||
'search': query,
|
'search': query,
|
||||||
'languages': language_iso639,
|
'family_filter': family_filter_map.get(params['safesearch'], 'false'),
|
||||||
|
'thumbnail_ratio': 'original', # original|widescreen|square
|
||||||
|
# https://developers.dailymotion.com/api/#video-filters
|
||||||
|
'languages': eng_lang,
|
||||||
'page': params['pageno'],
|
'page': params['pageno'],
|
||||||
|
'password_protected': 'false',
|
||||||
|
'private': 'false',
|
||||||
|
'sort': 'relevance',
|
||||||
|
'limit': number_of_results,
|
||||||
|
'fields': ','.join(result_fields),
|
||||||
}
|
}
|
||||||
|
|
||||||
if locale.territory:
|
args.update(safesearch_params.get(params['safesearch'], {}))
|
||||||
localization = locale.language + '_' + locale.territory
|
|
||||||
if localization in supported_languages:
|
# Don't add localization and country arguments if the user does select a
|
||||||
query_args['country'] = locale.territory
|
# language (:de, :en, ..)
|
||||||
|
|
||||||
|
if len(params['searxng_locale'].split('-')) > 1:
|
||||||
|
# https://developers.dailymotion.com/api/#global-parameters
|
||||||
|
args['localization'] = eng_region
|
||||||
|
args['country'] = eng_region.split('_')[1]
|
||||||
|
# Insufficient rights for the `ams_country' parameter of route `GET /videos'
|
||||||
|
# 'ams_country': eng_region.split('_')[1],
|
||||||
|
|
||||||
time_delta = time_delta_dict.get(params["time_range"])
|
time_delta = time_delta_dict.get(params["time_range"])
|
||||||
if time_delta:
|
if time_delta:
|
||||||
created_after = datetime.now() - time_delta
|
created_after = datetime.now() - time_delta
|
||||||
query_args['created_after'] = datetime.timestamp(created_after)
|
args['created_after'] = datetime.timestamp(created_after)
|
||||||
|
|
||||||
query_str = urlencode(query_args)
|
query_str = urlencode(args)
|
||||||
params['url'] = search_url + '&' + query_str + safesearch_params.get(params['safesearch'], '')
|
params['url'] = search_url + query_str
|
||||||
params['raise_for_httperror'] = False
|
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
@ -168,31 +200,27 @@ def response(resp):
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
# get supported languages from their site
|
|
||||||
def _fetch_supported_languages(resp):
|
|
||||||
response_json = resp.json()
|
|
||||||
return [item['locale'] for item in response_json['list']]
|
|
||||||
|
|
||||||
|
|
||||||
def fetch_traits(engine_traits: EngineTraits):
|
def fetch_traits(engine_traits: EngineTraits):
|
||||||
"""Fetch regions from dailymotion.
|
"""Fetch locales & languages from dailymotion.
|
||||||
|
|
||||||
|
Locales fetched from `api/locales <https://api.dailymotion.com/locales>`_.
|
||||||
There are duplications in the locale codes returned from Dailymotion which
|
There are duplications in the locale codes returned from Dailymotion which
|
||||||
can be ignored::
|
can be ignored::
|
||||||
|
|
||||||
en_EN --> en_GB, en_US
|
en_EN --> en_GB, en_US
|
||||||
ar_AA --> ar_EG, ar_AE, ar_SA
|
ar_AA --> ar_EG, ar_AE, ar_SA
|
||||||
|
|
||||||
|
The language list `api/languages <https://api.dailymotion.com/languages>`_
|
||||||
|
contains over 7000 *languages* codes (see PR1071_). We use only those
|
||||||
|
language codes that are used in the locales.
|
||||||
|
|
||||||
|
.. _PR1071: https://github.com/searxng/searxng/pull/1071
|
||||||
|
|
||||||
"""
|
"""
|
||||||
# pylint: disable=import-outside-toplevel
|
|
||||||
|
|
||||||
engine_traits.data_type = 'supported_languages' # deprecated
|
|
||||||
|
|
||||||
from searx.locales import region_tag
|
|
||||||
|
|
||||||
resp = network.get('https://api.dailymotion.com/locales')
|
resp = network.get('https://api.dailymotion.com/locales')
|
||||||
if not resp.ok:
|
if not resp.ok:
|
||||||
print("ERROR: response from peertube is not OK.")
|
print("ERROR: response from dailymotion/locales is not OK.")
|
||||||
|
|
||||||
for item in resp.json()['list']:
|
for item in resp.json()['list']:
|
||||||
eng_tag = item['locale']
|
eng_tag = item['locale']
|
||||||
|
@ -210,3 +238,15 @@ def fetch_traits(engine_traits: EngineTraits):
|
||||||
print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
|
print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
|
||||||
continue
|
continue
|
||||||
engine_traits.regions[sxng_tag] = eng_tag
|
engine_traits.regions[sxng_tag] = eng_tag
|
||||||
|
|
||||||
|
locale_lang_list = [x.split('_')[0] for x in engine_traits.regions.values()]
|
||||||
|
|
||||||
|
resp = network.get('https://api.dailymotion.com/languages')
|
||||||
|
if not resp.ok:
|
||||||
|
print("ERROR: response from dailymotion/languages is not OK.")
|
||||||
|
|
||||||
|
for item in resp.json()['list']:
|
||||||
|
eng_tag = item['code']
|
||||||
|
if eng_tag in locale_lang_list:
|
||||||
|
sxng_tag = language_tag(babel.Locale.parse(eng_tag))
|
||||||
|
engine_traits.languages[sxng_tag] = eng_tag
|
||||||
|
|
Loading…
Reference in a new issue