forked from Ponysearch/Ponysearch
[mod] Peertube: re-engineered & upgrade to data_type: traits_v1
- fetch_traits(): Fetch languages from peertube's search-index source code. [mod] Include migration of the request methode from 'supported_languages' to 'traits' (EngineTraits) object. [fix] old supported_languages_url is no longer valid since the sources has been moved to a different path. - fixed code to pass pylint - request(): complete re-implementation based on the API docs [1] - response(): complete re-implementation, adds serveral fields missed before - add source code documentation [1] https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
6e5f22e558
commit
a7fe22770a
4 changed files with 197 additions and 77 deletions
19
docs/src/searx.engines.peertube.rst
Normal file
19
docs/src/searx.engines.peertube.rst
Normal file
|
@ -0,0 +1,19 @@
|
|||
.. _peertube engines:
|
||||
|
||||
================
|
||||
Peertube Engines
|
||||
================
|
||||
|
||||
.. contents:: Contents
|
||||
:depth: 2
|
||||
:local:
|
||||
:backlinks: entry
|
||||
|
||||
|
||||
.. _peertube video engine:
|
||||
|
||||
Peertube Video
|
||||
==============
|
||||
|
||||
.. automodule:: searx.engines.peertube
|
||||
:members:
|
|
@ -1468,31 +1468,32 @@
|
|||
"peertube": {
|
||||
"all_locale": null,
|
||||
"custom": {},
|
||||
"data_type": "supported_languages",
|
||||
"languages": {},
|
||||
"data_type": "traits_v1",
|
||||
"languages": {
|
||||
"ca": "ca",
|
||||
"cs": "cs",
|
||||
"de": "de",
|
||||
"el": "el",
|
||||
"en": "en",
|
||||
"eo": "eo",
|
||||
"es": "es",
|
||||
"eu": "eu",
|
||||
"fi": "fi",
|
||||
"fr": "fr",
|
||||
"gd": "gd",
|
||||
"it": "it",
|
||||
"ja": "ja",
|
||||
"nl": "nl",
|
||||
"pl": "pl",
|
||||
"pt": "pt",
|
||||
"ru": "ru",
|
||||
"sv": "sv",
|
||||
"zh": "zh",
|
||||
"zh_Hans": "zh",
|
||||
"zh_Hant": "zh"
|
||||
},
|
||||
"regions": {},
|
||||
"supported_languages": [
|
||||
"ca",
|
||||
"cs",
|
||||
"de",
|
||||
"el",
|
||||
"en",
|
||||
"eo",
|
||||
"es",
|
||||
"eu",
|
||||
"fi",
|
||||
"fr",
|
||||
"gd",
|
||||
"it",
|
||||
"ja",
|
||||
"nl",
|
||||
"oc",
|
||||
"pl",
|
||||
"pt",
|
||||
"ru",
|
||||
"sv",
|
||||
"zh"
|
||||
]
|
||||
"supported_languages": {}
|
||||
},
|
||||
"qwant": {
|
||||
"all_locale": null,
|
||||
|
|
|
@ -1,18 +1,30 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""
|
||||
peertube (Videos)
|
||||
# lint: pylint
|
||||
"""Peertube and :py:obj:`SepiaSearch <searx.engines.sepiasearch>` do share
|
||||
(more or less) the same REST API and the schema of the JSON result is identical.
|
||||
|
||||
"""
|
||||
|
||||
from json import loads
|
||||
from datetime import datetime
|
||||
import re
|
||||
from urllib.parse import urlencode
|
||||
from searx.utils import html_to_text
|
||||
from datetime import datetime
|
||||
from dateutil.parser import parse
|
||||
from dateutil.relativedelta import relativedelta
|
||||
|
||||
import babel
|
||||
|
||||
from searx import network
|
||||
from searx.locales import language_tag
|
||||
from searx.utils import html_to_text
|
||||
from searx.enginelib.traits import EngineTraits
|
||||
|
||||
traits: EngineTraits
|
||||
|
||||
# about
|
||||
about = {
|
||||
# pylint: disable=line-too-long
|
||||
"website": 'https://joinpeertube.org',
|
||||
"wikidata_id": 'Q50938515',
|
||||
"official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html',
|
||||
"official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos',
|
||||
"use_official_api": True,
|
||||
"require_api_key": False,
|
||||
"results": 'JSON',
|
||||
|
@ -22,66 +34,155 @@ about = {
|
|||
categories = ["videos"]
|
||||
paging = True
|
||||
base_url = "https://peer.tube"
|
||||
supported_languages_url = 'https://peer.tube/api/v1/videos/languages'
|
||||
"""Base URL of the Peertube instance. A list of instances is available at:
|
||||
|
||||
- https://instances.joinpeertube.org/instances
|
||||
"""
|
||||
|
||||
time_range_support = True
|
||||
time_range_table = {
|
||||
'day': relativedelta(),
|
||||
'week': relativedelta(weeks=-1),
|
||||
'month': relativedelta(months=-1),
|
||||
'year': relativedelta(years=-1),
|
||||
}
|
||||
|
||||
safesearch = True
|
||||
safesearch_table = {0: 'both', 1: 'false', 2: 'false'}
|
||||
|
||||
|
||||
def minute_to_hm(minute):
|
||||
if isinstance(minute, int):
|
||||
return "%d:%02d" % (divmod(minute, 60))
|
||||
return None
|
||||
|
||||
|
||||
# do search-request
|
||||
def request(query, params):
|
||||
sanitized_url = base_url.rstrip("/")
|
||||
pageno = (params["pageno"] - 1) * 15
|
||||
search_url = sanitized_url + "/api/v1/search/videos/?pageno={pageno}&{query}"
|
||||
query_dict = {"search": query}
|
||||
language = params["language"].split("-")[0]
|
||||
if "all" != language and language in supported_languages:
|
||||
query_dict["languageOneOf"] = language
|
||||
params["url"] = search_url.format(query=urlencode(query_dict), pageno=pageno)
|
||||
"""Assemble request for the Peertube API"""
|
||||
|
||||
if not query:
|
||||
return False
|
||||
|
||||
# eng_region = traits.get_region(params['searxng_locale'], 'en_US')
|
||||
eng_lang = traits.get_language(params['searxng_locale'], None)
|
||||
|
||||
params['url'] = (
|
||||
base_url.rstrip("/")
|
||||
+ "/api/v1/search/videos?"
|
||||
+ urlencode(
|
||||
{
|
||||
'search': query,
|
||||
'searchTarget': 'search-index', # Vidiversum
|
||||
'resultType': 'videos',
|
||||
'start': (params['pageno'] - 1) * 10,
|
||||
'count': 10,
|
||||
# -createdAt: sort by date ascending / createdAt: date descending
|
||||
'sort': '-match', # sort by *match descending*
|
||||
'nsfw': safesearch_table[params['safesearch']],
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
if eng_lang is not None:
|
||||
params['url'] += '&languageOneOf[]=' + eng_lang
|
||||
params['url'] += '&boostLanguages[]=' + eng_lang
|
||||
|
||||
if params['time_range'] in time_range_table:
|
||||
time = datetime.now().date() + time_range_table[params['time_range']]
|
||||
params['url'] += '&startDate=' + time.isoformat()
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def _get_offset_from_pageno(pageno):
|
||||
return (pageno - 1) * 15 + 1
|
||||
|
||||
|
||||
# get response from search-request
|
||||
def response(resp):
|
||||
sanitized_url = base_url.rstrip("/")
|
||||
return video_response(resp)
|
||||
|
||||
|
||||
def video_response(resp):
|
||||
"""Parse video response from SepiaSearch and Peertube instances."""
|
||||
results = []
|
||||
|
||||
search_res = loads(resp.text)
|
||||
json_data = resp.json()
|
||||
|
||||
# return empty array if there are no results
|
||||
if "data" not in search_res:
|
||||
if 'data' not in json_data:
|
||||
return []
|
||||
|
||||
# parse results
|
||||
for res in search_res["data"]:
|
||||
title = res["name"]
|
||||
url = sanitized_url + "/videos/watch/" + res["uuid"]
|
||||
description = res["description"]
|
||||
if description:
|
||||
content = html_to_text(res["description"])
|
||||
else:
|
||||
content = ""
|
||||
thumbnail = sanitized_url + res["thumbnailPath"]
|
||||
publishedDate = datetime.strptime(res["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
for result in json_data['data']:
|
||||
metadata = [
|
||||
x
|
||||
for x in [
|
||||
result.get('channel', {}).get('displayName'),
|
||||
result.get('channel', {}).get('name') + '@' + result.get('channel', {}).get('host'),
|
||||
', '.join(result.get('tags', [])),
|
||||
]
|
||||
if x
|
||||
]
|
||||
|
||||
results.append(
|
||||
{
|
||||
"template": "videos.html",
|
||||
"url": url,
|
||||
"title": title,
|
||||
"content": content,
|
||||
"publishedDate": publishedDate,
|
||||
"iframe_src": sanitized_url + res["embedPath"],
|
||||
"thumbnail": thumbnail,
|
||||
'url': result['url'],
|
||||
'title': result['name'],
|
||||
'content': html_to_text(result.get('description') or ''),
|
||||
'author': result.get('account', {}).get('displayName'),
|
||||
'length': minute_to_hm(result.get('duration')),
|
||||
'template': 'videos.html',
|
||||
'publishedDate': parse(result['publishedAt']),
|
||||
'iframe_src': result.get('embedUrl'),
|
||||
'thumbnail': result.get('thumbnailUrl') or result.get('previewUrl'),
|
||||
'metadata': ' | '.join(metadata),
|
||||
}
|
||||
)
|
||||
|
||||
# return results
|
||||
return results
|
||||
|
||||
|
||||
def _fetch_supported_languages(resp):
|
||||
videolanguages = resp.json()
|
||||
peertube_languages = list(videolanguages.keys())
|
||||
return peertube_languages
|
||||
def fetch_traits(engine_traits: EngineTraits):
|
||||
"""Fetch languages from peertube's search-index source code.
|
||||
|
||||
See videoLanguages_ in commit `8ed5c729 - Refactor and redesign client`_
|
||||
|
||||
.. _8ed5c729 - Refactor and redesign client:
|
||||
https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729
|
||||
.. _videoLanguages:
|
||||
https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729#3d8747f9a60695c367c70bb64efba8f403721fad_0_291
|
||||
"""
|
||||
|
||||
resp = network.get(
|
||||
'https://framagit.org/framasoft/peertube/search-index/-/raw/master/client/src/components/Filters.vue',
|
||||
# the response from search-index repository is very slow
|
||||
timeout=60,
|
||||
)
|
||||
|
||||
if not resp.ok:
|
||||
print("ERROR: response from peertube is not OK.")
|
||||
return
|
||||
|
||||
js_lang = re.search(r"videoLanguages \(\)[^\n]+(.*?)\]", resp.text, re.DOTALL)
|
||||
if not js_lang:
|
||||
print("ERROR: can't determine languages from peertube")
|
||||
return
|
||||
|
||||
for lang in re.finditer(r"\{ id: '([a-z]+)', label:", js_lang.group(1)):
|
||||
try:
|
||||
eng_tag = lang.group(1)
|
||||
if eng_tag == 'oc':
|
||||
# Occitanis not known by babel, its closest relative is Catalan
|
||||
# but 'ca' is already in the list of engine_traits.languages -->
|
||||
# 'oc' will be ignored.
|
||||
continue
|
||||
|
||||
sxng_tag = language_tag(babel.Locale.parse(eng_tag))
|
||||
|
||||
except babel.UnknownLocaleError:
|
||||
print("ERROR: %s is unknown by babel" % eng_tag)
|
||||
continue
|
||||
|
||||
conflict = engine_traits.languages.get(sxng_tag)
|
||||
if conflict:
|
||||
if conflict != eng_tag:
|
||||
print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
|
||||
continue
|
||||
engine_traits.languages[sxng_tag] = eng_tag
|
||||
|
||||
engine_traits.languages['zh_Hans'] = 'zh'
|
||||
engine_traits.languages['zh_Hant'] = 'zh'
|
||||
|
|
|
@ -1758,9 +1758,8 @@ engines:
|
|||
engine: peertube
|
||||
shortcut: ptb
|
||||
paging: true
|
||||
# https://instances.joinpeertube.org/instances
|
||||
base_url: https://peertube.biz/
|
||||
# base_url: https://tube.tardis.world/
|
||||
# alternatives see: https://instances.joinpeertube.org/instances
|
||||
# base_url: https://tube.4aem.com
|
||||
categories: videos
|
||||
disabled: true
|
||||
timeout: 6.0
|
||||
|
|
Loading…
Reference in a new issue