Job failing since October 2025. enh: always raise and reuse data fix: brave unknown locale fix: startpage add "brazilian"
182 lines
5.8 KiB
Python
182 lines
5.8 KiB
Python
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
"""Peertube and :py:obj:`SepiaSearch <searx.engines.sepiasearch>` do share
|
|
(more or less) the same REST API and the schema of the JSON result is identical.
|
|
|
|
"""
|
|
|
|
import re
|
|
from datetime import datetime, timedelta
|
|
from urllib.parse import urlencode
|
|
|
|
import babel
|
|
from dateutil.parser import parse
|
|
from dateutil.relativedelta import relativedelta
|
|
|
|
from searx.enginelib.traits import EngineTraits
|
|
from searx.locales import language_tag
|
|
from searx.utils import html_to_text, humanize_number
|
|
|
|
about = {
|
|
# pylint: disable=line-too-long
|
|
"website": "https://joinpeertube.org",
|
|
"wikidata_id": "Q50938515",
|
|
"official_api_documentation": "https://docs.joinpeertube.org/api-rest-reference.html#tag/Search/operation/searchVideos",
|
|
"use_official_api": True,
|
|
"require_api_key": False,
|
|
"results": "JSON",
|
|
}
|
|
|
|
# engine dependent config
|
|
categories = ["videos"]
|
|
paging = True
|
|
base_url = "https://peer.tube"
|
|
"""Base URL of the Peertube instance. A list of instances is available at:
|
|
|
|
- https://instances.joinpeertube.org/instances
|
|
"""
|
|
|
|
time_range_support = True
|
|
time_range_table = {
|
|
"day": relativedelta(),
|
|
"week": relativedelta(weeks=-1),
|
|
"month": relativedelta(months=-1),
|
|
"year": relativedelta(years=-1),
|
|
}
|
|
|
|
safesearch = True
|
|
safesearch_table = {0: "both", 1: "false", 2: "false"}
|
|
|
|
|
|
def request(query, params):
|
|
"""Assemble request for the Peertube API"""
|
|
|
|
if not query:
|
|
return False
|
|
|
|
# eng_region = traits.get_region(params['searxng_locale'], 'en_US')
|
|
eng_lang = traits.get_language(params["searxng_locale"], None)
|
|
|
|
params["url"] = (
|
|
base_url.rstrip("/")
|
|
+ "/api/v1/search/videos?"
|
|
+ urlencode(
|
|
{
|
|
"search": query,
|
|
"searchTarget": "search-index", # Vidiversum
|
|
"resultType": "videos",
|
|
"start": (params["pageno"] - 1) * 10,
|
|
"count": 10,
|
|
# -createdAt: sort by date ascending / createdAt: date descending
|
|
"sort": "-match", # sort by *match descending*
|
|
"nsfw": safesearch_table[params["safesearch"]],
|
|
}
|
|
)
|
|
)
|
|
|
|
if eng_lang is not None:
|
|
params["url"] += "&languageOneOf[]=" + eng_lang
|
|
params["url"] += "&boostLanguages[]=" + eng_lang
|
|
|
|
if params["time_range"] in time_range_table:
|
|
time = datetime.now().date() + time_range_table[params["time_range"]]
|
|
params["url"] += "&startDate=" + time.isoformat()
|
|
|
|
return params
|
|
|
|
|
|
def response(resp):
|
|
return video_response(resp)
|
|
|
|
|
|
def video_response(resp):
|
|
"""Parse video response from SepiaSearch and Peertube instances."""
|
|
results = []
|
|
|
|
json_data = resp.json()
|
|
|
|
if "data" not in json_data:
|
|
return []
|
|
|
|
for result in json_data["data"]:
|
|
metadata = [
|
|
x
|
|
for x in [
|
|
result.get("channel", {}).get("displayName"),
|
|
result.get("channel", {}).get("name") + "@" + result.get("channel", {}).get("host"),
|
|
", ".join(result.get("tags", [])),
|
|
]
|
|
if x
|
|
]
|
|
|
|
duration = result.get("duration")
|
|
if duration:
|
|
duration = timedelta(seconds=duration)
|
|
|
|
results.append(
|
|
{
|
|
"url": result["url"],
|
|
"title": result["name"],
|
|
"content": html_to_text(result.get("description") or ""),
|
|
"author": result.get("account", {}).get("displayName"),
|
|
"length": duration,
|
|
"views": humanize_number(result["views"]),
|
|
"template": "videos.html",
|
|
"publishedDate": parse(result["publishedAt"]),
|
|
"iframe_src": result.get("embedUrl"),
|
|
"thumbnail": result.get("thumbnailUrl") or result.get("previewUrl"),
|
|
"metadata": " | ".join(metadata),
|
|
}
|
|
)
|
|
|
|
return results
|
|
|
|
|
|
def fetch_traits(engine_traits: EngineTraits):
|
|
"""Fetch languages from peertube's search-index source code.
|
|
|
|
See videoLanguages_ in commit `8ed5c729 - Refactor and redesign client`_
|
|
|
|
.. _8ed5c729 - Refactor and redesign client:
|
|
https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729
|
|
.. _videoLanguages:
|
|
https://framagit.org/framasoft/peertube/search-index/-/commit/8ed5c729#3d8747f9a60695c367c70bb64efba8f403721fad_0_291
|
|
"""
|
|
# pylint: disable=import-outside-toplevel
|
|
|
|
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
|
|
|
resp = get(
|
|
"https://framagit.org/framasoft/peertube/search-index/-/raw/master/client/src/components/Filters.vue",
|
|
timeout=5,
|
|
)
|
|
if not resp.ok:
|
|
raise RuntimeError("Response from Peertube is not OK.")
|
|
|
|
js_lang = re.search(r"videoLanguages \(\)[^\n]+(.*?)\]", resp.text, re.DOTALL) # type: ignore
|
|
if not js_lang:
|
|
print("ERROR: can't determine languages from peertube")
|
|
return
|
|
|
|
for lang in re.finditer(r"\{ id: '([a-z]+)', label:", js_lang.group(1)):
|
|
eng_tag = lang.group(1)
|
|
if eng_tag == "oc":
|
|
# Occitanis not known by babel, its closest relative is Catalan
|
|
# but 'ca' is already in the list of engine_traits.languages -->
|
|
# 'oc' will be ignored.
|
|
continue
|
|
try:
|
|
sxng_tag = language_tag(babel.Locale.parse(eng_tag))
|
|
except babel.UnknownLocaleError:
|
|
print("ERROR: %s is unknown by babel" % eng_tag)
|
|
continue
|
|
|
|
conflict = engine_traits.languages.get(sxng_tag)
|
|
if conflict:
|
|
if conflict != eng_tag:
|
|
print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
|
|
continue
|
|
engine_traits.languages[sxng_tag] = eng_tag
|
|
|
|
engine_traits.languages["zh_Hans"] = "zh"
|
|
engine_traits.languages["zh_Hant"] = "zh"
|