searxng/searx/engines/pexels.py

# SPDX-License-Identifier: AGPL-3.0-or-later
"""Pexels (images)"""

import re

from urllib.parse import urlencode
from lxml import html

from searx.result_types import EngineResults
from searx.utils import eval_xpath_list, gen_useragent
from searx.enginelib import EngineCache
from searx.exceptions import SearxEngineAPIException
from searx.network import get


# about
about = {
    "website": 'https://www.pexels.com',
    "wikidata_id": 'Q101240504',
    "official_api_documentation": 'https://www.pexels.com/api/',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'JSON',
}

base_url = 'https://www.pexels.com'
categories = ['images']

api_key = "H2jk9uKnhRmL6WPwh89zBezWvr"
"""
Fallback API key to use when SearXNG fails to automatically extract one from the website.
"""
results_per_page = 20

paging = True
time_range_support = True
time_range_map = {'day': 'last_24_hours', 'week': 'last_week', 'month': 'last_month', 'year': 'last_year'}

SECRET_KEY_RE = re.compile('"secret-key":\b*"(.*?)"')
SECRET_KEY_DB_KEY = "secret-key"


CACHE: EngineCache
"""Cache to store the secret API key for the engine."""

enable_http2 = False


def init(engine_settings):
    global CACHE  # pylint: disable=global-statement
    CACHE = EngineCache(engine_settings["name"])


def _get_secret_key():
    resp = get(
        base_url,
        headers={
            # circumvents Cloudflare bot protections
            "User-Agent": gen_useragent(),
            "Referer": base_url,
        },
    )

    if resp.status_code != 200:
        raise SearxEngineAPIException("failed to obtain secret key")

    doc = html.fromstring(resp.text)
    for script_src in eval_xpath_list(doc, "//script/@src"):
        script = get(script_src)
        if script.status_code != 200:
            raise SearxEngineAPIException("failed to obtain secret key")

        match = SECRET_KEY_RE.search(script.text)
        if match:
            return match.groups()[0]

    # all scripts checked, but secret key was not found
    raise SearxEngineAPIException("failed to obtain secret key")


def request(query, params):
    args = {
        'query': query,
        'page': params['pageno'],
        'per_page': results_per_page,
    }
    if params['time_range']:
        args['date_from'] = time_range_map[params['time_range']]

    params["url"] = f"{base_url}/en-us/api/v3/search/photos?{urlencode(args)}"

    # cache api key for future requests
    secret_key = CACHE.get(SECRET_KEY_DB_KEY)
    if not secret_key:
        try:
            secret_key = _get_secret_key()
            CACHE.set(SECRET_KEY_DB_KEY, secret_key)
        except SearxEngineAPIException as e:
            logger.debug("failed to extract API key %s" % e)
            secret_key = api_key

    params["headers"]["secret-key"] = secret_key

    return params


def response(resp):
    res = EngineResults()
    json_data = resp.json()

    for result in json_data.get('data', []):
        attrs = result["attributes"]
        res.add(
            res.types.LegacyResult(
                {
                    'template': 'images.html',
                    'url': f"{base_url}/photo/{attrs['slug']}-{attrs['id']}/",
                    'title': attrs["title"],
                    'content': attrs["description"],
                    'thumbnail_src': attrs["image"]["small"],
                    'img_src': attrs["image"]["download_link"],
                    'resolution': f"{attrs['width']}x{attrs['height']}",
                    'author': f"{attrs['user']['username']}",
                }
            )
        )

    return res