Files
searxng/searx/engines/pexels.py

129 lines
3.5 KiB
Python

# SPDX-License-Identifier: AGPL-3.0-or-later
"""Pexels (images)"""
import re
from urllib.parse import urlencode
from lxml import html
from searx.result_types import EngineResults
from searx.utils import eval_xpath_list, gen_useragent
from searx.enginelib import EngineCache
from searx.exceptions import SearxEngineAPIException
from searx.network import get
# about
about = {
"website": 'https://www.pexels.com',
"wikidata_id": 'Q101240504',
"official_api_documentation": 'https://www.pexels.com/api/',
"use_official_api": False,
"require_api_key": False,
"results": 'JSON',
}
base_url = 'https://www.pexels.com'
categories = ['images']
api_key = "H2jk9uKnhRmL6WPwh89zBezWvr"
"""
Fallback API key to use when SearXNG fails to automatically extract one from the website.
"""
results_per_page = 20
paging = True
time_range_support = True
time_range_map = {'day': 'last_24_hours', 'week': 'last_week', 'month': 'last_month', 'year': 'last_year'}
SECRET_KEY_RE = re.compile('"secret-key":\b*"(.*?)"')
SECRET_KEY_DB_KEY = "secret-key"
CACHE: EngineCache
"""Cache to store the secret API key for the engine."""
enable_http2 = False
def init(engine_settings):
global CACHE # pylint: disable=global-statement
CACHE = EngineCache(engine_settings["name"])
def _get_secret_key():
resp = get(
base_url,
headers={
# circumvents Cloudflare bot protections
"User-Agent": gen_useragent(),
"Referer": base_url,
},
)
if resp.status_code != 200:
raise SearxEngineAPIException("failed to obtain secret key")
doc = html.fromstring(resp.text)
for script_src in eval_xpath_list(doc, "//script/@src"):
script = get(script_src)
if script.status_code != 200:
raise SearxEngineAPIException("failed to obtain secret key")
match = SECRET_KEY_RE.search(script.text)
if match:
return match.groups()[0]
# all scripts checked, but secret key was not found
raise SearxEngineAPIException("failed to obtain secret key")
def request(query, params):
args = {
'query': query,
'page': params['pageno'],
'per_page': results_per_page,
}
if params['time_range']:
args['date_from'] = time_range_map[params['time_range']]
params["url"] = f"{base_url}/en-us/api/v3/search/photos?{urlencode(args)}"
# cache api key for future requests
secret_key = CACHE.get(SECRET_KEY_DB_KEY)
if not secret_key:
try:
secret_key = _get_secret_key()
CACHE.set(SECRET_KEY_DB_KEY, secret_key)
except SearxEngineAPIException as e:
logger.debug("failed to extract API key %s" % e)
secret_key = api_key
params["headers"]["secret-key"] = secret_key
return params
def response(resp):
res = EngineResults()
json_data = resp.json()
for result in json_data.get('data', []):
attrs = result["attributes"]
res.add(
res.types.LegacyResult(
{
'template': 'images.html',
'url': f"{base_url}/photo/{attrs['slug']}-{attrs['id']}/",
'title': attrs["title"],
'content': attrs["description"],
'thumbnail_src': attrs["image"]["small"],
'img_src': attrs["image"]["download_link"],
'resolution': f"{attrs['width']}x{attrs['height']}",
'author': f"{attrs['user']['username']}",
}
)
)
return res