129 lines
3.5 KiB
Python
129 lines
3.5 KiB
Python
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
"""Pexels (images)"""
|
|
|
|
import re
|
|
|
|
from urllib.parse import urlencode
|
|
from lxml import html
|
|
|
|
from searx.result_types import EngineResults
|
|
from searx.utils import eval_xpath_list, gen_useragent
|
|
from searx.enginelib import EngineCache
|
|
from searx.exceptions import SearxEngineAPIException
|
|
from searx.network import get
|
|
|
|
|
|
# about
|
|
about = {
|
|
"website": 'https://www.pexels.com',
|
|
"wikidata_id": 'Q101240504',
|
|
"official_api_documentation": 'https://www.pexels.com/api/',
|
|
"use_official_api": False,
|
|
"require_api_key": False,
|
|
"results": 'JSON',
|
|
}
|
|
|
|
base_url = 'https://www.pexels.com'
|
|
categories = ['images']
|
|
|
|
api_key = "H2jk9uKnhRmL6WPwh89zBezWvr"
|
|
"""
|
|
Fallback API key to use when SearXNG fails to automatically extract one from the website.
|
|
"""
|
|
results_per_page = 20
|
|
|
|
paging = True
|
|
time_range_support = True
|
|
time_range_map = {'day': 'last_24_hours', 'week': 'last_week', 'month': 'last_month', 'year': 'last_year'}
|
|
|
|
SECRET_KEY_RE = re.compile('"secret-key":\b*"(.*?)"')
|
|
SECRET_KEY_DB_KEY = "secret-key"
|
|
|
|
|
|
CACHE: EngineCache
|
|
"""Cache to store the secret API key for the engine."""
|
|
|
|
enable_http2 = False
|
|
|
|
|
|
def init(engine_settings):
|
|
global CACHE # pylint: disable=global-statement
|
|
CACHE = EngineCache(engine_settings["name"])
|
|
|
|
|
|
def _get_secret_key():
|
|
resp = get(
|
|
base_url,
|
|
headers={
|
|
# circumvents Cloudflare bot protections
|
|
"User-Agent": gen_useragent(),
|
|
"Referer": base_url,
|
|
},
|
|
)
|
|
|
|
if resp.status_code != 200:
|
|
raise SearxEngineAPIException("failed to obtain secret key")
|
|
|
|
doc = html.fromstring(resp.text)
|
|
for script_src in eval_xpath_list(doc, "//script/@src"):
|
|
script = get(script_src)
|
|
if script.status_code != 200:
|
|
raise SearxEngineAPIException("failed to obtain secret key")
|
|
|
|
match = SECRET_KEY_RE.search(script.text)
|
|
if match:
|
|
return match.groups()[0]
|
|
|
|
# all scripts checked, but secret key was not found
|
|
raise SearxEngineAPIException("failed to obtain secret key")
|
|
|
|
|
|
def request(query, params):
|
|
args = {
|
|
'query': query,
|
|
'page': params['pageno'],
|
|
'per_page': results_per_page,
|
|
}
|
|
if params['time_range']:
|
|
args['date_from'] = time_range_map[params['time_range']]
|
|
|
|
params["url"] = f"{base_url}/en-us/api/v3/search/photos?{urlencode(args)}"
|
|
|
|
# cache api key for future requests
|
|
secret_key = CACHE.get(SECRET_KEY_DB_KEY)
|
|
if not secret_key:
|
|
try:
|
|
secret_key = _get_secret_key()
|
|
CACHE.set(SECRET_KEY_DB_KEY, secret_key)
|
|
except SearxEngineAPIException as e:
|
|
logger.debug("failed to extract API key %s" % e)
|
|
secret_key = api_key
|
|
|
|
params["headers"]["secret-key"] = secret_key
|
|
|
|
return params
|
|
|
|
|
|
def response(resp):
|
|
res = EngineResults()
|
|
json_data = resp.json()
|
|
|
|
for result in json_data.get('data', []):
|
|
attrs = result["attributes"]
|
|
res.add(
|
|
res.types.LegacyResult(
|
|
{
|
|
'template': 'images.html',
|
|
'url': f"{base_url}/photo/{attrs['slug']}-{attrs['id']}/",
|
|
'title': attrs["title"],
|
|
'content': attrs["description"],
|
|
'thumbnail_src': attrs["image"]["small"],
|
|
'img_src': attrs["image"]["download_link"],
|
|
'resolution': f"{attrs['width']}x{attrs['height']}",
|
|
'author': f"{attrs['user']['username']}",
|
|
}
|
|
)
|
|
)
|
|
|
|
return res
|