From 20fc6fe80db06430041bb8a96960d0a1505a6e42 Mon Sep 17 00:00:00 2001 From: Bnyro Date: Sun, 5 Apr 2026 11:50:05 +0200 Subject: [PATCH] [fix] pexels: circumvent botdetection by passing referer header As a side effect, Cloudscraper is no longer needed. It probably only ever worked by setting the correct request headers, so we don't really need it since we can just set the right request headers and ciphersuites ourselves. --- requirements.txt | 1 - searx/engines/pexels.py | 17 ++++++++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/requirements.txt b/requirements.txt index 976a0312f..1a8d6d016 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,4 +17,3 @@ typer==0.24.1 isodate==0.7.2 whitenoise==6.12.0 typing-extensions==4.15.0 -cloudscraper==1.2.71 diff --git a/searx/engines/pexels.py b/searx/engines/pexels.py index 57d2d1424..aa9ace50e 100644 --- a/searx/engines/pexels.py +++ b/searx/engines/pexels.py @@ -6,10 +6,8 @@ import re from urllib.parse import urlencode from lxml import html -import cloudscraper - from searx.result_types import EngineResults -from searx.utils import eval_xpath_list +from searx.utils import eval_xpath_list, gen_useragent from searx.enginelib import EngineCache from searx.exceptions import SearxEngineAPIException from searx.network import get @@ -40,6 +38,8 @@ SECRET_KEY_DB_KEY = "secret-key" CACHE: EngineCache """Cache to store the secret API key for the engine.""" +enable_http2 = False + def init(engine_settings): global CACHE # pylint: disable=global-statement @@ -47,8 +47,15 @@ def init(engine_settings): def _get_secret_key(): - scraper = cloudscraper.create_scraper() - resp = scraper.get(base_url) + resp = get( + base_url, + headers={ + # circumvents Cloudflare bot protections + "User-Agent": gen_useragent(), + "Referer": base_url, + }, + ) + if resp.status_code != 200: raise SearxEngineAPIException("failed to obtain secret key")