Files
searxng/searx/engines/gmx.py
Bnyro 8579974f5e [feat] engines: add GMX search engine (#5967)
Notes:
- Safesearch doesn't seem to work properly?
- In theory multiple languages are supported, but even in the web UI, they don't work properly
- Possibly, we could cache the request hashes (h query parameter), I'm not sure if it ever changes
2026-04-17 07:00:21 +02:00

96 lines
3.2 KiB
Python

# SPDX-License-Identifier: AGPL-3.0-or-later
"""GMX (general)
It's unclear which index it uses, the results were the most similar to Google's.
In theory it supports multiple languages, but even if changing the region on their website,
most of the results are still in English."""
import time
import typing as t
from urllib.parse import urlencode
from searx.result_types import EngineResults
from searx.extended_types import SXNG_Response
from searx.utils import extr, gen_useragent, html_to_text
from searx.network import get
if t.TYPE_CHECKING:
from searx.search.processors import OnlineParams
about = {
"website": "https://search.gmx.com",
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": "JSON",
}
base_url = "https://search.gmx.com" # alternatively: search.gmx.net
categories = ["general"]
paging = True
safesearch = True
time_range_support = True
time_range_map = {"day": "d", "week": "w", "month": "m", "year": "y"}
def _get_page_hash(query: str, page: int, headers: dict[str, str]) -> str:
resp = get(f"{base_url}/web/result?q={query}&page={page}", headers=headers)
# the text we search for looks like:
# load("/desk?lang="+eV.p.param['hl']+"&q="+eV['p']['q_encode']+"&page=5&h=aa45603&t=177582576&origin=web&comp=web_serp_pag&p=gmx-com&sp=&lr="+eV.p.param['lr0']+"&mkt="+eV.p.param['mkt0']+"&family="+eV.p.param['familyFilter']+"&fcons="+eV.p.perm.fCons,"google", "eMMO", "eMH","eMP"); # pylint: disable=line-too-long
return extr(resp.text, "&h=", "&t=")
def request(query: str, params: 'OnlineParams'):
# the headers have to be as close to normal browsers as possible, otherwise you get rate-limited quickly
# the user agent for loading the hash and requesting the results has to be the same
headers = {
"User-Agent": gen_useragent(),
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
"Connection": "keep-alive",
"Referer": base_url,
}
# the "h" parameter has to be set to the current time in seconds with the last digit removed
# e.g., if the current time is 1775829848, h has to be 177582984
now = int(time.time() / 10)
# the page hash depends on the query and page number
page_hash = _get_page_hash(query, params["pageno"], headers)
# the headers have to match the ones from the previous request
args = {"lang": "en", "q": query, "page": params["pageno"], "h": page_hash, "t": now}
if params["safesearch"]:
args["family"] = True
if params.get("time_range"):
args["time"] = time_range_map[params["time_range"]]
params["url"] = f"{base_url}/desk?{urlencode(args)}"
params["headers"].update(headers)
def response(resp: 'SXNG_Response') -> EngineResults:
res = EngineResults()
results = resp.json()["results"]
for suggestion in results["rs"]:
res.add(res.types.LegacyResult({"suggestion": suggestion["t"]}))
for result in results["hits"]:
res.add(
res.types.MainResult(
url=result["u"],
title=html_to_text(result["t"]),
content=html_to_text(result["s"]),
)
)
return res