[feat] engines: add GMX search engine (#5967)
Notes: - Safesearch doesn't seem to work properly? - In theory multiple languages are supported, but even in the web UI, they don't work properly - Possibly, we could cache the request hashes (h query parameter), I'm not sure if it ever changes
This commit is contained in:
95
searx/engines/gmx.py
Normal file
95
searx/engines/gmx.py
Normal file
@@ -0,0 +1,95 @@
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""GMX (general)
|
||||
|
||||
It's unclear which index it uses, the results were the most similar to Google's.
|
||||
|
||||
In theory it supports multiple languages, but even if changing the region on their website,
|
||||
most of the results are still in English."""
|
||||
|
||||
import time
|
||||
import typing as t
|
||||
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from searx.result_types import EngineResults
|
||||
from searx.extended_types import SXNG_Response
|
||||
from searx.utils import extr, gen_useragent, html_to_text
|
||||
from searx.network import get
|
||||
|
||||
if t.TYPE_CHECKING:
|
||||
from searx.search.processors import OnlineParams
|
||||
|
||||
about = {
|
||||
"website": "https://search.gmx.com",
|
||||
"official_api_documentation": None,
|
||||
"use_official_api": False,
|
||||
"require_api_key": False,
|
||||
"results": "JSON",
|
||||
}
|
||||
|
||||
base_url = "https://search.gmx.com" # alternatively: search.gmx.net
|
||||
categories = ["general"]
|
||||
|
||||
paging = True
|
||||
safesearch = True
|
||||
time_range_support = True
|
||||
|
||||
time_range_map = {"day": "d", "week": "w", "month": "m", "year": "y"}
|
||||
|
||||
|
||||
def _get_page_hash(query: str, page: int, headers: dict[str, str]) -> str:
|
||||
resp = get(f"{base_url}/web/result?q={query}&page={page}", headers=headers)
|
||||
|
||||
# the text we search for looks like:
|
||||
# load("/desk?lang="+eV.p.param['hl']+"&q="+eV['p']['q_encode']+"&page=5&h=aa45603&t=177582576&origin=web&comp=web_serp_pag&p=gmx-com&sp=&lr="+eV.p.param['lr0']+"&mkt="+eV.p.param['mkt0']+"&family="+eV.p.param['familyFilter']+"&fcons="+eV.p.perm.fCons,"google", "eMMO", "eMH","eMP"); # pylint: disable=line-too-long
|
||||
return extr(resp.text, "&h=", "&t=")
|
||||
|
||||
|
||||
def request(query: str, params: 'OnlineParams'):
|
||||
# the headers have to be as close to normal browsers as possible, otherwise you get rate-limited quickly
|
||||
# the user agent for loading the hash and requesting the results has to be the same
|
||||
headers = {
|
||||
"User-Agent": gen_useragent(),
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
"Connection": "keep-alive",
|
||||
"Referer": base_url,
|
||||
}
|
||||
|
||||
# the "h" parameter has to be set to the current time in seconds with the last digit removed
|
||||
# e.g., if the current time is 1775829848, h has to be 177582984
|
||||
now = int(time.time() / 10)
|
||||
|
||||
# the page hash depends on the query and page number
|
||||
page_hash = _get_page_hash(query, params["pageno"], headers)
|
||||
# the headers have to match the ones from the previous request
|
||||
|
||||
args = {"lang": "en", "q": query, "page": params["pageno"], "h": page_hash, "t": now}
|
||||
if params["safesearch"]:
|
||||
args["family"] = True
|
||||
if params.get("time_range"):
|
||||
args["time"] = time_range_map[params["time_range"]]
|
||||
|
||||
params["url"] = f"{base_url}/desk?{urlencode(args)}"
|
||||
|
||||
params["headers"].update(headers)
|
||||
|
||||
|
||||
def response(resp: 'SXNG_Response') -> EngineResults:
|
||||
res = EngineResults()
|
||||
|
||||
results = resp.json()["results"]
|
||||
|
||||
for suggestion in results["rs"]:
|
||||
res.add(res.types.LegacyResult({"suggestion": suggestion["t"]}))
|
||||
|
||||
for result in results["hits"]:
|
||||
res.add(
|
||||
res.types.MainResult(
|
||||
url=result["u"],
|
||||
title=html_to_text(result["t"]),
|
||||
content=html_to_text(result["s"]),
|
||||
)
|
||||
)
|
||||
|
||||
return res
|
||||
@@ -1036,6 +1036,11 @@ engines:
|
||||
shortcut: gitea
|
||||
disabled: true
|
||||
|
||||
- name: gmx
|
||||
engine: gmx
|
||||
shortcut: gmx
|
||||
disabled: true
|
||||
|
||||
- name: goodreads
|
||||
engine: goodreads
|
||||
shortcut: good
|
||||
|
||||
Reference in New Issue
Block a user