[feat] engines: add support for aol.com (#5882)

Co-authored-by: Markus Heiser <markus.heiser@darmarit.de>
2026-03-24 20:44:15 +01:00
parent 99ec6f296e
commit 220c42c8e9
3 changed files with 234 additions and 0 deletions
--- a/docs/dev/engines/online/aol.rst
+++ b/docs/dev/engines/online/aol.rst
@@ -0,0 +1,8 @@
 .. _aol engine:
 ===
 AOL
 ===
 .. automodule:: searx.engines.aol
   :members:
--- a/searx/engines/aol.py
+++ b/searx/engines/aol.py
@@ -0,0 +1,208 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """AOL supports WEB, image, and video search.  Internally, it uses the Bing
 index.
 AOL doesn't seem to support setting the language via request parameters, instead
 the results are based on the URL.  For example, there is
 - `search.aol.com <https://search.aol.com>`_ for English results
 - `suche.aol.de <https://suche.aol.de>`_ for German results
 However, AOL offers its services only in a few regions:
 - en-US: search.aol.com
 - de-DE: suche.aol.de
 - fr-FR: recherche.aol.fr
 - en-GB: search.aol.co.uk
 - en-CA: search.aol.ca
 In order to still offer sufficient support for language and region, the `search
 keywords`_ known from Bing, ``language`` and ``loc`` (region), are added to the
 search term (AOL is basically just a proxy for Bing).
 .. _search keywords:
    https://support.microsoft.com/en-us/topic/advanced-search-keywords-ea595928-5d63-4a0b-9c6b-0b769865e78a
 """
 from urllib.parse import urlencode, unquote_plus
 import typing as t
 from lxml import html
 from dateutil import parser
 from searx.result_types import EngineResults
 from searx.utils import eval_xpath_list, eval_xpath, extract_text
 if t.TYPE_CHECKING:
    from searx.extended_types import SXNG_Response
    from searx.search.processors import OnlineParams
 about = {
    "website": "https://www.aol.com",
    "wikidata_id": "Q2407",
    "official_api_documentation": None,
    "use_official_api": False,
    "require_api_key": False,
    "results": "HTML",
 }
 categories = ["general"]
 search_type = "search"  # supported: search, image, video
 paging = True
 safesearch = True
 time_range_support = True
 results_per_page = 10
 base_url = "https://search.aol.com"
 time_range_map = {"day": "1d", "week": "1w", "month": "1m", "year": "1y"}
 safesearch_map = {0: "p", 1: "r", 2: "i"}
 def init(_):
    if search_type not in ("search", "image", "video"):
        raise ValueError(f"unsupported search type {search_type}")
 def request(query: str, params: "OnlineParams") -> None:
    language, region = (params["searxng_locale"].split("-") + [None])[:2]
    if language and language != "all":
        query = f"{query} language:{language}"
    if region:
        query = f"{query} loc:{region}"
    args: dict[str, str | int | None] = {
        "q": query,
        "b": params["pageno"] * results_per_page + 1,  # page is 1-indexed
        "pz": results_per_page,
    }
    if params["time_range"]:
        args["fr2"] = "time"
        args["age"] = params["time_range"]
    else:
        args["fr2"] = "sb-top-search"
    params["cookies"]["sB"] = f"vm={safesearch_map[params['safesearch']]}"
    params["url"] = f"{base_url}/aol/{search_type}?{urlencode(args)}"
    logger.debug(params)
 def _deobfuscate_url(obfuscated_url: str) -> str | None:
    # URL looks like "https://search.aol.com/click/_ylt=AwjFSDjd;_ylu=JfsdjDFd/RV=2/RE=1774058166/RO=10/RU=https%3a%2f%2fen.wikipedia.org%2fwiki%2fTree/RK=0/RS=BP2CqeMLjscg4n8cTmuddlEQA2I-"  # pylint: disable=line-too-long
    if not obfuscated_url:
        return None
    for part in obfuscated_url.split("/"):
        if part.startswith("RU="):
            return unquote_plus(part[3:])
    # pattern for de-obfuscating URL not found, fall back to Yahoo's tracking link
    return obfuscated_url
 def _general_results(doc: html.HtmlElement) -> EngineResults:
    res = EngineResults()
    for result in eval_xpath_list(doc, "//div[@id='web']//ol/li[not(contains(@class, 'first'))]"):
        obfuscated_url = extract_text(eval_xpath(result, ".//h3/a/@href"))
        if not obfuscated_url:
            continue
        url = _deobfuscate_url(obfuscated_url)
        if not url:
            continue
        res.add(
            res.types.MainResult(
                url=url,
                title=extract_text(eval_xpath(result, ".//h3/a")) or "",
                content=extract_text(eval_xpath(result, ".//div[contains(@class, 'compText')]")) or "",
                thumbnail=extract_text(eval_xpath(result, ".//a[contains(@class, 'thm')]/img/@data-src")) or "",
            )
        )
    return res
 def _video_results(doc: html.HtmlElement) -> EngineResults:
    res = EngineResults()
    for result in eval_xpath_list(doc, "//div[contains(@class, 'results')]//ol/li"):
        obfuscated_url = extract_text(eval_xpath(result, ".//a/@href"))
        if not obfuscated_url:
            continue
        url = _deobfuscate_url(obfuscated_url)
        if not url:
            continue
        published_date_raw = extract_text(eval_xpath(result, ".//div[contains(@class, 'v-age')]"))
        try:
            published_date = parser.parse(published_date_raw or "")
        except parser.ParserError:
            published_date = None
        res.add(
            res.types.LegacyResult(
                {
                    "template": "videos.html",
                    "url": url,
                    "title": extract_text(eval_xpath(result, ".//h3")),
                    "content": extract_text(eval_xpath(result, ".//div[contains(@class, 'compText')]")),
                    "thumbnail": extract_text(eval_xpath(result, ".//img[contains(@class, 'thm')]/@src")),
                    "length": extract_text(eval_xpath(result, ".//span[contains(@class, 'v-time')]")),
                    "publishedDate": published_date,
                }
            )
        )
    return res
 def _image_results(doc: html.HtmlElement) -> EngineResults:
    res = EngineResults()
    for result in eval_xpath_list(doc, "//section[@id='results']//ul/li"):
        obfuscated_url = extract_text(eval_xpath(result, "./a/@href"))
        if not obfuscated_url:
            continue
        url = _deobfuscate_url(obfuscated_url)
        if not url:
            continue
        res.add(
            res.types.LegacyResult(
                {
                    "template": "images.html",
                    # results don't have an extra URL, only the image source
                    "url": url,
                    "title": extract_text(eval_xpath(result, ".//a/@aria-label")),
                    "thumbnail_src": extract_text(eval_xpath(result, ".//img/@src")),
                    "img_src": url,
                }
            )
        )
    return res
 def response(resp: "SXNG_Response") -> EngineResults:
    doc = html.fromstring(resp.text)
    match search_type:
        case "search":
            results = _general_results(doc)
        case "image":
            results = _image_results(doc)
        case "video":
            results = _video_results(doc)
        case _:
            raise ValueError("unsupported search type")
    for suggestion in eval_xpath_list(doc, ".//ol[contains(@class, 'searchRightBottom')]//table//a"):
        results.add(results.types.LegacyResult({"suggestion": extract_text(suggestion)}))
    return results
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -421,6 +421,24 @@ engines:
    shortcut: conda
    disabled: true
  - name: aol
    engine: aol
    search_type: search
    categories: [general]
    shortcut: aol
  - name: aol images
    engine: aol
    search_type: image
    categories: [images]
    shortcut: aoli
  - name: aol videos
    engine: aol
    search_type: video
    categories: [videos]
    shortcut: aolv
  - name: arch linux wiki
    engine: archlinux
    shortcut: al