From 220c42c8e95893f9e942fe8eb433a96cf8f01fa9 Mon Sep 17 00:00:00 2001
From: Bnyro <bnyro@tutanota.com>
Date: Tue, 24 Mar 2026 20:44:15 +0100
Subject: [PATCH] [feat] engines: add support for aol.com (#5882)

Co-authored-by: Markus Heiser <markus.heiser@darmarit.de>
---
 docs/dev/engines/online/aol.rst |   8 ++
 searx/engines/aol.py            | 208 ++++++++++++++++++++++++++++++++
 searx/settings.yml              |  18 +++
 3 files changed, 234 insertions(+)
 create mode 100644 docs/dev/engines/online/aol.rst
 create mode 100644 searx/engines/aol.py

diff --git a/docs/dev/engines/online/aol.rst b/docs/dev/engines/online/aol.rst
new file mode 100644
index 000000000..5a6ea7a63
--- /dev/null
+++ b/docs/dev/engines/online/aol.rst
@@ -0,0 +1,8 @@
+.. _aol engine:
+
+===
+AOL
+===
+
+.. automodule:: searx.engines.aol
+   :members:
diff --git a/searx/engines/aol.py b/searx/engines/aol.py
new file mode 100644
index 000000000..b63098451
--- /dev/null
+++ b/searx/engines/aol.py
@@ -0,0 +1,208 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""AOL supports WEB, image, and video search.  Internally, it uses the Bing
+index.
+
+AOL doesn't seem to support setting the language via request parameters, instead
+the results are based on the URL.  For example, there is
+
+- `search.aol.com <https://search.aol.com>`_ for English results
+- `suche.aol.de <https://suche.aol.de>`_ for German results
+
+However, AOL offers its services only in a few regions:
+
+- en-US: search.aol.com
+- de-DE: suche.aol.de
+- fr-FR: recherche.aol.fr
+- en-GB: search.aol.co.uk
+- en-CA: search.aol.ca
+
+In order to still offer sufficient support for language and region, the `search
+keywords`_ known from Bing, ``language`` and ``loc`` (region), are added to the
+search term (AOL is basically just a proxy for Bing).
+
+.. _search keywords:
+    https://support.microsoft.com/en-us/topic/advanced-search-keywords-ea595928-5d63-4a0b-9c6b-0b769865e78a
+
+"""
+
+from urllib.parse import urlencode, unquote_plus
+import typing as t
+
+from lxml import html
+from dateutil import parser
+
+from searx.result_types import EngineResults
+from searx.utils import eval_xpath_list, eval_xpath, extract_text
+
+if t.TYPE_CHECKING:
+    from searx.extended_types import SXNG_Response
+    from searx.search.processors import OnlineParams
+
+about = {
+    "website": "https://www.aol.com",
+    "wikidata_id": "Q2407",
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": "HTML",
+}
+
+categories = ["general"]
+search_type = "search"  # supported: search, image, video
+
+paging = True
+safesearch = True
+time_range_support = True
+results_per_page = 10
+
+
+base_url = "https://search.aol.com"
+time_range_map = {"day": "1d", "week": "1w", "month": "1m", "year": "1y"}
+safesearch_map = {0: "p", 1: "r", 2: "i"}
+
+
+def init(_):
+    if search_type not in ("search", "image", "video"):
+        raise ValueError(f"unsupported search type {search_type}")
+
+
+def request(query: str, params: "OnlineParams") -> None:
+
+    language, region = (params["searxng_locale"].split("-") + [None])[:2]
+    if language and language != "all":
+        query = f"{query} language:{language}"
+    if region:
+        query = f"{query} loc:{region}"
+
+    args: dict[str, str | int | None] = {
+        "q": query,
+        "b": params["pageno"] * results_per_page + 1,  # page is 1-indexed
+        "pz": results_per_page,
+    }
+
+    if params["time_range"]:
+        args["fr2"] = "time"
+        args["age"] = params["time_range"]
+    else:
+        args["fr2"] = "sb-top-search"
+
+    params["cookies"]["sB"] = f"vm={safesearch_map[params['safesearch']]}"
+    params["url"] = f"{base_url}/aol/{search_type}?{urlencode(args)}"
+    logger.debug(params)
+
+
+def _deobfuscate_url(obfuscated_url: str) -> str | None:
+    # URL looks like "https://search.aol.com/click/_ylt=AwjFSDjd;_ylu=JfsdjDFd/RV=2/RE=1774058166/RO=10/RU=https%3a%2f%2fen.wikipedia.org%2fwiki%2fTree/RK=0/RS=BP2CqeMLjscg4n8cTmuddlEQA2I-"  # pylint: disable=line-too-long
+    if not obfuscated_url:
+        return None
+
+    for part in obfuscated_url.split("/"):
+        if part.startswith("RU="):
+            return unquote_plus(part[3:])
+    # pattern for de-obfuscating URL not found, fall back to Yahoo's tracking link
+    return obfuscated_url
+
+
+def _general_results(doc: html.HtmlElement) -> EngineResults:
+    res = EngineResults()
+
+    for result in eval_xpath_list(doc, "//div[@id='web']//ol/li[not(contains(@class, 'first'))]"):
+        obfuscated_url = extract_text(eval_xpath(result, ".//h3/a/@href"))
+        if not obfuscated_url:
+            continue
+
+        url = _deobfuscate_url(obfuscated_url)
+        if not url:
+            continue
+
+        res.add(
+            res.types.MainResult(
+                url=url,
+                title=extract_text(eval_xpath(result, ".//h3/a")) or "",
+                content=extract_text(eval_xpath(result, ".//div[contains(@class, 'compText')]")) or "",
+                thumbnail=extract_text(eval_xpath(result, ".//a[contains(@class, 'thm')]/img/@data-src")) or "",
+            )
+        )
+    return res
+
+
+def _video_results(doc: html.HtmlElement) -> EngineResults:
+    res = EngineResults()
+
+    for result in eval_xpath_list(doc, "//div[contains(@class, 'results')]//ol/li"):
+        obfuscated_url = extract_text(eval_xpath(result, ".//a/@href"))
+        if not obfuscated_url:
+            continue
+
+        url = _deobfuscate_url(obfuscated_url)
+        if not url:
+            continue
+
+        published_date_raw = extract_text(eval_xpath(result, ".//div[contains(@class, 'v-age')]"))
+        try:
+            published_date = parser.parse(published_date_raw or "")
+        except parser.ParserError:
+            published_date = None
+
+        res.add(
+            res.types.LegacyResult(
+                {
+                    "template": "videos.html",
+                    "url": url,
+                    "title": extract_text(eval_xpath(result, ".//h3")),
+                    "content": extract_text(eval_xpath(result, ".//div[contains(@class, 'compText')]")),
+                    "thumbnail": extract_text(eval_xpath(result, ".//img[contains(@class, 'thm')]/@src")),
+                    "length": extract_text(eval_xpath(result, ".//span[contains(@class, 'v-time')]")),
+                    "publishedDate": published_date,
+                }
+            )
+        )
+
+    return res
+
+
+def _image_results(doc: html.HtmlElement) -> EngineResults:
+    res = EngineResults()
+
+    for result in eval_xpath_list(doc, "//section[@id='results']//ul/li"):
+        obfuscated_url = extract_text(eval_xpath(result, "./a/@href"))
+        if not obfuscated_url:
+            continue
+
+        url = _deobfuscate_url(obfuscated_url)
+        if not url:
+            continue
+
+        res.add(
+            res.types.LegacyResult(
+                {
+                    "template": "images.html",
+                    # results don't have an extra URL, only the image source
+                    "url": url,
+                    "title": extract_text(eval_xpath(result, ".//a/@aria-label")),
+                    "thumbnail_src": extract_text(eval_xpath(result, ".//img/@src")),
+                    "img_src": url,
+                }
+            )
+        )
+
+    return res
+
+
+def response(resp: "SXNG_Response") -> EngineResults:
+    doc = html.fromstring(resp.text)
+
+    match search_type:
+        case "search":
+            results = _general_results(doc)
+        case "image":
+            results = _image_results(doc)
+        case "video":
+            results = _video_results(doc)
+        case _:
+            raise ValueError("unsupported search type")
+
+    for suggestion in eval_xpath_list(doc, ".//ol[contains(@class, 'searchRightBottom')]//table//a"):
+        results.add(results.types.LegacyResult({"suggestion": extract_text(suggestion)}))
+
+    return results
diff --git a/searx/settings.yml b/searx/settings.yml
index 1cf62987d..d6e5e7e61 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -421,6 +421,24 @@ engines:
     shortcut: conda
     disabled: true
 
+  - name: aol
+    engine: aol
+    search_type: search
+    categories: [general]
+    shortcut: aol
+
+  - name: aol images
+    engine: aol
+    search_type: image
+    categories: [images]
+    shortcut: aoli
+
+  - name: aol videos
+    engine: aol
+    search_type: video
+    categories: [videos]
+    shortcut: aolv
+
   - name: arch linux wiki
     engine: archlinux
     shortcut: al