From 220c42c8e95893f9e942fe8eb433a96cf8f01fa9 Mon Sep 17 00:00:00 2001 From: Bnyro Date: Tue, 24 Mar 2026 20:44:15 +0100 Subject: [PATCH] [feat] engines: add support for aol.com (#5882) Co-authored-by: Markus Heiser --- docs/dev/engines/online/aol.rst | 8 ++ searx/engines/aol.py | 208 ++++++++++++++++++++++++++++++++ searx/settings.yml | 18 +++ 3 files changed, 234 insertions(+) create mode 100644 docs/dev/engines/online/aol.rst create mode 100644 searx/engines/aol.py diff --git a/docs/dev/engines/online/aol.rst b/docs/dev/engines/online/aol.rst new file mode 100644 index 000000000..5a6ea7a63 --- /dev/null +++ b/docs/dev/engines/online/aol.rst @@ -0,0 +1,8 @@ +.. _aol engine: + +=== +AOL +=== + +.. automodule:: searx.engines.aol + :members: diff --git a/searx/engines/aol.py b/searx/engines/aol.py new file mode 100644 index 000000000..b63098451 --- /dev/null +++ b/searx/engines/aol.py @@ -0,0 +1,208 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""AOL supports WEB, image, and video search. Internally, it uses the Bing +index. + +AOL doesn't seem to support setting the language via request parameters, instead +the results are based on the URL. For example, there is + +- `search.aol.com `_ for English results +- `suche.aol.de `_ for German results + +However, AOL offers its services only in a few regions: + +- en-US: search.aol.com +- de-DE: suche.aol.de +- fr-FR: recherche.aol.fr +- en-GB: search.aol.co.uk +- en-CA: search.aol.ca + +In order to still offer sufficient support for language and region, the `search +keywords`_ known from Bing, ``language`` and ``loc`` (region), are added to the +search term (AOL is basically just a proxy for Bing). + +.. _search keywords: + https://support.microsoft.com/en-us/topic/advanced-search-keywords-ea595928-5d63-4a0b-9c6b-0b769865e78a + +""" + +from urllib.parse import urlencode, unquote_plus +import typing as t + +from lxml import html +from dateutil import parser + +from searx.result_types import EngineResults +from searx.utils import eval_xpath_list, eval_xpath, extract_text + +if t.TYPE_CHECKING: + from searx.extended_types import SXNG_Response + from searx.search.processors import OnlineParams + +about = { + "website": "https://www.aol.com", + "wikidata_id": "Q2407", + "official_api_documentation": None, + "use_official_api": False, + "require_api_key": False, + "results": "HTML", +} + +categories = ["general"] +search_type = "search" # supported: search, image, video + +paging = True +safesearch = True +time_range_support = True +results_per_page = 10 + + +base_url = "https://search.aol.com" +time_range_map = {"day": "1d", "week": "1w", "month": "1m", "year": "1y"} +safesearch_map = {0: "p", 1: "r", 2: "i"} + + +def init(_): + if search_type not in ("search", "image", "video"): + raise ValueError(f"unsupported search type {search_type}") + + +def request(query: str, params: "OnlineParams") -> None: + + language, region = (params["searxng_locale"].split("-") + [None])[:2] + if language and language != "all": + query = f"{query} language:{language}" + if region: + query = f"{query} loc:{region}" + + args: dict[str, str | int | None] = { + "q": query, + "b": params["pageno"] * results_per_page + 1, # page is 1-indexed + "pz": results_per_page, + } + + if params["time_range"]: + args["fr2"] = "time" + args["age"] = params["time_range"] + else: + args["fr2"] = "sb-top-search" + + params["cookies"]["sB"] = f"vm={safesearch_map[params['safesearch']]}" + params["url"] = f"{base_url}/aol/{search_type}?{urlencode(args)}" + logger.debug(params) + + +def _deobfuscate_url(obfuscated_url: str) -> str | None: + # URL looks like "https://search.aol.com/click/_ylt=AwjFSDjd;_ylu=JfsdjDFd/RV=2/RE=1774058166/RO=10/RU=https%3a%2f%2fen.wikipedia.org%2fwiki%2fTree/RK=0/RS=BP2CqeMLjscg4n8cTmuddlEQA2I-" # pylint: disable=line-too-long + if not obfuscated_url: + return None + + for part in obfuscated_url.split("/"): + if part.startswith("RU="): + return unquote_plus(part[3:]) + # pattern for de-obfuscating URL not found, fall back to Yahoo's tracking link + return obfuscated_url + + +def _general_results(doc: html.HtmlElement) -> EngineResults: + res = EngineResults() + + for result in eval_xpath_list(doc, "//div[@id='web']//ol/li[not(contains(@class, 'first'))]"): + obfuscated_url = extract_text(eval_xpath(result, ".//h3/a/@href")) + if not obfuscated_url: + continue + + url = _deobfuscate_url(obfuscated_url) + if not url: + continue + + res.add( + res.types.MainResult( + url=url, + title=extract_text(eval_xpath(result, ".//h3/a")) or "", + content=extract_text(eval_xpath(result, ".//div[contains(@class, 'compText')]")) or "", + thumbnail=extract_text(eval_xpath(result, ".//a[contains(@class, 'thm')]/img/@data-src")) or "", + ) + ) + return res + + +def _video_results(doc: html.HtmlElement) -> EngineResults: + res = EngineResults() + + for result in eval_xpath_list(doc, "//div[contains(@class, 'results')]//ol/li"): + obfuscated_url = extract_text(eval_xpath(result, ".//a/@href")) + if not obfuscated_url: + continue + + url = _deobfuscate_url(obfuscated_url) + if not url: + continue + + published_date_raw = extract_text(eval_xpath(result, ".//div[contains(@class, 'v-age')]")) + try: + published_date = parser.parse(published_date_raw or "") + except parser.ParserError: + published_date = None + + res.add( + res.types.LegacyResult( + { + "template": "videos.html", + "url": url, + "title": extract_text(eval_xpath(result, ".//h3")), + "content": extract_text(eval_xpath(result, ".//div[contains(@class, 'compText')]")), + "thumbnail": extract_text(eval_xpath(result, ".//img[contains(@class, 'thm')]/@src")), + "length": extract_text(eval_xpath(result, ".//span[contains(@class, 'v-time')]")), + "publishedDate": published_date, + } + ) + ) + + return res + + +def _image_results(doc: html.HtmlElement) -> EngineResults: + res = EngineResults() + + for result in eval_xpath_list(doc, "//section[@id='results']//ul/li"): + obfuscated_url = extract_text(eval_xpath(result, "./a/@href")) + if not obfuscated_url: + continue + + url = _deobfuscate_url(obfuscated_url) + if not url: + continue + + res.add( + res.types.LegacyResult( + { + "template": "images.html", + # results don't have an extra URL, only the image source + "url": url, + "title": extract_text(eval_xpath(result, ".//a/@aria-label")), + "thumbnail_src": extract_text(eval_xpath(result, ".//img/@src")), + "img_src": url, + } + ) + ) + + return res + + +def response(resp: "SXNG_Response") -> EngineResults: + doc = html.fromstring(resp.text) + + match search_type: + case "search": + results = _general_results(doc) + case "image": + results = _image_results(doc) + case "video": + results = _video_results(doc) + case _: + raise ValueError("unsupported search type") + + for suggestion in eval_xpath_list(doc, ".//ol[contains(@class, 'searchRightBottom')]//table//a"): + results.add(results.types.LegacyResult({"suggestion": extract_text(suggestion)})) + + return results diff --git a/searx/settings.yml b/searx/settings.yml index 1cf62987d..d6e5e7e61 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -421,6 +421,24 @@ engines: shortcut: conda disabled: true + - name: aol + engine: aol + search_type: search + categories: [general] + shortcut: aol + + - name: aol images + engine: aol + search_type: image + categories: [images] + shortcut: aoli + + - name: aol videos + engine: aol + search_type: video + categories: [videos] + shortcut: aolv + - name: arch linux wiki engine: archlinux shortcut: al