# SPDX-License-Identifier: AGPL-3.0-or-later """AOL supports WEB, image, and video search. Internally, it uses the Bing index. AOL doesn't seem to support setting the language via request parameters, instead the results are based on the URL. For example, there is - `search.aol.com `_ for English results - `suche.aol.de `_ for German results However, AOL offers its services only in a few regions: - en-US: search.aol.com - de-DE: suche.aol.de - fr-FR: recherche.aol.fr - en-GB: search.aol.co.uk - en-CA: search.aol.ca In order to still offer sufficient support for language and region, the `search keywords`_ known from Bing, ``language`` and ``loc`` (region), are added to the search term (AOL is basically just a proxy for Bing). .. _search keywords: https://support.microsoft.com/en-us/topic/advanced-search-keywords-ea595928-5d63-4a0b-9c6b-0b769865e78a """ from urllib.parse import urlencode, unquote_plus import typing as t from lxml import html from dateutil import parser from searx.result_types import EngineResults from searx.utils import eval_xpath_list, eval_xpath, extract_text if t.TYPE_CHECKING: from searx.extended_types import SXNG_Response from searx.search.processors import OnlineParams about = { "website": "https://www.aol.com", "wikidata_id": "Q2407", "official_api_documentation": None, "use_official_api": False, "require_api_key": False, "results": "HTML", } categories = ["general"] search_type = "search" # supported: search, image, video paging = True safesearch = True time_range_support = True results_per_page = 10 base_url = "https://search.aol.com" time_range_map = {"day": "1d", "week": "1w", "month": "1m", "year": "1y"} safesearch_map = {0: "p", 1: "r", 2: "i"} def init(_): if search_type not in ("search", "image", "video"): raise ValueError(f"unsupported search type {search_type}") def request(query: str, params: "OnlineParams") -> None: language, region = (params["searxng_locale"].split("-") + [None])[:2] if language and language != "all": query = f"{query} language:{language}" if region: query = f"{query} loc:{region}" args: dict[str, str | int | None] = { "q": query, "b": params["pageno"] * results_per_page + 1, # page is 1-indexed "pz": results_per_page, } if params["time_range"]: args["fr2"] = "time" args["age"] = params["time_range"] else: args["fr2"] = "sb-top-search" params["cookies"]["sB"] = f"vm={safesearch_map[params['safesearch']]}" params["url"] = f"{base_url}/aol/{search_type}?{urlencode(args)}" logger.debug(params) def _deobfuscate_url(obfuscated_url: str) -> str | None: # URL looks like "https://search.aol.com/click/_ylt=AwjFSDjd;_ylu=JfsdjDFd/RV=2/RE=1774058166/RO=10/RU=https%3a%2f%2fen.wikipedia.org%2fwiki%2fTree/RK=0/RS=BP2CqeMLjscg4n8cTmuddlEQA2I-" # pylint: disable=line-too-long if not obfuscated_url: return None for part in obfuscated_url.split("/"): if part.startswith("RU="): return unquote_plus(part[3:]) # pattern for de-obfuscating URL not found, fall back to Yahoo's tracking link return obfuscated_url def _general_results(doc: html.HtmlElement) -> EngineResults: res = EngineResults() for result in eval_xpath_list(doc, "//div[@id='web']//ol/li[not(contains(@class, 'first'))]"): obfuscated_url = extract_text(eval_xpath(result, ".//h3/a/@href")) if not obfuscated_url: continue url = _deobfuscate_url(obfuscated_url) if not url: continue res.add( res.types.MainResult( url=url, title=extract_text(eval_xpath(result, ".//h3/a")) or "", content=extract_text(eval_xpath(result, ".//div[contains(@class, 'compText')]")) or "", thumbnail=extract_text(eval_xpath(result, ".//a[contains(@class, 'thm')]/img/@data-src")) or "", ) ) return res def _video_results(doc: html.HtmlElement) -> EngineResults: res = EngineResults() for result in eval_xpath_list(doc, "//div[contains(@class, 'results')]//ol/li"): obfuscated_url = extract_text(eval_xpath(result, ".//a/@href")) if not obfuscated_url: continue url = _deobfuscate_url(obfuscated_url) if not url: continue published_date_raw = extract_text(eval_xpath(result, ".//div[contains(@class, 'v-age')]")) try: published_date = parser.parse(published_date_raw or "") except parser.ParserError: published_date = None res.add( res.types.LegacyResult( { "template": "videos.html", "url": url, "title": extract_text(eval_xpath(result, ".//h3")), "content": extract_text(eval_xpath(result, ".//div[contains(@class, 'compText')]")), "thumbnail": extract_text(eval_xpath(result, ".//img[contains(@class, 'thm')]/@src")), "length": extract_text(eval_xpath(result, ".//span[contains(@class, 'v-time')]")), "publishedDate": published_date, } ) ) return res def _image_results(doc: html.HtmlElement) -> EngineResults: res = EngineResults() for result in eval_xpath_list(doc, "//section[@id='results']//ul/li"): obfuscated_url = extract_text(eval_xpath(result, "./a/@href")) if not obfuscated_url: continue url = _deobfuscate_url(obfuscated_url) if not url: continue res.add( res.types.LegacyResult( { "template": "images.html", # results don't have an extra URL, only the image source "url": url, "title": extract_text(eval_xpath(result, ".//a/@aria-label")), "thumbnail_src": extract_text(eval_xpath(result, ".//img/@src")), "img_src": url, } ) ) return res def response(resp: "SXNG_Response") -> EngineResults: doc = html.fromstring(resp.text) match search_type: case "search": results = _general_results(doc) case "image": results = _image_results(doc) case "video": results = _video_results(doc) case _: raise ValueError("unsupported search type") for suggestion in eval_xpath_list(doc, ".//ol[contains(@class, 'searchRightBottom')]//table//a"): results.add(results.types.LegacyResult({"suggestion": extract_text(suggestion)})) return results