[fix] google: switch to using "Google App" for Android useragent (#5892)

I found a bypass using the Android Google App this time. However, unlike the iPhone GSA method, this one does have rate limits. Although it took a couple of hundred consecutive requests to trigger them.
2026-03-23 16:55:48 +03:00
parent 6c7e9c1976
commit 2c1ce3bd37
4 changed files with 1548 additions and 61 deletions
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -69,7 +69,7 @@ filter_mapping = {0: "off", 1: "medium", 2: "high"}

 # Suggestions are links placed in a *card-section*, we extract only the text
 # from the links not the links itself.
-suggestion_xpath = '//div[contains(@class, "ouy7Mc")]//a'
+suggestion_xpath = '//div[contains(@class, "gGQDvd iIWm4b")]//a'


 _arcid_range = string.ascii_letters + string.digits + "_-"
@@ -269,6 +269,15 @@ def get_google_info(params: "OnlineParams", eng_traits: EngineTraits) -> dict[st
    ret_val["headers"]["Accept"] = "*/*"
    ret_val["headers"]["User-Agent"] = gen_gsa_useragent()

+    # Hardcoded default ENID Header required alongside the Android Google App
+    # User Agent
+    ret_val["headers"]["__Secure-ENID"] = (
+        "28.SE=II9FMkz92GewodDwKRBFsMISph7GsQs8JYLdXmAlprl6UcC02O2p7kfQlAWuwT"
+        "oygcrqHpmwQSH57b0c2kXfRfo35J8aV5FYSeUzYB67hqZQ2tZB7-o0hlTKwb5qMjn8Cf"
+        "w_AZ2s_6KIFMAl2goXGcXHSfgu4jwZOqShlHCcag0ppy_NnxJYWxpLkaeuGCICwWoIFJ"
+        "HP6Gy4BOkIEsl1N_k6F6jMF_OklE9qIubiyKkNaA"
+    )
+
    # Cookies

    # - https://github.com/searxng/searxng/pull/1679#issuecomment-1235432746
@@ -328,14 +337,14 @@ def request(query: str, params: "OnlineParams") -> None:


 # regex match to get image map that is found inside the returned javascript:
-# (function(){google.ldi={ ... };google.pim={ ... };google.sib=false;google ...
-RE_DATA_IMAGE = re.compile(r'"((?:dimg|pimg|tsuid)_[^"]*)":"((?:https?:)?//[^"]*)')
+# (function(){var s='...';var i=['...'] ...}
+RE_DATA_IMAGE = re.compile(r"(data:image[^']*?)'[^']*?'((?:dimg|pimg|tsuid)[^']*)")


 def parse_url_images(text: str):
    data_image_map = {}

-    for img_id, image_url in RE_DATA_IMAGE.findall(text):
+    for image_url, img_id in RE_DATA_IMAGE.findall(text):
        data_image_map[img_id] = image_url.encode('utf-8').decode("unicode-escape")
    logger.debug("data:image objects --> %s", list(data_image_map.keys()))
    return data_image_map
@@ -353,19 +362,18 @@ def response(resp: "SXNG_Response"):
    dom = html.fromstring(resp.text)

    # parse results
-
-    for result in eval_xpath_list(dom, './/div[contains(@class, "MjjYud")]'):
+    for result in eval_xpath_list(dom, '//a[@data-ved and not(@class)]'):
        # pylint: disable=too-many-nested-blocks

        try:
-            title_tag = eval_xpath_getindex(result, './/div[contains(@role, "link")]', 0, default=None)
+            title_tag = eval_xpath_getindex(result, './/div[@style]', 0, default=None)
            if title_tag is None:
                # this not one of the common google results *section*
                logger.debug("ignoring item from the result_xpath list: missing title")
                continue
            title = extract_text(title_tag)

-            raw_url = eval_xpath_getindex(result, ".//a/@href", 0, None)
+            raw_url = result.get("href")
            if raw_url is None:
                logger.debug(
                    'ignoring item from the result_xpath list: missing url of title "%s"',
@@ -378,15 +386,15 @@ def response(resp: "SXNG_Response"):
            else:
                url = raw_url

-            content_nodes = eval_xpath(result, './/div[contains(@data-sncf, "1")]')
+            content_nodes = eval_xpath(result, '../..//div[contains(@class, "ilUpNd H66NU aSRlid")]')
            for item in content_nodes:
                for script in item.xpath(".//script"):
                    script.getparent().remove(script)

-            content = extract_text(content_nodes)
+            content = extract_text(content_nodes[0])

            # Images that are NOT the favicon
-            xpath_image = eval_xpath_getindex(result, './/img[not(@class="XNo5Ab")]', index=0, default=None)
+            xpath_image = eval_xpath_getindex(result, './/img', index=0, default=None)

            thumbnail = None
            if xpath_image is not None: