[fix] google: switch to using "Google App" for Android useragent (#5892)

I found a bypass using the Android Google App this time. However, unlike the iPhone GSA method, this one does have rate limits. Although it took a couple of hundred consecutive requests to trigger them.
2026-03-23 16:55:48 +03:00
parent 6c7e9c1976
commit 2c1ce3bd37
4 changed files with 1548 additions and 61 deletions
--- a/searx/data/gsa_useragents.txt
+++ b/searx/data/gsa_useragents.txt
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -69,7 +69,7 @@ filter_mapping = {0: "off", 1: "medium", 2: "high"}

 # Suggestions are links placed in a *card-section*, we extract only the text
 # from the links not the links itself.
-suggestion_xpath = '//div[contains(@class, "ouy7Mc")]//a'
+suggestion_xpath = '//div[contains(@class, "gGQDvd iIWm4b")]//a'


 _arcid_range = string.ascii_letters + string.digits + "_-"
@@ -269,6 +269,15 @@ def get_google_info(params: "OnlineParams", eng_traits: EngineTraits) -> dict[st
    ret_val["headers"]["Accept"] = "*/*"
    ret_val["headers"]["User-Agent"] = gen_gsa_useragent()

+    # Hardcoded default ENID Header required alongside the Android Google App
+    # User Agent
+    ret_val["headers"]["__Secure-ENID"] = (
+        "28.SE=II9FMkz92GewodDwKRBFsMISph7GsQs8JYLdXmAlprl6UcC02O2p7kfQlAWuwT"
+        "oygcrqHpmwQSH57b0c2kXfRfo35J8aV5FYSeUzYB67hqZQ2tZB7-o0hlTKwb5qMjn8Cf"
+        "w_AZ2s_6KIFMAl2goXGcXHSfgu4jwZOqShlHCcag0ppy_NnxJYWxpLkaeuGCICwWoIFJ"
+        "HP6Gy4BOkIEsl1N_k6F6jMF_OklE9qIubiyKkNaA"
+    )
+
    # Cookies

    # - https://github.com/searxng/searxng/pull/1679#issuecomment-1235432746
@@ -328,14 +337,14 @@ def request(query: str, params: "OnlineParams") -> None:


 # regex match to get image map that is found inside the returned javascript:
-# (function(){google.ldi={ ... };google.pim={ ... };google.sib=false;google ...
-RE_DATA_IMAGE = re.compile(r'"((?:dimg|pimg|tsuid)_[^"]*)":"((?:https?:)?//[^"]*)')
+# (function(){var s='...';var i=['...'] ...}
+RE_DATA_IMAGE = re.compile(r"(data:image[^']*?)'[^']*?'((?:dimg|pimg|tsuid)[^']*)")


 def parse_url_images(text: str):
    data_image_map = {}

-    for img_id, image_url in RE_DATA_IMAGE.findall(text):
+    for image_url, img_id in RE_DATA_IMAGE.findall(text):
        data_image_map[img_id] = image_url.encode('utf-8').decode("unicode-escape")
    logger.debug("data:image objects --> %s", list(data_image_map.keys()))
    return data_image_map
@@ -353,19 +362,18 @@ def response(resp: "SXNG_Response"):
    dom = html.fromstring(resp.text)

    # parse results
-
-    for result in eval_xpath_list(dom, './/div[contains(@class, "MjjYud")]'):
+    for result in eval_xpath_list(dom, '//a[@data-ved and not(@class)]'):
        # pylint: disable=too-many-nested-blocks

        try:
-            title_tag = eval_xpath_getindex(result, './/div[contains(@role, "link")]', 0, default=None)
+            title_tag = eval_xpath_getindex(result, './/div[@style]', 0, default=None)
            if title_tag is None:
                # this not one of the common google results *section*
                logger.debug("ignoring item from the result_xpath list: missing title")
                continue
            title = extract_text(title_tag)

-            raw_url = eval_xpath_getindex(result, ".//a/@href", 0, None)
+            raw_url = result.get("href")
            if raw_url is None:
                logger.debug(
                    'ignoring item from the result_xpath list: missing url of title "%s"',
@@ -378,15 +386,15 @@ def response(resp: "SXNG_Response"):
            else:
                url = raw_url

-            content_nodes = eval_xpath(result, './/div[contains(@data-sncf, "1")]')
+            content_nodes = eval_xpath(result, '../..//div[contains(@class, "ilUpNd H66NU aSRlid")]')
            for item in content_nodes:
                for script in item.xpath(".//script"):
                    script.getparent().remove(script)

-            content = extract_text(content_nodes)
+            content = extract_text(content_nodes[0])

            # Images that are NOT the favicon
-            xpath_image = eval_xpath_getindex(result, './/img[not(@class="XNo5Ab")]', index=0, default=None)
+            xpath_image = eval_xpath_getindex(result, './/img', index=0, default=None)

            thumbnail = None
            if xpath_image is not None:
--- a/searx/utils.py
+++ b/searx/utils.py
@@ -13,7 +13,7 @@ from collections.abc import MutableMapping, Callable

 from numbers import Number
 from os.path import splitext, join
-from random import choice
+from random import choice, randint
 from html.parser import HTMLParser
 from html import escape
 from urllib.parse import urljoin, urlparse, parse_qs, urlencode
@@ -82,11 +82,11 @@ def gen_useragent(os_string: str | None = None) -> str:


 def gen_gsa_useragent() -> str:
-    """Return a random GSA User Agent suitable for Google
+    """Return a random "Android Google App" User Agent suitable for Google

    See searx/data/gsa_useragents.txt
    """
-    return choice(gsa_useragents_loader())
+    return choice(gsa_useragents_loader()) + " GoogleApp/" + str(randint(0, 9))


 class HTMLTextExtractor(HTMLParser):
--- a/searxng_extra/update/update_gsa_useragents.py
+++ b/searxng_extra/update/update_gsa_useragents.py
@@ -26,7 +26,7 @@ def fetch_gsa_useragents() -> list[str]:

    suas: set[str] = set()
    for ua in loads(decompress(response.content)):
-        if ua["platform"] == "iPhone" and "GSA" in ua["userAgent"]:
+        if "Android" in ua["userAgent"] and "Chrome" in ua["userAgent"] and "Samsung" not in ua["userAgent"]:
            suas.add(ua["userAgent"])

    luas = list(suas)