[fix] google: switch to using "Google App" for Android useragent (#5892)

I found a bypass using the Android Google App this time. However, unlike the iPhone GSA method, this one does have rate limits. Although it took a couple of hundred consecutive requests to trigger them.
This commit is contained in:
mg95
2026-03-23 16:55:48 +03:00
committed by GitHub
parent 6c7e9c1976
commit 2c1ce3bd37
4 changed files with 1548 additions and 61 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -69,7 +69,7 @@ filter_mapping = {0: "off", 1: "medium", 2: "high"}
# Suggestions are links placed in a *card-section*, we extract only the text
# from the links not the links itself.
suggestion_xpath = '//div[contains(@class, "ouy7Mc")]//a'
suggestion_xpath = '//div[contains(@class, "gGQDvd iIWm4b")]//a'
_arcid_range = string.ascii_letters + string.digits + "_-"
@@ -269,6 +269,15 @@ def get_google_info(params: "OnlineParams", eng_traits: EngineTraits) -> dict[st
ret_val["headers"]["Accept"] = "*/*"
ret_val["headers"]["User-Agent"] = gen_gsa_useragent()
# Hardcoded default ENID Header required alongside the Android Google App
# User Agent
ret_val["headers"]["__Secure-ENID"] = (
"28.SE=II9FMkz92GewodDwKRBFsMISph7GsQs8JYLdXmAlprl6UcC02O2p7kfQlAWuwT"
"oygcrqHpmwQSH57b0c2kXfRfo35J8aV5FYSeUzYB67hqZQ2tZB7-o0hlTKwb5qMjn8Cf"
"w_AZ2s_6KIFMAl2goXGcXHSfgu4jwZOqShlHCcag0ppy_NnxJYWxpLkaeuGCICwWoIFJ"
"HP6Gy4BOkIEsl1N_k6F6jMF_OklE9qIubiyKkNaA"
)
# Cookies
# - https://github.com/searxng/searxng/pull/1679#issuecomment-1235432746
@@ -328,14 +337,14 @@ def request(query: str, params: "OnlineParams") -> None:
# regex match to get image map that is found inside the returned javascript:
# (function(){google.ldi={ ... };google.pim={ ... };google.sib=false;google ...
RE_DATA_IMAGE = re.compile(r'"((?:dimg|pimg|tsuid)_[^"]*)":"((?:https?:)?//[^"]*)')
# (function(){var s='...';var i=['...'] ...}
RE_DATA_IMAGE = re.compile(r"(data:image[^']*?)'[^']*?'((?:dimg|pimg|tsuid)[^']*)")
def parse_url_images(text: str):
data_image_map = {}
for img_id, image_url in RE_DATA_IMAGE.findall(text):
for image_url, img_id in RE_DATA_IMAGE.findall(text):
data_image_map[img_id] = image_url.encode('utf-8').decode("unicode-escape")
logger.debug("data:image objects --> %s", list(data_image_map.keys()))
return data_image_map
@@ -353,19 +362,18 @@ def response(resp: "SXNG_Response"):
dom = html.fromstring(resp.text)
# parse results
for result in eval_xpath_list(dom, './/div[contains(@class, "MjjYud")]'):
for result in eval_xpath_list(dom, '//a[@data-ved and not(@class)]'):
# pylint: disable=too-many-nested-blocks
try:
title_tag = eval_xpath_getindex(result, './/div[contains(@role, "link")]', 0, default=None)
title_tag = eval_xpath_getindex(result, './/div[@style]', 0, default=None)
if title_tag is None:
# this not one of the common google results *section*
logger.debug("ignoring item from the result_xpath list: missing title")
continue
title = extract_text(title_tag)
raw_url = eval_xpath_getindex(result, ".//a/@href", 0, None)
raw_url = result.get("href")
if raw_url is None:
logger.debug(
'ignoring item from the result_xpath list: missing url of title "%s"',
@@ -378,15 +386,15 @@ def response(resp: "SXNG_Response"):
else:
url = raw_url
content_nodes = eval_xpath(result, './/div[contains(@data-sncf, "1")]')
content_nodes = eval_xpath(result, '../..//div[contains(@class, "ilUpNd H66NU aSRlid")]')
for item in content_nodes:
for script in item.xpath(".//script"):
script.getparent().remove(script)
content = extract_text(content_nodes)
content = extract_text(content_nodes[0])
# Images that are NOT the favicon
xpath_image = eval_xpath_getindex(result, './/img[not(@class="XNo5Ab")]', index=0, default=None)
xpath_image = eval_xpath_getindex(result, './/img', index=0, default=None)
thumbnail = None
if xpath_image is not None:

View File

@@ -13,7 +13,7 @@ from collections.abc import MutableMapping, Callable
from numbers import Number
from os.path import splitext, join
from random import choice
from random import choice, randint
from html.parser import HTMLParser
from html import escape
from urllib.parse import urljoin, urlparse, parse_qs, urlencode
@@ -82,11 +82,11 @@ def gen_useragent(os_string: str | None = None) -> str:
def gen_gsa_useragent() -> str:
"""Return a random GSA User Agent suitable for Google
"""Return a random "Android Google App" User Agent suitable for Google
See searx/data/gsa_useragents.txt
"""
return choice(gsa_useragents_loader())
return choice(gsa_useragents_loader()) + " GoogleApp/" + str(randint(0, 9))
class HTMLTextExtractor(HTMLParser):

View File

@@ -26,7 +26,7 @@ def fetch_gsa_useragents() -> list[str]:
suas: set[str] = set()
for ua in loads(decompress(response.content)):
if ua["platform"] == "iPhone" and "GSA" in ua["userAgent"]:
if "Android" in ua["userAgent"] and "Chrome" in ua["userAgent"] and "Samsung" not in ua["userAgent"]:
suas.add(ua["userAgent"])
luas = list(suas)