[fix] google: switch to using "Google App" for Android useragent (#5892)

I found a bypass using the Android Google App this time. However, unlike the iPhone GSA method, this one does have rate limits. Although it took a couple of hundred consecutive requests to trigger them.
This commit is contained in:
mg95
2026-03-23 16:55:48 +03:00
committed by GitHub
parent 6c7e9c1976
commit 2c1ce3bd37
4 changed files with 1548 additions and 61 deletions

View File

@@ -69,7 +69,7 @@ filter_mapping = {0: "off", 1: "medium", 2: "high"}
# Suggestions are links placed in a *card-section*, we extract only the text
# from the links not the links itself.
suggestion_xpath = '//div[contains(@class, "ouy7Mc")]//a'
suggestion_xpath = '//div[contains(@class, "gGQDvd iIWm4b")]//a'
_arcid_range = string.ascii_letters + string.digits + "_-"
@@ -269,6 +269,15 @@ def get_google_info(params: "OnlineParams", eng_traits: EngineTraits) -> dict[st
ret_val["headers"]["Accept"] = "*/*"
ret_val["headers"]["User-Agent"] = gen_gsa_useragent()
# Hardcoded default ENID Header required alongside the Android Google App
# User Agent
ret_val["headers"]["__Secure-ENID"] = (
"28.SE=II9FMkz92GewodDwKRBFsMISph7GsQs8JYLdXmAlprl6UcC02O2p7kfQlAWuwT"
"oygcrqHpmwQSH57b0c2kXfRfo35J8aV5FYSeUzYB67hqZQ2tZB7-o0hlTKwb5qMjn8Cf"
"w_AZ2s_6KIFMAl2goXGcXHSfgu4jwZOqShlHCcag0ppy_NnxJYWxpLkaeuGCICwWoIFJ"
"HP6Gy4BOkIEsl1N_k6F6jMF_OklE9qIubiyKkNaA"
)
# Cookies
# - https://github.com/searxng/searxng/pull/1679#issuecomment-1235432746
@@ -328,14 +337,14 @@ def request(query: str, params: "OnlineParams") -> None:
# regex match to get image map that is found inside the returned javascript:
# (function(){google.ldi={ ... };google.pim={ ... };google.sib=false;google ...
RE_DATA_IMAGE = re.compile(r'"((?:dimg|pimg|tsuid)_[^"]*)":"((?:https?:)?//[^"]*)')
# (function(){var s='...';var i=['...'] ...}
RE_DATA_IMAGE = re.compile(r"(data:image[^']*?)'[^']*?'((?:dimg|pimg|tsuid)[^']*)")
def parse_url_images(text: str):
data_image_map = {}
for img_id, image_url in RE_DATA_IMAGE.findall(text):
for image_url, img_id in RE_DATA_IMAGE.findall(text):
data_image_map[img_id] = image_url.encode('utf-8').decode("unicode-escape")
logger.debug("data:image objects --> %s", list(data_image_map.keys()))
return data_image_map
@@ -353,19 +362,18 @@ def response(resp: "SXNG_Response"):
dom = html.fromstring(resp.text)
# parse results
for result in eval_xpath_list(dom, './/div[contains(@class, "MjjYud")]'):
for result in eval_xpath_list(dom, '//a[@data-ved and not(@class)]'):
# pylint: disable=too-many-nested-blocks
try:
title_tag = eval_xpath_getindex(result, './/div[contains(@role, "link")]', 0, default=None)
title_tag = eval_xpath_getindex(result, './/div[@style]', 0, default=None)
if title_tag is None:
# this not one of the common google results *section*
logger.debug("ignoring item from the result_xpath list: missing title")
continue
title = extract_text(title_tag)
raw_url = eval_xpath_getindex(result, ".//a/@href", 0, None)
raw_url = result.get("href")
if raw_url is None:
logger.debug(
'ignoring item from the result_xpath list: missing url of title "%s"',
@@ -378,15 +386,15 @@ def response(resp: "SXNG_Response"):
else:
url = raw_url
content_nodes = eval_xpath(result, './/div[contains(@data-sncf, "1")]')
content_nodes = eval_xpath(result, '../..//div[contains(@class, "ilUpNd H66NU aSRlid")]')
for item in content_nodes:
for script in item.xpath(".//script"):
script.getparent().remove(script)
content = extract_text(content_nodes)
content = extract_text(content_nodes[0])
# Images that are NOT the favicon
xpath_image = eval_xpath_getindex(result, './/img[not(@class="XNo5Ab")]', index=0, default=None)
xpath_image = eval_xpath_getindex(result, './/img', index=0, default=None)
thumbnail = None
if xpath_image is not None: