[fix] google: switch to using "Google App" for Android useragent (#5892)
I found a bypass using the Android Google App this time. However, unlike the iPhone GSA method, this one does have rate limits. Although it took a couple of hundred consecutive requests to trigger them.
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -69,7 +69,7 @@ filter_mapping = {0: "off", 1: "medium", 2: "high"}
|
||||
|
||||
# Suggestions are links placed in a *card-section*, we extract only the text
|
||||
# from the links not the links itself.
|
||||
suggestion_xpath = '//div[contains(@class, "ouy7Mc")]//a'
|
||||
suggestion_xpath = '//div[contains(@class, "gGQDvd iIWm4b")]//a'
|
||||
|
||||
|
||||
_arcid_range = string.ascii_letters + string.digits + "_-"
|
||||
@@ -269,6 +269,15 @@ def get_google_info(params: "OnlineParams", eng_traits: EngineTraits) -> dict[st
|
||||
ret_val["headers"]["Accept"] = "*/*"
|
||||
ret_val["headers"]["User-Agent"] = gen_gsa_useragent()
|
||||
|
||||
# Hardcoded default ENID Header required alongside the Android Google App
|
||||
# User Agent
|
||||
ret_val["headers"]["__Secure-ENID"] = (
|
||||
"28.SE=II9FMkz92GewodDwKRBFsMISph7GsQs8JYLdXmAlprl6UcC02O2p7kfQlAWuwT"
|
||||
"oygcrqHpmwQSH57b0c2kXfRfo35J8aV5FYSeUzYB67hqZQ2tZB7-o0hlTKwb5qMjn8Cf"
|
||||
"w_AZ2s_6KIFMAl2goXGcXHSfgu4jwZOqShlHCcag0ppy_NnxJYWxpLkaeuGCICwWoIFJ"
|
||||
"HP6Gy4BOkIEsl1N_k6F6jMF_OklE9qIubiyKkNaA"
|
||||
)
|
||||
|
||||
# Cookies
|
||||
|
||||
# - https://github.com/searxng/searxng/pull/1679#issuecomment-1235432746
|
||||
@@ -328,14 +337,14 @@ def request(query: str, params: "OnlineParams") -> None:
|
||||
|
||||
|
||||
# regex match to get image map that is found inside the returned javascript:
|
||||
# (function(){google.ldi={ ... };google.pim={ ... };google.sib=false;google ...
|
||||
RE_DATA_IMAGE = re.compile(r'"((?:dimg|pimg|tsuid)_[^"]*)":"((?:https?:)?//[^"]*)')
|
||||
# (function(){var s='...';var i=['...'] ...}
|
||||
RE_DATA_IMAGE = re.compile(r"(data:image[^']*?)'[^']*?'((?:dimg|pimg|tsuid)[^']*)")
|
||||
|
||||
|
||||
def parse_url_images(text: str):
|
||||
data_image_map = {}
|
||||
|
||||
for img_id, image_url in RE_DATA_IMAGE.findall(text):
|
||||
for image_url, img_id in RE_DATA_IMAGE.findall(text):
|
||||
data_image_map[img_id] = image_url.encode('utf-8').decode("unicode-escape")
|
||||
logger.debug("data:image objects --> %s", list(data_image_map.keys()))
|
||||
return data_image_map
|
||||
@@ -353,19 +362,18 @@ def response(resp: "SXNG_Response"):
|
||||
dom = html.fromstring(resp.text)
|
||||
|
||||
# parse results
|
||||
|
||||
for result in eval_xpath_list(dom, './/div[contains(@class, "MjjYud")]'):
|
||||
for result in eval_xpath_list(dom, '//a[@data-ved and not(@class)]'):
|
||||
# pylint: disable=too-many-nested-blocks
|
||||
|
||||
try:
|
||||
title_tag = eval_xpath_getindex(result, './/div[contains(@role, "link")]', 0, default=None)
|
||||
title_tag = eval_xpath_getindex(result, './/div[@style]', 0, default=None)
|
||||
if title_tag is None:
|
||||
# this not one of the common google results *section*
|
||||
logger.debug("ignoring item from the result_xpath list: missing title")
|
||||
continue
|
||||
title = extract_text(title_tag)
|
||||
|
||||
raw_url = eval_xpath_getindex(result, ".//a/@href", 0, None)
|
||||
raw_url = result.get("href")
|
||||
if raw_url is None:
|
||||
logger.debug(
|
||||
'ignoring item from the result_xpath list: missing url of title "%s"',
|
||||
@@ -378,15 +386,15 @@ def response(resp: "SXNG_Response"):
|
||||
else:
|
||||
url = raw_url
|
||||
|
||||
content_nodes = eval_xpath(result, './/div[contains(@data-sncf, "1")]')
|
||||
content_nodes = eval_xpath(result, '../..//div[contains(@class, "ilUpNd H66NU aSRlid")]')
|
||||
for item in content_nodes:
|
||||
for script in item.xpath(".//script"):
|
||||
script.getparent().remove(script)
|
||||
|
||||
content = extract_text(content_nodes)
|
||||
content = extract_text(content_nodes[0])
|
||||
|
||||
# Images that are NOT the favicon
|
||||
xpath_image = eval_xpath_getindex(result, './/img[not(@class="XNo5Ab")]', index=0, default=None)
|
||||
xpath_image = eval_xpath_getindex(result, './/img', index=0, default=None)
|
||||
|
||||
thumbnail = None
|
||||
if xpath_image is not None:
|
||||
|
||||
@@ -13,7 +13,7 @@ from collections.abc import MutableMapping, Callable
|
||||
|
||||
from numbers import Number
|
||||
from os.path import splitext, join
|
||||
from random import choice
|
||||
from random import choice, randint
|
||||
from html.parser import HTMLParser
|
||||
from html import escape
|
||||
from urllib.parse import urljoin, urlparse, parse_qs, urlencode
|
||||
@@ -82,11 +82,11 @@ def gen_useragent(os_string: str | None = None) -> str:
|
||||
|
||||
|
||||
def gen_gsa_useragent() -> str:
|
||||
"""Return a random GSA User Agent suitable for Google
|
||||
"""Return a random "Android Google App" User Agent suitable for Google
|
||||
|
||||
See searx/data/gsa_useragents.txt
|
||||
"""
|
||||
return choice(gsa_useragents_loader())
|
||||
return choice(gsa_useragents_loader()) + " GoogleApp/" + str(randint(0, 9))
|
||||
|
||||
|
||||
class HTMLTextExtractor(HTMLParser):
|
||||
|
||||
@@ -26,7 +26,7 @@ def fetch_gsa_useragents() -> list[str]:
|
||||
|
||||
suas: set[str] = set()
|
||||
for ua in loads(decompress(response.content)):
|
||||
if ua["platform"] == "iPhone" and "GSA" in ua["userAgent"]:
|
||||
if "Android" in ua["userAgent"] and "Chrome" in ua["userAgent"] and "Samsung" not in ua["userAgent"]:
|
||||
suas.add(ua["userAgent"])
|
||||
|
||||
luas = list(suas)
|
||||
|
||||
Reference in New Issue
Block a user