diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index dcf6543d6..7c2bc1b63 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -2,25 +2,196 @@ """ DuckDuckGo WEB ~~~~~~~~~~~~~~ -""" +DDG's WEB search: + +- DuckDuckGo WEB : ``https://links.duckduckgo.com/d.js?q=..`` (HTTP GET) +- DuckDuckGo WEB no-AI: ``https://noai.duckduckgo.com/`` (HTTP GET) +- DuckDuckGo WEB html : ``https://html.duckduckgo.com/html`` (HTTP POST no-JS / form data) +- DuckDuckGo WEB lite : ``https://lite.duckduckgo.com/lite`` (HTTP POST no-JS / form data) + +DDG's content search / see engine ``duckduckgo_extra.py`` + +- DuckDuckGo Images : ``https://duckduckgo.com/i.js??q=...&vqd=...`` +- DuckDuckGo Videos : ``https://duckduckgo.com/v.js??q=...&vqd=...`` +- DuckDuckGo News : ``https://duckduckgo.com/news.js??q=...&vqd=...`` + +.. hint:: + + For WEB searches and to determine the ``vqd`` value, DDG-html (no-JS) is + used. + +Special features of the no-JS services (DDG-lite & DDG-html): + +- The no-JS clients receive a form that contains all the controlling parameters. +- When the form data is submitted, a real WEB browser sets the HTTP _Sec-Fetch_ headers. + +HTML ``
``, HTTP-Headers & DDG's bot Blocker: + + The HTTP *Sec-Fetch* headers (and the User-Agent_, see below) are generated by + the WEB-client and are checked by DDG's bot blocker. + +At least the follow-up page (content) is accessed by sending form data with +*Sec-Fetch* headers. If those HTTP headers are incorrect or missed, DDG's bot +protection puts the IP on it's block list:: + + Sec-Fetch-Dest: document + Sec-Fetch-Mode: navigate + Sec-Fetch-Site: same-origin + Sec-Fetch-User: ?1 + +To simulate the behavior of a real browser session, it might be necessary to +evaluate additional headers. For example, in the response from DDG, the +Referrer-Policy_ is always set to ``origin``. A real browser would then include +the following header in the next request:: + + Referer: https://html.duckduckgo.com/ + +The fields of the html-form are reverse-engineered from DDG-html and may be +subject to additional bot detection mechanisms and breaking changes in the +future. + +Query field: + +Intro page: https://html.duckduckgo.com/html/ + +- ``q`` (str): Search query string +- ``b`` (str): Beginning parameter - empty string for first page requests. If a + second page is requested, this field is not set! + +Search options: + +- ``kl`` (str): Keyboard language/region code (e.g. 'en-us' default: 'wt-wt') +- ``df`` (str): Time filter, maps to values like 'd' (day), 'w' (week), 'm' (month), 'y' (year) + +The key/value pairs ``df`` and ``kl`` are additional saved in the cookies, +example:: + + Cookie: kl=en-us; df=m + +*next page* form fields: + +- ``nextParams`` (str): Continuation parameters from previous page response, + typically empty string. Opposite of ``b``; this field is not set when + requesting the first result page. + +- ``api`` (str): API endpoint identifier, typically 'd.js' +- ``o`` (str): Output format, typically ``json`` +- ``v`` (str): Typically ``l`` for subsequent pages + + +- ``dc`` (int): Display count - value equal to offset (s) + 1 +- ``s`` (int): Search offset for pagination +- ``vqd`` (str): Validation query digest + +General assumptions regarding DDG's bot blocker: + +- Except ``Cookie: kl=..; df=..`` DDG does not use cookies in any of its + services. + +- DDG does not accept queries with more than 499 chars + +- The ``vqd`` value ("Validation query digest") is needed to pass DDG's bot + protection and is used by all request to DDG. + +- The ``vqd`` value is generally not needed for the first query (intro); it is + only required when additional pages are accessed (or when new content needs to + be loaded for the query while scrolling). + +- The second page (additional content) for a query cannot be requested without + ``vqd``, as this would lead to an immediate blocking, since such a use-case + does not exist in the process flows provided by DDG (and is a clear indication + of a bot). + +The following HTTP headers are being evaluated (and may possibly be responsible +for issues): + +User-Agent_: + The HTTP User-Agent is also involved in the formation of the vqd value, read + `DuckDuckGo Bot Detection Research & Solution`_. However, it is not checked + whether the UA is a known header. However, it is possible that certain UA + headers (such as curl) are filtered. + +Sec-Fetch-Mode_: + In the past, Sec-Fetch-Mode had to be set to 'navigate', otherwise there were + problems with the bot blocker.. I don't know if DDG still evaluates this + header today + +Accept-Language_: + DDG-Lite and DDG-HTML TRY to guess user's preferred language from the HTTP + ``Accept-Language``. Optional the user can select a region filter (but not a + language). + +In DDG's bot blocker, the IP will be blocked (DDG does not have a client session!) + +- As far as is known, it is possible to remove a un-blocked an IP by executing a + DDG query in a real web browser over the blocked IP (at least that's my + assumption). + + How exactly the blocking mechanism currently works is not fully known, and + there were also changes to the bot blocker in the period of Q3/Q4 2025: in the + past, the IP blocking was implemented as a 'sliding window' (unblock after + about 1 hour without requests from this IP) + +Terms / phrases that you keep coming across: + +- ``d.js``, ``i.js``, ``v.js``, ``news.js`` are the endpoints of the DDG's web + API through which additional content for a query can be requested (vqd + required) + + The ``*.js`` endpoints return a JSON response and can therefore only be + executed on a JS-capable client. + + The service at https://lite.duckduckgo.com/lite offers general WEB searches + (no news, videos etc). DDG-lite and DDG-html can be used by clients that do + not support JS, aka *no-JS*. + + DDG-lite works a bit differently: here, ``d.js`` is not an endpoint but a + field (``api=d.js``) in a form that is sent to DDG-lite. + +- The request argument ``origin=funnel_home_website`` is often seen in the DDG + services when the category is changed (e.g., from web search to news, images, + or to the video category) + +.. _DuckDuckGo Bot Detection Research & Solution: + https://github.com/ggfevans/searxng/blob/mod-sidecar-harvester/docs/ddg-bot-detection-research.md + +.. _Sec-Fetch-Mode: + https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Sec-Fetch-Mode> + +.. _Referrer-Policy: + https://developer.mozilla.org/docs/Web/HTTP/Reference/Headers/Referrer-Policy#directives + +.. _Referer: + https://developer.mozilla.org/de/docs/Web/HTTP/Reference/Headers/Referer + +.. _User-Agent: + https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/User-Agent + +.. _Accept-Language: + https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Accept-Language + +""" +# pylint: disable=global-statement + +import typing as t import json import re -from urllib.parse import quote_plus - import babel import lxml.html -from searx import ( - locales, - external_bang, -) +from searx import locales + +from searx.external_bang import EXTERNAL_BANGS, get_node # type: ignore + from searx.utils import ( eval_xpath, eval_xpath_getindex, extr, extract_text, + ElementType, + gen_useragent, ) from searx.network import get # see https://github.com/searxng/searxng/issues/762 from searx.enginelib.traits import EngineTraits @@ -28,119 +199,88 @@ from searx.enginelib import EngineCache from searx.exceptions import SearxEngineCaptchaException from searx.result_types import EngineResults -about = { - "website": 'https://lite.duckduckgo.com/lite/', - "wikidata_id": 'Q12805', +if t.TYPE_CHECKING: + from searx.extended_types import SXNG_Response + from searx.search.processors import OnlineParams + +about: dict[str, str | bool] = { + "website": "https://lite.duckduckgo.com/lite/", + "wikidata_id": "Q12805", "use_official_api": False, "require_api_key": False, - "results": 'HTML', + "results": "HTML", } -send_accept_language_header = True -"""DuckDuckGo-Lite tries to guess user's preferred language from the HTTP -``Accept-Language``. Optional the user can select a region filter (but not a -language). -""" +categories: list[str] = ["general", "web"] +paging: bool = True +time_range_support: bool = True +safesearch: bool = True +"""DDG-lite: user can't select but the results are filtered.""" -# engine dependent config -categories = ['general', 'web'] -paging = True -time_range_support = True -safesearch = True # user can't select but the results are filtered +ddg_url: str = "https://html.duckduckgo.com/html/" +"""The process flow for determining the ``vqd`` values was implemented for the +no-JS variant (DDG-html)""" -url = "https://html.duckduckgo.com/html/" +time_range_dict: dict[str, str] = {"day": "d", "week": "w", "month": "m", "year": "y"} -time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'} -form_data = {'v': 'l', 'api': 'd.js', 'o': 'json'} - -_CACHE: EngineCache = None # type: ignore +_CACHE: EngineCache = None # pyright: ignore[reportAssignmentType] """Persistent (SQLite) key/value cache that deletes its values after ``expire`` seconds.""" +_HTTP_User_Agent: str = gen_useragent() -def get_cache(): - global _CACHE # pylint: disable=global-statement - if _CACHE is None: - _CACHE = EngineCache("duckduckgo") # type:ignore + +def get_cache() -> EngineCache: + global _CACHE + if _CACHE is None: # pyright: ignore[reportUnnecessaryComparison] + _CACHE = EngineCache("duckduckgo") # pyright: ignore[reportUnreachable] return _CACHE -def get_vqd(query: str, region: str, force_request: bool = False) -> str: - """Returns the ``vqd`` that fits to the *query*. - - :param query: The query term - :param region: DDG's region code - :param force_request: force a request to get a vqd value from DDG - - TL;DR; the ``vqd`` value is needed to pass DDG's bot protection and is used - by all request to DDG: - - - DuckDuckGo Lite: ``https://lite.duckduckgo.com/lite`` (POST form data) - - DuckDuckGo Web: ``https://links.duckduckgo.com/d.js?q=...&vqd=...`` - - DuckDuckGo Images: ``https://duckduckgo.com/i.js??q=...&vqd=...`` - - DuckDuckGo Videos: ``https://duckduckgo.com/v.js??q=...&vqd=...`` - - DuckDuckGo News: ``https://duckduckgo.com/news.js??q=...&vqd=...`` - - DDG's bot detection is sensitive to the ``vqd`` value. For some search terms - (such as extremely long search terms that are often sent by bots), no ``vqd`` - value can be determined. - - If SearXNG cannot determine a ``vqd`` value, then no request should go out - to DDG. - - .. attention:: - - A request with a wrong ``vqd`` value leads to DDG temporarily putting - SearXNG's IP on a block list. - - Requests from IPs in this block list run into timeouts. Not sure, but it - seems the block list is a sliding window: to get my IP rid from the bot list - I had to cool down my IP for 1h (send no requests from that IP to DDG). - """ +def set_vqd(query: str | int, value: str, params: "OnlineParams") -> None: cache = get_cache() - key = cache.secret_hash(f"{query}//{region}") - value = cache.get(key=key) - if value is not None and not force_request: - logger.debug("vqd: re-use cached value: %s", value) - return value - - logger.debug("vqd: request value from from duckduckgo.com") - resp = get(f'https://duckduckgo.com/?q={quote_plus(query)}') - if resp.status_code == 200: # type: ignore - value = extr(resp.text, 'vqd="', '"') # type: ignore - if value: - logger.debug("vqd value from duckduckgo.com request: '%s'", value) - else: - logger.error("vqd: can't parse value from ddg response (return empty string)") - return "" - else: - logger.error("vqd: got HTTP %s from duckduckgo.com", resp.status_code) - - if value: - cache.set(key=key, value=value) - else: - logger.error("none vqd value from duckduckgo.com: HTTP %s", resp.status_code) - return value - - -def set_vqd(query: str, region: str, value: str): - cache = get_cache() - key = cache.secret_hash(f"{query}//{region}") + key = cache.secret_hash(f"{query}//{params['headers']['User-Agent']}") cache.set(key=key, value=value, expire=3600) -def get_ddg_lang(eng_traits: EngineTraits, sxng_locale, default='en_US'): +def get_vqd( + query: str, + params: "OnlineParams", +) -> str: + """Returns the vqd_ vakue that fits to the *query* (and HTTP User-Agent_ + header). + + :param query: the query term + :param params: request parameters + """ + cache = get_cache() + key = cache.secret_hash(f"{query}//{params['headers']['User-Agent']}") + value: str = cache.get(key=key) or "" + if value: + logger.debug("get_vqd: re-use cached value: %s", value) + return value + + +def get_ddg_lang( + eng_traits: EngineTraits, + sxng_locale: str, + default: str = "en_US", +) -> str | None: """Get DuckDuckGo's language identifier from SearXNG's locale. - DuckDuckGo defines its languages by region codes (see - :py:obj:`fetch_traits`). + .. hint:: - To get region and language of a DDG service use: + `DDG-lite `__ and the *no Javascript* + page https://html.duckduckgo.com/html do not offer a language selection + to the user. + + DDG defines its languages by a region code (:py:obj:`fetch_traits`). To + get region and language of a DDG service use: .. code: python - eng_region = traits.get_region(params['searxng_locale'], traits.all_locale) - eng_lang = get_ddg_lang(traits, params['searxng_locale']) + eng_region = traits.get_region(params["searxng_locale"], traits.all_locale) + eng_lang = get_ddg_lang(traits, params["searxng_locale"]) It might confuse, but the ``l`` value of the cookie is what SearXNG calls the *region*: @@ -152,220 +292,225 @@ def get_ddg_lang(eng_traits: EngineTraits, sxng_locale, default='en_US'): params['cookies']['ah'] = eng_region params['cookies']['l'] = eng_region - .. hint:: - - `DDG-lite `__ and the *no Javascript* - page https://html.duckduckgo.com/html do not offer a language selection - to the user, only a region can be selected by the user (``eng_region`` - from the example above). DDG-lite and *no Javascript* store the selected - region in a cookie:: - - params['cookies']['kl'] = eng_region # 'ar-es' - """ - return eng_traits.custom['lang_region'].get( # type: ignore - sxng_locale, eng_traits.get_language(sxng_locale, default) - ) + lang: str | None = eng_traits.get_language(sxng_locale, default) + + return eng_traits.custom["lang_region"].get(sxng_locale, lang) or None -ddg_reg_map = { - 'tw-tzh': 'zh_TW', - 'hk-tzh': 'zh_HK', - 'ct-ca': 'skip', # ct-ca and es-ca both map to ca_ES - 'es-ca': 'ca_ES', - 'id-en': 'id_ID', - 'no-no': 'nb_NO', - 'jp-jp': 'ja_JP', - 'kr-kr': 'ko_KR', - 'xa-ar': 'ar_SA', - 'sl-sl': 'sl_SI', - 'th-en': 'th_TH', - 'vn-en': 'vi_VN', +ddg_reg_map: dict[str, str] = { + "tw-tzh": "zh_TW", + "hk-tzh": "zh_HK", + "ct-ca": "skip", # ct-ca and es-ca both map to ca_ES + "es-ca": "ca_ES", + "id-en": "id_ID", + "no-no": "nb_NO", + "jp-jp": "ja_JP", + "kr-kr": "ko_KR", + "xa-ar": "ar_SA", + "sl-sl": "sl_SI", + "th-en": "th_TH", + "vn-en": "vi_VN", } -ddg_lang_map = { +ddg_lang_map: dict[str, str] = { # use ar --> ar_EG (Egypt's arabic) - "ar_DZ": 'lang_region', - "ar_JO": 'lang_region', - "ar_SA": 'lang_region', + "ar_DZ": "lang_region", + "ar_JO": "lang_region", + "ar_SA": "lang_region", # use bn --> bn_BD - 'bn_IN': 'lang_region', + "bn_IN": "lang_region", # use de --> de_DE - 'de_CH': 'lang_region', + "de_CH": "lang_region", # use en --> en_US, - 'en_AU': 'lang_region', - 'en_CA': 'lang_region', - 'en_GB': 'lang_region', + "en_AU": "lang_region", + "en_CA": "lang_region", + "en_GB": "lang_region", # Esperanto - 'eo_XX': 'eo', + "eo_XX": "eo", # use es --> es_ES, - 'es_AR': 'lang_region', - 'es_CL': 'lang_region', - 'es_CO': 'lang_region', - 'es_CR': 'lang_region', - 'es_EC': 'lang_region', - 'es_MX': 'lang_region', - 'es_PE': 'lang_region', - 'es_UY': 'lang_region', - 'es_VE': 'lang_region', + "es_AR": "lang_region", + "es_CL": "lang_region", + "es_CO": "lang_region", + "es_CR": "lang_region", + "es_EC": "lang_region", + "es_MX": "lang_region", + "es_PE": "lang_region", + "es_UY": "lang_region", + "es_VE": "lang_region", # use fr --> rf_FR - 'fr_CA': 'lang_region', - 'fr_CH': 'lang_region', - 'fr_BE': 'lang_region', + "fr_CA": "lang_region", + "fr_CH": "lang_region", + "fr_BE": "lang_region", # use nl --> nl_NL - 'nl_BE': 'lang_region', + "nl_BE": "lang_region", # use pt --> pt_PT - 'pt_BR': 'lang_region', + "pt_BR": "lang_region", # skip these languages - 'od_IN': 'skip', - 'io_XX': 'skip', - 'tokipona_XX': 'skip', + "od_IN": "skip", + "io_XX": "skip", + "tokipona_XX": "skip", } -def quote_ddg_bangs(query): - # quote ddg bangs - query_parts = [] +def quote_ddg_bangs(query: str) -> str: + """To avoid a redirect, the !bang directives in the query string are + quoted.""" - # for val in re.split(r'(\s+)', query): - for val in re.split(r'(\s+)', query): + _q: list[str] = [] + + for val in re.split(r"(\s+)", query): if not val.strip(): continue - if val.startswith('!') and external_bang.get_node(external_bang.EXTERNAL_BANGS, val[1:]): + + if val.startswith('!') and get_node(EXTERNAL_BANGS, val[1:]): val = f"'{val}'" - query_parts.append(val) - return ' '.join(query_parts) + _q.append(val) + return " ".join(_q) -def request(query, params): - query = quote_ddg_bangs(query) +def request(query: str, params: "OnlineParams") -> None: if len(query) >= 500: # DDG does not accept queries with more than 499 chars params["url"] = None return - eng_region: str = traits.get_region(params['searxng_locale'], traits.all_locale) # type: ignore + query = quote_ddg_bangs(query) + eng_region: str = traits.get_region( + params["searxng_locale"], + traits.all_locale, + ) # pyright: ignore[reportAssignmentType] - # Note: The API is reverse-engineered from DuckDuckGo's HTML webpage - # (https://html.duckduckgo.com/html/) and may be subject to additional bot detection mechanisms - # and breaking changes in the future. - # - # The params['data'] dictionary can have the following key parameters, in this order: - # - q (str): Search query string - # - b (str): Beginning parameter - empty string for first page requests - # - s (int): Search offset for pagination - # - nextParams (str): Continuation parameters from previous page response, typically empty - # - v (str): Typically 'l' for subsequent pages - # - o (str): Output format, typically 'json' - # - dc (int): Display count - value equal to offset (s) + 1 - # - api (str): API endpoint identifier, typically 'd.js' - # - vqd (str): Validation query digest - # - kl (str): Keyboard language/region code (e.g., 'en-us') - # - df (str): Time filter, maps to values like 'd' (day), 'w' (week), 'm' (month), 'y' (year) + # HTTP headers + # ============ - params['data']['q'] = query + headers = params["headers"] - if params['pageno'] == 1: - params['data']['b'] = "" - elif params['pageno'] >= 2: - offset = 10 + (params['pageno'] - 2) * 15 # Page 2 = 10, Page 3+ = 10 + n*15 - params['data']['s'] = offset - params['data']['nextParams'] = form_data.get('nextParams', '') - params['data']['v'] = form_data.get('v', 'l') - params['data']['o'] = form_data.get('o', 'json') - params['data']['dc'] = offset + 1 - params['data']['api'] = form_data.get('api', 'd.js') + # The vqd value is generated from the query and the UA header. To be able to + # reuse the vqd value, the UA header must be static. + headers["User-Agent"] = _HTTP_User_Agent + # Sec-Fetch-* headers are already set by the ``OnlineProcessor``. + # headers["Sec-Fetch-Dest"] = "document" + # headers["Sec-Fetch-Mode"] = "navigate" + # headers["Sec-Fetch-Site"] = "same-origin" + # headers["Sec-Fetch-User"] = "?1" + + headers["Referer"] = "https://html.duckduckgo.com/" + + ui_lang = params["searxng_locale"] + if not headers.get("Accept-Language"): + headers["Accept-Language"] = f"{ui_lang},{ui_lang}-{ui_lang.upper()};q=0.7" + + # DDG search form (POST data) + # =========================== + + # form_data: dict[str,str] = {"v": "l", "api": "d.js", "o": "json"} + # """The WEB-API "endpoint" is ``api``.""" + + data = params["data"] + data["q"] = query + params["url"] = ddg_url + params["method"] = "POST" + + if params["pageno"] == 1: + data["b"] = "" + else: # vqd is required to request other pages after the first one - vqd = get_vqd(query, eng_region, force_request=False) + vqd = get_vqd(query=query, params=params) if vqd: - params['data']['vqd'] = vqd + data["vqd"] = vqd else: - # Don't try to call follow up pages without a vqd value. + # Don"t try to call follow up pages without a vqd value. # DDG recognizes this as a request from a bot. This lowers the # reputation of the SearXNG IP and DDG starts to activate CAPTCHAs. - params["url"] = None - return + # set suspend time to zero is OK --> ddg does not block the IP + raise SearxEngineCaptchaException( + suspended_time=0, message=f"VQD missed (page: {params['pageno']}, locale: {params['searxng_locale']})" + ) - if params['searxng_locale'].startswith("zh"): + if params["searxng_locale"].startswith("zh"): # Some locales (at least China) do not have a "next page" button and DDG # will return a HTTP/2 403 Forbidden for a request of such a page. params["url"] = None return - # Put empty kl in form data if language/region set to all + data["nextParams"] = "" + data["api"] = "d.js" + data["o"] = "json" + data["v"] = "l" + + offset = 10 + (params["pageno"] - 2) * 15 # Page 2 = 10, Page 2+n = 10 + n*15 + data["dc"] = offset + 1 + data["s"] = offset + if eng_region == "wt-wt": - params['data']['kl'] = "" + # Put empty kl in form data if language/region set to all + # data["kl"] = "" + data["kl"] = "wt-wt" else: - params['data']['kl'] = eng_region + data["kl"] = eng_region + params["cookies"]["kl"] = eng_region - params['data']['df'] = '' - if params['time_range'] in time_range_dict: - params['data']['df'] = time_range_dict[params['time_range']] - params['cookies']['df'] = time_range_dict[params['time_range']] + t_range: str = time_range_dict.get(str(params["time_range"]), "") + if t_range: + data["df"] = t_range + params["cookies"]["df"] = t_range - params['cookies']['kl'] = eng_region + params["headers"]["Content-Type"] = "application/x-www-form-urlencoded" + params["headers"]["Referer"] = ddg_url - params['url'] = url - params['method'] = 'POST' - - params['headers']['Content-Type'] = 'application/x-www-form-urlencoded' - params['headers']['Referer'] = url - params['headers']['Sec-Fetch-Dest'] = "document" - params['headers']['Sec-Fetch-Mode'] = "navigate" # at least this one is used by ddg's bot detection - params['headers']['Sec-Fetch-Site'] = "same-origin" - params['headers']['Sec-Fetch-User'] = "?1" - - logger.debug("param headers: %s", params['headers']) - logger.debug("param data: %s", params['data']) - logger.debug("param cookies: %s", params['cookies']) + logger.debug("param headers: %s", params["headers"]) + logger.debug("param data: %s", params["data"]) + logger.debug("param cookies: %s", params["cookies"]) -def is_ddg_captcha(dom): +def is_ddg_captcha(dom: ElementType): """In case of CAPTCHA ddg response its own *not a Robot* dialog and is not redirected to a CAPTCHA page.""" return bool(eval_xpath(dom, "//form[@id='challenge-form']")) -def response(resp) -> EngineResults: - results = EngineResults() +def response(resp: "SXNG_Response") -> EngineResults: + res = EngineResults() if resp.status_code == 303: - return results + return res doc = lxml.html.fromstring(resp.text) + params = resp.search_params if is_ddg_captcha(doc): # set suspend time to zero is OK --> ddg does not block the IP - raise SearxEngineCaptchaException(suspended_time=0, message=f"CAPTCHA ({resp.search_params['data'].get('kl')})") + raise SearxEngineCaptchaException(suspended_time=0, message=f"CAPTCHA ({params['data'].get('kl')})") form = eval_xpath(doc, '//input[@name="vqd"]/..') + + # Some locales (at least China) do not have a "next page" button and DDG + # will return a HTTP/2 403 Forbidden for a request of such a page. if len(form): - # some locales (at least China) does not have a "next page" button form = form[0] form_vqd = eval_xpath(form, '//input[@name="vqd"]/@value')[0] + q: str = str(params["data"]["q"]) set_vqd( - query=resp.search_params['data']['q'], - region=resp.search_params['data']['kl'], + query=q, value=str(form_vqd), + params=resp.search_params, ) # just select "web-result" and ignore results of class "result--ad result--ad--small" for div_result in eval_xpath(doc, '//div[@id="links"]/div[contains(@class, "web-result")]'): - - item = {} - title = eval_xpath(div_result, './/h2/a') - if not title: - # this is the "No results." item in the result list - continue - item["title"] = extract_text(title) - item["url"] = eval_xpath(div_result, './/h2/a/@href')[0] - item["content"] = extract_text( - eval_xpath_getindex(div_result, './/a[contains(@class, "result__snippet")]', 0, []) + _title = eval_xpath(div_result, ".//h2/a") + _content = eval_xpath_getindex(div_result, './/a[contains(@class, "result__snippet")]', 0, []) + res.add( + res.types.MainResult( + title=extract_text(_title) or "", + url=eval_xpath(div_result, ".//h2/a/@href")[0], + content=extract_text(_content) or "", + ) ) - results.append(item) zero_click_info_xpath = '//div[@id="zero_click_abstract"]' zero_click = extract_text(eval_xpath(doc, zero_click_info_xpath)).strip() # type: ignore @@ -375,20 +520,19 @@ def response(resp) -> EngineResults: and "Your user agent:" not in zero_click and "URL Decoded:" not in zero_click ): - results.add( - results.types.Answer( + res.add( + res.types.Answer( answer=zero_click, - url=eval_xpath_getindex(doc, '//div[@id="zero_click_abstract"]/a/@href', 0), # type: ignore + url=eval_xpath_getindex(doc, '//div[@id="zero_click_abstract"]/a/@href', 0), ) ) - - return results + return res def fetch_traits(engine_traits: EngineTraits): """Fetch languages & regions from DuckDuckGo. - SearXNG's ``all`` locale maps DuckDuckGo's "Alle regions" (``wt-wt``). + SearXNG's ``all`` locale maps DuckDuckGo's "All regions" (``wt-wt``). DuckDuckGo's language "Browsers preferred language" (``wt_WT``) makes no sense in a SearXNG request since SearXNG's ``all`` will not add a ``Accept-Language`` HTTP header. The value in ``engine_traits.all_locale`` @@ -416,10 +560,10 @@ def fetch_traits(engine_traits: EngineTraits): # updated from u661.js to u.7669f071a13a7daa57cb / should be updated automatically? resp = get('https://duckduckgo.com/dist/util/u.7669f071a13a7daa57cb.js') - if not resp.ok: # type: ignore + if not resp.ok: print("ERROR: response from DuckDuckGo is not OK.") - js_code = extr(resp.text, 'regions:', ',snippetLengths') # type: ignore + js_code = extr(resp.text, 'regions:', ',snippetLengths') regions = json.loads(js_code) for eng_tag, name in regions.items(): @@ -453,7 +597,7 @@ def fetch_traits(engine_traits: EngineTraits): engine_traits.custom['lang_region'] = {} - js_code = extr(resp.text, 'languages:', ',regions') # type: ignore + js_code = extr(resp.text, 'languages:', ',regions') languages: dict[str, str] = js_obj_str_to_python(js_code) for eng_lang, name in languages.items(): diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py index 6d330cebe..bf2168827 100644 --- a/searx/engines/duckduckgo_definitions.py +++ b/searx/engines/duckduckgo_definitions.py @@ -12,69 +12,75 @@ least we could not find out how language support should work. It seems that most of the features are based on English terms. """ +import typing as t from urllib.parse import urlencode, urlparse, urljoin from lxml import html from searx.data import WIKIDATA_UNITS from searx.utils import extract_text, html_to_text, get_string_replaces_function -from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom +from searx.external_urls import ( + get_external_url, + get_earth_coordinates_url, + area_to_osm_zoom, +) from searx.result_types import EngineResults -# about +if t.TYPE_CHECKING: + from searx.extended_types import SXNG_Response + from searx.search.processors import OnlineParams + about = { - "website": 'https://duckduckgo.com/', - "wikidata_id": 'Q12805', - "official_api_documentation": 'https://duckduckgo.com/api', + "website": "https://duckduckgo.com/", + "wikidata_id": "Q12805", + "official_api_documentation": "https://duckduckgo.com/api", "use_official_api": True, "require_api_key": False, - "results": 'JSON', + "results": "JSON", } -URL = 'https://api.duckduckgo.com/' + '?{query}&format=json&pretty=0&no_redirect=1&d=1' +URL = "https://api.duckduckgo.com/" + "?{query}&format=json&pretty=0&no_redirect=1&d=1" -WIKIDATA_PREFIX = ['http://www.wikidata.org/entity/', 'https://www.wikidata.org/entity/'] +WIKIDATA_PREFIX = ["http://www.wikidata.org/entity/", "https://www.wikidata.org/entity/"] -replace_http_by_https = get_string_replaces_function({'http:': 'https:'}) +replace_http_by_https = get_string_replaces_function({"http:": "https:"}) -def is_broken_text(text): +def is_broken_text(text: str) -> bool: """duckduckgo may return something like ``http://somewhere Related website`` The href URL is broken, the "Related website" may contains some HTML. The best solution seems to ignore these results. """ - return text.startswith('http') and ' ' in text + return text.startswith("http") and " " in text -def result_to_text(text, htmlResult): +def result_to_text(text: str, htmlResult: str) -> str | None: # TODO : remove result ending with "Meaning" or "Category" # pylint: disable=fixme - result = None + result = "" dom = html.fromstring(htmlResult) - a = dom.xpath('//a') + a = dom.xpath("//a") if len(a) >= 1: result = extract_text(a[0]) else: result = text - if not is_broken_text(result): + if result and not is_broken_text(result): return result return None -def request(query, params): - params['url'] = URL.format(query=urlencode({'q': query})) - return params +def request(query: str, params: "OnlineParams") -> None: + params["url"] = URL.format(query=urlencode({"q": query})) -def response(resp) -> EngineResults: +def response(resp: "SXNG_Response") -> EngineResults: # pylint: disable=too-many-locals, too-many-branches, too-many-statements results = EngineResults() + search_res: dict[str, str] = resp.json() - search_res = resp.json() - - # search_res.get('Entity') possible values (not exhaustive) : + # search_res.get("Entity") possible values (not exhaustive) : # * continent / country / department / location / waterfall # * actor / musician / artist # * book / performing art / film / television / media franchise / concert tour / playwright @@ -82,79 +88,82 @@ def response(resp) -> EngineResults: # * website / software / os / programming language / file format / software engineer # * company - content = '' - heading = search_res.get('Heading', '') - attributes = [] - urls = [] + content: str = "" + heading: str = search_res.get("Heading", "") + attributes: list[dict[str, str | dict[str, str]]] = [] + urls: list[dict[str, str | bool]] = [] infobox_id = None - relatedTopics = [] + relatedTopics: list[dict[str, str | list[str]]] = [] # add answer if there is one - answer = search_res.get('Answer', '') + answer: str = search_res.get("Answer", "") if answer: - answer_type = search_res.get('AnswerType') - logger.debug('AnswerType="%s" Answer="%s"', answer_type, answer) - if isinstance(answer, str) and answer_type not in ['calc', 'ip']: + answer_type = search_res.get("AnswerType") + logger.debug("AnswerType='%s' Answer='%s'", answer_type, answer) + if isinstance(answer, str) and answer_type not in ["calc", "ip"]: results.add( results.types.Answer( answer=html_to_text(answer), - url=search_res.get('AbstractURL', ''), + url=search_res.get("AbstractURL", ""), ) ) # add infobox - if 'Definition' in search_res: - content = content + search_res.get('Definition', '') + if "Definition" in search_res: + content = content + search_res.get("Definition", "") - if 'Abstract' in search_res: - content = content + search_res.get('Abstract', '') + if "Abstract" in search_res: + content = content + search_res.get("Abstract", "") # image - image = search_res.get('Image') - image = None if image == '' else image - if image is not None and urlparse(image).netloc == '': - image = urljoin('https://duckduckgo.com', image) + image = search_res.get("Image") + image = None if image == "" else image + if image is not None and urlparse(image).netloc == "": + image = urljoin("https://duckduckgo.com", image) - # urls # Official website, Wikipedia page - for ddg_result in search_res.get('Results', []): - firstURL = ddg_result.get('FirstURL') - text = ddg_result.get('Text') + _result_list: list[dict[str, str]] = search_res.get("Results", []) # pyright: ignore[reportAssignmentType] + + for ddg_result in _result_list: + firstURL = ddg_result.get("FirstURL") + text = ddg_result.get("Text") if firstURL is not None and text is not None: - urls.append({'title': text, 'url': firstURL}) - results.append({'title': heading, 'url': firstURL}) + urls.append({"title": text, "url": firstURL}) + results.add(results.types.LegacyResult({"title": heading, "url": firstURL})) # related topics - for ddg_result in search_res.get('RelatedTopics', []): - if 'FirstURL' in ddg_result: - firstURL = ddg_result.get('FirstURL') - text = ddg_result.get('Text') + _result_list = search_res.get("RelatedTopics", []) # pyright: ignore[reportAssignmentType] + for ddg_result in _result_list: + if "FirstURL" in ddg_result: + firstURL = ddg_result.get("FirstURL") + text = ddg_result.get("Text", "") if not is_broken_text(text): - suggestion = result_to_text(text, ddg_result.get('Result')) + suggestion = result_to_text(text, ddg_result.get("Result", "")) if suggestion != heading and suggestion is not None: - results.append({'suggestion': suggestion}) - elif 'Topics' in ddg_result: - suggestions = [] - relatedTopics.append({'name': ddg_result.get('Name', ''), 'suggestions': suggestions}) - for topic_result in ddg_result.get('Topics', []): - suggestion = result_to_text(topic_result.get('Text'), topic_result.get('Result')) + results.add(results.types.LegacyResult({"suggestion": suggestion})) + elif "Topics" in ddg_result: + suggestions: list[str] = [] + relatedTopics.append({"name": ddg_result.get("Name", ""), "suggestions": suggestions}) + _topic_results: list[dict[str, str]] = ddg_result.get("Topics", []) # pyright: ignore[reportAssignmentType] + for topic_result in _topic_results: + suggestion = result_to_text(topic_result.get("Text", ""), topic_result.get("Result", "")) if suggestion != heading and suggestion is not None: suggestions.append(suggestion) # abstract - abstractURL = search_res.get('AbstractURL', '') - if abstractURL != '': + abstractURL = search_res.get("AbstractURL", "") + if abstractURL != "": # add as result ? problem always in english infobox_id = abstractURL - urls.append({'title': search_res.get('AbstractSource'), 'url': abstractURL, 'official': True}) - results.append({'url': abstractURL, 'title': heading}) + urls.append({"title": search_res.get("AbstractSource", ""), "url": abstractURL, "official": True}) + results.add(results.types.LegacyResult({"url": abstractURL, "title": heading})) # definition - definitionURL = search_res.get('DefinitionURL', '') - if definitionURL != '': + definitionURL = search_res.get("DefinitionURL", "") + if definitionURL != "": # add as result ? as answer ? problem always in english infobox_id = definitionURL - urls.append({'title': search_res.get('DefinitionSource'), 'url': definitionURL}) + urls.append({"title": search_res.get("DefinitionSource", ""), "url": definitionURL}) # to merge with wikidata's infobox if infobox_id: @@ -162,15 +171,15 @@ def response(resp) -> EngineResults: # attributes # some will be converted to urls - if 'Infobox' in search_res: - infobox = search_res.get('Infobox') - if 'content' in infobox: + if "Infobox" in search_res: + infobox: dict[str, t.Any] = search_res.get("Infobox", {}) # pyright: ignore[reportAssignmentType] + if "content" in infobox: osm_zoom = 17 coordinates = None - for info in infobox.get('content'): - data_type = info.get('data_type') - data_label = info.get('label') - data_value = info.get('value') + for info in infobox.get("content", {}): + data_type: str = info.get("data_type", "") + data_label = info.get("label") + data_value = info.get("value") # Workaround: ddg may return a double quote if data_value == '""': @@ -180,77 +189,79 @@ def response(resp) -> EngineResults: # * imdb_id / facebook_profile / youtube_channel / youtube_video / twitter_profile # * instagram_profile / rotten_tomatoes / spotify_artist_id / itunes_artist_id / soundcloud_id # * netflix_id - external_url = get_external_url(data_type, data_value) + external_url: str | None = get_external_url(data_type, data_value) # type: ignore if external_url is not None: - urls.append({'title': data_label, 'url': external_url}) - elif data_type in ['instance', 'wiki_maps_trigger', 'google_play_artist_id']: + urls.append({"title": data_label, "url": external_url}) + elif data_type in ["instance", "wiki_maps_trigger", "google_play_artist_id"]: # ignore instance: Wikidata value from "Instance Of" (Qxxxx) # ignore wiki_maps_trigger: reference to a javascript # ignore google_play_artist_id: service shutdown pass - elif data_type == 'string' and data_label == 'Website': + elif data_type == "string" and data_label == "Website": # There is already an URL for the website pass - elif data_type == 'area': - attributes.append({'label': data_label, 'value': area_to_str(data_value), 'entity': 'P2046'}) - osm_zoom = area_to_osm_zoom(data_value.get('amount')) - elif data_type == 'coordinates': - if data_value.get('globe') == 'http://www.wikidata.org/entity/Q2': + elif data_type == "area": + attributes.append({"label": data_label, "value": area_to_str(data_value), "entity": "P2046"}) + osm_zoom = area_to_osm_zoom(data_value.get("amount")) + elif data_type == "coordinates": + if data_value.get("globe") == "http://www.wikidata.org/entity/Q2": # coordinate on Earth # get the zoom information from the area coordinates = info else: # coordinate NOT on Earth - attributes.append({'label': data_label, 'value': data_value, 'entity': 'P625'}) - elif data_type == 'string': - attributes.append({'label': data_label, 'value': data_value}) + attributes.append({"label": data_label, "value": data_value, "entity": "P625"}) + elif data_type == "string": + attributes.append({"label": data_label, "value": data_value}) if coordinates: - data_label = coordinates.get('label') - data_value = coordinates.get('value') - latitude = data_value.get('latitude') - longitude = data_value.get('longitude') - url = get_earth_coordinates_url(latitude, longitude, osm_zoom) - urls.append({'title': 'OpenStreetMap', 'url': url, 'entity': 'P625'}) + data_label = coordinates.get("label") + data_value = coordinates.get("value") + latitude = data_value.get("latitude") + longitude = data_value.get("longitude") + _url: str = get_earth_coordinates_url(latitude, longitude, osm_zoom) # type: ignore + urls.append({"title": "OpenStreetMap", "url": _url, "entity": "P625"}) if len(heading) > 0: - # TODO get infobox.meta.value where .label='article_title' # pylint: disable=fixme + # TODO get infobox.meta.value where .label="article_title" # pylint: disable=fixme if image is None and len(attributes) == 0 and len(urls) == 1 and len(relatedTopics) == 0 and len(content) == 0: - results.append({'url': urls[0]['url'], 'title': heading, 'content': content}) + results.add(results.types.LegacyResult({"url": urls[0]["url"], "title": heading, "content": content})) else: - results.append( - { - 'infobox': heading, - 'id': infobox_id, - 'content': content, - 'img_src': image, - 'attributes': attributes, - 'urls': urls, - 'relatedTopics': relatedTopics, - } + results.add( + results.types.LegacyResult( + { + "infobox": heading, + "id": infobox_id, + "content": content, + "img_src": image, + "attributes": attributes, + "urls": urls, + "relatedTopics": relatedTopics, + } + ) ) return results -def unit_to_str(unit): +def unit_to_str(unit: str) -> str: for prefix in WIKIDATA_PREFIX: if unit.startswith(prefix): wikidata_entity = unit[len(prefix) :] real_unit = WIKIDATA_UNITS.get(wikidata_entity) if real_unit is None: return unit - return real_unit['symbol'] + return real_unit["symbol"] return unit -def area_to_str(area): - """parse ``{'unit': 'https://www.wikidata.org/entity/Q712226', 'amount': '+20.99'}``""" - unit = unit_to_str(area.get('unit')) - if unit is not None: +def area_to_str(area: dict[str, str]) -> str: + """parse ``{"unit": "https://www.wikidata.org/entity/Q712226", "amount": "+20.99"}``""" + unit = unit_to_str(area.get("unit", "")) + if unit: try: - amount = float(area.get('amount')) - return '{} {}'.format(amount, unit) + amount = float(area.get("amount", "")) + return "{} {}".format(amount, unit) except ValueError: pass - return '{} {}'.format(area.get('amount', ''), area.get('unit', '')) + return "{} {}".format(area.get("amount", ""), area.get("unit", "")) diff --git a/searx/engines/duckduckgo_extra.py b/searx/engines/duckduckgo_extra.py index 47b544e1e..80719a1be 100644 --- a/searx/engines/duckduckgo_extra.py +++ b/searx/engines/duckduckgo_extra.py @@ -4,84 +4,155 @@ DuckDuckGo Extra (images, videos, news) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ """ +import typing as t + from datetime import datetime from urllib.parse import urlencode -from searx.utils import get_embeded_stream_url, html_to_text +from urllib.parse import quote_plus + +from searx.utils import get_embeded_stream_url, html_to_text, gen_useragent, extr +from searx.network import get # see https://github.com/searxng/searxng/issues/762 from searx.engines.duckduckgo import fetch_traits # pylint: disable=unused-import -from searx.engines.duckduckgo import get_ddg_lang, get_vqd +from searx.engines.duckduckgo import get_ddg_lang, get_vqd, set_vqd + +if t.TYPE_CHECKING: + from searx.extended_types import SXNG_Response + from searx.search.processors import OnlineParams # about about = { - "website": 'https://duckduckgo.com/', - "wikidata_id": 'Q12805', + "website": "https://duckduckgo.com/", + "wikidata_id": "Q12805", "use_official_api": False, "require_api_key": False, - "results": 'JSON (site requires js to get images)', + "results": "JSON (site requires js to get images)", } # engine dependent config -categories = ['images', 'web'] -ddg_category = 'images' +categories = [] +ddg_category = "" """The category must be any of ``images``, ``videos`` and ``news`` """ paging = True safesearch = True -send_accept_language_header = True -safesearch_cookies = {0: '-2', 1: None, 2: '1'} -safesearch_args = {0: '1', 1: None, 2: '1'} +safesearch_cookies = {0: "-2", 1: None, 2: "1"} +safesearch_args = {0: "1", 1: None, 2: "1"} -search_path_map = {'images': 'i', 'videos': 'v', 'news': 'news'} +search_path_map = {"images": "i", "videos": "v", "news": "news"} +_HTTP_User_Agent: str = gen_useragent() -def request(query, params): - eng_region: str = traits.get_region(params['searxng_locale'], traits.all_locale) # type: ignore +def init(engine_settings: dict[str, t.Any]): - # request needs a vqd argument - vqd = get_vqd(query, eng_region, force_request=True) + if engine_settings["ddg_category"] not in ["images", "videos", "news"]: + raise ValueError(f"Unsupported DuckDuckGo category: {engine_settings['ddg_category']}") - if not vqd: - # some search terms do not have results and therefore no vqd value - params['url'] = None - return params - eng_lang = get_ddg_lang(traits, params['searxng_locale']) +def fetch_vqd( + query: str, + params: "OnlineParams", +): - args = { - 'q': query, - 'o': 'json', - # 'u': 'bing', - 'l': eng_region, - 'f': ',,,,,', - 'vqd': vqd, + logger.debug("fetch_vqd: request value from from duckduckgo.com") + resp = get( + url=f"https://duckduckgo.com/?q={quote_plus(query)}&iar=images&t=h_", + headers=params["headers"], + timeout=2, + ) + + value = "" + if resp.status_code == 200: + value = extr(resp.text, 'vqd="', '"') + if value: + logger.debug("vqd value from duckduckgo.com request: '%s'", value) + else: + logger.error("vqd: can't parse value from ddg response (return empty string)") + return "" + else: + logger.error("vqd: got HTTP %s from duckduckgo.com", resp.status_code) + + if value: + set_vqd(query=query, value=value, params=params) + else: + logger.error("none vqd value from duckduckgo.com: HTTP %s", resp.status_code) + return value + + +def request(query: str, params: "OnlineParams") -> None: + + if len(query) >= 500: + # DDG does not accept queries with more than 499 chars + params["url"] = None + return + + if params["pageno"] > 1 and ddg_category in ["videos", "news"]: + # DDG has limited results for videos and news and we got already all + # results from the first request. + params["url"] = None + return + + # HTTP headers + # ============ + + headers = params["headers"] + # The vqd value is generated from the query and the UA header. To be able to + # reuse the vqd value, the UA header must be static. + headers["User-Agent"] = _HTTP_User_Agent + vqd = get_vqd(query=query, params=params) or fetch_vqd(query=query, params=params) + + # Sec-Fetch-* headers are already set by the ``OnlineProcessor``. + # Overwrite the default Sec-Fetch-* headers to fit to a XHTMLRequest + headers["Sec-Fetch-Dest"] = "empty" + headers["Sec-Fetch-Mode"] = "cors" + + headers["Accept"] = "*/*" + headers["Referer"] = "https://duckduckgo.com/" + headers["Host"] = "duckduckgo.com" + # headers["X-Requested-With"] = "XMLHttpRequest" + + # DDG XHTMLRequest + # ================ + + eng_region: str = traits.get_region( + params["searxng_locale"], + traits.all_locale, + ) # pyright: ignore[reportAssignmentType] + + eng_lang: str = get_ddg_lang(traits, params["searxng_locale"]) or "wt-wt" + + args: dict[str, str | int] = { + "o": "json", + "q": query, + "u": "bing", + "l": eng_region, + "bpia": "1", + "vqd": vqd, + "a": "h_", } - if params['pageno'] > 1: - args['s'] = (params['pageno'] - 1) * 100 + params["cookies"]["ad"] = eng_lang # zh_CN + params["cookies"]["ah"] = eng_region # "us-en,de-de" + params["cookies"]["l"] = eng_region # "hk-tzh" - params['cookies']['ad'] = eng_lang # zh_CN - params['cookies']['ah'] = eng_region # "us-en,de-de" - params['cookies']['l'] = eng_region # "hk-tzh" + args["ct"] = "EN" + if params["searxng_locale"] != "all": + args["ct"] = params["searxng_locale"].split("-")[0].upper() - safe_search = safesearch_cookies.get(params['safesearch']) + if params["pageno"] > 1: + args["s"] = (params["pageno"] - 1) * 100 + + safe_search = safesearch_cookies.get(params["safesearch"]) if safe_search is not None: - params['cookies']['p'] = safe_search # "-2", "1" - safe_search = safesearch_args.get(params['safesearch']) - if safe_search is not None: - args['p'] = safe_search # "-1", "1" + params["cookies"]["p"] = safe_search # "-2", "1" + args["p"] = safe_search - logger.debug("cookies: %s", params['cookies']) + params["url"] = f"https://duckduckgo.com/{search_path_map[ddg_category]}.js?{urlencode(args)}" - params['url'] = f'https://duckduckgo.com/{search_path_map[ddg_category]}.js?{urlencode(args)}' - - # sending these two headers prevents rate limiting for the query - params['headers'] = { - 'Referer': 'https://duckduckgo.com/', - 'X-Requested-With': 'XMLHttpRequest', - } - - return params + logger.debug("param headers: %s", params["headers"]) + logger.debug("param data: %s", params["data"]) + logger.debug("param cookies: %s", params["cookies"]) def _image_result(result): diff --git a/searx/settings.yml b/searx/settings.yml index ab3eae1e5..af2bad478 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -829,24 +829,21 @@ engines: - name: duckduckgo images engine: duckduckgo_extra - categories: [images, web] + categories: [images] ddg_category: images shortcut: ddi - disabled: true - name: duckduckgo videos engine: duckduckgo_extra - categories: [videos, web] + categories: [videos] ddg_category: videos shortcut: ddv - disabled: true - name: duckduckgo news engine: duckduckgo_extra - categories: [news, web] + categories: [news] ddg_category: news shortcut: ddn - disabled: true - name: duckduckgo weather engine: duckduckgo_weather