# SPDX-License-Identifier: AGPL-3.0-or-later 
 
						
						
						
							# lint: pylint 
 
						
						
						
							""" This is the implementation of the Bing-WEB engine. Some of this 
  
						
						
						
							implementations are shared by other engines: 
  
						
						
						
							
  
						
						
						
							- :ref:`bing images engine` 
  
						
						
						
							- :ref:`bing news engine` 
  
						
						
						
							- :ref:`bing videos engine` 
  
						
						
						
							
  
						
						
						
							On the `preference page`_ Bing offers a lot of languages an regions (see section 
  
						
						
						
							' Search results languages '  and  ' Country/region ' ).  However, the abundant choice 
  
						
						
						
							does not correspond to reality, where Bing has a full-text indexer only for a 
  
						
						
						
							limited number of languages.  By example: you can select a language like Māori 
  
						
						
						
							but you never get a result in this language. 
  
						
						
						
							
  
						
						
						
							What comes a bit closer to the truth are the `search-APIs`_ but they don`t seem 
  
						
						
						
							to be completely correct either (if you take a closer look you will find some 
  
						
						
						
							inaccuracies there too): 
  
						
						
						
							
  
						
						
						
							- :py:obj:`searx.engines.bing.bing_traits_url` 
  
						
						
						
							- :py:obj:`searx.engines.bing_videos.bing_traits_url` 
  
						
						
						
							- :py:obj:`searx.engines.bing_images.bing_traits_url` 
  
						
						
						
							- :py:obj:`searx.engines.bing_news.bing_traits_url` 
  
						
						
						
							
  
						
						
						
							.. _preference page: https://www.bing.com/account/general 
  
						
						
						
							.. _search-APIs: https://learn.microsoft.com/en-us/bing/search-apis/ 
  
						
						
						
							
  
						
						
						
							""" 
 
						
						
						
							# pylint: disable=too-many-branches, invalid-name 
 
						
						
						
							
 
						
						
						
							from  typing  import  TYPE_CHECKING 
 
						
						
						
							import  datetime 
 
						
						
						
							import  re 
 
						
						
						
							import  uuid 
 
						
						
						
							from  urllib . parse  import  urlencode 
 
						
						
						
							from  lxml  import  html 
 
						
						
						
							import  babel 
 
						
						
						
							import  babel . languages 
 
						
						
						
							
 
						
						
						
							from  searx . utils  import  eval_xpath ,  extract_text ,  eval_xpath_list ,  eval_xpath_getindex 
 
						
						
						
							from  searx  import  network 
 
						
						
						
							from  searx . locales  import  language_tag ,  region_tag 
 
						
						
						
							from  searx . enginelib . traits  import  EngineTraits 
 
						
						
						
							
 
						
						
						
							if  TYPE_CHECKING : 
 
						
						
						
							    import  logging 
 
						
						
						
							
 
						
						
						
							    logger :  logging . Logger 
 
						
						
						
							
 
						
						
						
							traits :  EngineTraits 
 
						
						
						
							
 
						
						
						
							about  =  { 
 
						
						
						
							    " website " :  ' https://www.bing.com ' , 
 
						
						
						
							    " wikidata_id " :  ' Q182496 ' , 
 
						
						
						
							    " official_api_documentation " :  ' https://www.microsoft.com/en-us/bing/apis/bing-web-search-api ' , 
 
						
						
						
							    " use_official_api " :  False , 
 
						
						
						
							    " require_api_key " :  False , 
 
						
						
						
							    " results " :  ' HTML ' , 
 
						
						
						
							} 
 
						
						
						
							
 
						
						
						
							send_accept_language_header  =  True 
 
						
						
						
							""" Bing tries to guess user ' s language and territory from the HTTP 
  
						
						
						
							Accept-Language.  Optional the user can select a search-language (can be 
  
						
						
						
							different to the UI language) and a region (market code). """ 
 
						
						
						
							
 
						
						
						
							# engine dependent config 
 
						
						
						
							categories  =  [ ' general ' ,  ' web ' ] 
 
						
						
						
							paging  =  True 
 
						
						
						
							time_range_support  =  True 
 
						
						
						
							safesearch  =  True 
 
						
						
						
							safesearch_types  =  { 2 :  ' STRICT ' ,  1 :  ' DEMOTE ' ,  0 :  ' OFF ' }   # cookie: ADLT=STRICT 
 
						
						
						
							
 
						
						
						
							base_url  =  ' https://www.bing.com/search ' 
 
						
						
						
							""" Bing (Web) search URL """ 
 
						
						
						
							
 
						
						
						
							bing_traits_url  =  ' https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/reference/market-codes ' 
 
						
						
						
							""" Bing (Web) search API description """ 
 
						
						
						
							
 
						
						
						
							
 
						
						
						
							def  _get_offset_from_pageno ( pageno ) : 
 
						
						
						
							    return  ( pageno  -  1 )  *  10  +  1 
 
						
						
						
							
 
						
						
						
							
 
						
						
						
							def  set_bing_cookies ( params ,  engine_language ,  engine_region ,  SID ) : 
 
						
						
						
							
 
						
						
						
							    # set cookies 
 
						
						
						
							    # ----------- 
 
						
						
						
							
 
						
						
						
							    params [ ' cookies ' ] [ ' _EDGE_V ' ]  =  ' 1 ' 
 
						
						
						
							
 
						
						
						
							    # _EDGE_S: F=1&SID=3A5253BD6BCA609509B741876AF961CA&mkt=zh-tw 
 
						
						
						
							    _EDGE_S  =  [ 
 
						
						
						
							        ' F=1 ' , 
 
						
						
						
							        ' SID= %s '  %  SID , 
 
						
						
						
							        ' mkt= %s '  %  engine_region . lower ( ) , 
 
						
						
						
							        ' ui= %s '  %  engine_language . lower ( ) , 
 
						
						
						
							    ] 
 
						
						
						
							    params [ ' cookies ' ] [ ' _EDGE_S ' ]  =  ' & ' . join ( _EDGE_S ) 
 
						
						
						
							    logger . debug ( " cookie _EDGE_S= %s " ,  params [ ' cookies ' ] [ ' _EDGE_S ' ] ) 
 
						
						
						
							
 
						
						
						
							    # "_EDGE_CD": "m=zh-tw", 
 
						
						
						
							
 
						
						
						
							    _EDGE_CD  =  [   # pylint: disable=invalid-name 
 
						
						
						
							        ' m= %s '  %  engine_region . lower ( ) ,   # search region: zh-cn 
 
						
						
						
							        ' u= %s '  %  engine_language . lower ( ) ,   # UI: en-us 
 
						
						
						
							    ] 
 
						
						
						
							
 
						
						
						
							    params [ ' cookies ' ] [ ' _EDGE_CD ' ]  =  ' & ' . join ( _EDGE_CD )  +  ' ; ' 
 
						
						
						
							    logger . debug ( " cookie _EDGE_CD= %s " ,  params [ ' cookies ' ] [ ' _EDGE_CD ' ] ) 
 
						
						
						
							
 
						
						
						
							    SRCHHPGUSR  =  [   # pylint: disable=invalid-name 
 
						
						
						
							        ' SRCHLANG= %s '  %  engine_language , 
 
						
						
						
							        # Trying to set ADLT cookie here seems not to have any effect, I assume 
 
						
						
						
							        # there is some age verification by a cookie (and/or session ID) needed, 
 
						
						
						
							        # to disable the SafeSearch. 
 
						
						
						
							        ' ADLT= %s '  %  safesearch_types . get ( params [ ' safesearch ' ] ,  ' DEMOTE ' ) , 
 
						
						
						
							    ] 
 
						
						
						
							    params [ ' cookies ' ] [ ' SRCHHPGUSR ' ]  =  ' & ' . join ( SRCHHPGUSR ) 
 
						
						
						
							    logger . debug ( " cookie SRCHHPGUSR= %s " ,  params [ ' cookies ' ] [ ' SRCHHPGUSR ' ] ) 
 
						
						
						
							
 
						
						
						
							
 
						
						
						
							def  request ( query ,  params ) : 
 
						
						
						
							    """ Assemble a Bing-Web request. """ 
 
						
						
						
							
 
						
						
						
							    engine_region  =  traits . get_region ( params [ ' searxng_locale ' ] ,  ' en-US ' ) 
 
						
						
						
							    engine_language  =  traits . get_language ( params [ ' searxng_locale ' ] ,  ' en ' ) 
 
						
						
						
							
 
						
						
						
							    SID  =  uuid . uuid1 ( ) . hex . upper ( ) 
 
						
						
						
							    CVID  =  uuid . uuid1 ( ) . hex . upper ( ) 
 
						
						
						
							
 
						
						
						
							    set_bing_cookies ( params ,  engine_language ,  engine_region ,  SID ) 
 
						
						
						
							
 
						
						
						
							    # build URL query 
 
						
						
						
							    # --------------- 
 
						
						
						
							
 
						
						
						
							    # query term 
 
						
						
						
							    page  =  int ( params . get ( ' pageno ' ,  1 ) ) 
 
						
						
						
							    query_params  =  { 
 
						
						
						
							        # fmt: off 
 
						
						
						
							        ' q ' :  query , 
 
						
						
						
							        ' pq ' :  query , 
 
						
						
						
							        ' cvid ' :  CVID , 
 
						
						
						
							        ' qs ' :  ' n ' , 
 
						
						
						
							        ' sp ' :  ' -1 ' 
 
						
						
						
							        # fmt: on 
 
						
						
						
							    } 
 
						
						
						
							
 
						
						
						
							    # page 
 
						
						
						
							    if  page  >  1 : 
 
						
						
						
							        referer  =  base_url  +  ' ? '  +  urlencode ( query_params ) 
 
						
						
						
							        params [ ' headers ' ] [ ' Referer ' ]  =  referer 
 
						
						
						
							        logger . debug ( " headers.Referer -->  %s " ,  referer ) 
 
						
						
						
							
 
						
						
						
							    query_params [ ' first ' ]  =  _get_offset_from_pageno ( page ) 
 
						
						
						
							
 
						
						
						
							    if  page  ==  2 : 
 
						
						
						
							        query_params [ ' FORM ' ]  =  ' PERE ' 
 
						
						
						
							    elif  page  >  2 : 
 
						
						
						
							        query_params [ ' FORM ' ]  =  ' PERE %s '  %  ( page  -  2 ) 
 
						
						
						
							
 
						
						
						
							    filters  =  ' ' 
 
						
						
						
							    if  params [ ' time_range ' ] : 
 
						
						
						
							        query_params [ ' filt ' ]  =  ' custom ' 
 
						
						
						
							
 
						
						
						
							        if  params [ ' time_range ' ]  ==  ' day ' : 
 
						
						
						
							            filters  =  ' ex1: " ez1 " ' 
 
						
						
						
							        elif  params [ ' time_range ' ]  ==  ' week ' : 
 
						
						
						
							            filters  =  ' ex1: " ez2 " ' 
 
						
						
						
							        elif  params [ ' time_range ' ]  ==  ' month ' : 
 
						
						
						
							            filters  =  ' ex1: " ez3 " ' 
 
						
						
						
							        elif  params [ ' time_range ' ]  ==  ' year ' : 
 
						
						
						
							            epoch_1970  =  datetime . date ( 1970 ,  1 ,  1 ) 
 
						
						
						
							            today_no  =  ( datetime . date . today ( )  -  epoch_1970 ) . days 
 
						
						
						
							            filters  =  ' ex1: " ez5_ %s _ %s " '  %  ( today_no  -  365 ,  today_no ) 
 
						
						
						
							
 
						
						
						
							    params [ ' url ' ]  =  base_url  +  ' ? '  +  urlencode ( query_params ) 
 
						
						
						
							    if  filters : 
 
						
						
						
							        params [ ' url ' ]  =  params [ ' url ' ]  +  ' &filters= '  +  filters 
 
						
						
						
							    return  params 
 
						
						
						
							
 
						
						
						
							
 
						
						
						
							def  response ( resp ) : 
 
						
						
						
							    results  =  [ ] 
 
						
						
						
							    result_len  =  0 
 
						
						
						
							
 
						
						
						
							    dom  =  html . fromstring ( resp . text ) 
 
						
						
						
							
 
						
						
						
							    # parse results again if nothing is found yet 
 
						
						
						
							
 
						
						
						
							    url_to_resolve  =  [ ] 
 
						
						
						
							    url_to_resolve_index  =  [ ] 
 
						
						
						
							    i  =  0 
 
						
						
						
							    for  result  in  eval_xpath_list ( dom ,  ' //ol[@id= " b_results " ]/li[contains(@class,  " b_algo " )] ' ) : 
 
						
						
						
							
 
						
						
						
							        link  =  eval_xpath_getindex ( result ,  ' .//h2/a ' ,  0 ,  None ) 
 
						
						
						
							        if  link  is  None : 
 
						
						
						
							            continue 
 
						
						
						
							        url  =  link . attrib . get ( ' href ' ) 
 
						
						
						
							        title  =  extract_text ( link ) 
 
						
						
						
							
 
						
						
						
							        content  =  eval_xpath ( result ,  ' (.//p)[1] ' ) 
 
						
						
						
							        for  p  in  content : 
 
						
						
						
							            # Make sure that the element is free of <a href> links 
 
						
						
						
							            for  e  in  p . xpath ( ' .//a ' ) : 
 
						
						
						
							                e . getparent ( ) . remove ( e ) 
 
						
						
						
							        content  =  extract_text ( content ) 
 
						
						
						
							
 
						
						
						
							        # get the real URL either using the URL shown to user or following the Bing URL 
 
						
						
						
							        if  url . startswith ( ' https://www.bing.com/ck/a? ' ) : 
 
						
						
						
							            url_cite  =  extract_text ( eval_xpath ( result ,  ' .//div[@class= " b_attribution " ]/cite ' ) ) 
 
						
						
						
							            # Bing can shorten the URL either at the end or in the middle of the string 
 
						
						
						
							            if  ( 
 
						
						
						
							                url_cite 
 
						
						
						
							                and  url_cite . startswith ( ' https:// ' ) 
 
						
						
						
							                and  ' … '  not  in  url_cite 
 
						
						
						
							                and  ' ... '  not  in  url_cite 
 
						
						
						
							                and  ' › '  not  in  url_cite 
 
						
						
						
							            ) : 
 
						
						
						
							                # no need for an additional HTTP request 
 
						
						
						
							                url  =  url_cite 
 
						
						
						
							            else : 
 
						
						
						
							                # resolve the URL with an additional HTTP request 
 
						
						
						
							                url_to_resolve . append ( url . replace ( ' &ntb=1 ' ,  ' &ntb=F ' ) ) 
 
						
						
						
							                url_to_resolve_index . append ( i ) 
 
						
						
						
							                url  =  None   # remove the result if the HTTP Bing redirect raise an exception 
 
						
						
						
							
 
						
						
						
							        # append result 
 
						
						
						
							        results . append ( { ' url ' :  url ,  ' title ' :  title ,  ' content ' :  content } ) 
 
						
						
						
							        # increment result pointer for the next iteration in this loop 
 
						
						
						
							        i  + =  1 
 
						
						
						
							
 
						
						
						
							    # resolve all Bing redirections in parallel 
 
						
						
						
							    request_list  =  [ 
 
						
						
						
							        network . Request . get ( u ,  allow_redirects = False ,  headers = resp . search_params [ ' headers ' ] )  for  u  in  url_to_resolve 
 
						
						
						
							    ] 
 
						
						
						
							    response_list  =  network . multi_requests ( request_list ) 
 
						
						
						
							    for  i ,  redirect_response  in  enumerate ( response_list ) : 
 
						
						
						
							        if  not  isinstance ( redirect_response ,  Exception ) : 
 
						
						
						
							            results [ url_to_resolve_index [ i ] ] [ ' url ' ]  =  redirect_response . headers [ ' location ' ] 
 
						
						
						
							
 
						
						
						
							    # get number_of_results 
 
						
						
						
							    try : 
 
						
						
						
							        result_len_container  =  " " . join ( eval_xpath ( dom ,  ' //span[@class= " sb_count " ]//text() ' ) ) 
 
						
						
						
							        if  " - "  in  result_len_container : 
 
						
						
						
							
 
						
						
						
							            # Remove the part "from-to" for paginated request ... 
 
						
						
						
							            result_len_container  =  result_len_container [ result_len_container . find ( " - " )  *  2  +  2  : ] 
 
						
						
						
							
 
						
						
						
							        result_len_container  =  re . sub ( ' [^0-9] ' ,  ' ' ,  result_len_container ) 
 
						
						
						
							
 
						
						
						
							        if  len ( result_len_container )  >  0 : 
 
						
						
						
							            result_len  =  int ( result_len_container ) 
 
						
						
						
							
 
						
						
						
							    except  Exception  as  e :   # pylint: disable=broad-except 
 
						
						
						
							        logger . debug ( ' result error : \n %s ' ,  e ) 
 
						
						
						
							
 
						
						
						
							    if  result_len  and  _get_offset_from_pageno ( resp . search_params . get ( " pageno " ,  0 ) )  >  result_len : 
 
						
						
						
							        return  [ ] 
 
						
						
						
							
 
						
						
						
							    results . append ( { ' number_of_results ' :  result_len } ) 
 
						
						
						
							    return  results 
 
						
						
						
							
 
						
						
						
							
 
						
						
						
							def  fetch_traits ( engine_traits :  EngineTraits ) : 
 
						
						
						
							    """ Fetch languages and regions from Bing-Web. """ 
 
						
						
						
							
 
						
						
						
							    xpath_market_codes  =  ' //table[1]/tbody/tr/td[3] ' 
 
						
						
						
							    # xpath_country_codes = '//table[2]/tbody/tr/td[2]' 
 
						
						
						
							    xpath_language_codes  =  ' //table[3]/tbody/tr/td[2] ' 
 
						
						
						
							
 
						
						
						
							    _fetch_traits ( engine_traits ,  bing_traits_url ,  xpath_language_codes ,  xpath_market_codes ) 
 
						
						
						
							
 
						
						
						
							
 
						
						
						
							def  _fetch_traits ( engine_traits :  EngineTraits ,  url :  str ,  xpath_language_codes :  str ,  xpath_market_codes :  str ) : 
 
						
						
						
							
 
						
						
						
							    # insert alias to map from a language (zh) to a language + script (zh_Hans) 
 
						
						
						
							    engine_traits . languages [ ' zh ' ]  =  ' zh-hans ' 
 
						
						
						
							
 
						
						
						
							    resp  =  network . get ( url ) 
 
						
						
						
							
 
						
						
						
							    if  not  resp . ok : 
 
						
						
						
							        print ( " ERROR: response from peertube is not OK. " ) 
 
						
						
						
							
 
						
						
						
							    dom  =  html . fromstring ( resp . text ) 
 
						
						
						
							
 
						
						
						
							    map_lang  =  { ' jp ' :  ' ja ' } 
 
						
						
						
							    for  td  in  eval_xpath ( dom ,  xpath_language_codes ) : 
 
						
						
						
							        eng_lang  =  td . text 
 
						
						
						
							
 
						
						
						
							        if  eng_lang  in  ( ' en-gb ' ,  ' pt-br ' ) : 
 
						
						
						
							            # language 'en' is already in the list and a language 'en-gb' can't 
 
						
						
						
							            # be handled in SearXNG, same with pt-br which is covered by pt-pt. 
 
						
						
						
							            continue 
 
						
						
						
							
 
						
						
						
							        babel_lang  =  map_lang . get ( eng_lang ,  eng_lang ) . replace ( ' - ' ,  ' _ ' ) 
 
						
						
						
							        try : 
 
						
						
						
							            sxng_tag  =  language_tag ( babel . Locale . parse ( babel_lang ) ) 
 
						
						
						
							        except  babel . UnknownLocaleError : 
 
						
						
						
							            print ( " ERROR: language ( %s ) is unknown by babel "  %  ( eng_lang ) ) 
 
						
						
						
							            continue 
 
						
						
						
							        conflict  =  engine_traits . languages . get ( sxng_tag ) 
 
						
						
						
							        if  conflict : 
 
						
						
						
							            if  conflict  !=  eng_lang : 
 
						
						
						
							                print ( " CONFLICT: babel  %s  -->  %s ,  %s "  %  ( sxng_tag ,  conflict ,  eng_lang ) ) 
 
						
						
						
							            continue 
 
						
						
						
							        engine_traits . languages [ sxng_tag ]  =  eng_lang 
 
						
						
						
							
 
						
						
						
							    map_region  =  { 
 
						
						
						
							        ' en-ID ' :  ' id_ID ' , 
 
						
						
						
							        ' no-NO ' :  ' nb_NO ' , 
 
						
						
						
							    } 
 
						
						
						
							
 
						
						
						
							    for  td  in  eval_xpath ( dom ,  xpath_market_codes ) : 
 
						
						
						
							        eng_region  =  td . text 
 
						
						
						
							        babel_region  =  map_region . get ( eng_region ,  eng_region ) . replace ( ' - ' ,  ' _ ' ) 
 
						
						
						
							
 
						
						
						
							        if  eng_region  ==  ' en-WW ' : 
 
						
						
						
							            engine_traits . all_locale  =  eng_region 
 
						
						
						
							            continue 
 
						
						
						
							
 
						
						
						
							        try : 
 
						
						
						
							            sxng_tag  =  region_tag ( babel . Locale . parse ( babel_region ) ) 
 
						
						
						
							        except  babel . UnknownLocaleError : 
 
						
						
						
							            print ( " ERROR: region ( %s ) is unknown by babel "  %  ( eng_region ) ) 
 
						
						
						
							            continue 
 
						
						
						
							        conflict  =  engine_traits . regions . get ( sxng_tag ) 
 
						
						
						
							        if  conflict : 
 
						
						
						
							            if  conflict  !=  eng_region : 
 
						
						
						
							                print ( " CONFLICT: babel  %s  -->  %s ,  %s "  %  ( sxng_tag ,  conflict ,  eng_region ) ) 
 
						
						
						
							            continue 
 
						
						
						
							        engine_traits . regions [ sxng_tag ]  =  eng_region