# main.py import json import time from typing import Dict, List, Optional from urllib.parse import urlencode, urlparse, parse_qs from bs4 import BeautifulSoup from curl_cffi.requests import Session from fastapi import FastAPI, HTTPException, Query, Request from pydantic import BaseModel, Field # 1. Pydantic Models for API Responses (Unchanged) class BingSearchResult(BaseModel): url: str = Field(..., description="The URL of the search result.") title: str = Field(..., description="The title of the search result.") description: str = Field(..., description="A brief description or snippet from the result page.") class BingImageResult(BaseModel): title: str = Field(..., description="The title or caption of the image.") image: str = Field(..., description="The direct URL to the full-resolution image.") thumbnail: str = Field(..., description="The URL to the thumbnail of the image.") url: str = Field(..., description="The URL of the webpage where the image was found.") source: str = Field(..., description="The source domain of the image.") class BingNewsResult(BaseModel): title: str = Field(..., description="The title of the news article.") url: str = Field(..., description="The URL to the full news article.") description: str = Field(..., description="A snippet from the news article.") source: str = Field(..., description="The publisher or source of the news article.") # 2. FastAPI Application Setup (Unchanged) app = FastAPI( title="Definitive Fast Bing Search API", description="Returns correct, non-localized search results from Bing using advanced techniques.", version="9.0.0-complete" ) # 3. Middleware to Add Custom Headers (Unchanged) @app.middleware("http") async def add_custom_headers(request: Request, call_next): start_time = time.time() response = await call_next(request) process_time = time.time() - start_time response.headers["X-Process-Time"] = f"{process_time:.4f} seconds" response.headers["X-Powered-By"] = "NiansuhAI" return response # 4. The Definitive Bing Search Class class BingSearch: """The definitive Bing search scraper that counters aggressive localization.""" def __init__( self, proxies: Optional[Dict[str, str]] = None, timeout: int = 15, impersonate: str = "chrome110" ): self.session = Session( proxies=proxies or {}, timeout=timeout, impersonate=impersonate, verify=False ) self._base_url = "https://www.bing.com" self.session.headers.update({ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.9', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36' }) def _update_session_for_region(self, region: str = "en-US"): """THE CRUCIAL FIX: Sets a cookie that explicitly tells Bing our preferred market.""" self.session.cookies.set("SRCHHPGUSR", f"SRCHLANG=en&MKT={region}", domain=".bing.com") def text( self, keywords: str, max_results: int, region: str, safesearch: str ) -> List[BingSearchResult]: self._update_session_for_region(region) safe_map = {"on": "Strict", "moderate": "Moderate", "off": "Off"} safe = safe_map.get(safesearch.lower(), "Moderate") fetched_results = [] page = 1 while len(fetched_results) < max_results: params = { "q": keywords, "first": (page - 1) * 10 + 1, "safeSearch": safe } try: resp = self.session.get(self._base_url + "/search", params=params) resp.raise_for_status() soup = BeautifulSoup(resp.text, "html.parser") except Exception as e: print(f"Error fetching text search page: {e}"); break result_blocks = soup.select("li.b_algo") if not result_blocks: break for result in result_blocks: link_tag = result.select_one("h2 a") desc_tag = result.select_one(".b_caption p") if link_tag and desc_tag and link_tag.get('href'): fetched_results.append(BingSearchResult( url=link_tag['href'], title=link_tag.get_text(strip=True), description=desc_tag.get_text(strip=True))) if len(fetched_results) >= max_results: break page += 1 return fetched_results[:max_results] def images( self, keywords: str, max_results: int, region: str, safesearch: str ) -> List[BingImageResult]: self._update_session_for_region(region) safe_map = {"on": "Strict", "moderate": "Moderate", "off": "Off"} safe = safe_map.get(safesearch.lower(), "Moderate") params = {"q": keywords, "safeSearch": safe, "form": "HDRSC2"} try: resp = self.session.get(f"{self._base_url}/images/search", params=params) resp.raise_for_status() soup = BeautifulSoup(resp.text, "html.parser") except Exception as e: raise Exception(f"Bing image search failed: {e}") results = [] for item in soup.select("a.iusc"): if len(results) >= max_results: break try: m_data = json.loads(item.get("m", "{}")) if m_data and m_data.get("murl"): results.append(BingImageResult( title=m_data.get("t", ""), image=m_data.get("murl"), thumbnail=m_data.get("turl", ""), url=m_data.get("purl", ""), source=m_data.get("surl", ""))) except Exception: continue return results def news( self, keywords: str, max_results: int, region: str, safesearch: str ) -> List[BingNewsResult]: self._update_session_for_region(region) safe_map = {"on": "Strict", "moderate": "Moderate", "off": "Off"} safe = safe_map.get(safesearch.lower(), "Moderate") params = {"q": keywords, "safeSearch": safe, "form": "QBNH"} try: resp = self.session.get(f"{self._base_url}/news/search", params=params) resp.raise_for_status() soup = BeautifulSoup(resp.text, "html.parser") except Exception as e: raise Exception(f"Bing news search failed: {e}") results = [] for item in soup.select("div.news-card"): if len(results) >= max_results: break a_tag = item.find("a", class_="title") snippet = item.find("div", class_="snippet") source = item.find("div", class_="source") if a_tag and a_tag.get('href'): results.append(BingNewsResult( title=a_tag.get_text(strip=True), url=a_tag['href'], description=snippet.get_text(strip=True) if snippet else "", source=source.get_text(strip=True) if source else "")) return results def suggestions(self, query: str, region: str = "en-US") -> List[str]: # The suggestions endpoint is an API and correctly uses the 'mkt' parameter. params = {"query": query, "mkt": region} url = f"https://api.bing.com/osjson.aspx?{urlencode(params)}" try: resp = self.session.get(url) resp.raise_for_status() data = resp.json() return data[1] if isinstance(data, list) and len(data) > 1 else [] except Exception: return [] # 5. API Endpoints # IMPORTANT: For guaranteed results from a specific country (e.g., en-US), # you MUST use a proxy server from that country. # # Example proxy setup: # proxies = { # "http": "http://USERNAME:PASSWORD@us-residential-proxy.com:PORT", # "https": "http://USERNAME:PASSWORD@us-residential-proxy.com:PORT", # } # bing = BingSearch(proxies=proxies) bing = BingSearch() # Without a proxy, results may still be localized. @app.get("/search", response_model=List[BingSearchResult], summary="Perform a Bing text search") async def text_search( keywords: str = Query(..., description="The search query."), max_results: int = Query(10, ge=1, le=50, description="Maximum number of results."), region: str = Query("en-US", description="Market to search in (e.g., 'en-US'). A proxy is recommended."), safesearch: str = Query("moderate", description="Safe search level: 'on', 'moderate', or 'off'.") ): try: return bing.text(keywords, max_results, region, safesearch) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.get("/images", response_model=List[BingImageResult], summary="Perform a Bing image search") async def image_search( keywords: str = Query(..., description="The image search query."), max_results: int = Query(10, ge=1, le=50, description="Maximum number of image results."), region: str = Query("en-US", description="Market to search in (e.g., 'en-US'). A proxy is recommended."), safesearch: str = Query("moderate", description="Safe search level: 'on', 'moderate', or 'off'.") ): try: return bing.images(keywords, max_results, region, safesearch) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.get("/news", response_model=List[BingNewsResult], summary="Perform a Bing news search") async def news_search( keywords: str = Query(..., description="The news search query."), max_results: int = Query(10, ge=1, le=50, description="Maximum number of news results."), region: str = Query("en-US", description="Market to search in (e.g., 'en-US'). A proxy is recommended."), safesearch: str = Query("moderate", description="Safe search level: 'on', 'moderate', or 'off'.") ): try: return bing.news(keywords, max_results, region, safesearch) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.get("/suggestions", response_model=List[str], summary="Get Bing search suggestions") async def get_suggestions( query: str = Query(..., description="The query to get suggestions for."), region: str = Query("en-US", description="Market for suggestions (e.g., 'en-US').") ): try: return bing.suggestions(query, region) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000)