|
|
|
|
|
|
|
|
import json |
|
|
import time |
|
|
from typing import Dict, List, Optional |
|
|
from urllib.parse import urlencode, urlparse, parse_qs |
|
|
|
|
|
from bs4 import BeautifulSoup |
|
|
from curl_cffi.requests import Session |
|
|
from fastapi import FastAPI, HTTPException, Query, Request |
|
|
from pydantic import BaseModel, Field |
|
|
|
|
|
|
|
|
class BingSearchResult(BaseModel): |
|
|
url: str = Field(..., description="The URL of the search result.") |
|
|
title: str = Field(..., description="The title of the search result.") |
|
|
description: str = Field(..., description="A brief description or snippet from the result page.") |
|
|
|
|
|
class BingImageResult(BaseModel): |
|
|
title: str = Field(..., description="The title or caption of the image.") |
|
|
image: str = Field(..., description="The direct URL to the full-resolution image.") |
|
|
thumbnail: str = Field(..., description="The URL to the thumbnail of the image.") |
|
|
url: str = Field(..., description="The URL of the webpage where the image was found.") |
|
|
source: str = Field(..., description="The source domain of the image.") |
|
|
|
|
|
class BingNewsResult(BaseModel): |
|
|
title: str = Field(..., description="The title of the news article.") |
|
|
url: str = Field(..., description="The URL to the full news article.") |
|
|
description: str = Field(..., description="A snippet from the news article.") |
|
|
source: str = Field(..., description="The publisher or source of the news article.") |
|
|
|
|
|
|
|
|
app = FastAPI( |
|
|
title="Definitive Fast Bing Search API", |
|
|
description="Returns correct, non-localized search results from Bing using advanced techniques.", |
|
|
version="9.0.0-complete" |
|
|
) |
|
|
|
|
|
|
|
|
@app.middleware("http") |
|
|
async def add_custom_headers(request: Request, call_next): |
|
|
start_time = time.time() |
|
|
response = await call_next(request) |
|
|
process_time = time.time() - start_time |
|
|
response.headers["X-Process-Time"] = f"{process_time:.4f} seconds" |
|
|
response.headers["X-Powered-By"] = "NiansuhAI" |
|
|
return response |
|
|
|
|
|
|
|
|
class BingSearch: |
|
|
"""The definitive Bing search scraper that counters aggressive localization.""" |
|
|
def __init__( |
|
|
self, |
|
|
proxies: Optional[Dict[str, str]] = None, |
|
|
timeout: int = 15, |
|
|
impersonate: str = "chrome110" |
|
|
): |
|
|
self.session = Session( |
|
|
proxies=proxies or {}, |
|
|
timeout=timeout, |
|
|
impersonate=impersonate, |
|
|
verify=False |
|
|
) |
|
|
self._base_url = "https://www.bing.com" |
|
|
self.session.headers.update({ |
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', |
|
|
'Accept-Language': 'en-US,en;q=0.9', |
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36' |
|
|
}) |
|
|
|
|
|
def _update_session_for_region(self, region: str = "en-US"): |
|
|
"""THE CRUCIAL FIX: Sets a cookie that explicitly tells Bing our preferred market.""" |
|
|
self.session.cookies.set("SRCHHPGUSR", f"SRCHLANG=en&MKT={region}", domain=".bing.com") |
|
|
|
|
|
def text( |
|
|
self, keywords: str, max_results: int, region: str, safesearch: str |
|
|
) -> List[BingSearchResult]: |
|
|
self._update_session_for_region(region) |
|
|
safe_map = {"on": "Strict", "moderate": "Moderate", "off": "Off"} |
|
|
safe = safe_map.get(safesearch.lower(), "Moderate") |
|
|
|
|
|
fetched_results = [] |
|
|
page = 1 |
|
|
while len(fetched_results) < max_results: |
|
|
params = { "q": keywords, "first": (page - 1) * 10 + 1, "safeSearch": safe } |
|
|
try: |
|
|
resp = self.session.get(self._base_url + "/search", params=params) |
|
|
resp.raise_for_status() |
|
|
soup = BeautifulSoup(resp.text, "html.parser") |
|
|
except Exception as e: |
|
|
print(f"Error fetching text search page: {e}"); break |
|
|
|
|
|
result_blocks = soup.select("li.b_algo") |
|
|
if not result_blocks: break |
|
|
|
|
|
for result in result_blocks: |
|
|
link_tag = result.select_one("h2 a") |
|
|
desc_tag = result.select_one(".b_caption p") |
|
|
if link_tag and desc_tag and link_tag.get('href'): |
|
|
fetched_results.append(BingSearchResult( |
|
|
url=link_tag['href'], title=link_tag.get_text(strip=True), |
|
|
description=desc_tag.get_text(strip=True))) |
|
|
if len(fetched_results) >= max_results: break |
|
|
page += 1 |
|
|
return fetched_results[:max_results] |
|
|
|
|
|
def images( |
|
|
self, keywords: str, max_results: int, region: str, safesearch: str |
|
|
) -> List[BingImageResult]: |
|
|
self._update_session_for_region(region) |
|
|
safe_map = {"on": "Strict", "moderate": "Moderate", "off": "Off"} |
|
|
safe = safe_map.get(safesearch.lower(), "Moderate") |
|
|
params = {"q": keywords, "safeSearch": safe, "form": "HDRSC2"} |
|
|
try: |
|
|
resp = self.session.get(f"{self._base_url}/images/search", params=params) |
|
|
resp.raise_for_status() |
|
|
soup = BeautifulSoup(resp.text, "html.parser") |
|
|
except Exception as e: |
|
|
raise Exception(f"Bing image search failed: {e}") |
|
|
|
|
|
results = [] |
|
|
for item in soup.select("a.iusc"): |
|
|
if len(results) >= max_results: break |
|
|
try: |
|
|
m_data = json.loads(item.get("m", "{}")) |
|
|
if m_data and m_data.get("murl"): |
|
|
results.append(BingImageResult( |
|
|
title=m_data.get("t", ""), image=m_data.get("murl"), |
|
|
thumbnail=m_data.get("turl", ""), url=m_data.get("purl", ""), |
|
|
source=m_data.get("surl", ""))) |
|
|
except Exception: continue |
|
|
return results |
|
|
|
|
|
def news( |
|
|
self, keywords: str, max_results: int, region: str, safesearch: str |
|
|
) -> List[BingNewsResult]: |
|
|
self._update_session_for_region(region) |
|
|
safe_map = {"on": "Strict", "moderate": "Moderate", "off": "Off"} |
|
|
safe = safe_map.get(safesearch.lower(), "Moderate") |
|
|
params = {"q": keywords, "safeSearch": safe, "form": "QBNH"} |
|
|
try: |
|
|
resp = self.session.get(f"{self._base_url}/news/search", params=params) |
|
|
resp.raise_for_status() |
|
|
soup = BeautifulSoup(resp.text, "html.parser") |
|
|
except Exception as e: |
|
|
raise Exception(f"Bing news search failed: {e}") |
|
|
|
|
|
results = [] |
|
|
for item in soup.select("div.news-card"): |
|
|
if len(results) >= max_results: break |
|
|
a_tag = item.find("a", class_="title") |
|
|
snippet = item.find("div", class_="snippet") |
|
|
source = item.find("div", class_="source") |
|
|
if a_tag and a_tag.get('href'): |
|
|
results.append(BingNewsResult( |
|
|
title=a_tag.get_text(strip=True), url=a_tag['href'], |
|
|
description=snippet.get_text(strip=True) if snippet else "", |
|
|
source=source.get_text(strip=True) if source else "")) |
|
|
return results |
|
|
|
|
|
def suggestions(self, query: str, region: str = "en-US") -> List[str]: |
|
|
|
|
|
params = {"query": query, "mkt": region} |
|
|
url = f"https://api.bing.com/osjson.aspx?{urlencode(params)}" |
|
|
try: |
|
|
resp = self.session.get(url) |
|
|
resp.raise_for_status() |
|
|
data = resp.json() |
|
|
return data[1] if isinstance(data, list) and len(data) > 1 else [] |
|
|
except Exception: return [] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bing = BingSearch() |
|
|
|
|
|
@app.get("/search", response_model=List[BingSearchResult], summary="Perform a Bing text search") |
|
|
async def text_search( |
|
|
keywords: str = Query(..., description="The search query."), |
|
|
max_results: int = Query(10, ge=1, le=50, description="Maximum number of results."), |
|
|
region: str = Query("en-US", description="Market to search in (e.g., 'en-US'). A proxy is recommended."), |
|
|
safesearch: str = Query("moderate", description="Safe search level: 'on', 'moderate', or 'off'.") |
|
|
): |
|
|
try: |
|
|
return bing.text(keywords, max_results, region, safesearch) |
|
|
except Exception as e: |
|
|
raise HTTPException(status_code=500, detail=str(e)) |
|
|
|
|
|
@app.get("/images", response_model=List[BingImageResult], summary="Perform a Bing image search") |
|
|
async def image_search( |
|
|
keywords: str = Query(..., description="The image search query."), |
|
|
max_results: int = Query(10, ge=1, le=50, description="Maximum number of image results."), |
|
|
region: str = Query("en-US", description="Market to search in (e.g., 'en-US'). A proxy is recommended."), |
|
|
safesearch: str = Query("moderate", description="Safe search level: 'on', 'moderate', or 'off'.") |
|
|
): |
|
|
try: |
|
|
return bing.images(keywords, max_results, region, safesearch) |
|
|
except Exception as e: |
|
|
raise HTTPException(status_code=500, detail=str(e)) |
|
|
|
|
|
@app.get("/news", response_model=List[BingNewsResult], summary="Perform a Bing news search") |
|
|
async def news_search( |
|
|
keywords: str = Query(..., description="The news search query."), |
|
|
max_results: int = Query(10, ge=1, le=50, description="Maximum number of news results."), |
|
|
region: str = Query("en-US", description="Market to search in (e.g., 'en-US'). A proxy is recommended."), |
|
|
safesearch: str = Query("moderate", description="Safe search level: 'on', 'moderate', or 'off'.") |
|
|
): |
|
|
try: |
|
|
return bing.news(keywords, max_results, region, safesearch) |
|
|
except Exception as e: |
|
|
raise HTTPException(status_code=500, detail=str(e)) |
|
|
|
|
|
@app.get("/suggestions", response_model=List[str], summary="Get Bing search suggestions") |
|
|
async def get_suggestions( |
|
|
query: str = Query(..., description="The query to get suggestions for."), |
|
|
region: str = Query("en-US", description="Market for suggestions (e.g., 'en-US').") |
|
|
): |
|
|
try: |
|
|
return bing.suggestions(query, region) |
|
|
except Exception as e: |
|
|
raise HTTPException(status_code=500, detail=str(e)) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
import uvicorn |
|
|
uvicorn.run(app, host="0.0.0.0", port=8000) |