Spaces:

SuperAPIs
/

Bing

Paused

App Files Files Community

Bing / main.py

rkihacker

Update main.py

333f8d9 verified 3 months ago

raw

history blame contribute delete

10.7 kB

	# main.py

	import json
	import time
	from typing import Dict, List, Optional
	from urllib.parse import urlencode, urlparse, parse_qs

	from bs4 import BeautifulSoup
	from curl_cffi.requests import Session
	from fastapi import FastAPI, HTTPException, Query, Request
	from pydantic import BaseModel, Field

	# 1. Pydantic Models for API Responses (Unchanged)
	class BingSearchResult(BaseModel):
	url: str = Field(..., description="The URL of the search result.")
	title: str = Field(..., description="The title of the search result.")
	description: str = Field(..., description="A brief description or snippet from the result page.")

	class BingImageResult(BaseModel):
	title: str = Field(..., description="The title or caption of the image.")
	image: str = Field(..., description="The direct URL to the full-resolution image.")
	thumbnail: str = Field(..., description="The URL to the thumbnail of the image.")
	url: str = Field(..., description="The URL of the webpage where the image was found.")
	source: str = Field(..., description="The source domain of the image.")

	class BingNewsResult(BaseModel):
	title: str = Field(..., description="The title of the news article.")
	url: str = Field(..., description="The URL to the full news article.")
	description: str = Field(..., description="A snippet from the news article.")
	source: str = Field(..., description="The publisher or source of the news article.")

	# 2. FastAPI Application Setup (Unchanged)
	app = FastAPI(
	title="Definitive Fast Bing Search API",
	description="Returns correct, non-localized search results from Bing using advanced techniques.",
	version="9.0.0-complete"
	)

	# 3. Middleware to Add Custom Headers (Unchanged)
	@app.middleware("http")
	async def add_custom_headers(request: Request, call_next):
	start_time = time.time()
	response = await call_next(request)
	process_time = time.time() - start_time
	response.headers["X-Process-Time"] = f"{process_time:.4f} seconds"
	response.headers["X-Powered-By"] = "NiansuhAI"
	return response

	# 4. The Definitive Bing Search Class
	class BingSearch:
	"""The definitive Bing search scraper that counters aggressive localization."""
	def __init__(
	self,
	proxies: Optional[Dict[str, str]] = None,
	timeout: int = 15,
	impersonate: str = "chrome110"
	):
	self.session = Session(
	proxies=proxies or {},
	timeout=timeout,
	impersonate=impersonate,
	verify=False
	)
	self._base_url = "https://www.bing.com"
	self.session.headers.update({
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,/;q=0.8',
	'Accept-Language': 'en-US,en;q=0.9',
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36'
	})

	def _update_session_for_region(self, region: str = "en-US"):
	"""THE CRUCIAL FIX: Sets a cookie that explicitly tells Bing our preferred market."""
	self.session.cookies.set("SRCHHPGUSR", f"SRCHLANG=en&MKT={region}", domain=".bing.com")

	def text(
	self, keywords: str, max_results: int, region: str, safesearch: str
	) -> List[BingSearchResult]:
	self._update_session_for_region(region)
	safe_map = {"on": "Strict", "moderate": "Moderate", "off": "Off"}
	safe = safe_map.get(safesearch.lower(), "Moderate")

	fetched_results = []
	page = 1
	while len(fetched_results) < max_results:
	params = { "q": keywords, "first": (page - 1) * 10 + 1, "safeSearch": safe }
	try:
	resp = self.session.get(self._base_url + "/search", params=params)
	resp.raise_for_status()
	soup = BeautifulSoup(resp.text, "html.parser")
	except Exception as e:
	print(f"Error fetching text search page: {e}"); break

	result_blocks = soup.select("li.b_algo")
	if not result_blocks: break

	for result in result_blocks:
	link_tag = result.select_one("h2 a")
	desc_tag = result.select_one(".b_caption p")
	if link_tag and desc_tag and link_tag.get('href'):
	fetched_results.append(BingSearchResult(
	url=link_tag['href'], title=link_tag.get_text(strip=True),
	description=desc_tag.get_text(strip=True)))
	if len(fetched_results) >= max_results: break
	page += 1
	return fetched_results[:max_results]

	def images(
	self, keywords: str, max_results: int, region: str, safesearch: str
	) -> List[BingImageResult]:
	self._update_session_for_region(region)
	safe_map = {"on": "Strict", "moderate": "Moderate", "off": "Off"}
	safe = safe_map.get(safesearch.lower(), "Moderate")
	params = {"q": keywords, "safeSearch": safe, "form": "HDRSC2"}
	try:
	resp = self.session.get(f"{self._base_url}/images/search", params=params)
	resp.raise_for_status()
	soup = BeautifulSoup(resp.text, "html.parser")
	except Exception as e:
	raise Exception(f"Bing image search failed: {e}")

	results = []
	for item in soup.select("a.iusc"):
	if len(results) >= max_results: break
	try:
	m_data = json.loads(item.get("m", "{}"))
	if m_data and m_data.get("murl"):
	results.append(BingImageResult(
	title=m_data.get("t", ""), image=m_data.get("murl"),
	thumbnail=m_data.get("turl", ""), url=m_data.get("purl", ""),
	source=m_data.get("surl", "")))
	except Exception: continue
	return results

	def news(
	self, keywords: str, max_results: int, region: str, safesearch: str
	) -> List[BingNewsResult]:
	self._update_session_for_region(region)
	safe_map = {"on": "Strict", "moderate": "Moderate", "off": "Off"}
	safe = safe_map.get(safesearch.lower(), "Moderate")
	params = {"q": keywords, "safeSearch": safe, "form": "QBNH"}
	try:
	resp = self.session.get(f"{self._base_url}/news/search", params=params)
	resp.raise_for_status()
	soup = BeautifulSoup(resp.text, "html.parser")
	except Exception as e:
	raise Exception(f"Bing news search failed: {e}")

	results = []
	for item in soup.select("div.news-card"):
	if len(results) >= max_results: break
	a_tag = item.find("a", class_="title")
	snippet = item.find("div", class_="snippet")
	source = item.find("div", class_="source")
	if a_tag and a_tag.get('href'):
	results.append(BingNewsResult(
	title=a_tag.get_text(strip=True), url=a_tag['href'],
	description=snippet.get_text(strip=True) if snippet else "",
	source=source.get_text(strip=True) if source else ""))
	return results

	def suggestions(self, query: str, region: str = "en-US") -> List[str]:
	# The suggestions endpoint is an API and correctly uses the 'mkt' parameter.
	params = {"query": query, "mkt": region}
	url = f"https://api.bing.com/osjson.aspx?{urlencode(params)}"
	try:
	resp = self.session.get(url)
	resp.raise_for_status()
	data = resp.json()
	return data[1] if isinstance(data, list) and len(data) > 1 else []
	except Exception: return []

	# 5. API Endpoints
	# IMPORTANT: For guaranteed results from a specific country (e.g., en-US),
	# you MUST use a proxy server from that country.
	#
	# Example proxy setup:
	# proxies = {
	# "http": "http://USERNAME:[email protected]:PORT",
	# "https": "http://USERNAME:[email protected]:PORT",
	# }
	# bing = BingSearch(proxies=proxies)

	bing = BingSearch() # Without a proxy, results may still be localized.

	@app.get("/search", response_model=List[BingSearchResult], summary="Perform a Bing text search")
	async def text_search(
	keywords: str = Query(..., description="The search query."),
	max_results: int = Query(10, ge=1, le=50, description="Maximum number of results."),
	region: str = Query("en-US", description="Market to search in (e.g., 'en-US'). A proxy is recommended."),
	safesearch: str = Query("moderate", description="Safe search level: 'on', 'moderate', or 'off'.")
	):
	try:
	return bing.text(keywords, max_results, region, safesearch)
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))

	@app.get("/images", response_model=List[BingImageResult], summary="Perform a Bing image search")
	async def image_search(
	keywords: str = Query(..., description="The image search query."),
	max_results: int = Query(10, ge=1, le=50, description="Maximum number of image results."),
	region: str = Query("en-US", description="Market to search in (e.g., 'en-US'). A proxy is recommended."),
	safesearch: str = Query("moderate", description="Safe search level: 'on', 'moderate', or 'off'.")
	):
	try:
	return bing.images(keywords, max_results, region, safesearch)
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))

	@app.get("/news", response_model=List[BingNewsResult], summary="Perform a Bing news search")
	async def news_search(
	keywords: str = Query(..., description="The news search query."),
	max_results: int = Query(10, ge=1, le=50, description="Maximum number of news results."),
	region: str = Query("en-US", description="Market to search in (e.g., 'en-US'). A proxy is recommended."),
	safesearch: str = Query("moderate", description="Safe search level: 'on', 'moderate', or 'off'.")
	):
	try:
	return bing.news(keywords, max_results, region, safesearch)
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))

	@app.get("/suggestions", response_model=List[str], summary="Get Bing search suggestions")
	async def get_suggestions(
	query: str = Query(..., description="The query to get suggestions for."),
	region: str = Query("en-US", description="Market for suggestions (e.g., 'en-US').")
	):
	try:
	return bing.suggestions(query, region)
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=8000)