Spaces:
Sleeping
Sleeping
File size: 2,792 Bytes
e01c07b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
"""Base classes for image search engines."""
import logging
from abc import ABC, abstractmethod
from dataclasses import dataclass
from enum import Enum
from typing import Dict, List, Optional, Set
logger = logging.getLogger(__name__)
class SearchPlatform(Enum):
"""Supported search platforms."""
PINTEREST = "pinterest"
INSTAGRAM = "instagram"
REDDIT = "reddit"
FLICKR = "flickr"
DEVIANTART = "deviantart"
GENERAL = "general"
@dataclass
class ImageResult:
"""Represents a single image search result."""
url: str
platform: SearchPlatform
quality_score: float = 0.0
width: Optional[int] = None
height: Optional[int] = None
title: Optional[str] = None
source_url: Optional[str] = None
@property
def resolution_score(self) -> float:
"""Calculate score based on image resolution."""
if not self.width or not self.height:
return 0.5
total_pixels = self.width * self.height
if total_pixels >= 1000000: # 1MP+
return 1.0
elif total_pixels >= 500000: # 0.5MP+
return 0.8
elif total_pixels >= 250000: # 0.25MP+
return 0.6
else:
return 0.3
@dataclass
class SearchResult:
"""Container for all search results from multiple platforms."""
images: List[ImageResult]
total_found: int
platforms_used: Set[SearchPlatform]
search_duration: float
def get_top_results(self, limit: int = 50) -> List[ImageResult]:
"""Get top results sorted by quality score."""
sorted_images = sorted(self.images, key=lambda x: x.quality_score, reverse=True)
return sorted_images[:limit]
class BaseSearchEngine(ABC):
"""Abstract base class for image search engines."""
def __init__(self, platform: SearchPlatform):
self.platform = platform
self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
@abstractmethod
def search(self, query: str, max_results: int = 20) -> List[ImageResult]:
"""Search for images on the platform."""
pass
@abstractmethod
def is_valid_url(self, url: str) -> bool:
"""Check if URL is valid for this platform."""
pass
def get_quality_score(self, url: str, **kwargs) -> float:
"""Calculate quality score for a URL (0.0 to 1.0)."""
score = 0.5 # Base score
# URL length penalty (very long URLs often broken)
if len(url) > 500:
score -= 0.2
elif len(url) > 300:
score -= 0.1
# Image extension bonus
image_extensions = ['.jpg', '.jpeg', '.png', '.webp']
if any(ext in url.lower() for ext in image_extensions):
score += 0.1
return max(0.0, min(1.0, score)) |