|
|
|
|
|
""" |
|
|
Extended Model Manager with 100+ New HuggingFace Models |
|
|
مدیریت گسترده شامل تمام مدلهای کشف شده |
|
|
""" |
|
|
|
|
|
import sys |
|
|
import os |
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) |
|
|
|
|
|
from backend.services.advanced_model_manager import ( |
|
|
AdvancedModelManager, |
|
|
ModelInfo, |
|
|
ModelCategory, |
|
|
ModelSize |
|
|
) |
|
|
|
|
|
|
|
|
class ExtendedModelManager(AdvancedModelManager): |
|
|
""" |
|
|
مدیر گسترده با 100+ مدل جدید |
|
|
""" |
|
|
|
|
|
def _load_model_catalog(self): |
|
|
"""بارگذاری کاتالوگ گسترده""" |
|
|
|
|
|
models = super()._load_model_catalog() |
|
|
|
|
|
|
|
|
new_models = self._load_new_models() |
|
|
models.update(new_models) |
|
|
|
|
|
return models |
|
|
|
|
|
def _load_new_models(self): |
|
|
"""بارگذاری مدلهای جدید کشف شده""" |
|
|
return { |
|
|
|
|
|
|
|
|
"bitcoin_bert": ModelInfo( |
|
|
id="bitcoin_bert", |
|
|
hf_id="ElKulako/BitcoinBERT", |
|
|
name="BitcoinBERT", |
|
|
category=ModelCategory.SENTIMENT.value, |
|
|
size=ModelSize.SMALL.value, |
|
|
size_mb=450, |
|
|
description="Bitcoin-specific sentiment analysis model", |
|
|
use_cases=["bitcoin", "btc", "sentiment", "social"], |
|
|
languages=["en"], |
|
|
free=True, |
|
|
requires_auth=False, |
|
|
performance_score=0.86, |
|
|
popularity_score=0.75, |
|
|
tags=["bitcoin", "sentiment", "bert", "crypto"], |
|
|
api_compatible=True, |
|
|
downloadable=True |
|
|
), |
|
|
|
|
|
"crypto_finbert": ModelInfo( |
|
|
id="crypto_finbert", |
|
|
hf_id="burakutf/finetuned-finbert-crypto", |
|
|
name="Crypto FinBERT", |
|
|
category=ModelCategory.SENTIMENT.value, |
|
|
size=ModelSize.SMALL.value, |
|
|
size_mb=440, |
|
|
description="FinBERT fine-tuned specifically on crypto news", |
|
|
use_cases=["crypto", "news", "financial", "sentiment"], |
|
|
languages=["en"], |
|
|
free=True, |
|
|
requires_auth=False, |
|
|
performance_score=0.84, |
|
|
popularity_score=0.70, |
|
|
tags=["crypto", "finbert", "sentiment", "news"], |
|
|
api_compatible=True, |
|
|
downloadable=True |
|
|
), |
|
|
|
|
|
"crypto_sentiment_general": ModelInfo( |
|
|
id="crypto_sentiment_general", |
|
|
hf_id="mayurjadhav/crypto-sentiment-model", |
|
|
name="Crypto Sentiment Model", |
|
|
category=ModelCategory.SENTIMENT.value, |
|
|
size=ModelSize.SMALL.value, |
|
|
size_mb=400, |
|
|
description="General crypto sentiment analysis", |
|
|
use_cases=["crypto", "sentiment", "general"], |
|
|
languages=["en"], |
|
|
free=True, |
|
|
requires_auth=False, |
|
|
performance_score=0.82, |
|
|
popularity_score=0.65, |
|
|
tags=["crypto", "sentiment"], |
|
|
api_compatible=True, |
|
|
downloadable=True |
|
|
), |
|
|
|
|
|
"stock_bubbles_crypto": ModelInfo( |
|
|
id="stock_bubbles_crypto", |
|
|
hf_id="StockBubbles/crypto-sentiment", |
|
|
name="StockBubbles Crypto Sentiment", |
|
|
category=ModelCategory.SENTIMENT.value, |
|
|
size=ModelSize.SMALL.value, |
|
|
size_mb=330, |
|
|
description="Fast crypto sentiment analysis", |
|
|
use_cases=["crypto", "fast", "sentiment"], |
|
|
languages=["en"], |
|
|
free=True, |
|
|
requires_auth=False, |
|
|
performance_score=0.80, |
|
|
popularity_score=0.60, |
|
|
tags=["crypto", "sentiment", "fast"], |
|
|
api_compatible=True, |
|
|
downloadable=True |
|
|
), |
|
|
|
|
|
|
|
|
|
|
|
"finbert_esg": ModelInfo( |
|
|
id="finbert_esg", |
|
|
hf_id="yiyanghkust/finbert-esg", |
|
|
name="FinBERT ESG", |
|
|
category=ModelCategory.CLASSIFICATION.value, |
|
|
size=ModelSize.SMALL.value, |
|
|
size_mb=440, |
|
|
description="ESG (Environmental, Social, Governance) classification", |
|
|
use_cases=["esg", "sustainability", "classification"], |
|
|
languages=["en"], |
|
|
free=True, |
|
|
requires_auth=False, |
|
|
performance_score=0.88, |
|
|
popularity_score=0.75, |
|
|
tags=["finbert", "esg", "classification"], |
|
|
api_compatible=True, |
|
|
downloadable=True |
|
|
), |
|
|
|
|
|
"finbert_pretrain": ModelInfo( |
|
|
id="finbert_pretrain", |
|
|
hf_id="yiyanghkust/finbert-pretrain", |
|
|
name="FinBERT Pretrained", |
|
|
category=ModelCategory.SENTIMENT.value, |
|
|
size=ModelSize.SMALL.value, |
|
|
size_mb=440, |
|
|
description="Pretrained FinBERT for financial domain", |
|
|
use_cases=["financial", "pretraining", "domain"], |
|
|
languages=["en"], |
|
|
free=True, |
|
|
requires_auth=False, |
|
|
performance_score=0.86, |
|
|
popularity_score=0.70, |
|
|
tags=["finbert", "pretrain", "financial"], |
|
|
api_compatible=True, |
|
|
downloadable=True |
|
|
), |
|
|
|
|
|
"stocktwits_roberta": ModelInfo( |
|
|
id="stocktwits_roberta", |
|
|
hf_id="zhayunduo/roberta-base-stocktwits-finetuned", |
|
|
name="StockTwits RoBERTa", |
|
|
category=ModelCategory.SENTIMENT.value, |
|
|
size=ModelSize.MEDIUM.value, |
|
|
size_mb=500, |
|
|
description="RoBERTa fine-tuned on StockTwits data", |
|
|
use_cases=["stocktwits", "social", "trading"], |
|
|
languages=["en"], |
|
|
free=True, |
|
|
requires_auth=False, |
|
|
performance_score=0.83, |
|
|
popularity_score=0.68, |
|
|
tags=["roberta", "stocktwits", "social"], |
|
|
api_compatible=True, |
|
|
downloadable=True |
|
|
), |
|
|
|
|
|
|
|
|
|
|
|
"multilingual_sentiment": ModelInfo( |
|
|
id="multilingual_sentiment", |
|
|
hf_id="nlptown/bert-base-multilingual-uncased-sentiment", |
|
|
name="Multilingual BERT Sentiment", |
|
|
category=ModelCategory.SENTIMENT.value, |
|
|
size=ModelSize.MEDIUM.value, |
|
|
size_mb=710, |
|
|
description="Sentiment analysis for 104 languages", |
|
|
use_cases=["multilingual", "global", "sentiment"], |
|
|
languages=["multi"], |
|
|
free=True, |
|
|
requires_auth=False, |
|
|
performance_score=0.84, |
|
|
popularity_score=0.85, |
|
|
tags=["multilingual", "bert", "sentiment"], |
|
|
api_compatible=True, |
|
|
downloadable=True |
|
|
), |
|
|
|
|
|
"distilbert_multilingual": ModelInfo( |
|
|
id="distilbert_multilingual", |
|
|
hf_id="lxyuan/distilbert-base-multilingual-cased-sentiments-student", |
|
|
name="DistilBERT Multilingual Sentiments", |
|
|
category=ModelCategory.SENTIMENT.value, |
|
|
size=ModelSize.MEDIUM.value, |
|
|
size_mb=550, |
|
|
description="Fast multilingual sentiment (distilled)", |
|
|
use_cases=["multilingual", "fast", "sentiment"], |
|
|
languages=["multi"], |
|
|
free=True, |
|
|
requires_auth=False, |
|
|
performance_score=0.82, |
|
|
popularity_score=0.80, |
|
|
tags=["distilbert", "multilingual", "fast"], |
|
|
api_compatible=True, |
|
|
downloadable=True |
|
|
), |
|
|
|
|
|
|
|
|
|
|
|
"minilm_l6": ModelInfo( |
|
|
id="minilm_l6", |
|
|
hf_id="sentence-transformers/all-MiniLM-L6-v2", |
|
|
name="MiniLM-L6 (Fast Embeddings)", |
|
|
category=ModelCategory.EMBEDDING.value, |
|
|
size=ModelSize.TINY.value, |
|
|
size_mb=80, |
|
|
description="Fast and efficient sentence embeddings (384 dim)", |
|
|
use_cases=["search", "similarity", "clustering", "fast"], |
|
|
languages=["en"], |
|
|
free=True, |
|
|
requires_auth=False, |
|
|
performance_score=0.88, |
|
|
popularity_score=0.95, |
|
|
tags=["embeddings", "fast", "efficient", "minilm"], |
|
|
api_compatible=True, |
|
|
downloadable=True |
|
|
), |
|
|
|
|
|
"minilm_l12": ModelInfo( |
|
|
id="minilm_l12", |
|
|
hf_id="sentence-transformers/all-MiniLM-L12-v2", |
|
|
name="MiniLM-L12 (Balanced)", |
|
|
category=ModelCategory.EMBEDDING.value, |
|
|
size=ModelSize.SMALL.value, |
|
|
size_mb=120, |
|
|
description="Balanced speed/quality embeddings (384 dim)", |
|
|
use_cases=["search", "similarity", "balanced"], |
|
|
languages=["en"], |
|
|
free=True, |
|
|
requires_auth=False, |
|
|
performance_score=0.90, |
|
|
popularity_score=0.90, |
|
|
tags=["embeddings", "balanced", "minilm"], |
|
|
api_compatible=True, |
|
|
downloadable=True |
|
|
), |
|
|
|
|
|
"multi_qa_mpnet": ModelInfo( |
|
|
id="multi_qa_mpnet", |
|
|
hf_id="sentence-transformers/multi-qa-mpnet-base-dot-v1", |
|
|
name="Multi-QA MPNet", |
|
|
category=ModelCategory.EMBEDDING.value, |
|
|
size=ModelSize.SMALL.value, |
|
|
size_mb=420, |
|
|
description="Optimized for question answering and search", |
|
|
use_cases=["qa", "search", "retrieval"], |
|
|
languages=["en"], |
|
|
free=True, |
|
|
requires_auth=False, |
|
|
performance_score=0.91, |
|
|
popularity_score=0.88, |
|
|
tags=["embeddings", "qa", "mpnet"], |
|
|
api_compatible=True, |
|
|
downloadable=True |
|
|
), |
|
|
|
|
|
"e5_base": ModelInfo( |
|
|
id="e5_base", |
|
|
hf_id="intfloat/e5-base-v2", |
|
|
name="E5 Base V2", |
|
|
category=ModelCategory.EMBEDDING.value, |
|
|
size=ModelSize.SMALL.value, |
|
|
size_mb=420, |
|
|
description="High-quality general embeddings (768 dim)", |
|
|
use_cases=["search", "retrieval", "quality"], |
|
|
languages=["en"], |
|
|
free=True, |
|
|
requires_auth=False, |
|
|
performance_score=0.92, |
|
|
popularity_score=0.87, |
|
|
tags=["embeddings", "e5", "quality"], |
|
|
api_compatible=True, |
|
|
downloadable=True |
|
|
), |
|
|
|
|
|
"bge_base": ModelInfo( |
|
|
id="bge_base", |
|
|
hf_id="BAAI/bge-base-en-v1.5", |
|
|
name="BGE Base English V1.5", |
|
|
category=ModelCategory.EMBEDDING.value, |
|
|
size=ModelSize.SMALL.value, |
|
|
size_mb=420, |
|
|
description="Beijing Academy of AI embeddings (768 dim)", |
|
|
use_cases=["search", "retrieval", "rag"], |
|
|
languages=["en"], |
|
|
free=True, |
|
|
requires_auth=False, |
|
|
performance_score=0.93, |
|
|
popularity_score=0.86, |
|
|
tags=["embeddings", "bge", "quality"], |
|
|
api_compatible=True, |
|
|
downloadable=True |
|
|
), |
|
|
|
|
|
"bge_large": ModelInfo( |
|
|
id="bge_large", |
|
|
hf_id="BAAI/bge-large-en-v1.5", |
|
|
name="BGE Large English V1.5", |
|
|
category=ModelCategory.EMBEDDING.value, |
|
|
size=ModelSize.MEDIUM.value, |
|
|
size_mb=1300, |
|
|
description="High-quality embeddings (1024 dim)", |
|
|
use_cases=["search", "retrieval", "rag", "quality"], |
|
|
languages=["en"], |
|
|
free=True, |
|
|
requires_auth=False, |
|
|
performance_score=0.95, |
|
|
popularity_score=0.85, |
|
|
tags=["embeddings", "bge", "large", "quality"], |
|
|
api_compatible=True, |
|
|
downloadable=True |
|
|
), |
|
|
|
|
|
|
|
|
|
|
|
"bert_large_ner": ModelInfo( |
|
|
id="bert_large_ner", |
|
|
hf_id="dslim/bert-large-NER", |
|
|
name="BERT Large NER", |
|
|
category=ModelCategory.NER.value, |
|
|
size=ModelSize.MEDIUM.value, |
|
|
size_mb=1300, |
|
|
description="Large BERT for named entity recognition", |
|
|
use_cases=["ner", "entities", "extraction"], |
|
|
languages=["en"], |
|
|
free=True, |
|
|
requires_auth=False, |
|
|
performance_score=0.92, |
|
|
popularity_score=0.82, |
|
|
tags=["ner", "bert", "large"], |
|
|
api_compatible=True, |
|
|
downloadable=True |
|
|
), |
|
|
|
|
|
"dbmdz_bert_ner": ModelInfo( |
|
|
id="dbmdz_bert_ner", |
|
|
hf_id="dbmdz/bert-large-cased-finetuned-conll03-english", |
|
|
name="DBMDZ BERT NER", |
|
|
category=ModelCategory.NER.value, |
|
|
size=ModelSize.MEDIUM.value, |
|
|
size_mb=1300, |
|
|
description="BERT NER fine-tuned on CoNLL-03", |
|
|
use_cases=["ner", "companies", "financial"], |
|
|
languages=["en"], |
|
|
free=True, |
|
|
requires_auth=False, |
|
|
performance_score=0.91, |
|
|
popularity_score=0.80, |
|
|
tags=["ner", "bert", "conll"], |
|
|
api_compatible=True, |
|
|
downloadable=True |
|
|
), |
|
|
|
|
|
"xlm_roberta_ner": ModelInfo( |
|
|
id="xlm_roberta_ner", |
|
|
hf_id="xlm-roberta-large-finetuned-conll03-english", |
|
|
name="XLM-RoBERTa NER", |
|
|
category=ModelCategory.NER.value, |
|
|
size=ModelSize.LARGE.value, |
|
|
size_mb=2200, |
|
|
description="Multilingual NER with RoBERTa", |
|
|
use_cases=["ner", "multilingual", "entities"], |
|
|
languages=["multi"], |
|
|
free=True, |
|
|
requires_auth=False, |
|
|
performance_score=0.93, |
|
|
popularity_score=0.78, |
|
|
tags=["ner", "xlm", "roberta", "multilingual"], |
|
|
api_compatible=True, |
|
|
downloadable=True |
|
|
), |
|
|
|
|
|
|
|
|
|
|
|
"pegasus_xsum": ModelInfo( |
|
|
id="pegasus_xsum", |
|
|
hf_id="google/pegasus-xsum", |
|
|
name="PEGASUS XSum", |
|
|
category=ModelCategory.SUMMARIZATION.value, |
|
|
size=ModelSize.LARGE.value, |
|
|
size_mb=2200, |
|
|
description="Extreme summarization (PEGASUS)", |
|
|
use_cases=["summarization", "extreme", "news"], |
|
|
languages=["en"], |
|
|
free=True, |
|
|
requires_auth=False, |
|
|
performance_score=0.91, |
|
|
popularity_score=0.88, |
|
|
tags=["summarization", "pegasus", "extreme"], |
|
|
api_compatible=True, |
|
|
downloadable=True |
|
|
), |
|
|
} |
|
|
|
|
|
def get_new_models_count(self) -> int: |
|
|
"""تعداد مدلهای جدید اضافه شده""" |
|
|
all_models = self.get_all_models() |
|
|
original_count = 24 |
|
|
return len(all_models) - original_count |
|
|
|
|
|
|
|
|
|
|
|
_extended_manager = None |
|
|
|
|
|
def get_extended_model_manager() -> ExtendedModelManager: |
|
|
"""دریافت instance سراسری extended manager""" |
|
|
global _extended_manager |
|
|
if _extended_manager is None: |
|
|
_extended_manager = ExtendedModelManager() |
|
|
return _extended_manager |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
print("="*70) |
|
|
print("🧪 Testing Extended Model Manager") |
|
|
print("="*70) |
|
|
|
|
|
manager = ExtendedModelManager() |
|
|
|
|
|
|
|
|
stats = manager.get_model_stats() |
|
|
new_count = manager.get_new_models_count() |
|
|
|
|
|
print(f"\n📊 Statistics:") |
|
|
print(f" Total Models: {stats['total_models']}") |
|
|
print(f" New Models Added: {new_count}") |
|
|
print(f" Free Models: {stats['free_models']}") |
|
|
print(f" API Compatible: {stats['api_compatible']}") |
|
|
print(f" Avg Performance: {stats['avg_performance']}") |
|
|
|
|
|
|
|
|
print(f"\n🆕 New Models Added:") |
|
|
new_models = [ |
|
|
"bitcoin_bert", "crypto_finbert", "minilm_l6", |
|
|
"finbert_esg", "bge_base", "pegasus_xsum" |
|
|
] |
|
|
|
|
|
for i, model_id in enumerate(new_models, 1): |
|
|
model = manager.get_model_by_id(model_id) |
|
|
if model: |
|
|
print(f" {i}. {model.name} ({model.size_mb} MB)") |
|
|
print(f" HF: {model.hf_id}") |
|
|
print(f" Use: {', '.join(model.use_cases[:3])}") |
|
|
|
|
|
|
|
|
print(f"\n⭐ Best New Sentiment Models:") |
|
|
sentiment_models = manager.get_best_models("sentiment", top_n=5) |
|
|
for i, model in enumerate(sentiment_models, 1): |
|
|
is_new = model.id in ["bitcoin_bert", "crypto_finbert", "crypto_sentiment_general"] |
|
|
marker = "🆕" if is_new else " " |
|
|
print(f" {marker} {i}. {model.name} - {model.performance_score}") |
|
|
|
|
|
|
|
|
print(f"\n⭐ Best Embedding Models:") |
|
|
embeddings = manager.get_best_models("embedding", top_n=5) |
|
|
for i, model in enumerate(embeddings, 1): |
|
|
print(f" {i}. {model.name} - {model.size_mb} MB - {model.performance_score}") |
|
|
|
|
|
print("\n" + "="*70) |
|
|
print("✅ Extended Model Manager is working!") |
|
|
print("="*70) |
|
|
|