Spaces:
Build error
Build error
| from transformers import pipeline | |
| import torch | |
| import os | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| class LLMPipeline: | |
| def __init__(self): | |
| model_id = os.getenv("HF_MODEL_ID", "mradermacher/Huihui-gemma-3n-E4B-it-abliterated-GGUF") | |
| try: | |
| # Try to use CUDA if available | |
| if torch.cuda.is_available(): | |
| device = "cuda" | |
| dtype = torch.float16 | |
| else: | |
| device = "cpu" | |
| dtype = torch.float32 | |
| self.pipeline = pipeline( | |
| "text-generation", | |
| model=model_id, | |
| torch_dtype=dtype, | |
| device_map="auto" if device == "cuda" else None, | |
| model_kwargs={"low_cpu_mem_usage": True} | |
| ) | |
| except Exception as e: | |
| print(f"Error loading model: {e}") | |
| raise | |
| async def generate(self, prompt: str, max_length: int = 100) -> str: | |
| """Generate text using the local Gemma model.""" | |
| try: | |
| result = self.pipeline( | |
| prompt, | |
| max_length=max_length, | |
| num_return_sequences=1, | |
| temperature=0.7, | |
| top_p=0.9 | |
| ) | |
| return result[0]['generated_text'] | |
| except Exception as e: | |
| print(f"Error in LLM generation: {e}") | |
| return "" | |