Spaces:

jblast94
/

gemma-personal-assistant

Build error

Update Dockerfile and improve error handling

613c8f7 5 months ago

1.39 kB

	from transformers import pipeline
	import torch
	import os
	from dotenv import load_dotenv

	load_dotenv()

	class LLMPipeline:
	def __init__(self):
	model_id = os.getenv("HF_MODEL_ID", "mradermacher/Huihui-gemma-3n-E4B-it-abliterated-GGUF")
	try:
	# Try to use CUDA if available
	if torch.cuda.is_available():
	device = "cuda"
	dtype = torch.float16
	else:
	device = "cpu"
	dtype = torch.float32

	self.pipeline = pipeline(
	"text-generation",
	model=model_id,
	torch_dtype=dtype,
	device_map="auto" if device == "cuda" else None,
	model_kwargs={"low_cpu_mem_usage": True}
	)
	except Exception as e:
	print(f"Error loading model: {e}")
	raise

	async def generate(self, prompt: str, max_length: int = 100) -> str:
	"""Generate text using the local Gemma model."""
	try:
	result = self.pipeline(
	prompt,
	max_length=max_length,
	num_return_sequences=1,
	temperature=0.7,
	top_p=0.9
	)
	return result[0]['generated_text']
	except Exception as e:
	print(f"Error in LLM generation: {e}")
	return ""