Spaces:
Runtime error
Runtime error
| import spaces | |
| import json | |
| import subprocess | |
| from llama_cpp import Llama | |
| from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType | |
| from llama_cpp_agent.providers import LlamaCppPythonProvider | |
| import gradio as gr | |
| from huggingface_hub import hf_hub_download | |
| import logging | |
| import time | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| repo_id = "MaziyarPanahi/Meta-Llama-3.1-70B-Instruct-GGUF" | |
| filename = "Meta-Llama-3.1-70B-Instruct.IQ1_M.gguf" | |
| def chunk_text(text, chunk_size=5000): | |
| """ | |
| Splits the input text into chunks of specified size. | |
| Args: | |
| text (str): The input text to be chunked. | |
| chunk_size (int): The size of each chunk in tokens. | |
| Returns: | |
| list: A list of text chunks. | |
| """ | |
| words = text.split() | |
| chunks = [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)] | |
| return chunks | |
| try: | |
| start_time = time.time() | |
| logger.info("Downloading Model....") | |
| hf_hub_download( | |
| repo_id = repo_id , | |
| filename = filename, | |
| local_dir="./model" | |
| ) | |
| end_time = time.time() | |
| logger.info(f"Download complete. Time taken : {end_time - start_time} seconds.") | |
| except Exception as e: | |
| logger.error(f"Unable to download Model : {e}") | |
| raise | |
| llm = None | |
| def respond(message, history, temperature, max_tokens): | |
| """ | |
| Generate a streaming response using the llama3-8b model with chunking. | |
| Args: | |
| message (str): The input message. | |
| history (list): The conversation history used by ChatInterface. - Not used. | |
| temperature (float): The temperature for generating the response. | |
| max_new_tokens (int): The maximum number of new tokens to generate. | |
| Returns: | |
| str: The generated response. | |
| """ | |
| chat_template = MessagesFormatterType.LLAMA_3 | |
| global llm | |
| start_time = time.time() | |
| logging.info("Loading Model...") | |
| if llm is None: | |
| model = Llama( | |
| model_path=f"model/{filename}", | |
| flash_attn=True, | |
| n_gpu_layers=-1, | |
| n_batch=1, | |
| n_ctx=8192, | |
| last_n_tokens = 0 | |
| ) | |
| llm = model | |
| end_time = time.time() | |
| logger.info(f"Model Loaded. Time taken : {end_time - start_time} seconds.") | |
| start_time = time.time() | |
| logger.info("Loading Provider and Agent for the Llama Model....") | |
| provider = LlamaCppPythonProvider(llm) | |
| SYS_PROMPT =""" | |
| Extract the following information from the given text: | |
| Identify the specific areas where the work needs to be done and Add the furniture that has to be changed. | |
| Do not specify the work that has to be done. | |
| Format the extracted information in the following JSON structure: | |
| { | |
| "Area Type1": { | |
| "Furnture1", | |
| "Furnture2", | |
| ... | |
| } | |
| "Area Type2": { | |
| "Furnture1", | |
| "Furnture2", | |
| ... | |
| } | |
| } | |
| Requirements: | |
| 1. Each area type (e.g., lobby, bar, etc.) should have its own node. | |
| 3. List the furniture on which the work needs to be performed without specifying the work or units of items. | |
| 4. Ignore any personal information or irrelevant details. | |
| 5. Follow the JSON pattern strictly and ensure clarity and accuracy in the extracted information. | |
| Example: | |
| Given the paragraph: "In the lobby, replace 5 light fixtures and remove 2 old carpets. In the bar, | |
| install 3 new tables and remove 4 broken chairs." | |
| The JSON output should be: | |
| { | |
| "Lobby": { | |
| "Light fixtures" | |
| "Old carpets" | |
| }, | |
| "Bar": { | |
| "New tables" | |
| "Broken chairs" | |
| } | |
| } | |
| } | |
| Please ensure that the output JSON is well-structured and includes only relevant details about the work to be done. | |
| """ | |
| agent = LlamaCppAgent( | |
| provider, | |
| system_prompt=SYS_PROMPT, | |
| predefined_messages_formatter_type=chat_template, | |
| debug_output=False | |
| ) | |
| settings = provider.get_provider_default_settings() | |
| settings.temperature = temperature | |
| settings.max_tokens = max_tokens | |
| settings.stream = True | |
| end_time = time.time() | |
| logger.info(f"Provider settings updated. Prompt Loaded.Time taken : {end_time - start_time} seconds.") | |
| chunks = chunk_text(message) | |
| responses = [] | |
| start_time = time.time() | |
| logger.info("Generating responses...") | |
| for chunk in chunks: | |
| response = agent.get_chat_response( | |
| chunk, | |
| llm_sampling_settings=settings, | |
| returns_streaming_generator = True, #generate streamer | |
| print_output = False | |
| ) | |
| responses.append(response) | |
| logger.info(f"Responses generated. Time taken : {time.time() - start_time} seconds.") | |
| output = "" | |
| for response in responses: | |
| for text in response: | |
| output += text | |
| yield output | |
| DESCRIPTION = ''' | |
| <div> | |
| <h1 style="text-align: center;">ContenteaseAI custom trained model</h1> | |
| </div> | |
| ''' | |
| LICENSE = """ | |
| <p/> | |
| --- | |
| For more information, visit our [website](https://contentease.ai). | |
| """ | |
| PLACEHOLDER = """ | |
| <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;"> | |
| <h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">ContenteaseAI Custom AI trained model</h1> | |
| <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Enter the text extracted from the PDF:</p> | |
| </div> | |
| """ | |
| css = """ | |
| h1 { | |
| text-align: center; | |
| display: block; | |
| } | |
| """ | |
| # Gradio block | |
| chatbot = gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='Gradio ChatInterface') | |
| with gr.Blocks(fill_height=True, css=css) as demo: | |
| gr.Markdown(DESCRIPTION) | |
| gr.ChatInterface( | |
| fn=respond, | |
| chatbot=chatbot, | |
| fill_height=True, | |
| additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False), | |
| additional_inputs=[ | |
| gr.Slider(minimum=0, maximum=1, step=0.1, value=0.90, label="Temperature", render=False), | |
| gr.Slider(minimum=128, maximum=2000, step=1, value=1500, label="Max new tokens", render=False), | |
| ] | |
| ) | |
| gr.Markdown(LICENSE) | |
| if __name__ == "__main__": | |
| try: | |
| demo.launch(show_error=True, debug = True) | |
| except Exception as e: | |
| logger.error(f"Error launching Gradio demo: {e}") |