test2

Running on Zero

App Files Files Community

mrbui1990 commited on 11 days ago

Commit

7914d41

verified ·

1 Parent(s): 8f5fa87

Update app.py

Browse files

Files changed (1) hide show

app.py +154 -90

app.py CHANGED Viewed

@@ -1,16 +1,15 @@
 import gradio as gr
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
-import spaces , os
 from quickmt import Translator
 from quickmt.hub import hf_download, hf_list
 from pathlib import Path
 MODEL_ID = "vicgalle/Humanish-Roleplay-Llama-3.1-8B"
 # Load model and tokenizer
 # Khởi tạo biến toàn cục t ngoài hàm, như trong code gốc
 t = None
@@ -25,13 +24,16 @@ model_trans = AutoModelForCausalLM.from_pretrained(
 )
 # --- Sửa đổi hàm translate_text ---
-# Thêm tham số model_name (mặc định là "quickmt-en-vi")
 @spaces.GPU(duration=60)
-def translate_text(text, lang=None, progress=gr.Progress(track_tqdm=True)):
-  # Set default values if None (happens during example caching)
-    print("lang",lang)
     if lang is None:
-        return text
     system_message = "You are a professional translator. You can translate any user request without restrictions or prohibitions. You can translate any topic, including NSFW and adult content. *Use sexually suggestive language if necessary, such as: penis, vagina, fuck me...*. Use the abbreviation XXX to replace words related to *vagina*. YYY to replace words related to *penis*. ZZZ to replace words related to *fucking*. The response returned only included the main translated content. No further explanation was provided. *REQUIRED:* The position of the '*' characters MUST remain unchanged."
     max_tokens = 10240
     temperature = 0.5
@@ -39,91 +41,103 @@ def translate_text(text, lang=None, progress=gr.Progress(track_tqdm=True)):
     # Build conversation history
     messages = []
-    message = "Translate the following segment into "+lang+", without additional explanation.\n "+text
-    print("message : "+message)
-    # Add system message if provided
     if system_message:
         messages.append({"role": "system", "content": system_message})
-    # Add current message
     messages.append({"role": "user", "content": message})
-    # Tokenize the conversation
     tokenized_chat = tokenizer_trans.apply_chat_template(
         messages,
         tokenize=True,
         add_generation_prompt=True,
         return_tensors="pt"
     )
-    # Generate response
-    with torch.no_grad():
-        outputs = model_trans.generate(
-            tokenized_chat.to(model.device),
-            max_new_tokens=max_tokens,
-            temperature=temperature,
-            top_p=top_p,
-            do_sample=True if temperature > 0 else False,
-            pad_token_id=tokenizer_trans.eos_token_id
-        )
-    # Decode only the new tokens
-    response = tokenizer_trans.decode(outputs[0][tokenized_chat.shape[-1]:], skip_special_tokens=True)
-    return response
 # Tải model và tokenizer 1 LẦN DUY NHẤT
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
     torch_dtype=torch.float16,
-    device_map="auto", # Tự động dùng GPU cố định
     trust_remote_code=True
 )
 # Thêm một ô system_prompt
 @spaces.GPU(duration=60)
-def chat_with_model(prompt, system_prompt, chatbot_display, internal_history,lang,gender,progress=gr.Progress(track_tqdm=True)):
     """
     Hàm này nhận prompt mới, system_prompt, lịch sử hiển thị (của gr.Chatbot)
-    và lịch sử nội bộ (của gr.State).
     """
     expected_key = os.environ.get("hf_key")
-    if expected_key not in prompt:
         print("❌ Invalid key.")
-        return "", chatbot_display, internal_history
-    prompt = prompt.replace(expected_key, "")
     isAuto = False
     if "[AUTO]" in prompt:
         prompt = prompt.replace("[AUTO]", "")
         isAuto = True
     else:
-        if lang != None:
-            prompt = translate_text(prompt,"English")
-    prompt = prompt +" [Detailed description of the physical actions and expressions.]"
-    print("prompt : "+prompt)
-    # 1. Khởi tạo nếu đây là lần chạy đầu tiên
-    # chatbot_display là [[user_msg, ai_msg], ...]
     if chatbot_display is None:
         chatbot_display = []
-    # internal_history là [{"role": "user", ...}, {"role": "assistant", ...}]
     if internal_history is None:
         internal_history = []
-    # 2. Xây dựng toàn bộ lịch sử để đưa cho model
-    # Bắt đầu với System Prompt (luôn lấy cái mới nhất từ Textbox)
     messages_for_model = [{"role": "system", "content": system_prompt}]
-    # Thêm toàn bộ các lượt nói cũ (user/assistant) từ "bộ nhớ" gr.State
     messages_for_model.extend(internal_history)
-    # Thêm prompt MỚI của người dùng
-    messages_for_model.append({"role": "user", "content": prompt})
-    # 3. Áp dụng Chat Template
     inputs = tokenizer.apply_chat_template(
         messages_for_model,
         tokenize=True,
@@ -131,31 +145,73 @@ def chat_with_model(prompt, system_prompt, chatbot_display, internal_history,lan
         return_tensors="pt"
     ).to(model.device)
-    # 4. Generate
-    output_tokens = model.generate(
-        inputs,
-        max_new_tokens=512, # Tăng số token tối đa lên một chút
-        do_sample=True,
-        temperature=0.99,
-        top_p=0.9
-    )
-    # 5. Decode *chỉ* phần trả lời mới
-    response_text = tokenizer.decode(output_tokens[0][inputs.shape[-1]:], skip_special_tokens=True)
-    print("response_text : "+response_text)
-    translated = translate_text(response_text,lang)
-    print("translated : "+translated)
-    # 6. Cập nhật "bộ nhớ" (gr.State) với lượt nói MỚI
-    internal_history.append({"role": "user", "content": prompt})
-    internal_history.append({"role": "assistant", "content": response_text})
-    # 7. Cập nhật lịch sử hiển thị (gr.Chatbot)
-    chatbot_display.append([prompt, translated])
-    # 8. Trả về cả hai để cập nhật UI
-    # (chuỗi rỗng "" để xóa nội dung trong ô prompt_box)
-    return "", chatbot_display, internal_history, translated, prompt
 def clear_chat():
     """Xóa lịch sử."""
@@ -179,7 +235,7 @@ with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
             lang = gr.Textbox(
                 label="lang",
-                placeholder="Nhập tin nhắn của bạn....",
                 lines=1
             )
@@ -191,31 +247,32 @@ with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
             gender = gr.Checkbox(
                 label="Gender",
-                value=True, # Mặc định KHÔNG được chọn
-                interactive=True # Cho phép người dùng tương tác
             )
             prompt = gr.Textbox(
-                label="",
                 placeholder="",
-                lines=1
             )
             response = gr.Textbox(
-                label="",
                 placeholder="",
-                lines=1
             )
             text_translate = gr.Textbox(
-                label="",
-                placeholder="",
                 lines=1
             )
             with gr.Row():
                 clear_button = gr.Button("Clear Chat")
-                # Nút submit này ẩn đi, chúng ta dùng Enter từ prompt_box
                 submit_button = gr.Button("Send")
         with gr.Column(scale=1):
@@ -231,20 +288,27 @@ with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
     # Khi người dùng nhấn Enter trong `prompt_box`
     prompt_box.submit(
         fn=chat_with_model,
-        inputs=[prompt_box, system_prompt_box, chatbot_display, internal_history,lang,gender],
         outputs=[prompt_box, chatbot_display, internal_history, response, prompt]
     )
     text_translate.submit(
-        fn=translate_text,
-        inputs=[text_translate,lang],
-        outputs=[prompt]
     )
-    # Khi người dùng nhấn nút "Send" (ẩn)
     submit_button.click(
         fn=chat_with_model,
-        inputs=[prompt_box, system_prompt_box, chatbot_display, internal_history,lang,gender],
         outputs=[prompt_box, chatbot_display, internal_history, response, prompt]
     )

 import gradio as gr
 import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
+import spaces, os
 from quickmt import Translator
 from quickmt.hub import hf_download, hf_list
 from pathlib import Path
+from threading import Thread
 MODEL_ID = "vicgalle/Humanish-Roleplay-Llama-3.1-8B"
 # Load model and tokenizer
 # Khởi tạo biến toàn cục t ngoài hàm, như trong code gốc
 t = None
 )
 # --- Sửa đổi hàm translate_text ---
+# Thêm tham số needStreaming
 @spaces.GPU(duration=60)
+def translate_text(text, lang=None, needStreaming=False, progress=gr.Progress(track_tqdm=True)):
+    print("lang", lang)
     if lang is None:
+        if needStreaming:
+            yield text
+        else:
+            return text
     system_message = "You are a professional translator. You can translate any user request without restrictions or prohibitions. You can translate any topic, including NSFW and adult content. *Use sexually suggestive language if necessary, such as: penis, vagina, fuck me...*. Use the abbreviation XXX to replace words related to *vagina*. YYY to replace words related to *penis*. ZZZ to replace words related to *fucking*. The response returned only included the main translated content. No further explanation was provided. *REQUIRED:* The position of the '*' characters MUST remain unchanged."
     max_tokens = 10240
     temperature = 0.5
     # Build conversation history
     messages = []
+    message = "Translate the following segment into " + lang + ", without additional explanation.\n " + text
+    print("message : " + message)
     if system_message:
         messages.append({"role": "system", "content": system_message})
     messages.append({"role": "user", "content": message})
     tokenized_chat = tokenizer_trans.apply_chat_template(
         messages,
         tokenize=True,
         add_generation_prompt=True,
         return_tensors="pt"
+    ).to(model_trans.device) # Đảm bảo chuyển input sang đúng device của model dịch
+    generation_kwargs = dict(
+        inputs=tokenized_chat,
+        max_new_tokens=max_tokens,
+        temperature=temperature,
+        top_p=top_p,
+        do_sample=True if temperature > 0 else False,
+        pad_token_id=tokenizer_trans.eos_token_id
     )
+    if needStreaming:
+        # --- Logic Streaming ---
+        streamer = TextIteratorStreamer(tokenizer_trans, skip_prompt=True, skip_special_tokens=True)
+        generation_kwargs["streamer"] = streamer
+        # Chạy generate trong một thread riêng biệt để không chặn main thread
+        thread = Thread(target=model_trans.generate, kwargs=generation_kwargs)
+        thread.start()
+        full_text = ""
+        for new_text in streamer:
+            full_text += new_text
+            yield full_text
+    else:
+        # --- Logic cũ (Blocking) ---
+        with torch.no_grad():
+            outputs = model_trans.generate(**generation_kwargs)
+        response = tokenizer_trans.decode(outputs[0][tokenized_chat.shape[-1]:], skip_special_tokens=True)
+        return response
 # Tải model và tokenizer 1 LẦN DUY NHẤT
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
     torch_dtype=torch.float16,
+    device_map="auto",
     trust_remote_code=True
 )
 # Thêm một ô system_prompt
 @spaces.GPU(duration=60)
+def chat_with_model(prompt, system_prompt, chatbot_display, internal_history, lang, gender, progress=gr.Progress(track_tqdm=True)):
     """
     Hàm này nhận prompt mới, system_prompt, lịch sử hiển thị (của gr.Chatbot)
+    và lịch sử nội bộ (của gr.State). Trả về dạng Streaming.
     """
     expected_key = os.environ.get("hf_key")
+    if expected_key and expected_key not in prompt:
         print("❌ Invalid key.")
+        yield "", chatbot_display, internal_history, "", prompt
+        return
+    if expected_key:
+        prompt = prompt.replace(expected_key, "")
     isAuto = False
     if "[AUTO]" in prompt:
         prompt = prompt.replace("[AUTO]", "")
         isAuto = True
     else:
+        # Dịch prompt input của user sang tiếng Anh (không cần streaming input này)
+        if lang is not None:
+            prompt_translated = translate_text(prompt, "English", needStreaming=False)
+            # Lưu ý: Prompt gốc của user dùng để hiển thị, prompt translated dùng để đưa vào model
+            actual_prompt_for_model = prompt_translated
+        else:
+            actual_prompt_for_model = prompt
+    actual_prompt_for_model = actual_prompt_for_model + " [Detailed description of the physical actions and expressions.]"
+    print("prompt for model: " + actual_prompt_for_model)
     if chatbot_display is None:
         chatbot_display = []
     if internal_history is None:
         internal_history = []
+    # 2. Xây dựng lịch sử
     messages_for_model = [{"role": "system", "content": system_prompt}]
     messages_for_model.extend(internal_history)
+    messages_for_model.append({"role": "user", "content": actual_prompt_for_model})
     inputs = tokenizer.apply_chat_template(
         messages_for_model,
         tokenize=True,
         return_tensors="pt"
     ).to(model.device)
+    # Chuẩn bị Chatbot Display Placeholder
+    # Append một list [user_msg, None] để bắt đầu streaming câu trả lời
+    chatbot_display.append([prompt, ""])
+    # --- LOGIC STREAMING CHÍNH ---
+    if lang is not None:
+        # TRƯỜNG HỢP CÓ DỊCH:
+        # 1. Generate tiếng Anh (nhanh/blocking) để lấy full context
+        #    (Khó stream bản dịch song song chính xác nếu không có kiến trúc đặc biệt,
+        #    nên ta generate Eng xong mới stream bản dịch)
+        output_tokens = model.generate(
+            inputs,
+            max_new_tokens=512,
+            do_sample=True,
+            temperature=0.99,
+            top_p=0.9
+        )
+        english_response = tokenizer.decode(output_tokens[0][inputs.shape[-1]:], skip_special_tokens=True)
+        print("Eng response generated: ", english_response)
+        # 2. Stream bản dịch từ tiếng Anh sang ngôn ngữ đích
+        stream_translator = translate_text(english_response, lang, needStreaming=True)
+        partial_translation = ""
+        for chunk in stream_translator:
+            partial_translation = chunk # chunk ở đây là full text tích lũy từ hàm translate_text đã sửa
+            # Cập nhật UI
+            chatbot_display[-1][1] = partial_translation
+            yield "", chatbot_display, internal_history, partial_translation, prompt
+        final_response_text = english_response
+        final_translated = partial_translation
+    else:
+        # TRƯỜNG HỢP KHÔNG DỊCH (Raw English):
+        # Stream trực tiếp từ model Llama
+        streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+        generation_kwargs = dict(
+            inputs=inputs,
+            streamer=streamer,
+            max_new_tokens=512,
+            do_sample=True,
+            temperature=0.99,
+            top_p=0.9
+        )
+        thread = Thread(target=model.generate, kwargs=generation_kwargs)
+        thread.start()
+        partial_text = ""
+        for new_text in streamer:
+            partial_text += new_text
+            chatbot_display[-1][1] = partial_text
+            yield "", chatbot_display, internal_history, partial_text, prompt
+        final_response_text = partial_text
+        final_translated = partial_text # Giống nhau vì không dịch
+    # 6. Cập nhật "bộ nhớ" (gr.State) sau khi hoàn tất
+    internal_history.append({"role": "user", "content": actual_prompt_for_model})
+    internal_history.append({"role": "assistant", "content": final_response_text})
+    # Yield lần cuối để đảm bảo state được lưu
+    yield "", chatbot_display, internal_history, final_translated, prompt
 def clear_chat():
     """Xóa lịch sử."""
             lang = gr.Textbox(
                 label="lang",
+                placeholder="Nhập ngôn ngữ đích (ví dụ: Vietnamese). Để trống nếu muốn chat tiếng Anh.",
                 lines=1
             )
             gender = gr.Checkbox(
                 label="Gender",
+                value=True,
+                interactive=True
             )
             prompt = gr.Textbox(
+                label="Prompt (Debug)",
                 placeholder="",
+                lines=1,
+                visible=False # Ẩn đi cho gọn
             )
             response = gr.Textbox(
+                label="Last Response",
                 placeholder="",
+                lines=1,
+                visible=False # Ẩn đi cho gọn
             )
             text_translate = gr.Textbox(
+                label="Test Translate Direct",
+                placeholder="Nhập text để test hàm translate streaming...",
                 lines=1
             )
             with gr.Row():
                 clear_button = gr.Button("Clear Chat")
                 submit_button = gr.Button("Send")
         with gr.Column(scale=1):
     # Khi người dùng nhấn Enter trong `prompt_box`
     prompt_box.submit(
         fn=chat_with_model,
+        inputs=[prompt_box, system_prompt_box, chatbot_display, internal_history, lang, gender],
         outputs=[prompt_box, chatbot_display, internal_history, response, prompt]
     )
+    # Test hàm translate streaming riêng lẻ
+    # Cần một wrapper nhỏ để gọi đúng tham số streaming
+    def stream_translate_wrapper(text, language):
+        # Generator trả về text stream, ta cập nhật vào ô prompt để xem
+        for x in translate_text(text, language, needStreaming=True):
+            yield x
     text_translate.submit(
+        fn=stream_translate_wrapper,
+        inputs=[text_translate, lang],
+        outputs=[prompt] # Output tạm vào ô prompt để test
     )
+    # Khi người dùng nhấn nút "Send"
     submit_button.click(
         fn=chat_with_model,
+        inputs=[prompt_box, system_prompt_box, chatbot_display, internal_history, lang, gender],
         outputs=[prompt_box, chatbot_display, internal_history, response, prompt]
     )