Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import sentencepiece as spm | |
| import ctranslate2 | |
| from huggingface_hub import hf_hub_download | |
| import os | |
| languages = { | |
| "Kurdish": "ku", | |
| "Samoan": "sm", | |
| "Xhosa": "xh", | |
| "Lao": "lo", | |
| "Corsican": "co", | |
| "Cebuano": "ceb", | |
| "Galician": "gl", | |
| "Yiddish": "yi", | |
| "Swahili": "sw", | |
| "Yoruba": "yo", | |
| "English": "en", | |
| } | |
| def get_repo_id(src_lang, tgt_lang): | |
| return f"lingvanex/{src_lang.lower()}-to-{tgt_lang.lower()}-translation" | |
| def download_models(src_lang, tgt_lang): | |
| repo_id = get_repo_id(src_lang, tgt_lang) | |
| models = { | |
| "src_spm": hf_hub_download(repo_id=repo_id, filename=f"{languages[src_lang]}.spm.model"), | |
| "tgt_spm": hf_hub_download(repo_id=repo_id, filename=f"{languages[tgt_lang]}.spm.model"), | |
| "model": hf_hub_download(repo_id=repo_id, filename="model.bin"), | |
| "source_vocab": hf_hub_download(repo_id=repo_id, filename="source_vocabulary.txt"), | |
| "target_vocab": hf_hub_download(repo_id=repo_id, filename="target_vocabulary.txt"), | |
| "config": hf_hub_download(repo_id=repo_id, filename="config.json"), | |
| } | |
| return models | |
| def translate(text, src_lang, tgt_lang): | |
| if src_lang == tgt_lang: | |
| return text | |
| models = download_models(src_lang, tgt_lang) | |
| spm_encoder = spm.SentencePieceProcessor(models["src_spm"]) | |
| spm_decoder = spm.SentencePieceProcessor(models["tgt_spm"]) | |
| model_dir = os.path.dirname(models["model"]) | |
| translator = ctranslate2.Translator(model_dir, device="cpu") | |
| tokens = spm_encoder.encode(text, out_type=str) | |
| result = translator.translate_batch([tokens]) | |
| output = spm_decoder.decode(result[0].hypotheses[0], out_type=str) | |
| return output | |
| def update_target_lang(src_lang): | |
| if src_lang == "English": | |
| return gr.Dropdown(choices=sorted(languages.keys())), "" | |
| else: | |
| return gr.Dropdown(choices=["English"]), "Note: Translations are only supported from this language to English." | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Multilingual Translation with Lingvanex") | |
| gr.Markdown(""" | |
| This translator allows you to translate text between English and a variety of other languages. Please note that translations are supported only in the following directions: | |
| - From English to the target language (e.g., English → Kurdish). | |
| - From the source language to English (e.g., Kurdish → English). | |
| For this demo, language-specific model pairs are utilized, so translations between two non-English languages are not supported at this time. | |
| """) | |
| with gr.Row(): | |
| src_lang = gr.Dropdown(choices=sorted(languages.keys()), label="Source Language", value="English") | |
| tgt_lang = gr.Dropdown(choices=sorted(languages.keys()), label="Target Language", value="Kurdish") | |
| note = gr.Markdown("") | |
| src_lang.change(update_target_lang, inputs=src_lang, outputs=[tgt_lang, note]) | |
| text_input = gr.Textbox(label="Input Text", placeholder="Enter text to translate...") | |
| text_output = gr.Textbox(label="Translated Text") | |
| translate_btn = gr.Button("Translate") | |
| translate_btn.click(translate, inputs=[text_input, src_lang, tgt_lang], outputs=text_output) | |
| examples = gr.Examples( | |
| examples=[ | |
| ["Hello, how are you?", "English", "Kurdish"], | |
| ["Silav halê we çawa ye?", "Kurdish", "English"], | |
| ], | |
| inputs=[text_input, src_lang, tgt_lang], | |
| ) | |
| demo.launch(share=True) |