Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,20 +4,20 @@ import torch
|
|
| 4 |
from transformers import T5Tokenizer, T5ForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, AutoModel, pipeline, logging
|
| 5 |
import languagecodes
|
| 6 |
import requests, os
|
| 7 |
-
|
| 8 |
|
| 9 |
logging.set_verbosity_error()
|
| 10 |
favourite_langs = {"German": "de", "Romanian": "ro", "English": "en", "-----": "-----"}
|
| 11 |
-
df =
|
| 12 |
non_empty_isos = df.slice(1).filter(pl.col("ISO639-1") != "").rows()
|
| 13 |
-
# all_langs = languagecodes.
|
| 14 |
all_langs = {iso[0]: (iso[1], iso[2], iso[3]) for iso in non_empty_isos}
|
| 15 |
|
| 16 |
# Language options as list, add favourite languages first
|
| 17 |
options = list(favourite_langs.keys())
|
| 18 |
options.extend(list(all_langs.keys()))
|
| 19 |
|
| 20 |
-
models = ["Helsinki-NLP",
|
| 21 |
"facebook/nllb-200-distilled-600M", "facebook/nllb-200-distilled-1.3B", "facebook/nllb-200-1.3B", "facebook/nllb-200-3.3B",
|
| 22 |
"facebook/mbart-large-50-many-to-many-mmt", "facebook/mbart-large-50-one-to-many-mmt", "facebook/mbart-large-50-many-to-one-mmt",
|
| 23 |
"facebook/m2m100_418M", "facebook/m2m100_1.2B",
|
|
@@ -88,7 +88,7 @@ class Translators:
|
|
| 88 |
import argostranslate.translate, argostranslate.package
|
| 89 |
# Translate
|
| 90 |
try:
|
| 91 |
-
download_argos_model(self.sl, self.tl)
|
| 92 |
translated_text = argostranslate.translate.translate(self.input_text, self.sl, self.tl)
|
| 93 |
except StopIteration:
|
| 94 |
# packages_info = ', '.join(f"{pkg.get_description()}->{str(pkg.links)} {str(pkg.source_languages)}" for pkg in argostranslate.package.get_available_packages())
|
|
@@ -97,6 +97,17 @@ class Translators:
|
|
| 97 |
except Exception as error:
|
| 98 |
translated_text = error
|
| 99 |
return translated_text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
def HelsinkiNLP(self):
|
| 102 |
try: # Standard bilingual model
|
|
@@ -111,15 +122,7 @@ class Translators:
|
|
| 111 |
translation = pipe(self.input_text)
|
| 112 |
return translation[0]['translation_text'], f'Translated from {self.sl} to {self.tl} with {model_name}.'
|
| 113 |
except EnvironmentError as error:
|
| 114 |
-
|
| 115 |
-
model_name = "Helsinki-NLP/opus-mt-tc-bible-big-mul-mul"
|
| 116 |
-
pipe = pipeline("translation", model=model_name)
|
| 117 |
-
iso1_dict = {iso[1]: (iso[0], iso[2], iso[3]) for iso in non_empty_isos}
|
| 118 |
-
iso3tl = iso1_dict.get(self.tl)[2] # 'deu', 'ron', 'eng', 'fra'
|
| 119 |
-
translation = pipe(f'>>{iso3tl}<< {self.input_text}')
|
| 120 |
-
return translation[0]['translation_text'], f'Translated from {self.sl} to {self.tl} with {model_name}.'
|
| 121 |
-
except Exception as error:
|
| 122 |
-
return f"Error translating with model: {model_name}! Try other available language combination.", error
|
| 123 |
except KeyError as error:
|
| 124 |
return f"Error: Translation direction {self.sl} to {self.tl} is not supported by Helsinki Translation Models", error
|
| 125 |
|
|
@@ -359,7 +362,10 @@ def translate_text(input_text: str, s_language: str, t_language: str, model_name
|
|
| 359 |
tl = all_langs[t_language][0]
|
| 360 |
message_text = f'Translated from {s_language} to {t_language} with {model_name}'
|
| 361 |
try:
|
| 362 |
-
if model_name
|
|
|
|
|
|
|
|
|
|
| 363 |
translated_text, message_text = Translators(model_name, sl, tl, input_text).HelsinkiNLP()
|
| 364 |
|
| 365 |
elif model_name == 'Argos':
|
|
|
|
| 4 |
from transformers import T5Tokenizer, T5ForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, AutoModel, pipeline, logging
|
| 5 |
import languagecodes
|
| 6 |
import requests, os
|
| 7 |
+
from polars import read_parquet
|
| 8 |
|
| 9 |
logging.set_verbosity_error()
|
| 10 |
favourite_langs = {"German": "de", "Romanian": "ro", "English": "en", "-----": "-----"}
|
| 11 |
+
df = read_parquet("isolanguages.parquet")
|
| 12 |
non_empty_isos = df.slice(1).filter(pl.col("ISO639-1") != "").rows()
|
| 13 |
+
# all_langs = languagecodes.iso_languages_byname
|
| 14 |
all_langs = {iso[0]: (iso[1], iso[2], iso[3]) for iso in non_empty_isos}
|
| 15 |
|
| 16 |
# Language options as list, add favourite languages first
|
| 17 |
options = list(favourite_langs.keys())
|
| 18 |
options.extend(list(all_langs.keys()))
|
| 19 |
|
| 20 |
+
models = ["Helsinki-NLP", "Helsinki-NLP/opus-mt-tc-bible-big-mul-mul",
|
| 21 |
"facebook/nllb-200-distilled-600M", "facebook/nllb-200-distilled-1.3B", "facebook/nllb-200-1.3B", "facebook/nllb-200-3.3B",
|
| 22 |
"facebook/mbart-large-50-many-to-many-mmt", "facebook/mbart-large-50-one-to-many-mmt", "facebook/mbart-large-50-many-to-one-mmt",
|
| 23 |
"facebook/m2m100_418M", "facebook/m2m100_1.2B",
|
|
|
|
| 88 |
import argostranslate.translate, argostranslate.package
|
| 89 |
# Translate
|
| 90 |
try:
|
| 91 |
+
self.download_argos_model(self.sl, self.tl)
|
| 92 |
translated_text = argostranslate.translate.translate(self.input_text, self.sl, self.tl)
|
| 93 |
except StopIteration:
|
| 94 |
# packages_info = ', '.join(f"{pkg.get_description()}->{str(pkg.links)} {str(pkg.source_languages)}" for pkg in argostranslate.package.get_available_packages())
|
|
|
|
| 97 |
except Exception as error:
|
| 98 |
translated_text = error
|
| 99 |
return translated_text
|
| 100 |
+
|
| 101 |
+
def HelsinkiNLP_mulmul(self):
|
| 102 |
+
try:
|
| 103 |
+
model_name = "Helsinki-NLP/opus-mt-tc-bible-big-mul-mul"
|
| 104 |
+
pipe = pipeline("translation", model=model_name)
|
| 105 |
+
iso1_dict = {iso[1]: (iso[0], iso[2], iso[3]) for iso in non_empty_isos}
|
| 106 |
+
iso3tl = iso1_dict.get(self.tl)[2] # 'deu', 'ron', 'eng', 'fra'
|
| 107 |
+
translation = pipe(f'>>{iso3tl}<< {self.input_text}')
|
| 108 |
+
return translation[0]['translation_text'], f'Translated from {self.sl} to {self.tl} with {model_name}.'
|
| 109 |
+
except Exception as error:
|
| 110 |
+
return f"Error translating with model: {model_name}! Try other available language combination.", error
|
| 111 |
|
| 112 |
def HelsinkiNLP(self):
|
| 113 |
try: # Standard bilingual model
|
|
|
|
| 122 |
translation = pipe(self.input_text)
|
| 123 |
return translation[0]['translation_text'], f'Translated from {self.sl} to {self.tl} with {model_name}.'
|
| 124 |
except EnvironmentError as error:
|
| 125 |
+
return self.HelsinkiNLP_mulmul() # Last resort: multi to multi
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
except KeyError as error:
|
| 127 |
return f"Error: Translation direction {self.sl} to {self.tl} is not supported by Helsinki Translation Models", error
|
| 128 |
|
|
|
|
| 362 |
tl = all_langs[t_language][0]
|
| 363 |
message_text = f'Translated from {s_language} to {t_language} with {model_name}'
|
| 364 |
try:
|
| 365 |
+
if model_name == "Helsinki-NLP/opus-mt-tc-bible-big-mul-mul":
|
| 366 |
+
translated_text, message_text = Translators(model_name, sl, tl, input_text).HelsinkiNLP_mulmul()
|
| 367 |
+
|
| 368 |
+
elif model_name.startswith("Helsinki-NLP"):
|
| 369 |
translated_text, message_text = Translators(model_name, sl, tl, input_text).HelsinkiNLP()
|
| 370 |
|
| 371 |
elif model_name == 'Argos':
|