I wrote a script in Python to allow myself to enjoy this work, it translates all texts into Ukrainian. And if you replace the model, it will translate into any language (with the obligatory replacement of the original fancy fonts with something more common, if your language is not written in Latin)
import re
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import sentencepiece
import torch
model_name = "facebook/nllb-200-distilled-600M"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
print(model.config)
def is_file_reference(line):
return re.search(r'["\'].*\.(mp3|jpg|png|wav|ogg)["\']', line) is not None
def translate_text(text, src_lang="eng_Latn", tgt_lang="ukr_Cyrl"):
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
translated_tokens = model.generate(
**inputs,
forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang]
)
return tokenizer.decode(translated[0], skip_special_tokens=True)
def process_line(line, i):
matches = re.findall(r'(["\'])(.*?)\1', line)
for match in matches:
translated_text = translate_text(re.sub(r'{(?:i|/i|FF3366|/FF3366)}', '', match[1]))
line = line.replace(f'{match[0]}{match[1]}{match[0]}', f'{match[0]}{translated_text}{match[0]}')
print(" " + i + translated_text)
return line
def process_line_(line):
matches = re.findall(r'(["\'])(.*?)\1', line)
for match in matches:
if not is_file_reference(match[1]):
translated_text =translate_text(match[1])
line = line.replace(f'{match[0]}{match[1]}{match[0]}', f'{match[0]}{translated_text}{match[0]}')
line = re.sub(r'{i}(.*?){/i}', lambda m: f"{{i}}{translate_text(m.group(1))}{{/i}}", line)
return line
def translate_script(input_file, output_file):
with open(input_file, "r", encoding="utf-8") as file:
lines = file.readlines()
translated_lines = []
i = 0
for line in lines:
i += 1
if is_file_reference(line):
translated_lines.append(line)
else:
translated_lines.append(process_line(line, i))
with open(output_file, "w", encoding="utf-8") as file:
file.writelines(translated_lines)
# Вказуємо файли для обробки
input_file = "script.rpy"
output_file = "translated_script.rpy"
translate_script(input_file, output_file)