| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- 批量翻译 i18n/zh 下的所有文本文件到其他语言目录。
- - 保持目录结构,遇到代码块自动跳过翻译。
- - 目标语言集合:自动读取 i18n 下的子目录,排除 zh。
- 用法:
- python bulk_translate.py --src-root ../../i18n/zh --dst-root ../../i18n --src-lang zh
- 可选:
- --langs en es ... # 指定目标语言;默认自动扫描
- --overwrite # 允许覆盖已有文件
- """
- import argparse
- import os
- import sys
- from pathlib import Path
- from typing import Iterable, List
- try:
- from deep_translator import GoogleTranslator
- except ImportError:
- sys.stderr.write("[错误] 缺少 deep-translator,请先 pip install -r requirements.txt\n")
- sys.exit(1)
- def translate_blocks(text: str, translator: GoogleTranslator) -> str:
- lines = text.splitlines()
- translated: List[str] = []
- in_code = False
- buffer: List[str] = []
- def flush_buffer():
- if not buffer:
- return
- chunk = "\n".join(buffer)
- try:
- result = translator.translate(chunk)
- if result is None:
- raise RuntimeError("翻译返回空结果")
- translated.extend(result.split("\n"))
- except Exception:
- # 兜底:按行逐条翻译,避免整段失败
- for line in buffer:
- try:
- res_line = translator.translate(line) or line
- except Exception:
- res_line = line # 保留原文,留待人工校对
- translated.append(res_line)
- buffer.clear()
- for line in lines:
- if line.strip().startswith("```"):
- flush_buffer()
- in_code = not in_code
- translated.append(line)
- continue
- if in_code:
- translated.append(line)
- continue
- if not line.strip():
- flush_buffer()
- translated.append(line)
- continue
- buffer.append(line)
- flush_buffer()
- return "\n".join(translated)
- def iter_source_files(src_root: Path) -> Iterable[Path]:
- for path in src_root.rglob('*'):
- if path.is_file():
- yield path
- def main() -> int:
- parser = argparse.ArgumentParser(description="批量翻译 i18n/zh -> 其他语言")
- parser.add_argument('--src-root', default='../../i18n/zh', help='源语言根目录')
- parser.add_argument('--dst-root', default='../../i18n', help='目标语言根目录集合')
- parser.add_argument('--src-lang', default='zh-CN', help='源语言代码')
- parser.add_argument('--langs', nargs='*', help='指定目标语言,不含源语言')
- parser.add_argument('--overwrite', action='store_true', help='允许覆盖已有文件')
- args = parser.parse_args()
- src_root = Path(args.src_root).resolve()
- dst_root = Path(args.dst_root).resolve()
- if not src_root.exists():
- sys.stderr.write(f"[错误] 源目录不存在: {src_root}\n")
- return 1
- code_overrides = {
- 'zh': 'zh-CN',
- 'zh-CN': 'zh-CN',
- 'he': 'iw', # Google 使用旧代码
- }
- def map_code(lang: str) -> str:
- return code_overrides.get(lang, lang)
- if args.langs:
- target_langs = [lang for lang in args.langs if map_code(lang) != args.src_lang]
- else:
- target_langs = [p.name for p in dst_root.iterdir() if p.is_dir() and map_code(p.name) != args.src_lang]
- if not target_langs:
- sys.stderr.write("[错误] 无目标语言目录\n")
- return 1
- for lang in target_langs:
- translator = GoogleTranslator(source=args.src_lang, target=map_code(lang))
- print(f"==== 开始翻译 -> {lang} ====")
- for src_file in iter_source_files(src_root):
- rel_path = src_file.relative_to(src_root)
- dst_file = dst_root / lang / rel_path
- dst_file.parent.mkdir(parents=True, exist_ok=True)
- if dst_file.exists() and not args.overwrite:
- print(f"跳过已存在 {dst_file}")
- continue
- with open(src_file, 'r', encoding='utf-8') as f:
- content = f.read()
- try:
- translated = translate_blocks(content, translator)
- except Exception as exc:
- print(f"[失败] {src_file} -> {dst_file}: {exc}")
- continue
- with open(dst_file, 'w', encoding='utf-8') as f:
- f.write(translated + '\n')
- print(f"[OK] {src_file} -> {dst_file}")
- print("全部翻译完成")
- return 0
- if __name__ == '__main__':
- sys.exit(main())
|