bulk_translate.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 批量翻译 i18n/zh 下的所有文本文件到其他语言目录。
  5. - 保持目录结构,遇到代码块自动跳过翻译。
  6. - 目标语言集合:自动读取 i18n 下的子目录,排除 zh。
  7. 用法:
  8. python bulk_translate.py --src-root ../../i18n/zh --dst-root ../../i18n --src-lang zh
  9. 可选:
  10. --langs en es ... # 指定目标语言;默认自动扫描
  11. --overwrite # 允许覆盖已有文件
  12. """
  13. import argparse
  14. import os
  15. import sys
  16. from pathlib import Path
  17. from typing import Iterable, List
  18. try:
  19. from deep_translator import GoogleTranslator
  20. except ImportError:
  21. sys.stderr.write("[错误] 缺少 deep-translator,请先 pip install -r requirements.txt\n")
  22. sys.exit(1)
  23. def translate_blocks(text: str, translator: GoogleTranslator) -> str:
  24. lines = text.splitlines()
  25. translated: List[str] = []
  26. in_code = False
  27. buffer: List[str] = []
  28. def flush_buffer():
  29. if not buffer:
  30. return
  31. chunk = "\n".join(buffer)
  32. result = translator.translate(chunk)
  33. translated.extend(result.split("\n"))
  34. buffer.clear()
  35. for line in lines:
  36. if line.strip().startswith("```"):
  37. flush_buffer()
  38. in_code = not in_code
  39. translated.append(line)
  40. continue
  41. if in_code:
  42. translated.append(line)
  43. continue
  44. if not line.strip():
  45. flush_buffer()
  46. translated.append(line)
  47. continue
  48. buffer.append(line)
  49. flush_buffer()
  50. return "\n".join(translated)
  51. def iter_source_files(src_root: Path) -> Iterable[Path]:
  52. for path in src_root.rglob('*'):
  53. if path.is_file():
  54. yield path
  55. def main() -> int:
  56. parser = argparse.ArgumentParser(description="批量翻译 i18n/zh -> 其他语言")
  57. parser.add_argument('--src-root', default='../../i18n/zh', help='源语言根目录')
  58. parser.add_argument('--dst-root', default='../../i18n', help='目标语言根目录集合')
  59. parser.add_argument('--src-lang', default='zh-CN', help='源语言代码')
  60. parser.add_argument('--langs', nargs='*', help='指定目标语言,不含源语言')
  61. parser.add_argument('--overwrite', action='store_true', help='允许覆盖已有文件')
  62. args = parser.parse_args()
  63. src_root = Path(args.src_root).resolve()
  64. dst_root = Path(args.dst_root).resolve()
  65. if not src_root.exists():
  66. sys.stderr.write(f"[错误] 源目录不存在: {src_root}\n")
  67. return 1
  68. code_overrides = {
  69. 'zh': 'zh-CN',
  70. 'zh-CN': 'zh-CN',
  71. 'he': 'iw', # Google 使用旧代码
  72. }
  73. def map_code(lang: str) -> str:
  74. return code_overrides.get(lang, lang)
  75. if args.langs:
  76. target_langs = [lang for lang in args.langs if map_code(lang) != args.src_lang]
  77. else:
  78. target_langs = [p.name for p in dst_root.iterdir() if p.is_dir() and map_code(p.name) != args.src_lang]
  79. if not target_langs:
  80. sys.stderr.write("[错误] 无目标语言目录\n")
  81. return 1
  82. for lang in target_langs:
  83. translator = GoogleTranslator(source=args.src_lang, target=map_code(lang))
  84. print(f"==== 开始翻译 -> {lang} ====")
  85. for src_file in iter_source_files(src_root):
  86. rel_path = src_file.relative_to(src_root)
  87. dst_file = dst_root / lang / rel_path
  88. dst_file.parent.mkdir(parents=True, exist_ok=True)
  89. if dst_file.exists() and not args.overwrite:
  90. print(f"跳过已存在 {dst_file}")
  91. continue
  92. with open(src_file, 'r', encoding='utf-8') as f:
  93. content = f.read()
  94. try:
  95. translated = translate_blocks(content, translator)
  96. except Exception as exc:
  97. print(f"[失败] {src_file} -> {dst_file}: {exc}")
  98. continue
  99. with open(dst_file, 'w', encoding='utf-8') as f:
  100. f.write(translated + '\n')
  101. print(f"[OK] {src_file} -> {dst_file}")
  102. print("全部翻译完成")
  103. return 0
  104. if __name__ == '__main__':
  105. sys.exit(main())