bulk_translate.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 批量翻译 i18n/zh 下的所有文本文件到其他语言目录。
  5. - 保持目录结构,遇到代码块自动跳过翻译。
  6. - 目标语言集合:自动读取 i18n 下的子目录,排除 zh。
  7. 用法:
  8. python bulk_translate.py --src-root ../../i18n/zh --dst-root ../../i18n --src-lang zh
  9. 可选:
  10. --langs en es ... # 指定目标语言;默认自动扫描
  11. --overwrite # 允许覆盖已有文件
  12. """
  13. import argparse
  14. import os
  15. import sys
  16. from pathlib import Path
  17. from typing import Iterable, List
  18. try:
  19. from deep_translator import GoogleTranslator
  20. except ImportError:
  21. sys.stderr.write("[错误] 缺少 deep-translator,请先 pip install -r requirements.txt\n")
  22. sys.exit(1)
  23. def translate_blocks(text: str, translator: GoogleTranslator) -> str:
  24. lines = text.splitlines()
  25. translated: List[str] = []
  26. in_code = False
  27. buffer: List[str] = []
  28. def flush_buffer():
  29. if not buffer:
  30. return
  31. chunk = "\n".join(buffer)
  32. try:
  33. result = translator.translate(chunk)
  34. if result is None:
  35. raise RuntimeError("翻译返回空结果")
  36. translated.extend(result.split("\n"))
  37. except Exception:
  38. # 兜底:按行逐条翻译,避免整段失败
  39. for line in buffer:
  40. try:
  41. res_line = translator.translate(line) or line
  42. except Exception:
  43. res_line = line # 保留原文,留待人工校对
  44. translated.append(res_line)
  45. buffer.clear()
  46. for line in lines:
  47. if line.strip().startswith("```"):
  48. flush_buffer()
  49. in_code = not in_code
  50. translated.append(line)
  51. continue
  52. if in_code:
  53. translated.append(line)
  54. continue
  55. if not line.strip():
  56. flush_buffer()
  57. translated.append(line)
  58. continue
  59. buffer.append(line)
  60. flush_buffer()
  61. return "\n".join(translated)
  62. def iter_source_files(src_root: Path) -> Iterable[Path]:
  63. for path in src_root.rglob('*'):
  64. if path.is_file():
  65. yield path
  66. def main() -> int:
  67. parser = argparse.ArgumentParser(description="批量翻译 i18n/zh -> 其他语言")
  68. parser.add_argument('--src-root', default='../../i18n/zh', help='源语言根目录')
  69. parser.add_argument('--dst-root', default='../../i18n', help='目标语言根目录集合')
  70. parser.add_argument('--src-lang', default='zh-CN', help='源语言代码')
  71. parser.add_argument('--langs', nargs='*', help='指定目标语言,不含源语言')
  72. parser.add_argument('--overwrite', action='store_true', help='允许覆盖已有文件')
  73. args = parser.parse_args()
  74. src_root = Path(args.src_root).resolve()
  75. dst_root = Path(args.dst_root).resolve()
  76. if not src_root.exists():
  77. sys.stderr.write(f"[错误] 源目录不存在: {src_root}\n")
  78. return 1
  79. code_overrides = {
  80. 'zh': 'zh-CN',
  81. 'zh-CN': 'zh-CN',
  82. 'he': 'iw', # Google 使用旧代码
  83. }
  84. def map_code(lang: str) -> str:
  85. return code_overrides.get(lang, lang)
  86. if args.langs:
  87. target_langs = [lang for lang in args.langs if map_code(lang) != args.src_lang]
  88. else:
  89. target_langs = [p.name for p in dst_root.iterdir() if p.is_dir() and map_code(p.name) != args.src_lang]
  90. if not target_langs:
  91. sys.stderr.write("[错误] 无目标语言目录\n")
  92. return 1
  93. for lang in target_langs:
  94. translator = GoogleTranslator(source=args.src_lang, target=map_code(lang))
  95. print(f"==== 开始翻译 -> {lang} ====")
  96. for src_file in iter_source_files(src_root):
  97. rel_path = src_file.relative_to(src_root)
  98. dst_file = dst_root / lang / rel_path
  99. dst_file.parent.mkdir(parents=True, exist_ok=True)
  100. if dst_file.exists() and not args.overwrite:
  101. print(f"跳过已存在 {dst_file}")
  102. continue
  103. with open(src_file, 'r', encoding='utf-8') as f:
  104. content = f.read()
  105. try:
  106. translated = translate_blocks(content, translator)
  107. except Exception as exc:
  108. print(f"[失败] {src_file} -> {dst_file}: {exc}")
  109. continue
  110. with open(dst_file, 'w', encoding='utf-8') as f:
  111. f.write(translated + '\n')
  112. print(f"[OK] {src_file} -> {dst_file}")
  113. print("全部翻译完成")
  114. return 0
  115. if __name__ == '__main__':
  116. sys.exit(main())