translate.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 轻量级批量翻译工具:将 Markdown/文本从一种语言翻译到另一种语言。
  5. 默认使用 deep-translator 封装的 Google Translator,自带简单的代码块保护。
  6. 用法示例:
  7. python translate.py --input ../../i18n/zh/README.md --src-lang zh --tgt-lang en --output ../../i18n/en/README.md
  8. """
  9. import argparse
  10. import os
  11. import sys
  12. from typing import List
  13. try:
  14. from deep_translator import GoogleTranslator
  15. except ImportError as exc: # pragma: no cover - 运行前请安装依赖
  16. sys.stderr.write("[错误] 缺少依赖 deep-translator,请先 `pip install -r requirements.txt`\n")
  17. raise
  18. def translate_blocks(lines: List[str], src: str, tgt: str) -> List[str]:
  19. """逐段翻译,保持代码块原样,减少上下文丢失。"""
  20. translated: List[str] = []
  21. in_code = False
  22. buffer: List[str] = []
  23. translator = GoogleTranslator(source=src, target=tgt)
  24. def flush_buffer():
  25. if not buffer:
  26. return
  27. text = "\n".join(buffer)
  28. try:
  29. result = translator.translate(text)
  30. except Exception as exc: # pragma: no cover
  31. raise RuntimeError(f"翻译失败: {exc}") from exc
  32. translated.extend(result.split("\n"))
  33. buffer.clear()
  34. for line in lines:
  35. if line.strip().startswith("```"):
  36. flush_buffer()
  37. in_code = not in_code
  38. translated.append(line)
  39. continue
  40. if in_code:
  41. translated.append(line)
  42. continue
  43. # 空行作为段落分割,先刷新再保留空行
  44. if not line.strip():
  45. flush_buffer()
  46. translated.append(line)
  47. continue
  48. buffer.append(line)
  49. flush_buffer()
  50. return translated
  51. def main() -> int:
  52. parser = argparse.ArgumentParser(description="批量翻译文本/Markdown,保护代码块")
  53. parser.add_argument("--input", required=True, help="源文件路径")
  54. parser.add_argument("--output", required=True, help="目标文件路径")
  55. parser.add_argument("--src-lang", required=True, help="源语言代码,如 zh")
  56. parser.add_argument("--tgt-lang", required=True, help="目标语言代码,如 en")
  57. parser.add_argument("--overwrite", action="store_true", help="允许覆盖已有输出文件")
  58. args = parser.parse_args()
  59. if not os.path.isfile(args.input):
  60. sys.stderr.write(f"[错误] 源文件不存在: {args.input}\n")
  61. return 1
  62. if os.path.exists(args.output) and not args.overwrite:
  63. sys.stderr.write(f"[错误] 目标文件已存在,加 --overwrite 才会覆盖: {args.output}\n")
  64. return 1
  65. with open(args.input, "r", encoding="utf-8") as f:
  66. lines = f.read().splitlines()
  67. translated = translate_blocks(lines, args.src_lang, args.tgt_lang)
  68. os.makedirs(os.path.dirname(args.output), exist_ok=True)
  69. with open(args.output, "w", encoding="utf-8") as f:
  70. f.write("\n".join(translated) + "\n")
  71. print(f"[完成] {args.input} -> {args.output} ({args.src_lang} -> {args.tgt_lang})")
  72. return 0
  73. if __name__ == "__main__": # pragma: no cover
  74. sys.exit(main())