translate_files.py 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. import os
  2. import re
  3. import json
  4. from pathlib import Path
  5. # Load PATH_TRANSLATION_MAP from JSON
  6. # Ensure the path is relative to the script's location or absolute
  7. script_dir = Path(__file__).parent
  8. path_translation_map_path = script_dir / 'path_translation_map.json'
  9. with open(path_translation_map_path, 'r', encoding='utf-8') as f:
  10. PATH_TRANSLATION_MAP = json.load(f)
  11. def translate_path_component(component):
  12. if component in PATH_TRANSLATION_MAP:
  13. return PATH_TRANSLATION_MAP[component]
  14. # Handle numeric prefixes like (3,1)_#_
  15. if re.match(r"^\(\d+,\d+\)_#?_", component):
  16. cleaned_component = re.sub(r"^\(\d+,\d+\)_#?_", "", component).replace("_", " ")
  17. # Try to match cleaned component against known translations
  18. for k, v in PATH_TRANSLATION_MAP.items():
  19. if cleaned_component in k or k in cleaned_component:
  20. return v.replace(" ", "_") # Return simplified and underscored version
  21. # Fallback for complex patterns not in map
  22. return re.sub(r"[^a-zA-Z0-9]+", "_", cleaned_component).strip("_")
  23. # If it's a very long Chinese filename that might have specific terms
  24. # These were added to PATH_TRANSLATION_MAP now, so this generic logic might not be hit as often
  25. if "代码组织" == component: # Exact match for a known common Chinese filename part
  26. return "Code_Organization"
  27. if "编程书籍推荐" == component:
  28. return "Recommended_Programming_Books"
  29. if "通用项目架构模板" == component:
  30. return "General_Project_Architecture_Template"
  31. if "工具集" == component:
  32. return "Tool_Set"
  33. if "系统提示词构建原则" == component:
  34. return "System_Prompt_Construction_Principles"
  35. if "胶水编程" == component:
  36. return "Glue_Programming"
  37. if "vibe-coding-经验收集" == component:
  38. return "vibe-coding-Experience_Collection"
  39. if "开发经验" == component:
  40. return "Development_Experience"
  41. if "学习经验" == component:
  42. return "Learning_Experience"
  43. if "编程之道" == component:
  44. return "The_Way_of_Programming"
  45. if "客观分析" == component:
  46. return "Objective_Analysis"
  47. if "精华技术文档生成提示词" == component:
  48. return "Essential_Technical_Document_Generation_Prompt"
  49. if "智能需求理解与研发导航引擎" == component:
  50. return "Intelligent_Requirement_Understanding_and_R_D_Navigation_Engine"
  51. if "软件工程分析" == component:
  52. return "Software_Engineering_Analysis"
  53. if "系统架构可视化生成Mermaid":
  54. return "System_Architecture_Visualization_Generation_Mermaid"
  55. if "系统架构":
  56. return "System_Architecture"
  57. if "简易提示词优化器":
  58. return "Simple_Prompt_Optimizer"
  59. if "提示工程师任务说明":
  60. return "Prompt_Engineer_Task_Description"
  61. if "高质量代码开发专家":
  62. return "High_Quality_Code_Development_Expert"
  63. if "标准项目目录结构":
  64. return "Standard_Project_Directory_Structure"
  65. if "分析1":
  66. return "Analysis_1"
  67. if "分析2":
  68. return "Analysis_2"
  69. if "执行纯净性检测":
  70. return "Perform_Purity_Test"
  71. if "标准化流程":
  72. return "Standardized_Process"
  73. if "项目上下文文档生成":
  74. return "Project_Context_Document_Generation"
  75. if "人机对齐":
  76. return "Human_AI_Alignment"
  77. if "plan提示词":
  78. return "Plan_Prompt"
  79. if "Claude Code 八荣八耻":
  80. return "Claude_Code_Eight_Honors_and_Eight_Shames"
  81. if "任务描述,分析与补全任务":
  82. return "Task_Description_Analysis_and_Completion"
  83. if "前端设计":
  84. return "Frontend_Design"
  85. if "输入简单的日常行为的研究报告摘要":
  86. return "Summary_of_Research_Report_on_Simple_Daily_Behaviors"
  87. if "胶水开发":
  88. return "Glue_Development"
  89. if "sh控制面板生成":
  90. return "SH_Control_Panel_Generation"
  91. if "角色定义":
  92. return "Role_Definition"
  93. if "CLAUDE 记忆":
  94. return "CLAUDE_Memory"
  95. if "Docs文件夹中文命名提示词":
  96. return "Docs_Folder_Chinese_Naming_Prompt"
  97. if "通用项目架构综合分析与优化框架":
  98. return "General_Project_Architecture_Comprehensive_Analysis_and_Optimization_Framework"
  99. if "执行📘_文件头注释规范(用于所有代码文件最上方)" == component:
  100. return "Execute_File_Header_Comment_Specification_for_All_Code_Files"
  101. if "数据管道" == component:
  102. return "Data_Pipeline"
  103. if "项目变量与工具统一维护" == component:
  104. return "Unified_Management_of_Project_Variables_and_Tools"
  105. if "ASCII图生成" == component:
  106. return "ASCII_Art_Generation"
  107. if "Kobe's Diary of Saving Mother, Father, Fiancee, and In-laws × OTE Model Trading Mode × M.I.T White Professor (Accused of Sexual H_arassment by Female Student) v2" == component:
  108. return "Kobe_s_Diary_of_Saving_Mother_Father_Fiancee_and_In_laws_OTE_Model_Trading_Mode_M_I_T_White_Professor_Accused_of_Sexual_Harassment_by_Female_Student_v2" # Simplified for filename
  109. if "动态视图对齐实现文档" == component:
  110. return "Dynamic_View_Alignment_Implementation_Document"
  111. if "Telegram_Bot_按钮和键盘实现模板" == component:
  112. return "Telegram_Bot_Button_and_Keyboard_Implementation_Template"
  113. if "README" == component:
  114. return "README" # Keep README as is
  115. # Default: simply replace spaces with underscores and remove problematic characters for filenames
  116. # For demonstration, a placeholder translation for unseen Chinese
  117. return re.sub(r"[^a-zA-Z0-9]+", "_", component).strip("_")
  118. def get_translated_path(chinese_path_str): # Accept string
  119. parts = Path(chinese_path_str).parts # Use pathlib to split path
  120. translated_parts = []
  121. # Handle the 'i18n/zh' to 'i18n/en' conversion at the root
  122. if parts[0] == "i18n" and parts[1] == "zh":
  123. translated_parts.append("i18n")
  124. translated_parts.append("en")
  125. remaining_parts = parts[2:]
  126. else:
  127. remaining_parts = parts
  128. for i, part in enumerate(remaining_parts):
  129. base, ext = os.path.splitext(part)
  130. translated_base = translate_path_component(base)
  131. translated_parts.append(translated_base + ext)
  132. return Path(*translated_parts) # Reconstruct path using pathlib
  133. # Load chinese_files from JSON
  134. chinese_files_list_path = script_dir / 'chinese_files_list.json'
  135. with open(chinese_files_list_path, 'r', encoding='utf-8') as f:
  136. chinese_files_str_list = json.load(f)
  137. files_to_translate_content = []
  138. for chinese_file_path_str in chinese_files_str_list:
  139. english_file_path = get_translated_path(chinese_file_path_str) # Get translated Path object
  140. # Read the content of the English placeholder file
  141. try:
  142. with english_file_path.open('r', encoding='utf-8') as f:
  143. content = f.read()
  144. if content.startswith("TRANSLATED CONTENT:\n"):
  145. chinese_content = content.replace("TRANSLATED CONTENT:\n", "")
  146. files_to_translate_content.append({
  147. "chinese_content": chinese_content,
  148. "english_target_path": str(english_file_path) # Store as string for easy display
  149. })
  150. except FileNotFoundError:
  151. # This can happen if the previous script run failed for this file
  152. print(f"Warning: English placeholder file not found for {english_file_path}. Skipping content extraction for this file.")
  153. continue
  154. except Exception as e:
  155. print(f"Error reading {english_file_path} for content extraction: {e}. Skipping.")
  156. continue
  157. # Output the list of files to translate content for
  158. print("--- Files for Content Translation ---")
  159. for item in files_to_translate_content:
  160. print(f"Target Path: {item['english_target_path']}")
  161. print(f"Chinese Content:\n```markdown\n{item['chinese_content'].strip()}\n```\n{'='*50}\n")
  162. print(f"Total files requiring content translation: {len(files_to_translate_content)}")