main.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. r"""
  4. main.py
  5. Unified controller for prompt-library conversions.
  6. 支持的转换模式
  7. ==============
  8. 1. Excel → Docs : 将 Excel 工作簿转换为 Markdown 文档目录
  9. 2. Docs → Excel : 将 Markdown 文档目录还原为 Excel 工作簿
  10. 3. Docs → JSONL : 将 Markdown 文档转换为 JSONL 格式(保留完整元信息)
  11. 4. JSONL → Excel : 将 JSONL 转换为 Excel(单元格存储 JSON 对象)
  12. 数据格式规范
  13. ============
  14. Excel 结构:
  15. - 每个工作表(sheet) = 一个分类(category)
  16. - 行(row) = 不同提示词
  17. - 列(col) = 版本迭代
  18. Docs 结构:
  19. - prompts/(N)_分类名/ # N = category_id
  20. - prompts/(N)_分类名/(r,c)_标题.md # r=row, c=col
  21. JSONL 格式 (每行一个 JSON 对象):
  22. {
  23. "category_id": 2, # 分类编号
  24. "category": "元提示词", # 分类名称
  25. "row": 1, # 原 Excel 行号
  26. "col": 1, # 原 Excel 列号(版本号)
  27. "title": "...", # 标题(截断80字符)
  28. "content": "..." # 完整内容
  29. }
  30. JSONL → Excel 单元格格式:
  31. {"title": "...", "content": "..."} # 只保留 title 和 content
  32. 目录约定
  33. ========
  34. - Excel 源文件: ./prompt_excel/
  35. - Docs 源目录: ./prompt_docs/
  36. - JSONL 文件: ./prompt_jsonl/
  37. - 输出:
  38. - Excel→Docs: ./prompt_docs/prompt_docs_YYYY_MMDD_HHMMSS/
  39. - Docs→Excel: ./prompt_excel/prompt_excel_YYYY_MMDD_HHMMSS/rebuilt.xlsx
  40. - Docs→JSONL: ./prompt_jsonl/{docs_name}.jsonl
  41. - JSONL→Excel: ./prompt_excel/{jsonl_name}.xlsx
  42. 使用示例
  43. ========
  44. # 交互式选择
  45. python3 main.py
  46. # Excel → Docs
  47. python3 main.py --select "prompt_excel/prompt.xlsx"
  48. # Docs → Excel
  49. python3 main.py --select "prompt_docs/prompt_docs_2025_1222"
  50. # Docs → JSONL
  51. python3 main.py --select "prompt_docs/prompt_docs_2025_1222" --mode docs2jsonl
  52. # JSONL → Excel
  53. python3 main.py --select "prompt_jsonl/prompt_docs.jsonl"
  54. """
  55. from __future__ import annotations
  56. import argparse
  57. import os
  58. import subprocess
  59. import sys
  60. from dataclasses import dataclass
  61. from pathlib import Path
  62. from typing import List, Optional, Sequence, Tuple
  63. # Optional Rich UI imports (fallback to plain if unavailable)
  64. try:
  65. from rich.console import Console
  66. from rich.layout import Layout
  67. from rich.panel import Panel
  68. from rich.table import Table
  69. from rich.text import Text
  70. from rich import box
  71. from rich.prompt import IntPrompt
  72. _RICH_AVAILABLE = True
  73. except Exception: # pragma: no cover
  74. _RICH_AVAILABLE = False
  75. # Optional InquirerPy for arrow-key selection
  76. try:
  77. from InquirerPy import inquirer as _inq
  78. _INQUIRER_AVAILABLE = True
  79. except Exception: # pragma: no cover
  80. _INQUIRER_AVAILABLE = False
  81. @dataclass
  82. class Candidate:
  83. index: int
  84. kind: str # "excel" | "docs" | "docs2jsonl" | "jsonl"
  85. path: Path
  86. label: str
  87. def get_repo_root() -> Path:
  88. return Path(__file__).resolve().parent
  89. def list_excel_files(excel_dir: Path) -> List[Path]:
  90. if not excel_dir.exists():
  91. return []
  92. return sorted([p for p in excel_dir.iterdir() if p.is_file() and p.suffix.lower() == ".xlsx"], key=lambda p: p.stat().st_mtime)
  93. def has_prompt_files(directory: Path) -> bool:
  94. if not directory.exists():
  95. return False
  96. # Detect files like "(r,c)_*.md" anywhere under the directory
  97. for file_path in directory.rglob("*.md"):
  98. name = file_path.name
  99. if name.startswith("(") and ")_" in name:
  100. return True
  101. return False
  102. def list_doc_sets(docs_dir: Path) -> List[Path]:
  103. results: List[Path] = []
  104. if not docs_dir.exists():
  105. return results
  106. # If the docs_dir itself looks like a set, include it
  107. if has_prompt_files(docs_dir):
  108. results.append(docs_dir)
  109. # Also include any immediate children that look like a docs set
  110. for child in sorted(docs_dir.iterdir()):
  111. if child.is_dir() and has_prompt_files(child):
  112. results.append(child)
  113. return results
  114. def run_start_convert(start_convert: Path, mode: str, project_root: Path, select_path: Optional[Path] = None, excel_dir: Optional[Path] = None, docs_dir: Optional[Path] = None) -> int:
  115. """Delegate to scripts/start_convert.py with appropriate flags."""
  116. python_exe = sys.executable
  117. cmd: List[str] = [python_exe, str(start_convert), "--mode", mode]
  118. if select_path is not None:
  119. # Always pass as repo-root-relative or absolute string
  120. cmd.extend(["--select", str(select_path)])
  121. if excel_dir is not None:
  122. cmd.extend(["--excel-dir", str(excel_dir)])
  123. if docs_dir is not None:
  124. cmd.extend(["--docs-dir", str(docs_dir)])
  125. # Execute in repo root to ensure relative defaults resolve correctly
  126. proc = subprocess.run(cmd, cwd=str(project_root))
  127. return proc.returncode
  128. def run_docs_to_jsonl(docs_path: Path, project_root: Path) -> int:
  129. """Convert docs folder to JSONL format."""
  130. import json
  131. import re
  132. prompts_dir = docs_path / "prompts"
  133. if not prompts_dir.exists():
  134. print(f"❌ 找不到 prompts 目录: {prompts_dir}")
  135. return 1
  136. output_dir = project_root / "prompt_jsonl"
  137. output_dir.mkdir(parents=True, exist_ok=True)
  138. output_file = output_dir / f"{docs_path.name}.jsonl"
  139. records = []
  140. for category_dir in sorted(prompts_dir.iterdir()):
  141. if not category_dir.is_dir():
  142. continue
  143. m = re.match(r'\((\d+)\)_(.+)', category_dir.name)
  144. cat_id, cat_name = (m.groups() if m else (0, category_dir.name))
  145. for md_file in sorted(category_dir.glob("*.md")):
  146. if md_file.name == "index.md":
  147. continue
  148. fm = re.match(r'\((\d+),(\d+)\)_(.+)\.md', md_file.name)
  149. if not fm:
  150. continue
  151. row, col, title = fm.groups()
  152. content = md_file.read_text(encoding='utf-8')
  153. records.append({
  154. "category_id": int(cat_id),
  155. "category": cat_name,
  156. "row": int(row),
  157. "col": int(col),
  158. "title": title[:80],
  159. "content": content
  160. })
  161. with open(output_file, 'w', encoding='utf-8') as f:
  162. for r in records:
  163. f.write(json.dumps(r, ensure_ascii=False) + '\n')
  164. print(f"✅ Docs→JSONL OK: {docs_path.name} → {output_file.relative_to(project_root)}")
  165. return 0
  166. def list_jsonl_files(jsonl_dir: Path) -> List[Path]:
  167. if not jsonl_dir.exists():
  168. return []
  169. return sorted([p for p in jsonl_dir.iterdir() if p.is_file() and p.suffix.lower() == ".jsonl"], key=lambda p: p.stat().st_mtime)
  170. def run_jsonl_to_excel(jsonl_path: Path, project_root: Path) -> int:
  171. """Convert JSONL to Excel, each cell contains the full JSON object as string."""
  172. import json
  173. from collections import defaultdict
  174. try:
  175. import pandas as pd
  176. except ImportError:
  177. print("❌ 需要 pandas: pip install pandas openpyxl")
  178. return 1
  179. records = []
  180. with open(jsonl_path, 'r', encoding='utf-8') as f:
  181. for line in f:
  182. if line.strip():
  183. records.append(json.loads(line))
  184. if not records:
  185. print(f"❌ JSONL 文件为空: {jsonl_path}")
  186. return 1
  187. # category -> {row -> {col -> json_string}}
  188. sheets_data: dict = defaultdict(lambda: defaultdict(dict))
  189. cat_id_map = {}
  190. for r in records:
  191. cat_name = r["category"]
  192. cat_id_map[r["category_id"]] = cat_name
  193. # 单元格内容只保留 title 和 content
  194. cell_data = {"title": r["title"], "content": r["content"]}
  195. sheets_data[cat_name][r["row"]][r["col"]] = json.dumps(cell_data, ensure_ascii=False)
  196. output_dir = project_root / "prompt_excel"
  197. output_dir.mkdir(parents=True, exist_ok=True)
  198. output_file = output_dir / f"{jsonl_path.stem}.xlsx"
  199. sorted_cats = sorted(cat_id_map.items(), key=lambda x: x[0])
  200. with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
  201. for cat_id, cat_name in sorted_cats:
  202. row_data = sheets_data[cat_name]
  203. if not row_data:
  204. continue
  205. max_row = max(row_data.keys())
  206. max_col = max(c for cols in row_data.values() for c in cols.keys())
  207. data = []
  208. for row_idx in range(1, max_row + 1):
  209. row_list = []
  210. for col_idx in range(1, max_col + 1):
  211. row_list.append(row_data.get(row_idx, {}).get(col_idx, ""))
  212. data.append(row_list)
  213. df = pd.DataFrame(data)
  214. sheet_name = cat_name[:31]
  215. df.to_excel(writer, sheet_name=sheet_name, index=False, header=False)
  216. print(f"✅ JSONL→Excel OK: {jsonl_path.name} → {output_file.relative_to(project_root)} ({len(sorted_cats)} 个工作表)")
  217. return 0
  218. def build_candidates(project_root: Path, excel_dir: Path, docs_dir: Path) -> List[Candidate]:
  219. candidates: List[Candidate] = []
  220. idx = 1
  221. jsonl_dir = project_root / "prompt_jsonl"
  222. for path in list_excel_files(excel_dir):
  223. label = f"{path.name}"
  224. candidates.append(Candidate(index=idx, kind="excel", path=path, label=label))
  225. idx += 1
  226. for path in list_doc_sets(docs_dir):
  227. display = path.relative_to(project_root) if path.is_absolute() else path
  228. # Docs → Excel
  229. candidates.append(Candidate(index=idx, kind="docs", path=path, label=f"{display}"))
  230. idx += 1
  231. # Docs → JSONL
  232. candidates.append(Candidate(index=idx, kind="docs2jsonl", path=path, label=f"{display}"))
  233. idx += 1
  234. for path in list_jsonl_files(jsonl_dir):
  235. label = f"{path.name}"
  236. candidates.append(Candidate(index=idx, kind="jsonl", path=path, label=label))
  237. idx += 1
  238. return candidates
  239. def select_interactively(candidates: Sequence[Candidate]) -> Optional[Candidate]:
  240. if not candidates:
  241. print("没有可用的 Excel 或 Docs 源。请将 .xlsx 放到 prompt_excel/ 或将文档放到 prompt_docs/ 下。")
  242. return None
  243. # Prefer arrow-key selection if available
  244. if _INQUIRER_AVAILABLE:
  245. try:
  246. choices = [
  247. {"name": f"[{c.kind.upper()}] {c.label}", "value": c.index}
  248. for c in candidates
  249. ]
  250. selection = _inq.select(
  251. message="选择要转换的源(上下箭头,回车确认,Ctrl+C 取消):",
  252. choices=choices,
  253. default=choices[0]["value"],
  254. ).execute()
  255. match = next((c for c in candidates if c.index == selection), None)
  256. return match
  257. except KeyboardInterrupt:
  258. return None
  259. if _RICH_AVAILABLE:
  260. console = Console()
  261. layout = Layout()
  262. layout.split_column(
  263. Layout(name="header", size=3),
  264. Layout(name="list"),
  265. Layout(name="footer", size=3),
  266. )
  267. header = Panel(Text("提示词库转换器", style="bold cyan"), subtitle="选择一个源开始转换", box=box.ROUNDED)
  268. table = Table(box=box.SIMPLE_HEAVY)
  269. table.add_column("编号", style="bold yellow", justify="right", width=4)
  270. table.add_column("类型", style="magenta", width=12)
  271. table.add_column("路径/名称", style="white")
  272. kind_labels = {"excel": "Excel→Docs", "docs": "Docs→Excel", "docs2jsonl": "Docs→JSONL", "jsonl": "JSONL→Excel"}
  273. for c in candidates:
  274. table.add_row(str(c.index), kind_labels.get(c.kind, c.kind), c.label)
  275. layout["header"].update(header)
  276. layout["list"].update(Panel(table, title="可选源", border_style="cyan"))
  277. layout["footer"].update(Panel(Text("输入编号并回车(0 退出)", style="bold"), box=box.ROUNDED))
  278. console.print(layout)
  279. while True:
  280. try:
  281. choice = IntPrompt.ask("编号", default=0)
  282. except Exception:
  283. return None
  284. if choice == 0:
  285. return None
  286. match = next((c for c in candidates if c.index == choice), None)
  287. if match is not None:
  288. return match
  289. console.print("[red]编号不存在,请重试[/red]")
  290. # Plain fallback
  291. kind_labels = {"excel": "Excel→Docs", "docs": "Docs→Excel", "docs2jsonl": "Docs→JSONL", "jsonl": "JSONL→Excel"}
  292. print("请选择一个源进行转换:")
  293. for c in candidates:
  294. print(f" {c.index:2d}. [{kind_labels.get(c.kind, c.kind)}] {c.label}")
  295. print(" 0. 退出")
  296. while True:
  297. try:
  298. raw = input("输入编号后回车:").strip()
  299. except EOFError:
  300. return None
  301. if not raw:
  302. continue
  303. if raw == "0":
  304. return None
  305. if not raw.isdigit():
  306. print("请输入有效数字。")
  307. continue
  308. choice = int(raw)
  309. match = next((c for c in candidates if c.index == choice), None)
  310. if match is None:
  311. print("编号不存在,请重试。")
  312. continue
  313. return match
  314. def parse_args() -> argparse.Namespace:
  315. p = argparse.ArgumentParser(description="prompt-library conversion controller")
  316. p.add_argument("--excel-dir", type=str, default="prompt_excel", help="Excel sources directory (default: prompt_excel)")
  317. p.add_argument("--docs-dir", type=str, default="prompt_docs", help="Docs sources directory (default: prompt_docs)")
  318. p.add_argument("--select", type=str, default=None, help="Path to a specific .xlsx file or a docs folder")
  319. p.add_argument("--mode", type=str, choices=["excel2docs", "docs2excel", "docs2jsonl", "jsonl2excel"], default=None, help="Conversion mode (auto-detect if not specified)")
  320. p.add_argument("--non-interactive", action="store_true", help="Do not prompt; require --select or exit")
  321. return p.parse_args()
  322. def main() -> int:
  323. repo_root = get_repo_root()
  324. start_convert = repo_root / "scripts" / "start_convert.py"
  325. if not start_convert.exists():
  326. print("找不到 scripts/start_convert.py。")
  327. return 1
  328. args = parse_args()
  329. excel_dir = (repo_root / args.excel_dir).resolve() if not Path(args.excel_dir).is_absolute() else Path(args.excel_dir).resolve()
  330. docs_dir = (repo_root / args.docs_dir).resolve() if not Path(args.docs_dir).is_absolute() else Path(args.docs_dir).resolve()
  331. # Non-interactive path with explicit selection
  332. if args.non_interactive or args.select:
  333. if not args.select:
  334. print("--non-interactive 需要配合 --select 使用。")
  335. return 2
  336. selected = Path(args.select)
  337. if not selected.is_absolute():
  338. selected = (repo_root / selected).resolve()
  339. if not selected.exists():
  340. print(f"选择的路径不存在: {selected}")
  341. return 2
  342. if selected.is_file() and selected.suffix.lower() == ".xlsx":
  343. return run_start_convert(start_convert, mode="excel2docs", project_root=repo_root, select_path=selected, excel_dir=excel_dir)
  344. if selected.is_file() and selected.suffix.lower() == ".jsonl":
  345. return run_jsonl_to_excel(selected, repo_root)
  346. if selected.is_dir():
  347. # Check mode or default to docs2excel
  348. if args.mode == "docs2jsonl":
  349. return run_docs_to_jsonl(selected, repo_root)
  350. return run_start_convert(start_convert, mode="docs2excel", project_root=repo_root, select_path=selected, docs_dir=docs_dir)
  351. print("无法识别的选择类型。")
  352. return 2
  353. # Interactive selection
  354. candidates = build_candidates(repo_root, excel_dir, docs_dir)
  355. chosen = select_interactively(candidates)
  356. if chosen is None:
  357. return 0
  358. if chosen.kind == "excel":
  359. return run_start_convert(start_convert, mode="excel2docs", project_root=repo_root, select_path=chosen.path, excel_dir=excel_dir)
  360. elif chosen.kind == "docs2jsonl":
  361. return run_docs_to_jsonl(chosen.path, repo_root)
  362. elif chosen.kind == "jsonl":
  363. return run_jsonl_to_excel(chosen.path, repo_root)
  364. else:
  365. return run_start_convert(start_convert, mode="docs2excel", project_root=repo_root, select_path=chosen.path, docs_dir=docs_dir)
  366. if __name__ == "__main__":
  367. sys.exit(main())