main.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. r"""
  4. main.py
  5. Unified controller for prompt-library conversions.
  6. Capabilities
  7. - Scan default folders and let user select a source to convert
  8. - If you select an Excel file (.xlsx), it will convert Excel → Docs
  9. - If you select a prompt docs folder, it will convert Docs → Excel
  10. - Fully non-interactive CLI flags are also supported (automation-friendly)
  11. Conventions (relative to repository root = this file's parent)
  12. - Excel sources under: ./prompt_excel/
  13. - Docs sources under: ./prompt_docs/
  14. - Outputs:
  15. - Excel→Docs: ./prompt_docs/prompt_docs_YYYY_MMDD_HHMMSS/{prompts,docs}
  16. - Docs→Excel: ./prompt_excel/prompt_excel_YYYY_MMDD_HHMMSS/rebuilt.xlsx
  17. Examples
  18. # Interactive selection
  19. python3 main.py
  20. # Non-interactive: choose one Excel file
  21. python3 main.py --select "prompt_excel/prompt (3).xlsx"
  22. # Non-interactive: choose one docs set directory
  23. python3 main.py --select "prompt_docs/prompt_docs_2025_0903_055708"
  24. Notes
  25. - This script is a thin orchestrator that delegates actual work to
  26. scripts/start_convert.py to ensure a single source of truth.
  27. """
  28. from __future__ import annotations
  29. import argparse
  30. import os
  31. import subprocess
  32. import sys
  33. from dataclasses import dataclass
  34. from pathlib import Path
  35. from typing import List, Optional, Sequence, Tuple
  36. # Optional Rich UI imports (fallback to plain if unavailable)
  37. try:
  38. from rich.console import Console
  39. from rich.layout import Layout
  40. from rich.panel import Panel
  41. from rich.table import Table
  42. from rich.text import Text
  43. from rich import box
  44. from rich.prompt import IntPrompt
  45. _RICH_AVAILABLE = True
  46. except Exception: # pragma: no cover
  47. _RICH_AVAILABLE = False
  48. # Optional InquirerPy for arrow-key selection
  49. try:
  50. from InquirerPy import inquirer as _inq
  51. _INQUIRER_AVAILABLE = True
  52. except Exception: # pragma: no cover
  53. _INQUIRER_AVAILABLE = False
  54. @dataclass
  55. class Candidate:
  56. index: int
  57. kind: str # "excel" | "docs" | "docs2jsonl" | "jsonl"
  58. path: Path
  59. label: str
  60. def get_repo_root() -> Path:
  61. return Path(__file__).resolve().parent
  62. def list_excel_files(excel_dir: Path) -> List[Path]:
  63. if not excel_dir.exists():
  64. return []
  65. return sorted([p for p in excel_dir.iterdir() if p.is_file() and p.suffix.lower() == ".xlsx"], key=lambda p: p.stat().st_mtime)
  66. def has_prompt_files(directory: Path) -> bool:
  67. if not directory.exists():
  68. return False
  69. # Detect files like "(r,c)_*.md" anywhere under the directory
  70. for file_path in directory.rglob("*.md"):
  71. name = file_path.name
  72. if name.startswith("(") and ")_" in name:
  73. return True
  74. return False
  75. def list_doc_sets(docs_dir: Path) -> List[Path]:
  76. results: List[Path] = []
  77. if not docs_dir.exists():
  78. return results
  79. # If the docs_dir itself looks like a set, include it
  80. if has_prompt_files(docs_dir):
  81. results.append(docs_dir)
  82. # Also include any immediate children that look like a docs set
  83. for child in sorted(docs_dir.iterdir()):
  84. if child.is_dir() and has_prompt_files(child):
  85. results.append(child)
  86. return results
  87. def run_start_convert(start_convert: Path, mode: str, project_root: Path, select_path: Optional[Path] = None, excel_dir: Optional[Path] = None, docs_dir: Optional[Path] = None) -> int:
  88. """Delegate to scripts/start_convert.py with appropriate flags."""
  89. python_exe = sys.executable
  90. cmd: List[str] = [python_exe, str(start_convert), "--mode", mode]
  91. if select_path is not None:
  92. # Always pass as repo-root-relative or absolute string
  93. cmd.extend(["--select", str(select_path)])
  94. if excel_dir is not None:
  95. cmd.extend(["--excel-dir", str(excel_dir)])
  96. if docs_dir is not None:
  97. cmd.extend(["--docs-dir", str(docs_dir)])
  98. # Execute in repo root to ensure relative defaults resolve correctly
  99. proc = subprocess.run(cmd, cwd=str(project_root))
  100. return proc.returncode
  101. def run_docs_to_jsonl(docs_path: Path, project_root: Path) -> int:
  102. """Convert docs folder to JSONL format."""
  103. import json
  104. import re
  105. prompts_dir = docs_path / "prompts"
  106. if not prompts_dir.exists():
  107. print(f"❌ 找不到 prompts 目录: {prompts_dir}")
  108. return 1
  109. output_dir = project_root / "prompt_jsonl"
  110. output_dir.mkdir(parents=True, exist_ok=True)
  111. output_file = output_dir / f"{docs_path.name}.jsonl"
  112. records = []
  113. for category_dir in sorted(prompts_dir.iterdir()):
  114. if not category_dir.is_dir():
  115. continue
  116. m = re.match(r'\((\d+)\)_(.+)', category_dir.name)
  117. cat_id, cat_name = (m.groups() if m else (0, category_dir.name))
  118. for md_file in sorted(category_dir.glob("*.md")):
  119. if md_file.name == "index.md":
  120. continue
  121. fm = re.match(r'\((\d+),(\d+)\)_(.+)\.md', md_file.name)
  122. if not fm:
  123. continue
  124. row, col, title = fm.groups()
  125. content = md_file.read_text(encoding='utf-8')
  126. records.append({
  127. "category_id": int(cat_id),
  128. "category": cat_name,
  129. "row": int(row),
  130. "col": int(col),
  131. "title": title[:80],
  132. "content": content
  133. })
  134. with open(output_file, 'w', encoding='utf-8') as f:
  135. for r in records:
  136. f.write(json.dumps(r, ensure_ascii=False) + '\n')
  137. print(f"✅ Docs→JSONL OK: {docs_path.name} → {output_file.relative_to(project_root)}")
  138. return 0
  139. def list_jsonl_files(jsonl_dir: Path) -> List[Path]:
  140. if not jsonl_dir.exists():
  141. return []
  142. return sorted([p for p in jsonl_dir.iterdir() if p.is_file() and p.suffix.lower() == ".jsonl"], key=lambda p: p.stat().st_mtime)
  143. def run_jsonl_to_excel(jsonl_path: Path, project_root: Path) -> int:
  144. """Convert JSONL to Excel, each cell contains the full JSON object as string."""
  145. import json
  146. from collections import defaultdict
  147. try:
  148. import pandas as pd
  149. except ImportError:
  150. print("❌ 需要 pandas: pip install pandas openpyxl")
  151. return 1
  152. records = []
  153. with open(jsonl_path, 'r', encoding='utf-8') as f:
  154. for line in f:
  155. if line.strip():
  156. records.append(json.loads(line))
  157. if not records:
  158. print(f"❌ JSONL 文件为空: {jsonl_path}")
  159. return 1
  160. # category -> {row -> {col -> json_string}}
  161. sheets_data: dict = defaultdict(lambda: defaultdict(dict))
  162. cat_id_map = {}
  163. for r in records:
  164. cat_name = r["category"]
  165. cat_id_map[r["category_id"]] = cat_name
  166. # 单元格内容只保留 title 和 content
  167. cell_data = {"title": r["title"], "content": r["content"]}
  168. sheets_data[cat_name][r["row"]][r["col"]] = json.dumps(cell_data, ensure_ascii=False)
  169. output_dir = project_root / "prompt_excel"
  170. output_dir.mkdir(parents=True, exist_ok=True)
  171. output_file = output_dir / f"{jsonl_path.stem}.xlsx"
  172. sorted_cats = sorted(cat_id_map.items(), key=lambda x: x[0])
  173. with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
  174. for cat_id, cat_name in sorted_cats:
  175. row_data = sheets_data[cat_name]
  176. if not row_data:
  177. continue
  178. max_row = max(row_data.keys())
  179. max_col = max(c for cols in row_data.values() for c in cols.keys())
  180. data = []
  181. for row_idx in range(1, max_row + 1):
  182. row_list = []
  183. for col_idx in range(1, max_col + 1):
  184. row_list.append(row_data.get(row_idx, {}).get(col_idx, ""))
  185. data.append(row_list)
  186. df = pd.DataFrame(data)
  187. sheet_name = cat_name[:31]
  188. df.to_excel(writer, sheet_name=sheet_name, index=False, header=False)
  189. print(f"✅ JSONL→Excel OK: {jsonl_path.name} → {output_file.relative_to(project_root)} ({len(sorted_cats)} 个工作表)")
  190. return 0
  191. def build_candidates(project_root: Path, excel_dir: Path, docs_dir: Path) -> List[Candidate]:
  192. candidates: List[Candidate] = []
  193. idx = 1
  194. jsonl_dir = project_root / "prompt_jsonl"
  195. for path in list_excel_files(excel_dir):
  196. label = f"{path.name}"
  197. candidates.append(Candidate(index=idx, kind="excel", path=path, label=label))
  198. idx += 1
  199. for path in list_doc_sets(docs_dir):
  200. display = path.relative_to(project_root) if path.is_absolute() else path
  201. # Docs → Excel
  202. candidates.append(Candidate(index=idx, kind="docs", path=path, label=f"{display}"))
  203. idx += 1
  204. # Docs → JSONL
  205. candidates.append(Candidate(index=idx, kind="docs2jsonl", path=path, label=f"{display}"))
  206. idx += 1
  207. for path in list_jsonl_files(jsonl_dir):
  208. label = f"{path.name}"
  209. candidates.append(Candidate(index=idx, kind="jsonl", path=path, label=label))
  210. idx += 1
  211. return candidates
  212. def select_interactively(candidates: Sequence[Candidate]) -> Optional[Candidate]:
  213. if not candidates:
  214. print("没有可用的 Excel 或 Docs 源。请将 .xlsx 放到 prompt_excel/ 或将文档放到 prompt_docs/ 下。")
  215. return None
  216. # Prefer arrow-key selection if available
  217. if _INQUIRER_AVAILABLE:
  218. try:
  219. choices = [
  220. {"name": f"[{c.kind.upper()}] {c.label}", "value": c.index}
  221. for c in candidates
  222. ]
  223. selection = _inq.select(
  224. message="选择要转换的源(上下箭头,回车确认,Ctrl+C 取消):",
  225. choices=choices,
  226. default=choices[0]["value"],
  227. ).execute()
  228. match = next((c for c in candidates if c.index == selection), None)
  229. return match
  230. except KeyboardInterrupt:
  231. return None
  232. if _RICH_AVAILABLE:
  233. console = Console()
  234. layout = Layout()
  235. layout.split_column(
  236. Layout(name="header", size=3),
  237. Layout(name="list"),
  238. Layout(name="footer", size=3),
  239. )
  240. header = Panel(Text("提示词库转换器", style="bold cyan"), subtitle="选择一个源开始转换", box=box.ROUNDED)
  241. table = Table(box=box.SIMPLE_HEAVY)
  242. table.add_column("编号", style="bold yellow", justify="right", width=4)
  243. table.add_column("类型", style="magenta", width=12)
  244. table.add_column("路径/名称", style="white")
  245. kind_labels = {"excel": "Excel→Docs", "docs": "Docs→Excel", "docs2jsonl": "Docs→JSONL", "jsonl": "JSONL→Excel"}
  246. for c in candidates:
  247. table.add_row(str(c.index), kind_labels.get(c.kind, c.kind), c.label)
  248. layout["header"].update(header)
  249. layout["list"].update(Panel(table, title="可选源", border_style="cyan"))
  250. layout["footer"].update(Panel(Text("输入编号并回车(0 退出)", style="bold"), box=box.ROUNDED))
  251. console.print(layout)
  252. while True:
  253. try:
  254. choice = IntPrompt.ask("编号", default=0)
  255. except Exception:
  256. return None
  257. if choice == 0:
  258. return None
  259. match = next((c for c in candidates if c.index == choice), None)
  260. if match is not None:
  261. return match
  262. console.print("[red]编号不存在,请重试[/red]")
  263. # Plain fallback
  264. kind_labels = {"excel": "Excel→Docs", "docs": "Docs→Excel", "docs2jsonl": "Docs→JSONL", "jsonl": "JSONL→Excel"}
  265. print("请选择一个源进行转换:")
  266. for c in candidates:
  267. print(f" {c.index:2d}. [{kind_labels.get(c.kind, c.kind)}] {c.label}")
  268. print(" 0. 退出")
  269. while True:
  270. try:
  271. raw = input("输入编号后回车:").strip()
  272. except EOFError:
  273. return None
  274. if not raw:
  275. continue
  276. if raw == "0":
  277. return None
  278. if not raw.isdigit():
  279. print("请输入有效数字。")
  280. continue
  281. choice = int(raw)
  282. match = next((c for c in candidates if c.index == choice), None)
  283. if match is None:
  284. print("编号不存在,请重试。")
  285. continue
  286. return match
  287. def parse_args() -> argparse.Namespace:
  288. p = argparse.ArgumentParser(description="prompt-library conversion controller")
  289. p.add_argument("--excel-dir", type=str, default="prompt_excel", help="Excel sources directory (default: prompt_excel)")
  290. p.add_argument("--docs-dir", type=str, default="prompt_docs", help="Docs sources directory (default: prompt_docs)")
  291. p.add_argument("--select", type=str, default=None, help="Path to a specific .xlsx file or a docs folder")
  292. p.add_argument("--mode", type=str, choices=["excel2docs", "docs2excel", "docs2jsonl", "jsonl2excel"], default=None, help="Conversion mode (auto-detect if not specified)")
  293. p.add_argument("--non-interactive", action="store_true", help="Do not prompt; require --select or exit")
  294. return p.parse_args()
  295. def main() -> int:
  296. repo_root = get_repo_root()
  297. start_convert = repo_root / "scripts" / "start_convert.py"
  298. if not start_convert.exists():
  299. print("找不到 scripts/start_convert.py。")
  300. return 1
  301. args = parse_args()
  302. excel_dir = (repo_root / args.excel_dir).resolve() if not Path(args.excel_dir).is_absolute() else Path(args.excel_dir).resolve()
  303. docs_dir = (repo_root / args.docs_dir).resolve() if not Path(args.docs_dir).is_absolute() else Path(args.docs_dir).resolve()
  304. # Non-interactive path with explicit selection
  305. if args.non_interactive or args.select:
  306. if not args.select:
  307. print("--non-interactive 需要配合 --select 使用。")
  308. return 2
  309. selected = Path(args.select)
  310. if not selected.is_absolute():
  311. selected = (repo_root / selected).resolve()
  312. if not selected.exists():
  313. print(f"选择的路径不存在: {selected}")
  314. return 2
  315. if selected.is_file() and selected.suffix.lower() == ".xlsx":
  316. return run_start_convert(start_convert, mode="excel2docs", project_root=repo_root, select_path=selected, excel_dir=excel_dir)
  317. if selected.is_file() and selected.suffix.lower() == ".jsonl":
  318. return run_jsonl_to_excel(selected, repo_root)
  319. if selected.is_dir():
  320. # Check mode or default to docs2excel
  321. if args.mode == "docs2jsonl":
  322. return run_docs_to_jsonl(selected, repo_root)
  323. return run_start_convert(start_convert, mode="docs2excel", project_root=repo_root, select_path=selected, docs_dir=docs_dir)
  324. print("无法识别的选择类型。")
  325. return 2
  326. # Interactive selection
  327. candidates = build_candidates(repo_root, excel_dir, docs_dir)
  328. chosen = select_interactively(candidates)
  329. if chosen is None:
  330. return 0
  331. if chosen.kind == "excel":
  332. return run_start_convert(start_convert, mode="excel2docs", project_root=repo_root, select_path=chosen.path, excel_dir=excel_dir)
  333. elif chosen.kind == "docs2jsonl":
  334. return run_docs_to_jsonl(chosen.path, repo_root)
  335. elif chosen.kind == "jsonl":
  336. return run_jsonl_to_excel(chosen.path, repo_root)
  337. else:
  338. return run_start_convert(start_convert, mode="docs2excel", project_root=repo_root, select_path=chosen.path, docs_dir=docs_dir)
  339. if __name__ == "__main__":
  340. sys.exit(main())