start_convert.py 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. start_convert.py
  5. Launcher that orchestrates conversions between Excel workbooks and prompt documents
  6. using the following conventions:
  7. Input locations (relative to repo root):
  8. - ./prompt_excel/ # place .xlsx files here for Excel → Docs
  9. - ./prompt_docs/ # place prompt folders here for Docs → Excel
  10. Output locations (under repo root, named by source file/folder mtime):
  11. - ./prompt_docs_YYYYMMDD_HHMMSS/ # Excel → Docs results (copies of prompts/*)
  12. - ./prompt_excel_YYYYMMDD_HHMMSS/ # Docs → Excel results (rebuilt.xlsx)
  13. Usage:
  14. # Auto mode: if there are .xlsx under prompt_excel, run Excel→Docs;
  15. # if there is a docs set under prompt_docs, run Docs→Excel.
  16. python prompt-library/scripts/start_convert.py
  17. # Force a mode:
  18. python prompt-library/scripts/start_convert.py --mode excel2docs
  19. python prompt-library/scripts/start_convert.py --mode docs2excel
  20. Notes:
  21. - No interactive prompts; behavior is driven by the file presence and CLI flags
  22. - Requires pandas, openpyxl, PyYAML (see scripts/requirements.txt)
  23. """
  24. from __future__ import annotations
  25. import argparse
  26. import importlib.util
  27. import shutil
  28. import sys
  29. from datetime import datetime
  30. from pathlib import Path
  31. from typing import List
  32. def ts_from_path(p: Path) -> str:
  33. st = p.stat()
  34. # Prefer creation/birth time when available; fall back to mtime
  35. ts = getattr(st, "st_birthtime", None)
  36. if ts is None:
  37. # On Windows, st_ctime is creation; on Linux it's inode change time
  38. # We still prefer mtime for consistency if birthtime is unavailable.
  39. ts = st.st_mtime
  40. # Format: YYYY_MMDD_HHMMSS per requirement example 2025_0102_2309
  41. return datetime.fromtimestamp(ts).strftime("%Y_%m%d_%H%M%S")
  42. def load_module(py_path: Path, module_name: str):
  43. spec = importlib.util.spec_from_file_location(module_name, str(py_path))
  44. if spec is None or spec.loader is None:
  45. raise RuntimeError(f"Unable to load module: {py_path}")
  46. module = importlib.util.module_from_spec(spec)
  47. sys.modules[module_name] = module
  48. spec.loader.exec_module(module) # type: ignore
  49. return module
  50. def run_excel_to_docs_for_file(excel_path: Path, prompt_library_dir: Path, out_root: Path) -> Path:
  51. convert_path = prompt_library_dir / "scripts" / "convert_local.py"
  52. mod = load_module(convert_path, "convert_local")
  53. project_root = prompt_library_dir.parent
  54. # Prepare snapshot output directory under repo_root/prompt_docs/
  55. base_dir = out_root / "prompt_docs"
  56. base_dir.mkdir(parents=True, exist_ok=True)
  57. out_dir = base_dir / f"prompt_docs_{ts_from_path(excel_path)}"
  58. if out_dir.exists():
  59. shutil.rmtree(out_dir)
  60. out_dir.mkdir(parents=True, exist_ok=True)
  61. converter = mod.ExcelPromptConverter(
  62. project_root=project_root,
  63. prompt_library_dir=prompt_library_dir,
  64. excel_path=excel_path,
  65. category_name="prompt-category",
  66. config_path=None,
  67. output_root=out_dir,
  68. )
  69. converter.convert()
  70. return out_dir
  71. def run_docs_to_excel_for_dir(prompts_dir: Path, scripts_dir: Path, out_root: Path) -> Path:
  72. docs2excel_path = scripts_dir / "docs_to_excel.py"
  73. mod = load_module(docs2excel_path, "docs_to_excel")
  74. # Determine timestamp from folder creation (or mtime fallback)
  75. base_dir = out_root / "prompt_excel"
  76. base_dir.mkdir(parents=True, exist_ok=True)
  77. ts_fmt = ts_from_path(prompts_dir)
  78. out_dir = base_dir / f"prompt_excel_{ts_fmt}"
  79. out_dir.mkdir(parents=True, exist_ok=True)
  80. out_path = out_dir / "rebuilt.xlsx"
  81. # Resolve actual prompts root (support either the prompts/ subfolder or direct sheet folders)
  82. prompts_root = prompts_dir / "prompts" if (prompts_dir / "prompts").exists() else prompts_dir
  83. # Invoke module's main via argparse emulation
  84. sys.argv = [str(docs2excel_path), "--prompts-dir", str(prompts_root), "--out", str(out_path)]
  85. mod.main() # type: ignore
  86. return out_dir
  87. def find_xlsx_files(input_excel_dir: Path) -> List[Path]:
  88. if not input_excel_dir.exists():
  89. return []
  90. return sorted([p for p in input_excel_dir.iterdir() if p.is_file() and p.suffix.lower() in {".xlsx"}], key=lambda p: p.stat().st_mtime)
  91. def has_prompt_files(input_docs_dir: Path) -> bool:
  92. if not input_docs_dir.exists():
  93. return False
  94. for p in input_docs_dir.rglob("*.md"):
  95. if p.name.startswith("(") and ")_" in p.name:
  96. return True
  97. return False
  98. def main() -> None:
  99. parser = argparse.ArgumentParser(description="Start conversion between Excel and prompt docs")
  100. parser.add_argument("--mode", choices=["auto", "excel2docs", "docs2excel"], default="auto")
  101. parser.add_argument("--excel-dir", default="prompt_excel", help="Input directory containing .xlsx files")
  102. parser.add_argument("--docs-dir", default="prompt_docs", help="Input directory containing prompt folders")
  103. parser.add_argument("--select", type=str, default=None, help="Optional path to a specific Excel file or prompts folder to convert")
  104. args = parser.parse_args()
  105. script_path = Path(__file__).resolve()
  106. prompt_library_dir = script_path.parent.parent # repo root (prompt-library)
  107. project_root = prompt_library_dir # use prompt-library as root for I/O
  108. input_excel_dir = (prompt_library_dir / args.excel_dir).resolve()
  109. input_docs_dir = (prompt_library_dir / args.docs_dir).resolve()
  110. ran_any = False
  111. if args.mode in ("auto", "excel2docs"):
  112. # If user explicitly selected a file, prefer it
  113. if args.select:
  114. sel = Path(args.select)
  115. if not sel.is_absolute():
  116. sel = (project_root / sel).resolve()
  117. if sel.is_file() and sel.suffix.lower() == ".xlsx":
  118. out_dir = run_excel_to_docs_for_file(sel, prompt_library_dir, project_root)
  119. rel = out_dir.relative_to(prompt_library_dir)
  120. print(f"✅ Excel→Docs OK: {sel.name} → {rel}")
  121. ran_any = True
  122. else:
  123. xlsx_files = find_xlsx_files(input_excel_dir)
  124. for xlsx in xlsx_files:
  125. out_dir = run_excel_to_docs_for_file(xlsx, prompt_library_dir, project_root)
  126. rel = out_dir.relative_to(prompt_library_dir)
  127. print(f"✅ Excel→Docs OK: {xlsx.name} → {rel}")
  128. ran_any = True
  129. if args.mode in ("auto", "docs2excel"):
  130. if args.select:
  131. sel = Path(args.select)
  132. if not sel.is_absolute():
  133. sel = (project_root / sel).resolve()
  134. if sel.exists() and sel.is_dir():
  135. out_dir = run_docs_to_excel_for_dir(sel, prompt_library_dir / "scripts", project_root)
  136. rel = out_dir.relative_to(prompt_library_dir)
  137. # show sel relative as well when possible
  138. try:
  139. sel_rel = Path(sel).relative_to(prompt_library_dir)
  140. except Exception:
  141. sel_rel = Path(sel)
  142. print(f"✅ Docs→Excel OK: {sel_rel} → {rel}")
  143. ran_any = True
  144. else:
  145. if has_prompt_files(input_docs_dir):
  146. out_dir = run_docs_to_excel_for_dir(input_docs_dir, prompt_library_dir / "scripts", project_root)
  147. rel = out_dir.relative_to(prompt_library_dir)
  148. print(f"✅ Docs→Excel OK: {args.docs_dir} → {rel}")
  149. ran_any = True
  150. if not ran_any:
  151. print("ℹ️ Nothing to do. Place .xlsx under ./prompt_excel or prompt docs under ./prompt_docs, or use --mode to force.")
  152. if __name__ == "__main__":
  153. main()