#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ convert_local.py Reads a local Excel file and converts its contents into a structured prompt library under `prompt-library/` per the development guide. It generates: - prompts// (one file per non-empty cell across columns for each prompt row) - prompts/index.json (summary + traceability) - prompts//index.md (table + version matrix) - docs/tools.md, docs/support.md, docs/excel-data.md - README.md (top-level for prompt-library) Usage: python prompt-library/scripts/convert_local.py \ [--excel "/absolute/or/relative/path/to/prompt (2).xlsx"] \ [--config prompt-library/scripts/config.yaml] \ [--category-name prompt-category] If no arguments are provided, it will: - load config from prompt-library/scripts/config.yaml (if present) - resolve Excel path from config.source.excel_file relative to project root - default category to "prompt-category" Dependencies: pandas, openpyxl, PyYAML """ from __future__ import annotations import argparse import json import re from dataclasses import dataclass from datetime import datetime from pathlib import Path from typing import Dict, List, Optional, Tuple import pandas as pd try: import yaml # type: ignore except Exception: # pragma: no cover yaml = None # Optional; script still works without YAML if no config provided @dataclass class RowClassification: row_index: int # zero-based excel index kind: str # prompt|tool|social|wallet_header|wallet|warning|other data: Dict class ExcelPromptConverter: def __init__( self, project_root: Path, prompt_library_dir: Path, excel_path: Path, category_name: str = "prompt-category", config_path: Optional[Path] = None, output_root: Optional[Path] = None, ) -> None: self.project_root = project_root self.prompt_library_dir = prompt_library_dir # If an output_root is provided, write into that snapshot directory # rather than the in-repo prompts/docs locations. if output_root is not None: self.output_root = output_root self.prompts_dir = output_root / "prompts" self.docs_dir = output_root / "docs" self.readme_target_root = output_root else: self.output_root = None self.prompts_dir = prompt_library_dir / "prompts" self.docs_dir = prompt_library_dir / "docs" self.readme_target_root = prompt_library_dir self.scripts_dir = prompt_library_dir / "scripts" self.category_name = category_name # fallback if single sheet self.category_dir = self.prompts_dir / self.category_name self.excel_path = excel_path self.config_path = config_path self.config = self._load_config(config_path) self.now = datetime.now() # Per-sheet prompts map: {sheet_name: {excel_row -> {title, versions{col->file}}}} self.prompts_info_by_sheet: Dict[str, Dict[int, Dict]] = {} self.tools: List[Dict] = [] self.social: List[Dict] = [] self.wallets: Dict[str, Dict] = {} self.misc: List[Dict] = [] self.total_rows = 0 self.total_cols = 0 self.sheet_names_order: List[str] = [] def _load_config(self, config_path: Optional[Path]) -> Dict: if config_path and config_path.exists() and yaml is not None: with config_path.open("r", encoding="utf-8") as f: return yaml.safe_load(f) or {} return {} def _sanitize_filename(self, text: str, max_length: int = 60) -> str: if not text: return "untitled" text = str(text).strip() text = re.sub(r"[\\/:*?\"<>|\r\n]+", "", text) text = text.replace(" ", "_") if len(text) > max_length: text = text[:max_length].rstrip("_-") return text or "untitled" def _extract_title(self, contents: List[str]) -> str: for c in contents: if c and c.strip(): first_line = c.strip().splitlines()[0] words = first_line.split() candidate = " ".join(words[:6]) return self._sanitize_filename(candidate) return "untitled" def _read_excel_sheets(self) -> Dict[str, pd.DataFrame]: # Read all sheets; if workbook has single sheet, still returns dict with one entry sheets: Dict[str, pd.DataFrame] = pd.read_excel(self.excel_path, header=None, engine="openpyxl", sheet_name=None) # type: ignore normalized: Dict[str, pd.DataFrame] = {} for sheet_name, df in sheets.items(): try: df = df.map(lambda v: v.strip() if isinstance(v, str) else v) # pandas >=2.1 except Exception: df = df.applymap(lambda v: v.strip() if isinstance(v, str) else v) # fallback normalized[sheet_name] = df # preserve order of sheets self.sheet_names_order = list(normalized.keys()) # set global rows/cols to first sheet for summary; detailed per-sheet handled later if normalized: any_df = normalized[self.sheet_names_order[0]] self.total_rows, self.total_cols = any_df.shape return normalized def _classify_rows(self, df: pd.DataFrame) -> List[RowClassification]: classifications: List[RowClassification] = [] wallet_mode = False for r in range(df.shape[0]): row_vals = [df.iloc[r, c] if c < df.shape[1] else None for c in range(df.shape[1])] non_empty = [v for v in row_vals if isinstance(v, str) and v.strip()] any_http = any(isinstance(v, str) and v.startswith("http") for v in row_vals) if not non_empty: classifications.append(RowClassification(r, "other", {"empty": True})) continue # Wallet header detection (e.g., contains "网络" and a label like "礼貌要饭地址") joined = " ".join([v for v in non_empty]) if any(k in joined for k in ["网络", "网络名称"]) and any( k in joined for k in ["礼貌要饭地址", "钱包", "地址"] ): wallet_mode = True classifications.append(RowClassification(r, "wallet_header", {"raw": row_vals})) continue if wallet_mode: # If the row still looks like wallet data (two columns: network, address) first, second = row_vals[0] if len(row_vals) > 0 else None, row_vals[1] if len(row_vals) > 1 else None if (first and isinstance(first, str)) and (second and isinstance(second, str)): classifications.append( RowClassification( r, "wallet", { "network": first, "address": second, "raw": row_vals, }, ) ) continue else: wallet_mode = False # end wallet section if pattern breaks # Tools and social heuristics if any_http: url = next(v for v in row_vals if isinstance(v, str) and v.startswith("http")) desc = None for v in row_vals: if v and isinstance(v, str) and not v.startswith("http"): desc = v break kind = "social" if ("x.com" in url or "twitter.com" in url) else "tool" classifications.append(RowClassification(r, kind, {"url": url, "description": desc or "", "raw": row_vals})) continue # Warnings or misc markers if any("广告位" in v for v in non_empty if isinstance(v, str)): classifications.append(RowClassification(r, "warning", {"content": joined, "raw": row_vals})) continue # Placeholder rows to ignore as prompts if any(v in {"...", "….", "...."} for v in non_empty): classifications.append(RowClassification(r, "other", {"placeholder": True, "raw": row_vals})) continue # Otherwise: treat as prompt row (one logical prompt per row with multiple versions across columns) prompt_versions: Dict[int, str] = {} for c in range(df.shape[1]): cell = df.iloc[r, c] if c < df.shape[1] else None if isinstance(cell, str) and cell.strip(): prompt_versions[c + 1] = cell.strip() if prompt_versions: classifications.append(RowClassification(r, "prompt", {"versions": prompt_versions})) else: classifications.append(RowClassification(r, "other", {"raw": row_vals})) return classifications def _ensure_dirs(self) -> None: self.prompts_dir.mkdir(parents=True, exist_ok=True) self.category_dir.mkdir(parents=True, exist_ok=True) self.docs_dir.mkdir(parents=True, exist_ok=True) def _write_prompt_file(self, row_num: int, col_num: int, title: str, content: str, versions_in_row: List[int]) -> str: """Write a prompt file containing ONLY the prompt text, nothing else.""" row_col = f"({row_num},{col_num})" filename = f"{row_col}_{title}.md" filepath = self.category_dir / filename # Ensure content ends with newline and contains no surrounding fences/headers added by us pure = (content or "").rstrip("\n") + "\n" filepath.write_text(pure, encoding="utf-8") return filename def _generate_category_index(self, sheet_name: str, category_dir: Path, prompts_info: Dict[int, Dict]) -> None: index_path = category_dir / "index.md" total_prompts = len(prompts_info) total_versions = sum(len(meta["versions"]) for meta in prompts_info.values()) avg_versions = total_versions / total_prompts if total_prompts else 0 lines: List[str] = [] lines.append(f"# 📂 提示词分类 - {sheet_name}(基于Excel原始数据)\n") lines.append(f"最后同步: {self.now.strftime('%Y-%m-%d %H:%M:%S')}\n") lines.append("\n## 📊 统计\n") lines.append(f"- 提示词总数: {total_prompts}\n") lines.append(f"- 版本总数: {total_versions} \n") lines.append(f"- 平均版本数: {avg_versions:.1f}\n\n") lines.append("## 📋 提示词列表\n") lines.append("\n| 序号 | 标题 | 版本数 | 查看 |\n|------|------|--------|------|\n") for row in sorted(prompts_info.keys()): info = prompts_info[row] title = info["title"] versions = info["versions"] links = " / ".join([f"[v{v}](./({row},{v})_{title}.md)" for v in sorted(versions.keys())]) lines.append(f"| {row} | {title} | {len(versions)} | {links} |\n") # Version matrix max_col = 0 for info in prompts_info.values(): if info["versions"]: max_col = max(max_col, max(info["versions"].keys())) lines.append("\n## 🗂️ 版本矩阵\n") header = ["行"] + [f"v{i}" for i in range(1, max_col + 1)] + ["备注"] lines.append("\n| " + " | ".join(header) + " |\n" + "|" + "---|" * len(header) + "\n") for row in sorted(prompts_info.keys()): info = prompts_info[row] row_cells = [str(row)] for c in range(1, max_col + 1): row_cells.append("✅" if c in info["versions"] else "—") row_cells.append("") lines.append("| " + " | ".join(row_cells) + " |\n") index_path.write_text("\n".join(lines), encoding="utf-8") def _generate_prompts_index_json(self) -> None: index_json_path = self.prompts_dir / "index.json" total_prompts = sum(len(p) for p in self.prompts_info_by_sheet.values()) total_versions = sum(sum(len(meta["versions"]) for meta in p.values()) for p in self.prompts_info_by_sheet.values()) stats = { "sheets": len(self.prompts_info_by_sheet), "prompts": total_prompts, "versions": total_versions, "tools": len(self.tools) if self.tools else 0, "social_accounts": len(self.social) if self.social else 0, "crypto_wallets": len(self.wallets) if self.wallets else 0, } categories = [] for sheet_name in self.sheet_names_order: prompts_info = self.prompts_info_by_sheet.get(sheet_name, {}) categories.append( { "name": sheet_name, "prompt_count": len(prompts_info), "version_count": sum(len(meta["versions"]) for meta in prompts_info.values()), "prompts": [ { "row": row, "title": info["title"], "versions": sorted(list(info["versions"].keys())), "files": [info["versions"][v] for v in sorted(info["versions"].keys())], } for row, info in sorted(prompts_info.items()) ], } ) excel_data = { "total_rows": self.total_rows, "total_cols": self.total_cols, "sheets": list(self.prompts_info_by_sheet.keys()), } tools = {} if self.tools: for t in self.tools: name = t.get("name") or "tool" tools[name] = {k: v for k, v in t.items() if k != "name"} social_media = {} if self.social: for s in self.social: name = s.get("name") or "social" social_media[name] = {k: v for k, v in s.items() if k != "name"} support = { "description": "礼貌要饭地址", "crypto_wallets": self.wallets, } data = { "last_updated": self.now.strftime("%Y-%m-%dT%H:%M:%S"), "source": self.excel_path.name, "stats": stats, "categories": categories, "excel_data": excel_data, "tools": tools, "social_media": social_media, "support": support, "misc": self.misc, } index_json_path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8") def _generate_docs(self, sheets: Dict[str, pd.DataFrame]) -> None: # docs/excel-data.md (full table) excel_doc_path = self.docs_dir / "excel-data.md" lines: List[str] = [] lines.append("# 📊 Excel原始数据完整记录\n") lines.append("## 数据来源\n") lines.append(f"- **文件**: {self.excel_path.name}\n") lines.append(f"- **处理时间**: {self.now.strftime('%Y-%m-%d')}\n") lines.append(f"- **工作表数量**: {len(sheets)}\n\n") for sheet_name, df in sheets.items(): rows, cols = df.shape lines.append(f"## 工作表: {sheet_name} ({rows}行×{cols}列)\n") lines.append("\n| 行号 | 列1 | 列2 | 列3 |\n|-----:|-----|-----|-----|\n") for r in range(rows): c1 = df.iloc[r, 0] if cols > 0 else "" c2 = df.iloc[r, 1] if cols > 1 else "" c3 = df.iloc[r, 2] if cols > 2 else "" def fmt(x) -> str: try: if x is None or (isinstance(x, float) and pd.isna(x)) or (hasattr(pd, 'isna') and pd.isna(x)): return "" except Exception: pass s = str(x) return s.replace("|", "\\|") lines.append(f"| {r} | {fmt(c1)} | {fmt(c2)} | {fmt(c3)} |\n") lines.append("\n") lines.append("\n---\n*完整数据提取自 {0}*\n".format(self.excel_path.name)) excel_doc_path.write_text("\n".join(lines), encoding="utf-8") # docs/tools.md tools_path = self.docs_dir / "tools.md" t_lines: List[str] = [] t_lines.append("# 🛠️ 工具与资源(从Excel提取)\n") if self.tools: t_lines.append("\n## AI优化工具\n") for t in self.tools: t_lines.append("\n### {0}\n- **URL**: {1}\n- **描述**: {2}\n- **数据来源**: Excel表格第{3}行\n".format( t.get("name") or "工具", t.get("url", ""), t.get("description", ""), (t.get("excel_row") or 0) + 1, )) if self.social: t_lines.append("\n## 社交媒体\n") for s in self.social: t_lines.append("\n### {0}\n- **URL**: {1}\n- **描述**: {2}\n- **数据来源**: Excel表格第{3}行\n".format( s.get("name") or "社交账号", s.get("url", ""), s.get("description", ""), (s.get("excel_row") or 0) + 1, )) t_lines.append("\n## 使用建议\n\n1. **OpenAI优化器**: 可以用来测试和改进本库中的提示词\n2. **社交媒体**: 关注获取项目更新和使用技巧\n3. **集成方式**: 可以将这些工具集成到自动化工作流中\n\n---\n*数据来源: {0}*\n".format(self.excel_path.name)) tools_path.write_text("\n".join(t_lines), encoding="utf-8") # docs/support.md support_path = self.docs_dir / "support.md" s_lines: List[str] = [] s_lines.append("# 💰 项目支持(从Excel提取)\n") s_lines.append("\n## 支持说明\n**礼貌要饭地址** - 如果这个项目对您有帮助,欢迎通过以下方式支持\n") if self.wallets: s_lines.append("\n## 加密货币钱包地址\n\n### 主流网络支持\n") s_lines.append("\n| 网络名称 | 钱包地址 | Excel行号 |\n|----------|----------|-----------|\n") for net, data in self.wallets.items(): s_lines.append("| **{0}** | `{1}` | 第{2}行 |\n".format(net.upper(), data.get("address", ""), (data.get("excel_row") or 0) + 1)) if self.misc: for m in self.misc: if m.get("type") == "warning" or "广告位" in m.get("content", ""): s_lines.append("\n⚠️ **重要提醒**: {0}\n".format(m.get("content"))) s_lines.append("\n### 使用建议\n1. 请确认钱包地址的准确性\n2. 建议小额测试后再进行大额转账\n3. 不同网络的转账费用不同,请选择合适的网络\n\n---\n*钱包地址来源: {0}*\n".format(self.excel_path.name)) support_path.write_text("\n".join(s_lines), encoding="utf-8") def _generate_readme(self) -> None: readme_path = self.readme_target_root / "README.md" total_prompts = sum(len(p) for p in self.prompts_info_by_sheet.values()) total_versions = sum(sum(len(meta["versions"]) for meta in p.values()) for p in self.prompts_info_by_sheet.values()) readme = [] readme.append("# 📚 提示词库(Excel转换版)\n") readme.append("![同步状态](https://img.shields.io/badge/status-synced-green)") readme.append(f"![提示词数量](https://img.shields.io/badge/prompts-{total_prompts}-blue)") readme.append(f"![版本总数](https://img.shields.io/badge/versions-{total_versions}-orange)") readme.append(f"![数据来源](https://img.shields.io/badge/source-Excel-yellow)\n") readme.append(f"最后更新: {self.now.strftime('%Y-%m-%d %H:%M:%S')}\n") readme.append("\n## 📊 总览\n") readme.append(f"- **数据来源**: {self.excel_path.name}\n") readme.append(f"- **分类数量**: {len(self.prompts_info_by_sheet)} \n- **提示词总数**: {total_prompts}\n- **版本总数**: {total_versions}\n") readme.append("\n## 📂 分类导航\n") for i, sheet_name in enumerate(self.sheet_names_order, start=1): prompts_info = self.prompts_info_by_sheet.get(sheet_name, {}) folder = f"({i})_{self._sanitize_filename(sheet_name)}" ver_count = sum(len(meta["versions"]) for meta in prompts_info.values()) readme.append(f"- [{sheet_name}](./prompts/{folder}/) - {len(prompts_info)} 个提示词, {ver_count} 个版本\n") readme.append("\n## 🔄 同步信息\n") readme.append(f"- **数据源**: {self.excel_path.name}\n- **处理时间**: {self.now.strftime('%Y-%m-%d %H:%M:%S')}\n") readme.append("\n## 📝 许可证\n本项目采用 MIT 许可证\n") readme.append("\n---\n*完全基于 Excel 表格自动生成*\n") readme_path.write_text("\n".join(readme), encoding="utf-8") def convert(self) -> None: self._ensure_dirs() sheets = self._read_excel_sheets() # If no sheets returned (shouldn't happen), fallback to empty for idx, sheet_name in enumerate(self.sheet_names_order, start=1): df = sheets[sheet_name] # Prepare per-sheet folder folder_name = f"({idx})_{self._sanitize_filename(sheet_name)}" category_dir = self.prompts_dir / folder_name category_dir.mkdir(parents=True, exist_ok=True) # Classify rows rows = self._classify_rows(df) prompts_info: Dict[int, Dict] = {} # Build prompt files for this sheet for rc in rows: if rc.kind == "prompt": excel_row_number = rc.row_index + 1 versions: Dict[int, str] = rc.data["versions"] title = self._extract_title(list(versions.values())) prompts_info[excel_row_number] = {"title": title, "versions": {}} # Rewrite files directly into category_dir for col_num, content in versions.items(): row_col = f"({excel_row_number},{col_num})" filename = f"{row_col}_{title}.md" (category_dir / filename).write_text((content or "").rstrip("\n") + "\n", encoding="utf-8") prompts_info[excel_row_number]["versions"][col_num] = filename elif rc.kind == "tool": url = rc.data.get("url", "") self.tools.append({ "name": "OpenAI 提示词优化平台" if "openai" in url else "工具", "url": url, "description": rc.data.get("description", ""), "excel_row": rc.row_index, "sheet": sheet_name, }) elif rc.kind == "social": url = rc.data.get("url", "") name = "Twitter/X 账号" if ("x.com" in url or "twitter.com" in url) else "社交账号" self.social.append({ "name": name, "url": url, "description": rc.data.get("description", ""), "excel_row": rc.row_index, "sheet": sheet_name, }) elif rc.kind == "wallet": network = str(rc.data.get("network", "")).strip() address = str(rc.data.get("address", "")).strip() if network and address: self.wallets[network.lower()] = { "address": address, "excel_row": rc.row_index, "sheet": sheet_name, } elif rc.kind == "warning": self.misc.append({"type": "warning", "excel_row": rc.row_index, "content": rc.data.get("content", ""), "sheet": sheet_name}) # Save per-sheet prompts map and index self.prompts_info_by_sheet[sheet_name] = prompts_info self._generate_category_index(sheet_name, category_dir, prompts_info) # Global indices and docs self._generate_prompts_index_json() self._generate_docs(sheets) self._generate_readme() def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description="Convert local Excel into prompt library structure") parser.add_argument("--excel", type=str, default=None, help="Path to the Excel file (default from config)") parser.add_argument("--config", type=str, default=None, help="Path to config.yaml (optional)") parser.add_argument("--category-name", type=str, default="prompt-category", help="Output category folder name") parser.add_argument("--out-dir", type=str, default=None, help="Optional snapshot output root. If set, writes to /prompts and /docs") return parser.parse_args() def main() -> None: args = parse_args() script_path = Path(__file__).resolve() prompt_library_dir = script_path.parent.parent project_root = prompt_library_dir.parent config_path = Path(args.config).resolve() if args.config else (prompt_library_dir / "scripts" / "config.yaml") # Resolve Excel path if args.excel: excel_path = Path(args.excel) if not excel_path.is_absolute(): excel_path = (project_root / excel_path).resolve() else: # Try config cfg_excel = None if config_path.exists() and yaml is not None: with config_path.open("r", encoding="utf-8") as f: cfg = yaml.safe_load(f) or {} cfg_excel = ((cfg.get("source") or {}).get("excel_file") or None) excel_path = (project_root / cfg_excel).resolve() if cfg_excel else (project_root / "prompt (2).xlsx").resolve() if not excel_path.exists(): raise FileNotFoundError(f"Excel file not found: {excel_path}") out_dir = Path(args.out_dir).resolve() if args.out_dir else None converter = ExcelPromptConverter( project_root=project_root, prompt_library_dir=prompt_library_dir, excel_path=excel_path, category_name=args.category_name, config_path=config_path if config_path.exists() else None, output_root=out_dir, ) converter.convert() target = out_dir if out_dir else prompt_library_dir print(f"✅ Conversion complete. Output under: {target}") if __name__ == "__main__": main()