| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- convert_local.py
- Reads a local Excel file and converts its contents into a structured prompt library
- under `prompt-library/` per the development guide. It generates:
- - prompts/<category>/ (one file per non-empty cell across columns for each prompt row)
- - prompts/index.json (summary + traceability)
- - prompts/<category>/index.md (table + version matrix)
- - docs/tools.md, docs/support.md, docs/excel-data.md
- - README.md (top-level for prompt-library)
- Usage:
- python prompt-library/scripts/convert_local.py \
- [--excel "/absolute/or/relative/path/to/prompt (2).xlsx"] \
- [--config prompt-library/scripts/config.yaml] \
- [--category-name prompt-category]
- If no arguments are provided, it will:
- - load config from prompt-library/scripts/config.yaml (if present)
- - resolve Excel path from config.source.excel_file relative to project root
- - default category to "prompt-category"
- Dependencies: pandas, openpyxl, PyYAML
- """
- from __future__ import annotations
- import argparse
- import json
- import re
- from dataclasses import dataclass
- from datetime import datetime
- from pathlib import Path
- from typing import Dict, List, Optional, Tuple
- import pandas as pd
- try:
- import yaml # type: ignore
- except Exception: # pragma: no cover
- yaml = None # Optional; script still works without YAML if no config provided
- @dataclass
- class RowClassification:
- row_index: int # zero-based excel index
- kind: str # prompt|tool|social|wallet_header|wallet|warning|other
- data: Dict
- class ExcelPromptConverter:
- def __init__(
- self,
- project_root: Path,
- prompt_library_dir: Path,
- excel_path: Path,
- category_name: str = "prompt-category",
- config_path: Optional[Path] = None,
- output_root: Optional[Path] = None,
- ) -> None:
- self.project_root = project_root
- self.prompt_library_dir = prompt_library_dir
- # If an output_root is provided, write into that snapshot directory
- # rather than the in-repo prompts/docs locations.
- if output_root is not None:
- self.output_root = output_root
- self.prompts_dir = output_root / "prompts"
- self.docs_dir = output_root / "docs"
- self.readme_target_root = output_root
- else:
- self.output_root = None
- self.prompts_dir = prompt_library_dir / "prompts"
- self.docs_dir = prompt_library_dir / "docs"
- self.readme_target_root = prompt_library_dir
- self.scripts_dir = prompt_library_dir / "scripts"
- self.category_name = category_name # fallback if single sheet
- self.category_dir = self.prompts_dir / self.category_name
- self.excel_path = excel_path
- self.config_path = config_path
- self.config = self._load_config(config_path)
- self.now = datetime.now()
- # Per-sheet prompts map: {sheet_name: {excel_row -> {title, versions{col->file}}}}
- self.prompts_info_by_sheet: Dict[str, Dict[int, Dict]] = {}
- self.tools: List[Dict] = []
- self.social: List[Dict] = []
- self.wallets: Dict[str, Dict] = {}
- self.misc: List[Dict] = []
- self.total_rows = 0
- self.total_cols = 0
- self.sheet_names_order: List[str] = []
- def _load_config(self, config_path: Optional[Path]) -> Dict:
- if config_path and config_path.exists() and yaml is not None:
- with config_path.open("r", encoding="utf-8") as f:
- return yaml.safe_load(f) or {}
- return {}
- def _sanitize_filename(self, text: str, max_length: int = 60) -> str:
- if not text:
- return "untitled"
- text = str(text).strip()
- text = re.sub(r"[\\/:*?\"<>|\r\n]+", "", text)
- text = text.replace(" ", "_")
- if len(text) > max_length:
- text = text[:max_length].rstrip("_-")
- return text or "untitled"
- def _extract_title(self, contents: List[str]) -> str:
- for c in contents:
- if c and c.strip():
- first_line = c.strip().splitlines()[0]
- words = first_line.split()
- candidate = " ".join(words[:6])
- return self._sanitize_filename(candidate)
- return "untitled"
- def _read_excel_sheets(self) -> Dict[str, pd.DataFrame]:
- # Read all sheets; if workbook has single sheet, still returns dict with one entry
- sheets: Dict[str, pd.DataFrame] = pd.read_excel(self.excel_path, header=None, engine="openpyxl", sheet_name=None) # type: ignore
- normalized: Dict[str, pd.DataFrame] = {}
- for sheet_name, df in sheets.items():
- try:
- df = df.map(lambda v: v.strip() if isinstance(v, str) else v) # pandas >=2.1
- except Exception:
- df = df.applymap(lambda v: v.strip() if isinstance(v, str) else v) # fallback
- normalized[sheet_name] = df
- # preserve order of sheets
- self.sheet_names_order = list(normalized.keys())
- # set global rows/cols to first sheet for summary; detailed per-sheet handled later
- if normalized:
- any_df = normalized[self.sheet_names_order[0]]
- self.total_rows, self.total_cols = any_df.shape
- return normalized
- def _classify_rows(self, df: pd.DataFrame) -> List[RowClassification]:
- classifications: List[RowClassification] = []
- wallet_mode = False
- for r in range(df.shape[0]):
- row_vals = [df.iloc[r, c] if c < df.shape[1] else None for c in range(df.shape[1])]
- non_empty = [v for v in row_vals if isinstance(v, str) and v.strip()]
- any_http = any(isinstance(v, str) and v.startswith("http") for v in row_vals)
- if not non_empty:
- classifications.append(RowClassification(r, "other", {"empty": True}))
- continue
- # Wallet header detection (e.g., contains "网络" and a label like "礼貌要饭地址")
- joined = " ".join([v for v in non_empty])
- if any(k in joined for k in ["网络", "网络名称"]) and any(
- k in joined for k in ["礼貌要饭地址", "钱包", "地址"]
- ):
- wallet_mode = True
- classifications.append(RowClassification(r, "wallet_header", {"raw": row_vals}))
- continue
- if wallet_mode:
- # If the row still looks like wallet data (two columns: network, address)
- first, second = row_vals[0] if len(row_vals) > 0 else None, row_vals[1] if len(row_vals) > 1 else None
- if (first and isinstance(first, str)) and (second and isinstance(second, str)):
- classifications.append(
- RowClassification(
- r,
- "wallet",
- {
- "network": first,
- "address": second,
- "raw": row_vals,
- },
- )
- )
- continue
- else:
- wallet_mode = False # end wallet section if pattern breaks
- # Tools and social heuristics
- if any_http:
- url = next(v for v in row_vals if isinstance(v, str) and v.startswith("http"))
- desc = None
- for v in row_vals:
- if v and isinstance(v, str) and not v.startswith("http"):
- desc = v
- break
- kind = "social" if ("x.com" in url or "twitter.com" in url) else "tool"
- classifications.append(RowClassification(r, kind, {"url": url, "description": desc or "", "raw": row_vals}))
- continue
- # Warnings or misc markers
- if any("广告位" in v for v in non_empty if isinstance(v, str)):
- classifications.append(RowClassification(r, "warning", {"content": joined, "raw": row_vals}))
- continue
- # Placeholder rows to ignore as prompts
- if any(v in {"...", "….", "...."} for v in non_empty):
- classifications.append(RowClassification(r, "other", {"placeholder": True, "raw": row_vals}))
- continue
- # Otherwise: treat as prompt row (one logical prompt per row with multiple versions across columns)
- prompt_versions: Dict[int, str] = {}
- for c in range(df.shape[1]):
- cell = df.iloc[r, c] if c < df.shape[1] else None
- if isinstance(cell, str) and cell.strip():
- prompt_versions[c + 1] = cell.strip()
- if prompt_versions:
- classifications.append(RowClassification(r, "prompt", {"versions": prompt_versions}))
- else:
- classifications.append(RowClassification(r, "other", {"raw": row_vals}))
- return classifications
- def _ensure_dirs(self) -> None:
- self.prompts_dir.mkdir(parents=True, exist_ok=True)
- self.category_dir.mkdir(parents=True, exist_ok=True)
- self.docs_dir.mkdir(parents=True, exist_ok=True)
- def _write_prompt_file(self, row_num: int, col_num: int, title: str, content: str, versions_in_row: List[int]) -> str:
- """Write a prompt file containing ONLY the prompt text, nothing else."""
- row_col = f"({row_num},{col_num})"
- filename = f"{row_col}_{title}.md"
- filepath = self.category_dir / filename
- # Ensure content ends with newline and contains no surrounding fences/headers added by us
- pure = (content or "").rstrip("\n") + "\n"
- filepath.write_text(pure, encoding="utf-8")
- return filename
- def _generate_category_index(self, sheet_name: str, category_dir: Path, prompts_info: Dict[int, Dict]) -> None:
- index_path = category_dir / "index.md"
- total_prompts = len(prompts_info)
- total_versions = sum(len(meta["versions"]) for meta in prompts_info.values())
- avg_versions = total_versions / total_prompts if total_prompts else 0
- lines: List[str] = []
- lines.append(f"# 📂 提示词分类 - {sheet_name}(基于Excel原始数据)\n")
- lines.append(f"最后同步: {self.now.strftime('%Y-%m-%d %H:%M:%S')}\n")
- lines.append("\n## 📊 统计\n")
- lines.append(f"- 提示词总数: {total_prompts}\n")
- lines.append(f"- 版本总数: {total_versions} \n")
- lines.append(f"- 平均版本数: {avg_versions:.1f}\n\n")
- lines.append("## 📋 提示词列表\n")
- lines.append("\n| 序号 | 标题 | 版本数 | 查看 |\n|------|------|--------|------|\n")
- for row in sorted(prompts_info.keys()):
- info = prompts_info[row]
- title = info["title"]
- versions = info["versions"]
- links = " / ".join([f"[v{v}](./({row},{v})_{title}.md)" for v in sorted(versions.keys())])
- lines.append(f"| {row} | {title} | {len(versions)} | {links} |\n")
- # Version matrix
- max_col = 0
- for info in prompts_info.values():
- if info["versions"]:
- max_col = max(max_col, max(info["versions"].keys()))
- lines.append("\n## 🗂️ 版本矩阵\n")
- header = ["行"] + [f"v{i}" for i in range(1, max_col + 1)] + ["备注"]
- lines.append("\n| " + " | ".join(header) + " |\n" + "|" + "---|" * len(header) + "\n")
- for row in sorted(prompts_info.keys()):
- info = prompts_info[row]
- row_cells = [str(row)]
- for c in range(1, max_col + 1):
- row_cells.append("✅" if c in info["versions"] else "—")
- row_cells.append("")
- lines.append("| " + " | ".join(row_cells) + " |\n")
- index_path.write_text("\n".join(lines), encoding="utf-8")
- def _generate_prompts_index_json(self) -> None:
- index_json_path = self.prompts_dir / "index.json"
- total_prompts = sum(len(p) for p in self.prompts_info_by_sheet.values())
- total_versions = sum(sum(len(meta["versions"]) for meta in p.values()) for p in self.prompts_info_by_sheet.values())
- stats = {
- "sheets": len(self.prompts_info_by_sheet),
- "prompts": total_prompts,
- "versions": total_versions,
- "tools": len(self.tools) if self.tools else 0,
- "social_accounts": len(self.social) if self.social else 0,
- "crypto_wallets": len(self.wallets) if self.wallets else 0,
- }
- categories = []
- for sheet_name in self.sheet_names_order:
- prompts_info = self.prompts_info_by_sheet.get(sheet_name, {})
- categories.append(
- {
- "name": sheet_name,
- "prompt_count": len(prompts_info),
- "version_count": sum(len(meta["versions"]) for meta in prompts_info.values()),
- "prompts": [
- {
- "row": row,
- "title": info["title"],
- "versions": sorted(list(info["versions"].keys())),
- "files": [info["versions"][v] for v in sorted(info["versions"].keys())],
- }
- for row, info in sorted(prompts_info.items())
- ],
- }
- )
- excel_data = {
- "total_rows": self.total_rows,
- "total_cols": self.total_cols,
- "sheets": list(self.prompts_info_by_sheet.keys()),
- }
- tools = {}
- if self.tools:
- for t in self.tools:
- name = t.get("name") or "tool"
- tools[name] = {k: v for k, v in t.items() if k != "name"}
- social_media = {}
- if self.social:
- for s in self.social:
- name = s.get("name") or "social"
- social_media[name] = {k: v for k, v in s.items() if k != "name"}
- support = {
- "description": "礼貌要饭地址",
- "crypto_wallets": self.wallets,
- }
- data = {
- "last_updated": self.now.strftime("%Y-%m-%dT%H:%M:%S"),
- "source": self.excel_path.name,
- "stats": stats,
- "categories": categories,
- "excel_data": excel_data,
- "tools": tools,
- "social_media": social_media,
- "support": support,
- "misc": self.misc,
- }
- index_json_path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
- def _generate_docs(self, sheets: Dict[str, pd.DataFrame]) -> None:
- # docs/excel-data.md (full table)
- excel_doc_path = self.docs_dir / "excel-data.md"
- lines: List[str] = []
- lines.append("# 📊 Excel原始数据完整记录\n")
- lines.append("## 数据来源\n")
- lines.append(f"- **文件**: {self.excel_path.name}\n")
- lines.append(f"- **处理时间**: {self.now.strftime('%Y-%m-%d')}\n")
- lines.append(f"- **工作表数量**: {len(sheets)}\n\n")
- for sheet_name, df in sheets.items():
- rows, cols = df.shape
- lines.append(f"## 工作表: {sheet_name} ({rows}行×{cols}列)\n")
- lines.append("\n| 行号 | 列1 | 列2 | 列3 |\n|-----:|-----|-----|-----|\n")
- for r in range(rows):
- c1 = df.iloc[r, 0] if cols > 0 else ""
- c2 = df.iloc[r, 1] if cols > 1 else ""
- c3 = df.iloc[r, 2] if cols > 2 else ""
- def fmt(x) -> str:
- try:
- if x is None or (isinstance(x, float) and pd.isna(x)) or (hasattr(pd, 'isna') and pd.isna(x)):
- return ""
- except Exception:
- pass
- s = str(x)
- return s.replace("|", "\\|")
- lines.append(f"| {r} | {fmt(c1)} | {fmt(c2)} | {fmt(c3)} |\n")
- lines.append("\n")
- lines.append("\n---\n*完整数据提取自 {0}*\n".format(self.excel_path.name))
- excel_doc_path.write_text("\n".join(lines), encoding="utf-8")
- # docs/tools.md
- tools_path = self.docs_dir / "tools.md"
- t_lines: List[str] = []
- t_lines.append("# 🛠️ 工具与资源(从Excel提取)\n")
- if self.tools:
- t_lines.append("\n## AI优化工具\n")
- for t in self.tools:
- t_lines.append("\n### {0}\n- **URL**: {1}\n- **描述**: {2}\n- **数据来源**: Excel表格第{3}行\n".format(
- t.get("name") or "工具",
- t.get("url", ""),
- t.get("description", ""),
- (t.get("excel_row") or 0) + 1,
- ))
- if self.social:
- t_lines.append("\n## 社交媒体\n")
- for s in self.social:
- t_lines.append("\n### {0}\n- **URL**: {1}\n- **描述**: {2}\n- **数据来源**: Excel表格第{3}行\n".format(
- s.get("name") or "社交账号",
- s.get("url", ""),
- s.get("description", ""),
- (s.get("excel_row") or 0) + 1,
- ))
- t_lines.append("\n## 使用建议\n\n1. **OpenAI优化器**: 可以用来测试和改进本库中的提示词\n2. **社交媒体**: 关注获取项目更新和使用技巧\n3. **集成方式**: 可以将这些工具集成到自动化工作流中\n\n---\n*数据来源: {0}*\n".format(self.excel_path.name))
- tools_path.write_text("\n".join(t_lines), encoding="utf-8")
- # docs/support.md
- support_path = self.docs_dir / "support.md"
- s_lines: List[str] = []
- s_lines.append("# 💰 项目支持(从Excel提取)\n")
- s_lines.append("\n## 支持说明\n**礼貌要饭地址** - 如果这个项目对您有帮助,欢迎通过以下方式支持\n")
- if self.wallets:
- s_lines.append("\n## 加密货币钱包地址\n\n### 主流网络支持\n")
- s_lines.append("\n| 网络名称 | 钱包地址 | Excel行号 |\n|----------|----------|-----------|\n")
- for net, data in self.wallets.items():
- s_lines.append("| **{0}** | `{1}` | 第{2}行 |\n".format(net.upper(), data.get("address", ""), (data.get("excel_row") or 0) + 1))
- if self.misc:
- for m in self.misc:
- if m.get("type") == "warning" or "广告位" in m.get("content", ""):
- s_lines.append("\n⚠️ **重要提醒**: {0}\n".format(m.get("content")))
- s_lines.append("\n### 使用建议\n1. 请确认钱包地址的准确性\n2. 建议小额测试后再进行大额转账\n3. 不同网络的转账费用不同,请选择合适的网络\n\n---\n*钱包地址来源: {0}*\n".format(self.excel_path.name))
- support_path.write_text("\n".join(s_lines), encoding="utf-8")
- def _generate_readme(self) -> None:
- readme_path = self.readme_target_root / "README.md"
- total_prompts = sum(len(p) for p in self.prompts_info_by_sheet.values())
- total_versions = sum(sum(len(meta["versions"]) for meta in p.values()) for p in self.prompts_info_by_sheet.values())
- readme = []
- readme.append("# 📚 提示词库(Excel转换版)\n")
- readme.append("")
- readme.append(f"")
- readme.append(f"")
- readme.append(f"\n")
- readme.append(f"最后更新: {self.now.strftime('%Y-%m-%d %H:%M:%S')}\n")
- readme.append("\n## 📊 总览\n")
- readme.append(f"- **数据来源**: {self.excel_path.name}\n")
- readme.append(f"- **分类数量**: {len(self.prompts_info_by_sheet)} \n- **提示词总数**: {total_prompts}\n- **版本总数**: {total_versions}\n")
- readme.append("\n## 📂 分类导航\n")
- for i, sheet_name in enumerate(self.sheet_names_order, start=1):
- prompts_info = self.prompts_info_by_sheet.get(sheet_name, {})
- folder = f"({i})_{self._sanitize_filename(sheet_name)}"
- ver_count = sum(len(meta["versions"]) for meta in prompts_info.values())
- readme.append(f"- [{sheet_name}](./prompts/{folder}/) - {len(prompts_info)} 个提示词, {ver_count} 个版本\n")
- readme.append("\n## 🔄 同步信息\n")
- readme.append(f"- **数据源**: {self.excel_path.name}\n- **处理时间**: {self.now.strftime('%Y-%m-%d %H:%M:%S')}\n")
- readme.append("\n## 📝 许可证\n本项目采用 MIT 许可证\n")
- readme.append("\n---\n*完全基于 Excel 表格自动生成*\n")
- readme_path.write_text("\n".join(readme), encoding="utf-8")
- def convert(self) -> None:
- self._ensure_dirs()
- sheets = self._read_excel_sheets()
- # If no sheets returned (shouldn't happen), fallback to empty
- for idx, sheet_name in enumerate(self.sheet_names_order, start=1):
- df = sheets[sheet_name]
- # Prepare per-sheet folder
- folder_name = f"({idx})_{self._sanitize_filename(sheet_name)}"
- category_dir = self.prompts_dir / folder_name
- category_dir.mkdir(parents=True, exist_ok=True)
- # Classify rows
- rows = self._classify_rows(df)
- prompts_info: Dict[int, Dict] = {}
- # Build prompt files for this sheet
- for rc in rows:
- if rc.kind == "prompt":
- excel_row_number = rc.row_index + 1
- versions: Dict[int, str] = rc.data["versions"]
- title = self._extract_title(list(versions.values()))
- prompts_info[excel_row_number] = {"title": title, "versions": {}}
- # Rewrite files directly into category_dir
- for col_num, content in versions.items():
- row_col = f"({excel_row_number},{col_num})"
- filename = f"{row_col}_{title}.md"
- (category_dir / filename).write_text((content or "").rstrip("\n") + "\n", encoding="utf-8")
- prompts_info[excel_row_number]["versions"][col_num] = filename
- elif rc.kind == "tool":
- url = rc.data.get("url", "")
- self.tools.append({
- "name": "OpenAI 提示词优化平台" if "openai" in url else "工具",
- "url": url,
- "description": rc.data.get("description", ""),
- "excel_row": rc.row_index,
- "sheet": sheet_name,
- })
- elif rc.kind == "social":
- url = rc.data.get("url", "")
- name = "Twitter/X 账号" if ("x.com" in url or "twitter.com" in url) else "社交账号"
- self.social.append({
- "name": name,
- "url": url,
- "description": rc.data.get("description", ""),
- "excel_row": rc.row_index,
- "sheet": sheet_name,
- })
- elif rc.kind == "wallet":
- network = str(rc.data.get("network", "")).strip()
- address = str(rc.data.get("address", "")).strip()
- if network and address:
- self.wallets[network.lower()] = {
- "address": address,
- "excel_row": rc.row_index,
- "sheet": sheet_name,
- }
- elif rc.kind == "warning":
- self.misc.append({"type": "warning", "excel_row": rc.row_index, "content": rc.data.get("content", ""), "sheet": sheet_name})
- # Save per-sheet prompts map and index
- self.prompts_info_by_sheet[sheet_name] = prompts_info
- self._generate_category_index(sheet_name, category_dir, prompts_info)
- # Global indices and docs
- self._generate_prompts_index_json()
- self._generate_docs(sheets)
- self._generate_readme()
- def parse_args() -> argparse.Namespace:
- parser = argparse.ArgumentParser(description="Convert local Excel into prompt library structure")
- parser.add_argument("--excel", type=str, default=None, help="Path to the Excel file (default from config)")
- parser.add_argument("--config", type=str, default=None, help="Path to config.yaml (optional)")
- parser.add_argument("--category-name", type=str, default="prompt-category", help="Output category folder name")
- parser.add_argument("--out-dir", type=str, default=None, help="Optional snapshot output root. If set, writes to <out-dir>/prompts and <out-dir>/docs")
- return parser.parse_args()
- def main() -> None:
- args = parse_args()
- script_path = Path(__file__).resolve()
- prompt_library_dir = script_path.parent.parent
- project_root = prompt_library_dir.parent
- config_path = Path(args.config).resolve() if args.config else (prompt_library_dir / "scripts" / "config.yaml")
- # Resolve Excel path
- if args.excel:
- excel_path = Path(args.excel)
- if not excel_path.is_absolute():
- excel_path = (project_root / excel_path).resolve()
- else:
- # Try config
- cfg_excel = None
- if config_path.exists() and yaml is not None:
- with config_path.open("r", encoding="utf-8") as f:
- cfg = yaml.safe_load(f) or {}
- cfg_excel = ((cfg.get("source") or {}).get("excel_file") or None)
- excel_path = (project_root / cfg_excel).resolve() if cfg_excel else (project_root / "prompt (2).xlsx").resolve()
- if not excel_path.exists():
- raise FileNotFoundError(f"Excel file not found: {excel_path}")
- out_dir = Path(args.out_dir).resolve() if args.out_dir else None
- converter = ExcelPromptConverter(
- project_root=project_root,
- prompt_library_dir=prompt_library_dir,
- excel_path=excel_path,
- category_name=args.category_name,
- config_path=config_path if config_path.exists() else None,
- output_root=out_dir,
- )
- converter.convert()
- target = out_dir if out_dir else prompt_library_dir
- print(f"✅ Conversion complete. Output under: {target}")
- if __name__ == "__main__":
- main()
|