jsonl_to_excel.py 1.2 KB

123456789101112131415161718192021222324252627282930313233343536
  1. import json
  2. import pandas as pd
  3. input_file = "prompt_jsonl/prompt_docs_refactored.jsonl"
  4. output_file = "prompt_excel/prompt_docs_refactored.xlsx"
  5. def process():
  6. data_by_cat = {}
  7. with open(input_file, 'r', encoding='utf-8') as f:
  8. for line in f:
  9. if not line.strip(): continue
  10. item = json.loads(line)
  11. cat = item['category']
  12. if cat not in data_by_cat:
  13. data_by_cat[cat] = []
  14. # Reconstruct the JSON string for the cell as it was in original Excel
  15. cell_data = {
  16. "title": item.get('title', ''),
  17. "content": item.get('content', '')
  18. }
  19. data_by_cat[cat].append(json.dumps(cell_data, ensure_ascii=False))
  20. with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
  21. # Sort categories to keep a consistent order
  22. sorted_cats = sorted(data_by_cat.keys())
  23. for cat in sorted_cats:
  24. items = data_by_cat[cat]
  25. # Each item in its own row, column 0
  26. df = pd.DataFrame(items)
  27. df.to_excel(writer, sheet_name=cat, index=False, header=False)
  28. print(f"Excel created: {output_file}")
  29. if __name__ == "__main__":
  30. process()