快速备份.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265
  1. #!/usr/bin/env python3
  2. """
  3. 快速备份项目工具
  4. 读取 .gitignore 规则并打包项目文件(排除匹配的文件)
  5. bash backups/一键备份.sh
  6. 文件位置:
  7. backups/快速备份.py
  8. 工具清单(backups/目录):
  9. • 快速备份.py - 核心备份引擎(7.3 KB)
  10. • 一键备份.sh - 一键执行脚本(2.4 KB)
  11. 使用方法:
  12. $ bash backups/一键备份.sh
  13. $ python3 backups/快速备份.py
  14. 备份输出:
  15. backups/gz/备份_YYYYMMDD_HHMMSS.tar.gz
  16. 适用项目:
  17. 任何包含 .gitignore 文件的项目(自动读取规则并排除匹配文件)
  18. 依赖:
  19. 无需额外安装包,仅使用Python内置模块
  20. """
  21. import os
  22. import tarfile
  23. import fnmatch
  24. from pathlib import Path
  25. from datetime import datetime
  26. import argparse
  27. import sys
  28. class GitignoreFilter:
  29. """解析 .gitignore 文件并过滤文件"""
  30. def __init__(self, gitignore_path: Path, project_root: Path):
  31. self.project_root = project_root
  32. # 规则按照出现顺序存储,支持取反(!)语义,后匹配覆盖前匹配
  33. # 每项: {"pattern": str, "dir_only": bool, "negate": bool, "has_slash": bool}
  34. self.rules = []
  35. self.load_gitignore(gitignore_path)
  36. def load_gitignore(self, gitignore_path: Path):
  37. """加载并解析 .gitignore 文件"""
  38. if not gitignore_path.exists():
  39. print(f"⚠️ 警告: {gitignore_path} 不存在,将不应用任何过滤规则")
  40. return
  41. try:
  42. with open(gitignore_path, 'r', encoding='utf-8') as f:
  43. for line in f:
  44. line = line.strip()
  45. # 跳过空行和注释
  46. if not line or line.startswith('#'):
  47. continue
  48. negate = line.startswith('!')
  49. if negate:
  50. line = line[1:].lstrip()
  51. if not line:
  52. continue
  53. dir_only = line.endswith('/')
  54. has_slash = '/' in line.rstrip('/')
  55. self.rules.append({
  56. "pattern": line,
  57. "dir_only": dir_only,
  58. "negate": negate,
  59. "has_slash": has_slash,
  60. })
  61. print(f"✓ 已加载 {len(self.rules)} 条规则(含取反)")
  62. except Exception as e:
  63. print(f"❌ 读取 .gitignore 失败: {e}")
  64. sys.exit(1)
  65. def _match_rule(self, rule: dict, relative_path_str: str, is_dir: bool) -> bool:
  66. """按规则匹配路径,返回是否命中"""
  67. pattern = rule["pattern"]
  68. dir_only = rule["dir_only"]
  69. has_slash = rule["has_slash"]
  70. # 目录规则:匹配目录自身或其子路径
  71. if dir_only:
  72. normalized = pattern.rstrip('/')
  73. if relative_path_str == normalized or relative_path_str.startswith(normalized + '/'):
  74. return True
  75. return False
  76. # 带路径分隔的规则:按相对路径匹配
  77. if has_slash:
  78. return fnmatch.fnmatch(relative_path_str, pattern)
  79. # 无斜杠:匹配任意层级的基本名
  80. if fnmatch.fnmatch(Path(relative_path_str).name, pattern):
  81. return True
  82. # 额外处理目录命中:无通配符时,若任一父级目录名等于 pattern 也视为命中
  83. if pattern.isalpha() and pattern in relative_path_str.split('/'):
  84. return True
  85. return False
  86. def should_exclude(self, path: Path, is_dir: bool = False) -> bool:
  87. """
  88. 判断路径是否应该被排除(支持 ! 取反,后匹配覆盖前匹配)
  89. 返回 True 表示应该排除(不备份)
  90. """
  91. try:
  92. # 统一使用 POSIX 路径风格进行匹配
  93. relative_path_str = path.relative_to(self.project_root).as_posix()
  94. except ValueError:
  95. return False # 不在项目根目录内,不处理
  96. # Git 风格:从上到下最后一次匹配决定去留
  97. matched = None
  98. for rule in self.rules:
  99. if self._match_rule(rule, relative_path_str, is_dir):
  100. matched = not rule["negate"] # negate 表示显式允许
  101. return bool(matched)
  102. def create_backup(project_root: Path, output_file: Path, filter_obj: GitignoreFilter):
  103. """创建备份压缩包"""
  104. # 统计信息
  105. total_files = 0
  106. excluded_files = 0
  107. included_files = 0
  108. print(f"\n{'='*60}")
  109. print(f"开始备份项目: {project_root}")
  110. print(f"输出文件: {output_file}")
  111. print(f"{'='*60}\n")
  112. try:
  113. with tarfile.open(output_file, 'w:gz') as tar:
  114. # 使用 os.walk 可在目录层级提前剪枝,避免进入已忽略目录
  115. for root, dirs, files in os.walk(project_root, topdown=True):
  116. root_path = Path(root)
  117. # 目录剪枝:命中忽略规则或 .git 时不再深入
  118. pruned_dirs = []
  119. for d in dirs:
  120. dir_path = root_path / d
  121. if d == '.git' or filter_obj.should_exclude(dir_path, is_dir=True):
  122. print(f" 排除目录: {dir_path.relative_to(project_root)}")
  123. excluded_files += 1
  124. continue
  125. pruned_dirs.append(d)
  126. dirs[:] = pruned_dirs
  127. for name in files:
  128. path = root_path / name
  129. total_files += 1
  130. # 文件忽略判定
  131. if '.git' in path.parts or filter_obj.should_exclude(path):
  132. excluded_files += 1
  133. print(f" 排除: {path.relative_to(project_root)}")
  134. continue
  135. arcname = path.relative_to(project_root)
  136. tar.add(path, arcname=arcname)
  137. included_files += 1
  138. print(f" 备份: {arcname}")
  139. print(f"\n{'='*60}")
  140. print("备份完成!")
  141. print(f"{'='*60}")
  142. print(f"总文件数: {total_files}")
  143. print(f"已备份: {included_files} 个文件")
  144. print(f"已排除: {excluded_files} 个文件/目录")
  145. print(f"压缩包大小: {output_file.stat().st_size / 1024 / 1024:.2f} MB")
  146. print(f"{'='*60}")
  147. return True
  148. except Exception as e:
  149. print(f"\n❌ 备份失败: {e}")
  150. import traceback
  151. traceback.print_exc()
  152. return False
  153. def main():
  154. parser = argparse.ArgumentParser(
  155. description='快速备份项目(根据 .gitignore 排除文件)',
  156. formatter_class=argparse.RawDescriptionHelpFormatter,
  157. epilog="""
  158. 使用示例:
  159. # 基本用法(备份到 backups/gz/ 目录)
  160. python backups/快速备份.py
  161. # 指定输出文件
  162. python backups/快速备份.py -o my_backup.tar.gz
  163. # 指定项目根目录
  164. python backups/快速备份.py -p /path/to/project
  165. """
  166. )
  167. parser.add_argument(
  168. '-p', '--project',
  169. type=str,
  170. default='.',
  171. help='项目根目录路径(默认: 当前目录)'
  172. )
  173. parser.add_argument(
  174. '-o', '--output',
  175. type=str,
  176. help='输出文件路径(默认: backups/备份_YYYYMMDD_HHMMSS.tar.gz)'
  177. )
  178. parser.add_argument(
  179. '-g', '--gitignore',
  180. type=str,
  181. default='.gitignore',
  182. help='.gitignore 文件路径(默认: .gitignore)'
  183. )
  184. args = parser.parse_args()
  185. # 解析路径
  186. project_root = Path(args.project).resolve()
  187. gitignore_path = Path(args.gitignore).resolve()
  188. if not project_root.exists():
  189. print(f"❌ 错误: 项目目录不存在: {project_root}")
  190. sys.exit(1)
  191. # 确定输出文件路径
  192. if args.output:
  193. output_file = Path(args.output).resolve()
  194. else:
  195. # 默认输出到 backups/gz/ 目录
  196. backup_dir = project_root / 'backups' / 'gz'
  197. backup_dir.mkdir(parents=True, exist_ok=True)
  198. timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
  199. output_file = backup_dir / f'备份_{timestamp}.tar.gz'
  200. # 确保输出目录存在
  201. output_file.parent.mkdir(parents=True, exist_ok=True)
  202. # 创建过滤器
  203. filter_obj = GitignoreFilter(gitignore_path, project_root)
  204. # 执行备份
  205. success = create_backup(project_root, output_file, filter_obj)
  206. sys.exit(0 if success else 1)
  207. if __name__ == '__main__':
  208. main()