| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265 |
- #!/usr/bin/env python3
- """
- 快速备份项目工具
- 读取 .gitignore 规则并打包项目文件(排除匹配的文件)
- bash backups/一键备份.sh
- 文件位置:
- backups/快速备份.py
- 工具清单(backups/目录):
- • 快速备份.py - 核心备份引擎(7.3 KB)
- • 一键备份.sh - 一键执行脚本(2.4 KB)
- 使用方法:
- $ bash backups/一键备份.sh
- 或
- $ python3 backups/快速备份.py
- 备份输出:
- backups/gz/备份_YYYYMMDD_HHMMSS.tar.gz
- 适用项目:
- 任何包含 .gitignore 文件的项目(自动读取规则并排除匹配文件)
- 依赖:
- 无需额外安装包,仅使用Python内置模块
- """
- import os
- import tarfile
- import fnmatch
- from pathlib import Path
- from datetime import datetime
- import argparse
- import sys
- class GitignoreFilter:
- """解析 .gitignore 文件并过滤文件"""
- def __init__(self, gitignore_path: Path, project_root: Path):
- self.project_root = project_root
- # 规则按照出现顺序存储,支持取反(!)语义,后匹配覆盖前匹配
- # 每项: {"pattern": str, "dir_only": bool, "negate": bool, "has_slash": bool}
- self.rules = []
- self.load_gitignore(gitignore_path)
- def load_gitignore(self, gitignore_path: Path):
- """加载并解析 .gitignore 文件"""
- if not gitignore_path.exists():
- print(f"⚠️ 警告: {gitignore_path} 不存在,将不应用任何过滤规则")
- return
- try:
- with open(gitignore_path, 'r', encoding='utf-8') as f:
- for line in f:
- line = line.strip()
- # 跳过空行和注释
- if not line or line.startswith('#'):
- continue
- negate = line.startswith('!')
- if negate:
- line = line[1:].lstrip()
- if not line:
- continue
- dir_only = line.endswith('/')
- has_slash = '/' in line.rstrip('/')
- self.rules.append({
- "pattern": line,
- "dir_only": dir_only,
- "negate": negate,
- "has_slash": has_slash,
- })
- print(f"✓ 已加载 {len(self.rules)} 条规则(含取反)")
- except Exception as e:
- print(f"❌ 读取 .gitignore 失败: {e}")
- sys.exit(1)
- def _match_rule(self, rule: dict, relative_path_str: str, is_dir: bool) -> bool:
- """按规则匹配路径,返回是否命中"""
- pattern = rule["pattern"]
- dir_only = rule["dir_only"]
- has_slash = rule["has_slash"]
- # 目录规则:匹配目录自身或其子路径
- if dir_only:
- normalized = pattern.rstrip('/')
- if relative_path_str == normalized or relative_path_str.startswith(normalized + '/'):
- return True
- return False
- # 带路径分隔的规则:按相对路径匹配
- if has_slash:
- return fnmatch.fnmatch(relative_path_str, pattern)
- # 无斜杠:匹配任意层级的基本名
- if fnmatch.fnmatch(Path(relative_path_str).name, pattern):
- return True
- # 额外处理目录命中:无通配符时,若任一父级目录名等于 pattern 也视为命中
- if pattern.isalpha() and pattern in relative_path_str.split('/'):
- return True
- return False
- def should_exclude(self, path: Path, is_dir: bool = False) -> bool:
- """
- 判断路径是否应该被排除(支持 ! 取反,后匹配覆盖前匹配)
- 返回 True 表示应该排除(不备份)
- """
- try:
- # 统一使用 POSIX 路径风格进行匹配
- relative_path_str = path.relative_to(self.project_root).as_posix()
- except ValueError:
- return False # 不在项目根目录内,不处理
- # Git 风格:从上到下最后一次匹配决定去留
- matched = None
- for rule in self.rules:
- if self._match_rule(rule, relative_path_str, is_dir):
- matched = not rule["negate"] # negate 表示显式允许
- return bool(matched)
- def create_backup(project_root: Path, output_file: Path, filter_obj: GitignoreFilter):
- """创建备份压缩包"""
- # 统计信息
- total_files = 0
- excluded_files = 0
- included_files = 0
- print(f"\n{'='*60}")
- print(f"开始备份项目: {project_root}")
- print(f"输出文件: {output_file}")
- print(f"{'='*60}\n")
- try:
- with tarfile.open(output_file, 'w:gz') as tar:
- # 使用 os.walk 可在目录层级提前剪枝,避免进入已忽略目录
- for root, dirs, files in os.walk(project_root, topdown=True):
- root_path = Path(root)
- # 目录剪枝:命中忽略规则或 .git 时不再深入
- pruned_dirs = []
- for d in dirs:
- dir_path = root_path / d
- if d == '.git' or filter_obj.should_exclude(dir_path, is_dir=True):
- print(f" 排除目录: {dir_path.relative_to(project_root)}")
- excluded_files += 1
- continue
- pruned_dirs.append(d)
- dirs[:] = pruned_dirs
- for name in files:
- path = root_path / name
- total_files += 1
- # 文件忽略判定
- if '.git' in path.parts or filter_obj.should_exclude(path):
- excluded_files += 1
- print(f" 排除: {path.relative_to(project_root)}")
- continue
- arcname = path.relative_to(project_root)
- tar.add(path, arcname=arcname)
- included_files += 1
- print(f" 备份: {arcname}")
- print(f"\n{'='*60}")
- print("备份完成!")
- print(f"{'='*60}")
- print(f"总文件数: {total_files}")
- print(f"已备份: {included_files} 个文件")
- print(f"已排除: {excluded_files} 个文件/目录")
- print(f"压缩包大小: {output_file.stat().st_size / 1024 / 1024:.2f} MB")
- print(f"{'='*60}")
- return True
- except Exception as e:
- print(f"\n❌ 备份失败: {e}")
- import traceback
- traceback.print_exc()
- return False
- def main():
- parser = argparse.ArgumentParser(
- description='快速备份项目(根据 .gitignore 排除文件)',
- formatter_class=argparse.RawDescriptionHelpFormatter,
- epilog="""
- 使用示例:
- # 基本用法(备份到 backups/gz/ 目录)
- python backups/快速备份.py
- # 指定输出文件
- python backups/快速备份.py -o my_backup.tar.gz
- # 指定项目根目录
- python backups/快速备份.py -p /path/to/project
- """
- )
- parser.add_argument(
- '-p', '--project',
- type=str,
- default='.',
- help='项目根目录路径(默认: 当前目录)'
- )
- parser.add_argument(
- '-o', '--output',
- type=str,
- help='输出文件路径(默认: backups/备份_YYYYMMDD_HHMMSS.tar.gz)'
- )
- parser.add_argument(
- '-g', '--gitignore',
- type=str,
- default='.gitignore',
- help='.gitignore 文件路径(默认: .gitignore)'
- )
- args = parser.parse_args()
- # 解析路径
- project_root = Path(args.project).resolve()
- gitignore_path = Path(args.gitignore).resolve()
- if not project_root.exists():
- print(f"❌ 错误: 项目目录不存在: {project_root}")
- sys.exit(1)
- # 确定输出文件路径
- if args.output:
- output_file = Path(args.output).resolve()
- else:
- # 默认输出到 backups/gz/ 目录
- backup_dir = project_root / 'backups' / 'gz'
- backup_dir.mkdir(parents=True, exist_ok=True)
- timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
- output_file = backup_dir / f'备份_{timestamp}.tar.gz'
- # 确保输出目录存在
- output_file.parent.mkdir(parents=True, exist_ok=True)
- # 创建过滤器
- filter_obj = GitignoreFilter(gitignore_path, project_root)
- # 执行备份
- success = create_backup(project_root, output_file, filter_obj)
- sys.exit(0 if success else 1)
- if __name__ == '__main__':
- main()
|