发布于8月23日8月23日 复制代码 隐藏代码 import os import sys import time import re from collections import defaultdict # 添加项目路径到系统路径 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from zipcracker_utils import extract_hash_safe, get_logger def group_split_files(files): """按分卷分组, 返回 {组名: [文件列表]}""" groups = defaultdict(list) for file_path in files: filename = os.path.basename(file_path) # 常见分卷命名: # xxx.part1.rar / xxx.part01.rar # xxx.001 / xxx.002 # xxx.z01 / xxx.z02 match = re.match(r"(.+?)\.(part\d+|\d{2,3}|z\d+)\.(rar|zip|7z)$", filename, re.I) if match: base = match.group(1) ext = match.group(3) group_key = f"{base}.{ext}".lower() groups[group_key].append(file_path) else: # 普通文件单独处理 groups[filename.lower()].append(file_path) return groups def detect_file_ext(main_file): """根据分卷文件名修正扩展名""" f = main_file.lower() if f.endswith((".zip.001", ".001")): return "zip" elif f.endswith((".rar.part1", ".part1.rar")): return "rar" elif f.endswith((".7z.001", ".001")): return "7z" else: # 普通文件扩展名 return os.path.splitext(main_file)[1].lower().lstrip('.') def batch_extract_hash(test_dir, output_dir): """批量提取哈希并保存到对应文件 (支持分卷 + 汇总 + 断点续跑)""" logger = get_logger() if not os.path.exists(output_dir): os.makedirs(output_dir) # 获取目录下所有文件 files = [] for root, dirs, filenames in os.walk(test_dir): for filename in filenames: file_path = os.path.join(root, filename) files.append(file_path) print(f"找到 {len(files)} 个文件,开始分组处理...") john_path = r"D:\Desktop\tools\john-1.9.0-jumbo-1-win64" # 按分卷分组 groups = group_split_files(files) print(f"识别出 {len(groups)} 个文件组") all_hashes_path = os.path.join(output_dir, "all_hashes.txt") with open(all_hashes_path, "a", encoding="utf-8") as all_f: for idx, (group_key, group_files) in enumerate(groups.items(), 1): # 生成单个哈希文件路径 base_name = os.path.splitext(os.path.basename(group_key))[0] hash_out_path = os.path.join(output_dir, f"{base_name}.hash") # 断点续跑:如果哈希文件已存在,跳过 if os.path.exists(hash_out_path): print(f"[{idx}/{len(groups)}] 已存在,跳过: {hash_out_path}") continue print(f"\n[{idx}/{len(groups)}] 文件组: {group_key}") for f in group_files: print(f" - {f}") # 选第一个分卷作为主文件 group_files_sorted = sorted(group_files, key=lambda x: os.path.getsize(x)) main_file = group_files_sorted[0] # 修正扩展名 file_ext = detect_file_ext(main_file) start_time = time.time() try: hash_value, hash_file, status_msg = extract_hash_safe( john_path, main_file, file_ext ) elapsed_time = time.time() - start_time if hash_value: # 写单文件哈希 with open(hash_out_path, "w", encoding="utf-8") as f: f.write(hash_value.strip() + "\n") # 写总汇总文件 all_f.write(hash_value.strip() + "\n") print(f" 提取成功 (用时 {elapsed_time:.2f}s),已保存: {hash_out_path}") else: print(f" 提取失败: {status_msg}") except Exception as e: print(f" 异常错误: {str(e)}") print(f"\n 所有哈希已提取完成,总汇总文件: {all_hashes_path}") if __name__ == "__main__": test_dir = r"D:\Desktop\wenjian" # 输入目录 output_dir = r"D:\Desktop\hashes" # 输出哈希目录 print(f"开始提取目录: {test_dir}") batch_extract_hash(test_dir, output_dir)
参与讨论
你可以现在发布并稍后注册. 如果你有帐户,现在就登录发布帖子.