# 处理115文件 # python compare_files.py - -pan115 "115share_list.txt" "115share_list [tg].txt" # 处理夸克文件 # python compare_files.py - -quark "quarkshare_list.txt" "quarkshare_list [tg].txt" # 同时处理两种文件 # python compare_files.py - -pan115 "115share_list.txt" "115share_list [tg].txt" - -quark "quarkshare_list.txt" "quarkshare_list [tg].txt" import sys import argparse def read_file(filename): with open(filename, 'rb') as f: return f.read().decode('utf-8').splitlines() def get_share_type(filename): """判断分享文件类型""" filename_lower = filename.lower() # 115网盘相关的文件名模式 if any(pattern in filename_lower for pattern in ['115share', '115code', '115pan']): return "115" # 夸克网盘相关的文件名模式 elif any(pattern in filename_lower for pattern in ['quarkshare', 'quarkcode', 'quarkpan']): return "quark" else: raise ValueError(f"无法判断文件类型: {filename}\n文件名需包含 '115share/115code/115pan' 或 'quarkshare/quarkcode/quarkpan'") def verify_file_types(file1, file2): """验证两个文件是否属于同一类型""" type1 = get_share_type(file1) type2 = get_share_type(file2) if type1 != type2: raise ValueError(f"文件类型不匹配:\n{file1} 是 {type1} 类型\n{file2} 是 {type2} 类型") return type1 def extract_share_id(line, share_type): """根据不同类型提取分享ID""" parts = line.strip().split() if len(parts) >= 3: if share_type == "115": return parts[1] # 115格式:第二部分是分享ID elif share_type == "quark": return f"{parts[1]}_{parts[2]}" # 夸克格式:第二和第三部分组合 return None def get_category(line): """获取分类路径(第一个部分)""" parts = line.strip().split() return parts[0] if parts else "" def merge_files(file1, file2): """根据文件名判断类型并合并文件""" # 判断并验证文件类型 share_type = verify_file_types(file1, file2) output_prefix = f"merged_{share_type}" output_file = f"{output_prefix}share_list.txt" removed_file = f"removed_{share_type}share_items.txt" # 读取并合并所有有效行 lines1 = [line for line in read_file(file1) if line.strip()] lines2 = [line for line in read_file(file2) if line.strip()] # 使用字典来存储,键为"分类+分享ID",值为完整行 share_dict = {} removed_lines = [] # 处理第一个文件 for line in lines1: share_id = extract_share_id(line, share_type) if share_id: category = get_category(line) key = f"{category}_{share_id}" share_dict[key] = line # 处理第二个文件,记录重复项 for line in lines2: share_id = extract_share_id(line, share_type) if share_id: category = get_category(line) key = f"{category}_{share_id}" if key in share_dict: removed_lines.append(line) else: share_dict[key] = line # 写入去重后的文件 with open(output_file, 'w', encoding='utf-8') as f: for line in share_dict.values(): f.write(line + '\n') # 写入删除项清单 with open(removed_file, 'w', encoding='utf-8') as f: f.writelines(line + '\n' for line in removed_lines) print(f"处理完成!") print(f"合并后的文件:{output_file}") print(f"重复项清单:{removed_file}") def main(): """主函数,处理不同类型的文件""" parser = argparse.ArgumentParser(description='合并并去重分享链接文件') parser.add_argument('--pan115', nargs=2, metavar=('FILE1', 'FILE2'), help='115网盘的两个文件') parser.add_argument('--quark', nargs=2, metavar=('FILE1', 'FILE2'), help='夸克网盘的两个文件') args = parser.parse_args() # 处理115网盘文件 if args.pan115: try: merge_files(args.pan115[0], args.pan115[1]) except FileNotFoundError: print(f"未找到115网盘分享文件: {args.pan115}") except ValueError as e: print(f"处理115网盘文件时出错: {e}") # 处理夸克网盘文件 if args.quark: try: merge_files(args.quark[0], args.quark[1]) except FileNotFoundError: print(f"未找到夸克网盘分享文件: {args.quark}") except ValueError as e: print(f"处理夸克网盘文件时出错: {e}") if __name__ == "__main__": main()