# 处理115文件 # python compare_files.py --pan115 "115share_list.txt" "115share_list [tg].txt" # 处理夸克文件 # python compare_files.py --quark "quarkshare_list.txt" "quarkshare_list [tg].txt" # 同时处理两种文件 # python compare_files.py --pan115 "115share_list.txt" "115share_list [tg].txt" --quark "quarkshare_list.txt" "quarkshare_list [tg].txt" import sys import os import argparse def read_file(filename): try: with open(filename, 'rb') as f: return f.read().decode('utf-8').splitlines() except UnicodeDecodeError: print(f"文件 {filename} 编码错误,请确保是UTF-8编码") sys.exit(1) except Exception as e: print(f"读取文件 {filename} 时出错: {e}") sys.exit(1) def get_share_type(filename): """判断分享文件类型""" filename_lower = filename.lower() # 115网盘相关的文件名模式 if any(pattern in filename_lower for pattern in ['115share', '115code', '115pan']): return "115" # 夸克网盘相关的文件名模式 elif any(pattern in filename_lower for pattern in ['quarkshare', 'quarkcode', 'quarkpan']): return "quark" else: raise ValueError(f"无法判断文件类型: {filename}\n文件名需包含 '115share/115code/115pan' 或 'quarkshare/quarkcode/quarkpan'") def verify_file_types(file1, file2): """验证两个文件是否属于同一类型""" type1 = get_share_type(file1) type2 = get_share_type(file2) if type1 != type2: raise ValueError(f"文件类型不匹配:\n{file1} 是 {type1} 类型\n{file2} 是 {type2} 类型") return type1 def extract_share_id(line, share_type): """根据不同类型提取分享ID""" parts = line.strip().split() if len(parts) >= 3: if share_type == "115": return parts[1].strip() # 115格式:第二部分是分享ID elif share_type == "quark": return f"{parts[1].strip()}_{parts[2].strip()}" # 夸克格式:第二和第三部分组合 return None def get_category(line): """获取分类路径(第一个部分)""" parts = line.strip().split() return parts[0].strip() if parts else "" def merge_files(file1, file2): """根据文件名判断类型并合并文件""" # 检查文件是否存在 for file in [file1, file2]: if not os.path.exists(file): raise FileNotFoundError(f"文件不存在: {file}") # 判断并验证文件类型 share_type = verify_file_types(file1, file2) output_prefix = f"merged_{share_type}" output_file = f"{output_prefix}share_list.txt" removed_file = f"removed_{share_type}share_items.txt" # 检查输出文件是否已存在 for file in [output_file, removed_file]: if os.path.exists(file): print(f"警告:文件 {file} 已存在,将被覆盖") # 读取并合并所有有效行 lines1 = [line for line in read_file(file1) if line.strip()] lines2 = [line for line in read_file(file2) if line.strip()] # 使用字典来存储,键为"分类+分享ID",值为完整行 share_dict = {} removed_lines = [] # 处理第一个文件 for line in lines1: share_id = extract_share_id(line, share_type) if share_id: category = get_category(line) key = f"{category}_{share_id}" share_dict[key] = line # 处理第二个文件,记录重复项 for line in lines2: share_id = extract_share_id(line, share_type) if share_id: category = get_category(line) key = f"{category}_{share_id}" if key in share_dict: removed_lines.append(line) else: share_dict[key] = line # 写入去重后的文件 with open(output_file, 'w', encoding='utf-8', newline='\n') as f: for line in share_dict.values(): f.write(line + '\n') # 写入删除项清单 with open(removed_file, 'w', encoding='utf-8', newline='\n') as f: for line in removed_lines: f.write(line + '\n') print(f"处理完成!") print(f"合并后的文件:{output_file}") print(f"重复项清单:{removed_file}") def main(): """主函数,处理不同类型的文件""" parser = argparse.ArgumentParser(description='合并并去重分享链接文件') parser.add_argument('--pan115', nargs=2, metavar=('FILE1', 'FILE2'), help='115网盘的两个文件') parser.add_argument('--quark', nargs=2, metavar=('FILE1', 'FILE2'), help='夸克网盘的两个文件') args = parser.parse_args() if not args.pan115 and not args.quark: parser.print_help() sys.exit(1) # 处理115网盘文件 if args.pan115: try: merge_files(args.pan115[0], args.pan115[1]) except FileNotFoundError as e: print(f"错误: {e}") except ValueError as e: print(f"处理115网盘文件时出错: {e}") # 处理夸克网盘文件 if args.quark: try: merge_files(args.quark[0], args.quark[1]) except FileNotFoundError as e: print(f"错误: {e}") except ValueError as e: print(f"处理夸克网盘文件时出错: {e}") if __name__ == "__main__": main()