xiaoya_compare_files.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. # 处理115文件
  2. # python compare_files.py --pan115 "115share_list.txt" "115share_list [tg].txt"
  3. # 处理夸克文件
  4. # python compare_files.py --quark "quarkshare_list.txt" "quarkshare_list [tg].txt"
  5. # 同时处理两种文件
  6. # python compare_files.py --pan115 "115share_list.txt" "115share_list [tg].txt" --quark "quarkshare_list.txt" "quarkshare_list [tg].txt"
  7. import sys
  8. import os
  9. import argparse
  10. def read_file(filename):
  11. try:
  12. with open(filename, 'rb') as f:
  13. return f.read().decode('utf-8').splitlines()
  14. except UnicodeDecodeError:
  15. print(f"文件 {filename} 编码错误,请确保是UTF-8编码")
  16. sys.exit(1)
  17. except Exception as e:
  18. print(f"读取文件 {filename} 时出错: {e}")
  19. sys.exit(1)
  20. def get_share_type(filename):
  21. """判断分享文件类型"""
  22. filename_lower = filename.lower()
  23. # 115网盘相关的文件名模式
  24. if any(pattern in filename_lower for pattern in ['115share', '115code', '115pan']):
  25. return "115"
  26. # 夸克网盘相关的文件名模式
  27. elif any(pattern in filename_lower for pattern in ['quarkshare', 'quarkcode', 'quarkpan']):
  28. return "quark"
  29. else:
  30. raise ValueError(f"无法判断文件类型: {filename}\n文件名需包含 '115share/115code/115pan' 或 'quarkshare/quarkcode/quarkpan'")
  31. def verify_file_types(file1, file2):
  32. """验证两个文件是否属于同一类型"""
  33. type1 = get_share_type(file1)
  34. type2 = get_share_type(file2)
  35. if type1 != type2:
  36. raise ValueError(f"文件类型不匹配:\n{file1} 是 {type1} 类型\n{file2} 是 {type2} 类型")
  37. return type1
  38. def extract_share_id(line, share_type):
  39. """根据不同类型提取分享ID"""
  40. parts = line.strip().split()
  41. if len(parts) >= 3:
  42. if share_type == "115":
  43. return parts[1].strip() # 115格式:第二部分是分享ID
  44. elif share_type == "quark":
  45. return f"{parts[1].strip()}_{parts[2].strip()}" # 夸克格式:第二和第三部分组合
  46. return None
  47. def get_category(line):
  48. """获取分类路径(第一个部分)"""
  49. parts = line.strip().split()
  50. return parts[0].strip() if parts else ""
  51. def merge_files(file1, file2):
  52. """根据文件名判断类型并合并文件"""
  53. # 检查文件是否存在
  54. for file in [file1, file2]:
  55. if not os.path.exists(file):
  56. raise FileNotFoundError(f"文件不存在: {file}")
  57. # 判断并验证文件类型
  58. share_type = verify_file_types(file1, file2)
  59. output_prefix = f"merged_{share_type}"
  60. output_file = f"{output_prefix}share_list.txt"
  61. removed_file = f"removed_{share_type}share_items.txt"
  62. # 检查输出文件是否已存在
  63. for file in [output_file, removed_file]:
  64. if os.path.exists(file):
  65. print(f"警告:文件 {file} 已存在,将被覆盖")
  66. # 读取并合并所有有效行
  67. lines1 = [line for line in read_file(file1) if line.strip()]
  68. lines2 = [line for line in read_file(file2) if line.strip()]
  69. # 使用字典来存储,键为"分类+分享ID",值为完整行
  70. share_dict = {}
  71. removed_lines = []
  72. # 处理第一个文件
  73. for line in lines1:
  74. share_id = extract_share_id(line, share_type)
  75. if share_id:
  76. category = get_category(line)
  77. key = f"{category}_{share_id}"
  78. share_dict[key] = line
  79. # 处理第二个文件,记录重复项
  80. for line in lines2:
  81. share_id = extract_share_id(line, share_type)
  82. if share_id:
  83. category = get_category(line)
  84. key = f"{category}_{share_id}"
  85. if key in share_dict:
  86. removed_lines.append(line)
  87. else:
  88. share_dict[key] = line
  89. # 写入去重后的文件
  90. with open(output_file, 'w', encoding='utf-8', newline='\n') as f:
  91. for line in share_dict.values():
  92. f.write(line + '\n')
  93. # 写入删除项清单
  94. with open(removed_file, 'w', encoding='utf-8', newline='\n') as f:
  95. for line in removed_lines:
  96. f.write(line + '\n')
  97. print(f"处理完成!")
  98. print(f"合并后的文件:{output_file}")
  99. print(f"重复项清单:{removed_file}")
  100. def main():
  101. """主函数,处理不同类型的文件"""
  102. parser = argparse.ArgumentParser(description='合并并去重分享链接文件')
  103. parser.add_argument('--pan115', nargs=2, metavar=('FILE1', 'FILE2'),
  104. help='115网盘的两个文件')
  105. parser.add_argument('--quark', nargs=2, metavar=('FILE1', 'FILE2'),
  106. help='夸克网盘的两个文件')
  107. args = parser.parse_args()
  108. if not args.pan115 and not args.quark:
  109. parser.print_help()
  110. sys.exit(1)
  111. # 处理115网盘文件
  112. if args.pan115:
  113. try:
  114. merge_files(args.pan115[0], args.pan115[1])
  115. except FileNotFoundError as e:
  116. print(f"错误: {e}")
  117. except ValueError as e:
  118. print(f"处理115网盘文件时出错: {e}")
  119. # 处理夸克网盘文件
  120. if args.quark:
  121. try:
  122. merge_files(args.quark[0], args.quark[1])
  123. except FileNotFoundError as e:
  124. print(f"错误: {e}")
  125. except ValueError as e:
  126. print(f"处理夸克网盘文件时出错: {e}")
  127. if __name__ == "__main__":
  128. main()