xiaoya_compare_files.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. # 处理115文件
  2. # python compare_files.py - -pan115 "115share_list.txt" "115share_list [tg].txt"
  3. # 处理夸克文件
  4. # python compare_files.py - -quark "quarkshare_list.txt" "quarkshare_list [tg].txt"
  5. # 同时处理两种文件
  6. # python compare_files.py - -pan115 "115share_list.txt" "115share_list [tg].txt" - -quark "quarkshare_list.txt" "quarkshare_list [tg].txt"
  7. import sys
  8. import argparse
  9. def read_file(filename):
  10. with open(filename, 'rb') as f:
  11. return f.read().decode('utf-8').splitlines()
  12. def get_share_type(filename):
  13. """判断分享文件类型"""
  14. filename_lower = filename.lower()
  15. # 115网盘相关的文件名模式
  16. if any(pattern in filename_lower for pattern in ['115share', '115code', '115pan']):
  17. return "115"
  18. # 夸克网盘相关的文件名模式
  19. elif any(pattern in filename_lower for pattern in ['quarkshare', 'quarkcode', 'quarkpan']):
  20. return "quark"
  21. else:
  22. raise ValueError(f"无法判断文件类型: {filename}\n文件名需包含 '115share/115code/115pan' 或 'quarkshare/quarkcode/quarkpan'")
  23. def verify_file_types(file1, file2):
  24. """验证两个文件是否属于同一类型"""
  25. type1 = get_share_type(file1)
  26. type2 = get_share_type(file2)
  27. if type1 != type2:
  28. raise ValueError(f"文件类型不匹配:\n{file1} 是 {type1} 类型\n{file2} 是 {type2} 类型")
  29. return type1
  30. def extract_share_id(line, share_type):
  31. """根据不同类型提取分享ID"""
  32. parts = line.strip().split()
  33. if len(parts) >= 3:
  34. if share_type == "115":
  35. return parts[1] # 115格式:第二部分是分享ID
  36. elif share_type == "quark":
  37. return f"{parts[1]}_{parts[2]}" # 夸克格式:第二和第三部分组合
  38. return None
  39. def get_category(line):
  40. """获取分类路径(第一个部分)"""
  41. parts = line.strip().split()
  42. return parts[0] if parts else ""
  43. def merge_files(file1, file2):
  44. """根据文件名判断类型并合并文件"""
  45. # 判断并验证文件类型
  46. share_type = verify_file_types(file1, file2)
  47. output_prefix = f"merged_{share_type}"
  48. output_file = f"{output_prefix}share_list.txt"
  49. removed_file = f"removed_{share_type}share_items.txt"
  50. # 读取并合并所有有效行
  51. lines1 = [line for line in read_file(file1) if line.strip()]
  52. lines2 = [line for line in read_file(file2) if line.strip()]
  53. # 使用字典来存储,键为"分类+分享ID",值为完整行
  54. share_dict = {}
  55. removed_lines = []
  56. # 处理第一个文件
  57. for line in lines1:
  58. share_id = extract_share_id(line, share_type)
  59. if share_id:
  60. category = get_category(line)
  61. key = f"{category}_{share_id}"
  62. share_dict[key] = line
  63. # 处理第二个文件,记录重复项
  64. for line in lines2:
  65. share_id = extract_share_id(line, share_type)
  66. if share_id:
  67. category = get_category(line)
  68. key = f"{category}_{share_id}"
  69. if key in share_dict:
  70. removed_lines.append(line)
  71. else:
  72. share_dict[key] = line
  73. # 写入去重后的文件
  74. with open(output_file, 'w', encoding='utf-8') as f:
  75. for line in share_dict.values():
  76. f.write(line + '\n')
  77. # 写入删除项清单
  78. with open(removed_file, 'w', encoding='utf-8') as f:
  79. f.writelines(line + '\n' for line in removed_lines)
  80. print(f"处理完成!")
  81. print(f"合并后的文件:{output_file}")
  82. print(f"重复项清单:{removed_file}")
  83. def main():
  84. """主函数,处理不同类型的文件"""
  85. parser = argparse.ArgumentParser(description='合并并去重分享链接文件')
  86. parser.add_argument('--pan115', nargs=2, metavar=('FILE1', 'FILE2'),
  87. help='115网盘的两个文件')
  88. parser.add_argument('--quark', nargs=2, metavar=('FILE1', 'FILE2'),
  89. help='夸克网盘的两个文件')
  90. args = parser.parse_args()
  91. # 处理115网盘文件
  92. if args.pan115:
  93. try:
  94. merge_files(args.pan115[0], args.pan115[1])
  95. except FileNotFoundError:
  96. print(f"未找到115网盘分享文件: {args.pan115}")
  97. except ValueError as e:
  98. print(f"处理115网盘文件时出错: {e}")
  99. # 处理夸克网盘文件
  100. if args.quark:
  101. try:
  102. merge_files(args.quark[0], args.quark[1])
  103. except FileNotFoundError:
  104. print(f"未找到夸克网盘分享文件: {args.quark}")
  105. except ValueError as e:
  106. print(f"处理夸克网盘文件时出错: {e}")
  107. if __name__ == "__main__":
  108. main()