| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152 |
- # 处理115文件
- # python compare_files.py --pan115 "115share_list.txt" "115share_list [tg].txt"
- # 处理夸克文件
- # python compare_files.py --quark "quarkshare_list.txt" "quarkshare_list [tg].txt"
- # 同时处理两种文件
- # python compare_files.py --pan115 "115share_list.txt" "115share_list [tg].txt" --quark "quarkshare_list.txt" "quarkshare_list [tg].txt"
- import sys
- import os
- import argparse
- def read_file(filename):
- try:
- with open(filename, 'rb') as f:
- return f.read().decode('utf-8').splitlines()
- except UnicodeDecodeError:
- print(f"文件 {filename} 编码错误,请确保是UTF-8编码")
- sys.exit(1)
- except Exception as e:
- print(f"读取文件 {filename} 时出错: {e}")
- sys.exit(1)
- def get_share_type(filename):
- """判断分享文件类型"""
- filename_lower = filename.lower()
- # 115网盘相关的文件名模式
- if any(pattern in filename_lower for pattern in ['115share', '115code', '115pan']):
- return "115"
- # 夸克网盘相关的文件名模式
- elif any(pattern in filename_lower for pattern in ['quarkshare', 'quarkcode', 'quarkpan']):
- return "quark"
- else:
- raise ValueError(f"无法判断文件类型: {filename}\n文件名需包含 '115share/115code/115pan' 或 'quarkshare/quarkcode/quarkpan'")
- def verify_file_types(file1, file2):
- """验证两个文件是否属于同一类型"""
- type1 = get_share_type(file1)
- type2 = get_share_type(file2)
- if type1 != type2:
- raise ValueError(f"文件类型不匹配:\n{file1} 是 {type1} 类型\n{file2} 是 {type2} 类型")
- return type1
- def extract_share_id(line, share_type):
- """根据不同类型提取分享ID"""
- parts = line.strip().split()
- if len(parts) >= 3:
- if share_type == "115":
- return parts[1].strip() # 115格式:第二部分是分享ID
- elif share_type == "quark":
- return f"{parts[1].strip()}_{parts[2].strip()}" # 夸克格式:第二和第三部分组合
- return None
- def get_category(line):
- """获取分类路径(第一个部分)"""
- parts = line.strip().split()
- return parts[0].strip() if parts else ""
- def merge_files(file1, file2):
- """根据文件名判断类型并合并文件"""
- # 检查文件是否存在
- for file in [file1, file2]:
- if not os.path.exists(file):
- raise FileNotFoundError(f"文件不存在: {file}")
-
- # 判断并验证文件类型
- share_type = verify_file_types(file1, file2)
- output_prefix = f"merged_{share_type}"
-
- output_file = f"{output_prefix}share_list.txt"
- removed_file = f"removed_{share_type}share_items.txt"
-
- # 检查输出文件是否已存在
- for file in [output_file, removed_file]:
- if os.path.exists(file):
- print(f"警告:文件 {file} 已存在,将被覆盖")
-
- # 读取并合并所有有效行
- lines1 = [line for line in read_file(file1) if line.strip()]
- lines2 = [line for line in read_file(file2) if line.strip()]
-
- # 使用字典来存储,键为"分类+分享ID",值为完整行
- share_dict = {}
- removed_lines = []
-
- # 处理第一个文件
- for line in lines1:
- share_id = extract_share_id(line, share_type)
- if share_id:
- category = get_category(line)
- key = f"{category}_{share_id}"
- share_dict[key] = line
- # 处理第二个文件,记录重复项
- for line in lines2:
- share_id = extract_share_id(line, share_type)
- if share_id:
- category = get_category(line)
- key = f"{category}_{share_id}"
- if key in share_dict:
- removed_lines.append(line)
- else:
- share_dict[key] = line
- # 写入去重后的文件
- with open(output_file, 'w', encoding='utf-8', newline='\n') as f:
- for line in share_dict.values():
- f.write(line + '\n')
-
- # 写入删除项清单
- with open(removed_file, 'w', encoding='utf-8', newline='\n') as f:
- for line in removed_lines:
- f.write(line + '\n')
-
- print(f"处理完成!")
- print(f"合并后的文件:{output_file}")
- print(f"重复项清单:{removed_file}")
- def main():
- """主函数,处理不同类型的文件"""
- parser = argparse.ArgumentParser(description='合并并去重分享链接文件')
- parser.add_argument('--pan115', nargs=2, metavar=('FILE1', 'FILE2'),
- help='115网盘的两个文件')
- parser.add_argument('--quark', nargs=2, metavar=('FILE1', 'FILE2'),
- help='夸克网盘的两个文件')
-
- args = parser.parse_args()
-
- if not args.pan115 and not args.quark:
- parser.print_help()
- sys.exit(1)
-
- # 处理115网盘文件
- if args.pan115:
- try:
- merge_files(args.pan115[0], args.pan115[1])
- except FileNotFoundError as e:
- print(f"错误: {e}")
- except ValueError as e:
- print(f"处理115网盘文件时出错: {e}")
-
- # 处理夸克网盘文件
- if args.quark:
- try:
- merge_files(args.quark[0], args.quark[1])
- except FileNotFoundError as e:
- print(f"错误: {e}")
- except ValueError as e:
- print(f"处理夸克网盘文件时出错: {e}")
- if __name__ == "__main__":
- main()
|