commit 055d0530ab23645e472f85a67e337bb170fcfbc8 Author: zzh <838331105@qq.com> Date: Mon Dec 8 11:25:24 2025 +0800 初始提交 diff --git a/batch_import.py b/batch_import.py new file mode 100644 index 0000000..8ca3d9f --- /dev/null +++ b/batch_import.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python3 +import pandas as pd +import redis +from tqdm import tqdm +import argparse +import os + +# 连接Redis +parser = argparse.ArgumentParser() +parser.add_argument("type", choices=["pdd", "yt", "tx"], help="目标: pdd/yt/tx") +args = parser.parse_args() + +r = redis.Redis(host='180.163.74.83', port=6379, password='Zzh08165511', decode_responses=True) + +# 读取Excel文件 +if args.type == "yt": + excel_path = '/home/hyx/work/batch_import_xlsx/sn_test_yt.xlsx' + pool = 'batch_sn_mapping_yt' + mac_col = 'MAC' +elif args.type == "pdd": + excel_path = '/home/hyx/work/batch_import_xlsx/sn_test_pdd.xlsx' + pool = 'batch_sn_mapping_pdd' + mac_col = 'MAC' +else: + excel_path = '/home/hyx/work/batch_import_xlsx/sn_test_tx.xlsx' + pool = 'batch_sn_mapping' + mac_col = 'SN_MAC' + +df = pd.read_excel(excel_path) +existing = r.hgetall(pool) +mac_to_batches = {} +for b, m in existing.items(): + mac_to_batches.setdefault(m, []).append(b) +s = df[mac_col].astype(str).str.strip() +dup_keys = set(s[s.duplicated(keep=False)].unique()) + +# 批量导入数据 +pipe = r.pipeline() +duplicates = [] +inserted_count = 0 +invalids = [] +duplicates_current = {} +dup_current_count = 0 +for index, row in tqdm(df.iterrows(), total=len(df)): + batch_no = str(row['批次号']).strip() + sn_mac = str(row[mac_col]).strip() + expected_len = 27 if args.type == 'tx' else 12 + + if len(sn_mac) != expected_len: + invalids.append((sn_mac, batch_no)) + continue + + if sn_mac in dup_keys: + s = duplicates_current.get(sn_mac, set()) + s.add(batch_no) + duplicates_current[sn_mac] = s + dup_current_count += 1 + continue + + if sn_mac in mac_to_batches: + for b in mac_to_batches[sn_mac]: + duplicates.append((sn_mac, b)) + continue + + pipe.hset(pool, batch_no, sn_mac) + inserted_count += 1 + + if (index + 1) % 100 == 0: + pipe.execute() + pipe = r.pipeline() + +pipe.execute() +print(f"成功导入 {inserted_count} 条数据,数据库重复跳过 {len(duplicates)} 条,当前批次重复跳过 {dup_current_count} 条,长度错误跳过 {len(invalids)} 条") +if duplicates: + for mac, b in duplicates: + print(f"重复: {mac} 已存在于批次号 {b}") + dup_df = pd.DataFrame(duplicates, columns=[mac_col, '批次号']) + out_path = f"/home/hyx/work/batch_import_xlsx/duplicates_{args.type}.xlsx" + if os.path.exists(out_path): + old_df = pd.read_excel(out_path) + combined = pd.concat([old_df, dup_df], ignore_index=True) + combined.to_excel(out_path, index=False) + else: + dup_df.to_excel(out_path, index=False) + print(f"重复数据已导出: {out_path}") +if duplicates_current: + for mac, bs in duplicates_current.items(): + for b in bs: + print(f"重复: {mac} 当前批次号 {b}") + cur_rows = [(mac, b) for mac, bs in duplicates_current.items() for b in bs] + cur_dup_df = pd.DataFrame(cur_rows, columns=[mac_col, '批次号']) + out_path_cur = f"/home/hyx/work/batch_import_xlsx/duplicates_current_{args.type}.xlsx" + if os.path.exists(out_path_cur): + old_cur_df = pd.read_excel(out_path_cur) + combined_cur = pd.concat([old_cur_df, cur_dup_df], ignore_index=True) + combined_cur.to_excel(out_path_cur, index=False) + else: + cur_dup_df.to_excel(out_path_cur, index=False) + print(f"当前批次重复数据已导出: {out_path_cur}") +if invalids: + for mac, b in invalids: + print(f"长度错误: {mac} 批次号 {b}") + inv_df = pd.DataFrame(invalids, columns=[mac_col, '批次号']) + out_path_inv = f"/home/hyx/work/batch_import_xlsx/invalid_{args.type}.xlsx" + if os.path.exists(out_path_inv): + old_inv_df = pd.read_excel(out_path_inv) + combined_inv = pd.concat([old_inv_df, inv_df], ignore_index=True) + combined_inv.to_excel(out_path_inv, index=False) + else: + inv_df.to_excel(out_path_inv, index=False) + print(f"长度错误数据已导出: {out_path_inv}") diff --git a/check_excel.py b/check_excel.py new file mode 100644 index 0000000..6b110a4 --- /dev/null +++ b/check_excel.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +import pandas as pd +import openpyxl +import warnings + +# 过滤openpyxl的跨平台兼容性警告 +warnings.filterwarnings('ignore', category=UserWarning, module='openpyxl') + +file_path = '/home/hyx/work/batch_import_xlsx/sn_test_tx.xlsx' + +print("检查Excel文件信息...") + +try: + # 使用openpyxl检查工作表(兼容Windows到Mac的Excel文件) + wb = openpyxl.load_workbook(file_path, data_only=True) + print(f"工作表数量: {len(wb.sheetnames)}") + print(f"工作表名称: {wb.sheetnames}") + + if wb.sheetnames: + ws = wb.active + print(f"活动工作表: {ws.title}") + print(f"最大行数: {ws.max_row}") + print(f"最大列数: {ws.max_column}") + + # 显示前几行数据 + print("\n前10行数据:") + for i, row in enumerate(ws.iter_rows(values_only=True), 1): + if i <= 10: + print(f"第{i}行: {row}") + else: + break + + wb.close() # 关闭工作簿释放资源 + +except Exception as e: + print(f"openpyxl错误: {e}") + print("提示: 这可能是Windows到Mac的Excel文件兼容性问题") + +try: + # 使用pandas检查 + print("\n使用pandas检查...") + xl_file = pd.ExcelFile(file_path) + print(f"pandas检测到的工作表: {xl_file.sheet_names}") + +except Exception as e: + print(f"pandas错误: {e}") diff --git a/duplicates_current_pdd.xlsx b/duplicates_current_pdd.xlsx new file mode 100644 index 0000000..d2f9645 Binary files /dev/null and b/duplicates_current_pdd.xlsx differ diff --git a/duplicates_pdd.xlsx b/duplicates_pdd.xlsx new file mode 100644 index 0000000..f70e817 Binary files /dev/null and b/duplicates_pdd.xlsx differ diff --git a/duplicates_tx.xlsx b/duplicates_tx.xlsx new file mode 100644 index 0000000..71ad14f Binary files /dev/null and b/duplicates_tx.xlsx differ diff --git a/duplicates_yt.xlsx b/duplicates_yt.xlsx new file mode 100644 index 0000000..14498f9 Binary files /dev/null and b/duplicates_yt.xlsx differ diff --git a/invalid_pdd.xlsx b/invalid_pdd.xlsx new file mode 100644 index 0000000..3540a47 Binary files /dev/null and b/invalid_pdd.xlsx differ diff --git a/sn_test_pdd.xls b/sn_test_pdd.xls new file mode 100644 index 0000000..a32d178 Binary files /dev/null and b/sn_test_pdd.xls differ diff --git a/sn_test_tx.xlsx b/sn_test_tx.xlsx new file mode 100644 index 0000000..8bdfda3 Binary files /dev/null and b/sn_test_tx.xlsx differ diff --git a/sn_test_yt.xls b/sn_test_yt.xls new file mode 100644 index 0000000..6440c9c Binary files /dev/null and b/sn_test_yt.xls differ