#!/usr/bin/env python3 """手机号匹配账号ID - 快速统计匹配率(XXTEA加密匹配 tel_encrypt)""" import os, sys, re import openpyxl from openpyxl.styles import Font, Alignment, PatternFill import psycopg2 WORKSPACE_DIR = "/root/.openclaw/workspace" SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, SCRIPTS_DIR) from phone_encrypt import encrypt_phone SECRETS_FILE = os.path.join(WORKSPACE_DIR, "secrets.env") DB_HOST = "bj-postgres-16pob4sg.sql.tencentcdb.com" DB_PORT = "28591" DB_USER = "ai_member" DB_NAME = "vala_bi" INPUT_FILE = sys.argv[1] if len(sys.argv) > 1 else sys.exit("Usage: python3 phone_match_userid.py ") def load_pg_password(): with open(SECRETS_FILE) as f: for line in f: if line.startswith("PG_ONLINE_PASSWORD="): return line.split("=", 1)[1].strip().strip("'\"") def extract_phones(file_path): """提取手机号,保持原始顺序并去重""" phones = [] seen = set() wb = openpyxl.load_workbook(file_path, read_only=True) for ws in wb: for row in ws.iter_rows(values_only=True): for cell in row: if cell is not None: val = str(int(cell)) if isinstance(cell, float) else str(cell).strip() if re.match(r'^1\d{10}$', val) and val not in seen: seen.add(val) phones.append(val) return phones # 1. 提取手机号 print("📱 提取手机号...") phones = extract_phones(INPUT_FILE) print(f" 共提取 {len(phones)} 个不重复手机号") # 2. 加密手机号用于 SQL IN 查询(匹配 tel_encrypt 字段) encrypted_list = [encrypt_phone(p) for p in phones] encrypt_to_phone = {encrypt_phone(p): p for p in phones} # 密文 -> 明文 # 3. 连接数据库查询 print("🔗 连接数据库...") pw = load_pg_password() conn = psycopg2.connect(host=DB_HOST, port=DB_PORT, user=DB_USER, password=pw, dbname=DB_NAME) cur = conn.cursor() # 分批查询(每批最多500个) matched = {} # 明文手机号 -> (account_id, tel_encrypt) batch_size = 500 for i in range(0, len(encrypted_list), batch_size): batch = encrypted_list[i:i+batch_size] placeholders = ','.join(['%s'] * len(batch)) sql = f""" SELECT id, tel_encrypt FROM bi_vala_app_account WHERE tel_encrypt IN ({placeholders}) AND status = 1 AND deleted_at IS NULL """ cur.execute(sql, batch) for account_id, tel_encrypt in cur.fetchall(): if tel_encrypt in encrypt_to_phone: matched[encrypt_to_phone[tel_encrypt]] = (account_id, tel_encrypt) cur.close() conn.close() # 4. 统计结果 print(f"\n📊 匹配结果:") print(f" 输入手机号: {len(phones)} 个") print(f" 匹配成功: {len(matched)} 个 ({len(matched)/len(phones)*100:.1f}%)") print(f" 未匹配: {len(phones) - len(matched)} 个 ({(len(phones)-len(matched))/len(phones)*100:.1f}%)") # 5. 输出 xlsx ts = __import__('datetime').datetime.now().strftime('%Y%m%d_%H%M%S') output_path = os.path.join(WORKSPACE_DIR, "output", f"phone_match_result_{ts}.xlsx") wb = openpyxl.Workbook() ws = wb.active ws.title = "匹配结果" header_font = Font(bold=True, size=11) header_fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid") header_font_white = Font(bold=True, size=11, color="FFFFFF") no_match_fill = PatternFill(start_color="FFF2CC", end_color="FFF2CC", fill_type="solid") # 写表头 headers = ["序号", "手机号", "匹配状态", "账号ID(account_id)", "加密手机号(tel_encrypt)"] for col, h in enumerate(headers, 1): cell = ws.cell(row=1, column=col, value=h) cell.font = header_font cell.alignment = Alignment(horizontal="center") row = 2 matched_count = 0 unmatched_count = 0 # 先写匹配成功的 for phone in phones: if phone in matched: aid, tel_m = matched[phone] ws.cell(row=row, column=1, value=row - 1) ws.cell(row=row, column=2, value=phone) ws.cell(row=row, column=3, value="已匹配") ws.cell(row=row, column=4, value=aid) ws.cell(row=row, column=5, value=tel_encrypt) matched_count += 1 row += 1 else: unmatched_count += 1 # 再写未匹配的 r_start = row for phone in phones: if phone not in matched: ws.cell(row=row, column=1, value=row - 1) ws.cell(row=row, column=2, value=phone) ws.cell(row=row, column=3, value="未匹配") for col in range(1, 6): ws.cell(row=row, column=col).fill = no_match_fill row += 1 # 加统计行 row += 1 ws.cell(row=row, column=2, value="统计汇总").font = Font(bold=True) row += 1 ws.cell(row=row, column=2, value=f"总手机号数: {len(phones)}") row += 1 ws.cell(row=row, column=2, value=f"匹配成功: {matched_count} ({matched_count/len(phones)*100:.1f}%)") row += 1 ws.cell(row=row, column=2, value=f"未匹配: {unmatched_count} ({unmatched_count/len(phones)*100:.1f}%)") # 调整列宽 ws.column_dimensions['A'].width = 8 ws.column_dimensions['B'].width = 16 ws.column_dimensions['C'].width = 12 ws.column_dimensions['D'].width = 22 ws.column_dimensions['E'].width = 18 wb.save(output_path) print(f"\n✅ 结果已保存: {output_path}")