#!/usr/bin/env python3 """ 批量手机号→用户ID匹配(XXTEA加密匹配 tel_encrypt) 输入: /tmp/sheet_id_data.json (飞书sheet原始数据) 输出: /tmp/sheet_id_results.json (回填数据) """ import json import re import os import sys import psycopg2 from datetime import datetime SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, SCRIPTS_DIR) from phone_encrypt import encrypt_phone DB_HOST = "bj-postgres-16pob4sg.sql.tencentcdb.com" DB_PORT = 28591 DB_USER = "ai_member" DB_NAME = "vala_bi" def get_password(): pw = os.environ.get("PG_ONLINE_PASSWORD", "") if pw: return pw secrets_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "secrets.env") if os.path.exists(secrets_path): with open(secrets_path) as f: for line in f: if line.startswith("PG_ONLINE_PASSWORD="): return line.strip().split("=", 1)[1].strip("'\"") raise RuntimeError("PG_ONLINE_PASSWORD not found") def match_phones_to_accounts(phones, conn): """手机号 XXTEA 加密后匹配 bi_vala_app_account.tel_encrypt""" if not phones: return {}, [] # 加密手机号 encrypt_to_phones = {} for p in phones: p = str(p).strip() enc = encrypt_phone(p) encrypt_to_phones.setdefault(enc, []).append(p) enc_list = list(encrypt_to_phones.keys()) phone_to_account = {} # 分批查询 (每批最多500个) for i in range(0, len(enc_list), 500): batch = enc_list[i:i+500] placeholders = ",".join(["%s"] * len(batch)) cur = conn.cursor() cur.execute(f""" SELECT id AS account_id, tel_encrypt FROM bi_vala_app_account WHERE tel_encrypt IN ({placeholders}) AND status = 1 AND deleted_at IS NULL """, batch) rows = cur.fetchall() cur.close() # tel_encrypt -> account_id enc_to_account = {r[1]: r[0] for r in rows} for p_list in encrypt_to_phones.values(): for p in p_list: enc = encrypt_phone(p) if enc in enc_to_account: phone_to_account[p] = enc_to_account[enc] return phone_to_account def main(): # 读取数据 with open('/tmp/sheet_id_data.json') as f: data = json.load(f) rows = data['data']['valueRange']['values'] headers = rows[0] # 找出待查询ID的行 pending_rows = [] for idx, row in enumerate(rows[1:], start=1): # row 0 is header, idx 1-based if len(row) > 4 and row[4] == '待查询ID': phone = row[1].strip() if len(row) > 1 and row[1] else '' pending_rows.append({ 'row_idx': idx + 1, # 1-based in sheet 'phone': phone, 'sales': row[0] if len(row) > 0 else '', 'month': row[2] if len(row) > 2 else '', 'query_key': row[3] if len(row) > 3 else '', 'notes': row[8] if len(row) > 8 else '', }) print(f"待查询ID记录: {len(pending_rows)}") # 提取唯一手机号 phones = list(set(r['phone'] for r in pending_rows if r['phone'])) print(f"唯一手机号: {len(phones)}") # 匹配 conn = psycopg2.connect(host=DB_HOST, port=DB_PORT, user=DB_USER, password=get_password(), dbname=DB_NAME, connect_timeout=30) phone_to_account = match_phones_to_accounts(phones, conn) conn.close() matched = sum(1 for p in phones if p in phone_to_account) unmatched = len(phones) - matched print(f"匹配成功: {matched}, 未匹配: {unmatched}") # 生成结果 now_str = datetime.now().strftime('%Y-%m-%d %H:%M:%S') results = [] stats = {'matched': 0, 'unmatched': 0} for r in pending_rows: phone = r['phone'] if phone in phone_to_account: uid = str(phone_to_account[phone]) results.append({ 'row_idx': r['row_idx'], 'user_id': uid, 'status': '已回填', 'update_time': now_str, }) stats['matched'] += 1 else: results.append({ 'row_idx': r['row_idx'], 'user_id': '', 'status': '未查到', 'update_time': now_str, }) stats['unmatched'] += 1 # 保存结果 with open('/tmp/sheet_id_results.json', 'w') as f: json.dump({'results': results, 'stats': stats, 'total': len(results)}, f, ensure_ascii=False, indent=2) print(f"\n结果统计: 匹配 {stats['matched']}, 未查到 {stats['unmatched']}") print(f"结果已保存到 /tmp/sheet_id_results.json") if __name__ == "__main__": main()