ai_member_xiaoxi/scripts/batch_phone_to_id.py

#!/usr/bin/env python3
"""
批量手机号→用户ID匹配（XXTEA加密匹配 tel_encrypt）
输入: /tmp/sheet_id_data.json (飞书sheet原始数据)
输出: /tmp/sheet_id_results.json (回填数据)
"""
import json
import re
import os
import sys
import psycopg2
from datetime import datetime

SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, SCRIPTS_DIR)
from phone_encrypt import encrypt_phone

DB_HOST = "bj-postgres-16pob4sg.sql.tencentcdb.com"
DB_PORT = 28591
DB_USER = "ai_member"
DB_NAME = "vala_bi"

def get_password():
    pw = os.environ.get("PG_ONLINE_PASSWORD", "")
    if pw:
        return pw
    secrets_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "secrets.env")
    if os.path.exists(secrets_path):
        with open(secrets_path) as f:
            for line in f:
                if line.startswith("PG_ONLINE_PASSWORD="):
                    return line.strip().split("=", 1)[1].strip("'\"")
    raise RuntimeError("PG_ONLINE_PASSWORD not found")

def match_phones_to_accounts(phones, conn):
    """手机号 XXTEA 加密后匹配 bi_vala_app_account.tel_encrypt"""
    if not phones:
        return {}, []

    # 加密手机号
    encrypt_to_phones = {}
    for p in phones:
        p = str(p).strip()
        enc = encrypt_phone(p)
        encrypt_to_phones.setdefault(enc, []).append(p)

    enc_list = list(encrypt_to_phones.keys())
    phone_to_account = {}

    # 分批查询 (每批最多500个)
    for i in range(0, len(enc_list), 500):
        batch = enc_list[i:i+500]
        placeholders = ",".join(["%s"] * len(batch))
        cur = conn.cursor()
        cur.execute(f"""
            SELECT id AS account_id, tel_encrypt
            FROM bi_vala_app_account
            WHERE tel_encrypt IN ({placeholders})
              AND status = 1
              AND deleted_at IS NULL
        """, batch)
        rows = cur.fetchall()
        cur.close()

        # tel_encrypt -> account_id
        enc_to_account = {r[1]: r[0] for r in rows}

        for p_list in encrypt_to_phones.values():
            for p in p_list:
                enc = encrypt_phone(p)
                if enc in enc_to_account:
                    phone_to_account[p] = enc_to_account[enc]

    return phone_to_account

def main():
    # 读取数据
    with open('/tmp/sheet_id_data.json') as f:
        data = json.load(f)
    rows = data['data']['valueRange']['values']
    headers = rows[0]

    # 找出待查询ID的行
    pending_rows = []
    for idx, row in enumerate(rows[1:], start=1):  # row 0 is header, idx 1-based
        if len(row) > 4 and row[4] == '待查询ID':
            phone = row[1].strip() if len(row) > 1 and row[1] else ''
            pending_rows.append({
                'row_idx': idx + 1,  # 1-based in sheet
                'phone': phone,
                'sales': row[0] if len(row) > 0 else '',
                'month': row[2] if len(row) > 2 else '',
                'query_key': row[3] if len(row) > 3 else '',
                'notes': row[8] if len(row) > 8 else '',
            })

    print(f"待查询ID记录: {len(pending_rows)}")

    # 提取唯一手机号
    phones = list(set(r['phone'] for r in pending_rows if r['phone']))
    print(f"唯一手机号: {len(phones)}")

    # 匹配
    conn = psycopg2.connect(host=DB_HOST, port=DB_PORT, user=DB_USER, password=get_password(), dbname=DB_NAME, connect_timeout=30)
    phone_to_account = match_phones_to_accounts(phones, conn)
    conn.close()

    matched = sum(1 for p in phones if p in phone_to_account)
    unmatched = len(phones) - matched
    print(f"匹配成功: {matched}, 未匹配: {unmatched}")

    # 生成结果
    now_str = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    results = []
    stats = {'matched': 0, 'unmatched': 0}

    for r in pending_rows:
        phone = r['phone']
        if phone in phone_to_account:
            uid = str(phone_to_account[phone])
            results.append({
                'row_idx': r['row_idx'],
                'user_id': uid,
                'status': '已回填',
                'update_time': now_str,
            })
            stats['matched'] += 1
        else:
            results.append({
                'row_idx': r['row_idx'],
                'user_id': '',
                'status': '未查到',
                'update_time': now_str,
            })
            stats['unmatched'] += 1

    # 保存结果
    with open('/tmp/sheet_id_results.json', 'w') as f:
        json.dump({'results': results, 'stats': stats, 'total': len(results)}, f, ensure_ascii=False, indent=2)

    print(f"\n结果统计: 匹配 {stats['matched']}, 未查到 {stats['unmatched']}")
    print(f"结果已保存到 /tmp/sheet_id_results.json")

if __name__ == "__main__":
    main()