ai_member_xiaoxi/scripts/process_xiaolong_dij.py

#!/usr/bin/env python3
"""
Process 小龙 sheet: fill D (trial counts), I (reg dates), J (channels).
H column already written. Now fill D/I/J for matched rows.
"""
import sys
sys.path.insert(0, '/root/.openclaw/workspace/scripts')
from phone_encrypt import encrypt_phone
import psycopg2
import requests
import time

PG_CONFIG = {
    'host': 'bj-postgres-16pob4sg.sql.tencentcdb.com',
    'port': 28591,
    'user': 'ai_member',
    'password': 'LdfjdjL83h3h3^$&**YGG*',
    'database': 'vala_bi',
}

SPREADSHEET_TOKEN = 'DU4dsUOJThfbPStMcgBcsMH5nyb'
SHEET_ID = 'dff8c7'

def get_token():
    resp = requests.post('https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal',
        json={'app_id': 'cli_a929ae22e0b8dcc8', 'app_secret': 'OtFjMy7p3qE3VvLbMdcWidwgHOnGD4FJ'})
    return resp.json()['tenant_access_token']

def read_sheet(token):
    url = f'https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{SHEET_ID}!A3:J1142?valueRenderOption=ToString'
    resp = requests.get(url, headers={'Authorization': f'Bearer {token}'})
    return resp.json().get('data', {}).get('valueRange', {}).get('values', [])

def write_batch(token, range_str, values, max_retries=3):
    url = f'https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values'
    body = {"valueRange": {"range": f"{SHEET_ID}!{range_str}", "values": values}}
    for attempt in range(max_retries):
        resp = requests.put(url, headers={
            'Authorization': f'Bearer {token}',
            'Content-Type': 'application/json'
        }, json=body)
        result = resp.json()
        code = result.get('code', -1)
        if code == 0:
            return True
        elif code == 90217:  # too many request
            wait = 1.0 * (attempt + 1)
            print(f"  Rate limited on {range_str}, waiting {wait}s...")
            time.sleep(wait)
        else:
            print(f"  ERROR {range_str}: {result}")
            return False
    print(f"  FAILED after {max_retries} retries: {range_str}")
    return False

def batch_writes(token, col, writes):
    """Write sorted writes in consecutive batches."""
    if not writes:
        return 0
    writes.sort(key=lambda x: x[0])
    batches = []
    batch_start = writes[0][0]
    batch_vals = [[writes[0][1]]]
    for j in range(1, len(writes)):
        if writes[j][0] == writes[j-1][0] + 1:
            batch_vals.append([writes[j][1]])
        else:
            batches.append((batch_start, batch_vals))
            batch_start = writes[j][0]
            batch_vals = [[writes[j][1]]]
    batches.append((batch_start, batch_vals))

    written = 0
    for start_idx, vals in batches:
        start_row = start_idx + 3
        end_row = start_row + len(vals) - 1
        range_str = f'{col}{start_row}:{col}{end_row}'
        if write_batch(token, range_str, vals):
            written += len(vals)
            print(f"  {range_str} ({len(vals)} cells)")
        time.sleep(0.08)
    return written

def main():
    token = get_token()
    print("Reading sheet...")
    rows = read_sheet(token)
    print(f"  {len(rows)} rows")

    # Extract phones
    print("\nEncrypting phones...")
    phone_map = {}  # enc -> (row_idx, phone)
    for i, row in enumerate(rows):
        if len(row) > 4 and row[4]:
            phone = row[4].strip()
            if len(phone) == 11 and phone.isdigit():
                enc = encrypt_phone(phone)
                phone_map[enc] = (i, phone)
    print(f"  {len(phone_map)} phones")

    # Query DB
    print("\nQuerying PostgreSQL...")
    conn = psycopg2.connect(**PG_CONFIG)
    cur = conn.cursor()

    enc_list = list(phone_map.keys())
    phone_to_uid = {}
    phone_to_created = {}
    phone_to_channel = {}

    for chunk_start in range(0, len(enc_list), 500):
        chunk = enc_list[chunk_start:chunk_start+500]
        placeholders = ','.join(['%s'] * len(chunk))
        cur.execute(f"""
            SELECT id, tel_encrypt, created_at, download_channel
            FROM bi_vala_app_account
            WHERE tel_encrypt IN ({placeholders})
            AND status = 1 AND deleted_at IS NULL
        """, chunk)
        for row in cur.fetchall():
            uid, enc, created, channel = row
            phone_to_uid[enc] = str(uid)
            phone_to_created[enc] = created
            phone_to_channel[enc] = channel or ''

    print(f"  Matched {len(phone_to_uid)} UIDs")

    # Trial counts
    all_uids = list(set(phone_to_uid.values()))
    uid_to_trial = {}
    for chunk_start in range(0, len(all_uids), 500):
        chunk = all_uids[chunk_start:chunk_start+500]
        placeholders = ','.join(['%s'] * len(chunk))
        cur.execute(f"""
            SELECT account_id, COUNT(*)
            FROM bi_user_course_detail
            WHERE account_id IN ({placeholders})
            AND expire_time IS NULL AND deleted_at IS NULL
            GROUP BY account_id
        """, [int(x) for x in chunk])
        for row in cur.fetchall():
            uid_to_trial[str(row[0])] = row[1]

    print(f"  Trial counts for {len(uid_to_trial)} users")

    # Also get trial counts for rows that already have UIDs in H but empty D
    existing_uids = set()
    for i, row in enumerate(rows):
        if len(row) > 7 and row[7]:
            h_val = row[7].strip()
            if h_val.isdigit():
                existing_uids.add(h_val)

    # Query trial counts for existing UIDs not already covered
    missing_uids = existing_uids - set(uid_to_trial.keys())
    if missing_uids:
        print(f"  Querying trial counts for {len(missing_uids)} existing UIDs...")
        uid_list = list(missing_uids)
        for chunk_start in range(0, len(uid_list), 500):
            chunk = uid_list[chunk_start:chunk_start+500]
            placeholders = ','.join(['%s'] * len(chunk))
            cur.execute(f"""
                SELECT account_id, COUNT(*)
                FROM bi_user_course_detail
                WHERE account_id IN ({placeholders})
                AND expire_time IS NULL AND deleted_at IS NULL
                GROUP BY account_id
            """, [int(x) for x in chunk])
            for row in cur.fetchall():
                uid_to_trial[str(row[0])] = row[1]
        print(f"  Added {len(missing_uids & set(uid_to_trial.keys()))} more")

    cur.close()
    conn.close()

    # Build writes
    row_data = {}
    for enc, (row_idx, phone) in phone_map.items():
        uid = phone_to_uid.get(enc)
        if uid:
            trial = uid_to_trial.get(uid, 0)
            created = phone_to_created.get(enc)
            channel = phone_to_channel.get(enc, '')
            reg_date = created.strftime('%Y-%m-%d') if created else ''
            row_data[row_idx] = {
                'D': str(trial) if trial > 0 else '',
                'I': reg_date,
                'J': channel,
            }

    # Also fill D for rows with existing H UID but empty D
    for i, row in enumerate(rows):
        if i in row_data:
            continue
        if len(row) > 7 and row[7]:
            h_val = row[7].strip()
            if h_val.isdigit():
                d_empty = len(row) <= 3 or not row[3]
                if d_empty:
                    trial = uid_to_trial.get(h_val, 0)
                    if trial > 0:
                        if i not in row_data:
                            row_data[i] = {}
                        row_data[i]['D'] = str(trial)

    # Prepare column writes
    d_writes = [(idx, data['D']) for idx, data in row_data.items() if data.get('D')]
    i_writes = [(idx, data['I']) for idx, data in row_data.items() if data.get('I')]
    j_writes = [(idx, data['J']) for idx, data in row_data.items() if data.get('J')]

    print(f"\nPrepared: D={len(d_writes)}, I={len(i_writes)}, J={len(j_writes)}")

    # Write D
    print("\n=== Writing D column ===")
    d_written = batch_writes(token, 'D', d_writes)

    # Write I
    print("\n=== Writing I column ===")
    i_written = batch_writes(token, 'I', i_writes)

    # Write J
    print("\n=== Writing J column ===")
    j_written = batch_writes(token, 'J', j_writes)

    # Report
    print("\n" + "="*60)
    print("FINAL REPORT")
    print("="*60)
    print(f"Phones matched to UIDs: {len(phone_to_uid)}")
    print(f"D column written: {d_written}")
    print(f"I column written: {i_written}")
    print(f"J column written: {j_written}")

if __name__ == '__main__':
    main()