#!/usr/bin/env python3 """ Process 小龙 sheet: fill D (trial counts), I (reg dates), J (channels). H column already written. Now fill D/I/J for matched rows. """ import sys sys.path.insert(0, '/root/.openclaw/workspace/scripts') from phone_encrypt import encrypt_phone import psycopg2 import requests import time PG_CONFIG = { 'host': 'bj-postgres-16pob4sg.sql.tencentcdb.com', 'port': 28591, 'user': 'ai_member', 'password': 'LdfjdjL83h3h3^$&**YGG*', 'database': 'vala_bi', } SPREADSHEET_TOKEN = 'DU4dsUOJThfbPStMcgBcsMH5nyb' SHEET_ID = 'dff8c7' def get_token(): resp = requests.post('https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal', json={'app_id': 'cli_a929ae22e0b8dcc8', 'app_secret': 'OtFjMy7p3qE3VvLbMdcWidwgHOnGD4FJ'}) return resp.json()['tenant_access_token'] def read_sheet(token): url = f'https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{SHEET_ID}!A3:J1142?valueRenderOption=ToString' resp = requests.get(url, headers={'Authorization': f'Bearer {token}'}) return resp.json().get('data', {}).get('valueRange', {}).get('values', []) def write_batch(token, range_str, values, max_retries=3): url = f'https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values' body = {"valueRange": {"range": f"{SHEET_ID}!{range_str}", "values": values}} for attempt in range(max_retries): resp = requests.put(url, headers={ 'Authorization': f'Bearer {token}', 'Content-Type': 'application/json' }, json=body) result = resp.json() code = result.get('code', -1) if code == 0: return True elif code == 90217: # too many request wait = 1.0 * (attempt + 1) print(f" Rate limited on {range_str}, waiting {wait}s...") time.sleep(wait) else: print(f" ERROR {range_str}: {result}") return False print(f" FAILED after {max_retries} retries: {range_str}") return False def batch_writes(token, col, writes): """Write sorted writes in consecutive batches.""" if not writes: return 0 writes.sort(key=lambda x: x[0]) batches = [] batch_start = writes[0][0] batch_vals = [[writes[0][1]]] for j in range(1, len(writes)): if writes[j][0] == writes[j-1][0] + 1: batch_vals.append([writes[j][1]]) else: batches.append((batch_start, batch_vals)) batch_start = writes[j][0] batch_vals = [[writes[j][1]]] batches.append((batch_start, batch_vals)) written = 0 for start_idx, vals in batches: start_row = start_idx + 3 end_row = start_row + len(vals) - 1 range_str = f'{col}{start_row}:{col}{end_row}' if write_batch(token, range_str, vals): written += len(vals) print(f" {range_str} ({len(vals)} cells)") time.sleep(0.08) return written def main(): token = get_token() print("Reading sheet...") rows = read_sheet(token) print(f" {len(rows)} rows") # Extract phones print("\nEncrypting phones...") phone_map = {} # enc -> (row_idx, phone) for i, row in enumerate(rows): if len(row) > 4 and row[4]: phone = row[4].strip() if len(phone) == 11 and phone.isdigit(): enc = encrypt_phone(phone) phone_map[enc] = (i, phone) print(f" {len(phone_map)} phones") # Query DB print("\nQuerying PostgreSQL...") conn = psycopg2.connect(**PG_CONFIG) cur = conn.cursor() enc_list = list(phone_map.keys()) phone_to_uid = {} phone_to_created = {} phone_to_channel = {} for chunk_start in range(0, len(enc_list), 500): chunk = enc_list[chunk_start:chunk_start+500] placeholders = ','.join(['%s'] * len(chunk)) cur.execute(f""" SELECT id, tel_encrypt, created_at, download_channel FROM bi_vala_app_account WHERE tel_encrypt IN ({placeholders}) AND status = 1 AND deleted_at IS NULL """, chunk) for row in cur.fetchall(): uid, enc, created, channel = row phone_to_uid[enc] = str(uid) phone_to_created[enc] = created phone_to_channel[enc] = channel or '' print(f" Matched {len(phone_to_uid)} UIDs") # Trial counts all_uids = list(set(phone_to_uid.values())) uid_to_trial = {} for chunk_start in range(0, len(all_uids), 500): chunk = all_uids[chunk_start:chunk_start+500] placeholders = ','.join(['%s'] * len(chunk)) cur.execute(f""" SELECT account_id, COUNT(*) FROM bi_user_course_detail WHERE account_id IN ({placeholders}) AND expire_time IS NULL AND deleted_at IS NULL GROUP BY account_id """, [int(x) for x in chunk]) for row in cur.fetchall(): uid_to_trial[str(row[0])] = row[1] print(f" Trial counts for {len(uid_to_trial)} users") # Also get trial counts for rows that already have UIDs in H but empty D existing_uids = set() for i, row in enumerate(rows): if len(row) > 7 and row[7]: h_val = row[7].strip() if h_val.isdigit(): existing_uids.add(h_val) # Query trial counts for existing UIDs not already covered missing_uids = existing_uids - set(uid_to_trial.keys()) if missing_uids: print(f" Querying trial counts for {len(missing_uids)} existing UIDs...") uid_list = list(missing_uids) for chunk_start in range(0, len(uid_list), 500): chunk = uid_list[chunk_start:chunk_start+500] placeholders = ','.join(['%s'] * len(chunk)) cur.execute(f""" SELECT account_id, COUNT(*) FROM bi_user_course_detail WHERE account_id IN ({placeholders}) AND expire_time IS NULL AND deleted_at IS NULL GROUP BY account_id """, [int(x) for x in chunk]) for row in cur.fetchall(): uid_to_trial[str(row[0])] = row[1] print(f" Added {len(missing_uids & set(uid_to_trial.keys()))} more") cur.close() conn.close() # Build writes row_data = {} for enc, (row_idx, phone) in phone_map.items(): uid = phone_to_uid.get(enc) if uid: trial = uid_to_trial.get(uid, 0) created = phone_to_created.get(enc) channel = phone_to_channel.get(enc, '') reg_date = created.strftime('%Y-%m-%d') if created else '' row_data[row_idx] = { 'D': str(trial) if trial > 0 else '', 'I': reg_date, 'J': channel, } # Also fill D for rows with existing H UID but empty D for i, row in enumerate(rows): if i in row_data: continue if len(row) > 7 and row[7]: h_val = row[7].strip() if h_val.isdigit(): d_empty = len(row) <= 3 or not row[3] if d_empty: trial = uid_to_trial.get(h_val, 0) if trial > 0: if i not in row_data: row_data[i] = {} row_data[i]['D'] = str(trial) # Prepare column writes d_writes = [(idx, data['D']) for idx, data in row_data.items() if data.get('D')] i_writes = [(idx, data['I']) for idx, data in row_data.items() if data.get('I')] j_writes = [(idx, data['J']) for idx, data in row_data.items() if data.get('J')] print(f"\nPrepared: D={len(d_writes)}, I={len(i_writes)}, J={len(j_writes)}") # Write D print("\n=== Writing D column ===") d_written = batch_writes(token, 'D', d_writes) # Write I print("\n=== Writing I column ===") i_written = batch_writes(token, 'I', i_writes) # Write J print("\n=== Writing J column ===") j_written = batch_writes(token, 'J', j_writes) # Report print("\n" + "="*60) print("FINAL REPORT") print("="*60) print(f"Phones matched to UIDs: {len(phone_to_uid)}") print(f"D column written: {d_written}") print(f"I column written: {i_written}") print(f"J column written: {j_written}") if __name__ == '__main__': main()