#!/usr/bin/env python3 """ Process 小龙 sheet: match phones to UIDs, fill trial counts, registration dates, download channels. """ import sys sys.path.insert(0, '/root/.openclaw/workspace/scripts') from phone_encrypt import encrypt_phone import psycopg2 import json import requests import time # === Config === PG_CONFIG = { 'host': 'bj-postgres-16pob4sg.sql.tencentcdb.com', 'port': 28591, 'user': 'ai_member', 'password': 'LdfjdjL83h3h3^$&**YGG*', 'database': 'vala_bi', } FEISHU_TOKEN = None SPREADSHEET_TOKEN = 'DU4dsUOJThfbPStMcgBcsMH5nyb' SHEET_ID = 'dff8c7' def get_feishu_token(): global FEISHU_TOKEN if FEISHU_TOKEN: return FEISHU_TOKEN resp = requests.post('https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal', json={'app_id': 'cli_a929ae22e0b8dcc8', 'app_secret': 'OtFjMy7p3qE3VvLbMdcWidwgHOnGD4FJ'}) FEISHU_TOKEN = resp.json()['tenant_access_token'] return FEISHU_TOKEN def read_sheet(): """Read all data from the sheet.""" token = get_feishu_token() url = f'https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{SHEET_ID}!A3:J1142?valueRenderOption=ToString' resp = requests.get(url, headers={'Authorization': f'Bearer {token}'}) data = resp.json() return data.get('data', {}).get('valueRange', {}).get('values', []) def write_batch(range_str, values): """Write a batch of values to the sheet.""" token = get_feishu_token() url = f'https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values' body = {"valueRange": {"range": f"{SHEET_ID}!{range_str}", "values": values}} resp = requests.put(url, headers={ 'Authorization': f'Bearer {token}', 'Content-Type': 'application/json' }, json=body) result = resp.json() if result.get('code') != 0: print(f" ERROR writing {range_str}: {result}") return False return True def main(): # 1. Read sheet data print("Reading sheet data...") rows = read_sheet() print(f" Got {len(rows)} rows") # 2. Extract phones and encrypt print("\nExtracting and encrypting phones...") phone_map = {} # encrypted -> (row_idx, phone) row_phones = {} # row_idx -> phone for i, row in enumerate(rows): row_num = i + 3 # 1-indexed row number in sheet if len(row) > 4 and row[4]: phone = row[4].strip() # Only process 11-digit phone numbers if len(phone) == 11 and phone.isdigit(): enc = encrypt_phone(phone) phone_map[enc] = (i, phone) row_phones[i] = phone print(f" Found {len(phone_map)} valid 11-digit phones") # 3. Query PostgreSQL for phone matching print("\nQuerying PostgreSQL for phone matching...") conn = psycopg2.connect(**PG_CONFIG) cur = conn.cursor() enc_list = list(phone_map.keys()) # Batch query in chunks of 500 phone_to_uid = {} # encrypted -> account_id phone_to_created = {} # encrypted -> created_at phone_to_channel = {} # encrypted -> download_channel for chunk_start in range(0, len(enc_list), 500): chunk = enc_list[chunk_start:chunk_start+500] placeholders = ','.join(['%s'] * len(chunk)) cur.execute(f""" SELECT id, tel_encrypt, created_at, download_channel FROM bi_vala_app_account WHERE tel_encrypt IN ({placeholders}) AND status = 1 AND deleted_at IS NULL """, chunk) for row in cur.fetchall(): uid, enc, created, channel = row phone_to_uid[enc] = str(uid) phone_to_created[enc] = created phone_to_channel[enc] = channel or '' print(f" Matched {len(phone_to_uid)} phones to UIDs") # 4. Query trial lesson counts for matched UIDs print("\nQuerying trial lesson counts...") all_uids = list(set(phone_to_uid.values())) uid_to_trial_count = {} for chunk_start in range(0, len(all_uids), 500): chunk = all_uids[chunk_start:chunk_start+500] placeholders = ','.join(['%s'] * len(chunk)) cur.execute(f""" SELECT account_id, COUNT(*) as trial_count FROM bi_user_course_detail WHERE account_id IN ({placeholders}) AND expire_time IS NULL AND deleted_at IS NULL GROUP BY account_id """, [int(x) for x in chunk]) for row in cur.fetchall(): uid_to_trial_count[str(row[0])] = row[1] print(f" Found trial counts for {len(uid_to_trial_count)} users") cur.close() conn.close() # 5. Prepare writes print("\nPreparing writes...") # Build row-level data # row_idx -> {H: uid, D: trial_count, I: reg_date, J: channel} row_data = {} for enc, (row_idx, phone) in phone_map.items(): uid = phone_to_uid.get(enc) if uid: trial_count = uid_to_trial_count.get(uid, 0) created = phone_to_created.get(enc) channel = phone_to_channel.get(enc, '') reg_date = created.strftime('%Y-%m-%d') if created else '' row_data[row_idx] = { 'H': uid, 'D': str(trial_count) if trial_count > 0 else '', 'I': reg_date, 'J': channel, } # Also fill D/I/J for rows where H already has a valid UID but D is empty print(" Checking rows with existing UIDs but empty D column...") for i, row in enumerate(rows): if i in row_data: continue # Already processed above if len(row) > 7 and row[7]: # H column has a value - check if it's a numeric UID h_val = row[7].strip() if h_val.isdigit(): # Check if D is empty d_empty = len(row) <= 3 or not row[3] if d_empty: trial_count = uid_to_trial_count.get(h_val, 0) if trial_count > 0: if i not in row_data: row_data[i] = {} row_data[i]['D'] = str(trial_count) # Check if I is empty but we have data i_empty = len(row) <= 8 or not row[8] if i_empty: # Need to query this UID's created_at pass # Skip for now - we don't have this data from our query # 6. Write H column (UIDs) print("\nWriting H column (UIDs)...") h_writes = [] for row_idx, data in row_data.items(): if 'H' in data: h_writes.append((row_idx, data['H'])) h_writes.sort(key=lambda x: x[0]) # Batch consecutive rows h_batches = [] if h_writes: batch_start = h_writes[0][0] batch_vals = [[h_writes[0][1]]] for j in range(1, len(h_writes)): if h_writes[j][0] == h_writes[j-1][0] + 1: batch_vals.append([h_writes[j][1]]) else: h_batches.append((batch_start, batch_vals)) batch_start = h_writes[j][0] batch_vals = [[h_writes[j][1]]] h_batches.append((batch_start, batch_vals)) h_written = 0 for start_idx, vals in h_batches: start_row = start_idx + 3 end_row = start_row + len(vals) - 1 range_str = f'H{start_row}:H{end_row}' if write_batch(range_str, vals): h_written += len(vals) print(f" Wrote H{start_row}:H{end_row} ({len(vals)} cells)") time.sleep(0.05) # 7. Write D column (trial counts) print("\nWriting D column (trial counts)...") d_writes = [] for row_idx, data in row_data.items(): if 'D' in data and data['D']: d_writes.append((row_idx, data['D'])) d_writes.sort(key=lambda x: x[0]) d_batches = [] if d_writes: batch_start = d_writes[0][0] batch_vals = [[d_writes[0][1]]] for j in range(1, len(d_writes)): if d_writes[j][0] == d_writes[j-1][0] + 1: batch_vals.append([d_writes[j][1]]) else: d_batches.append((batch_start, batch_vals)) batch_start = d_writes[j][0] batch_vals = [[d_writes[j][1]]] d_batches.append((batch_start, batch_vals)) d_written = 0 for start_idx, vals in d_batches: start_row = start_idx + 3 end_row = start_row + len(vals) - 1 range_str = f'D{start_row}:D{end_row}' if write_batch(range_str, vals): d_written += len(vals) print(f" Wrote D{start_row}:D{end_row} ({len(vals)} cells)") time.sleep(0.05) # 8. Write I column (registration dates) print("\nWriting I column (registration dates)...") i_writes = [] for row_idx, data in row_data.items(): if 'I' in data and data['I']: i_writes.append((row_idx, data['I'])) i_writes.sort(key=lambda x: x[0]) i_batches = [] if i_writes: batch_start = i_writes[0][0] batch_vals = [[i_writes[0][1]]] for j in range(1, len(i_writes)): if i_writes[j][0] == i_writes[j-1][0] + 1: batch_vals.append([i_writes[j][1]]) else: i_batches.append((batch_start, batch_vals)) batch_start = i_writes[j][0] batch_vals = [[i_writes[j][1]]] i_batches.append((batch_start, batch_vals)) i_written = 0 for start_idx, vals in i_batches: start_row = start_idx + 3 end_row = start_row + len(vals) - 1 range_str = f'I{start_row}:I{end_row}' if write_batch(range_str, vals): i_written += len(vals) print(f" Wrote I{start_row}:I{end_row} ({len(vals)} cells)") time.sleep(0.05) # 9. Write J column (download channels) print("\nWriting J column (download channels)...") j_writes = [] for row_idx, data in row_data.items(): if 'J' in data and data['J']: j_writes.append((row_idx, data['J'])) j_writes.sort(key=lambda x: x[0]) j_batches = [] if j_writes: batch_start = j_writes[0][0] batch_vals = [[j_writes[0][1]]] for j in range(1, len(j_writes)): if j_writes[j][0] == j_writes[j-1][0] + 1: batch_vals.append([j_writes[j][1]]) else: j_batches.append((batch_start, batch_vals)) batch_start = j_writes[j][0] batch_vals = [[j_writes[j][1]]] j_batches.append((batch_start, batch_vals)) j_written = 0 for start_idx, vals in j_batches: start_row = start_idx + 3 end_row = start_row + len(vals) - 1 range_str = f'J{start_row}:J{end_row}' if write_batch(range_str, vals): j_written += len(vals) print(f" Wrote J{start_row}:J{end_row} ({len(vals)} cells)") time.sleep(0.05) # 10. Report print("\n" + "="*60) print("FINAL REPORT") print("="*60) print(f"Total rows processed: {len(rows)}") print(f"Phones extracted: {len(phone_map)}") print(f"Phones matched to UIDs: {len(phone_to_uid)}") print(f"Phones NOT matched: {len(phone_map) - len(phone_to_uid)}") print(f"H column (UIDs) written: {h_written}") print(f"D column (trial counts) written: {d_written}") print(f"I column (reg dates) written: {i_written}") print(f"J column (channels) written: {j_written}") # Show unmatched phones unmatched = [] for enc, (row_idx, phone) in phone_map.items(): if enc not in phone_to_uid: unmatched.append(f" Row {row_idx+3}: {phone}") if unmatched: print(f"\nUnmatched phones ({len(unmatched)}):") for u in unmatched[:20]: print(u) if len(unmatched) > 20: print(f" ... and {len(unmatched)-20} more") if __name__ == '__main__': main()