#!/usr/bin/env python3 """Update the 小龙 sheet with UID, trial count, registration date, and download channel.""" import json import re import time import sys import mysql.connector import psycopg2 import requests FEISHU_TOKEN = "t-g10464c0UK5L67JVXSDDT3EWM4DPLSDY5C7R7NS6" SPREADSHEET_TOKEN = "DU4dsUOJThfbPStMcgBcsMH5nyb" SHEET_ID = "dff8c7" # DB connections MYSQL_CONFIG = { "host": "bj-cdb-dh2fkqa0.sql.tencentcdb.com", "port": 27751, "user": "read_only", "password": "fsdo45ijfmfmuu77$%^&", "database": "vala_user", } PG_CONFIG = { "host": "bj-postgres-16pob4sg.sql.tencentcdb.com", "port": 28591, "user": "ai_member", "password": "LdfjdjL83h3h3^$&**YGG*", "database": "vala_bi", } def read_sheet(): """Read all data from the sheet.""" url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{SHEET_ID}!A3:J1142?valueRenderOption=ToString" headers = {"Authorization": f"Bearer {FEISHU_TOKEN}"} r = requests.get(url, headers=headers) data = r.json() if data.get("code") != 0: print(f"Error reading sheet: {data}") sys.exit(1) return data["data"]["valueRange"]["values"] def write_batch(rows_data): """Write a batch of rows to the sheet. rows_data is a list of (row_num, col, value) tuples.""" # Group by row for efficiency # We'll write column by column for contiguous ranges pass def write_range(start_row, end_row, col_letter, values): """Write values to a column range.""" # start_row and end_row are 1-based sheet row numbers range_str = f"{SHEET_ID}!{col_letter}{start_row}:{col_letter}{end_row}" url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values" headers = { "Authorization": f"Bearer {FEISHU_TOKEN}", "Content-Type": "application/json", } body = { "valueRange": { "range": range_str, "values": values, } } r = requests.put(url, headers=headers, json=body) data = r.json() if data.get("code") != 0: print(f"Error writing {range_str}: {data}") return False print(f" Wrote {range_str}: {len(values)} cells") return True def main(): print("Reading sheet data...") rows = read_sheet() print(f"Got {len(rows)} rows") # Step 1: Identify rows needing phone lookup # Columns: A=0(sheet), B=1(name), C=2(date), D=3(trial_count), E=4(phone), F=5(grade), G=6(notes), H=7(UID), I=8(reg_date), J=9(channel) phone_rows = [] # (row_index, phone, row_num) existing_uids = set() # UIDs already in H column for i, row in enumerate(rows): row_num = i + 3 # sheet row number # Pad row to 10 columns while len(row) < 10: row.append("") phone = row[4].strip() if len(row) > 4 else "" h_val = row[7].strip() if len(row) > 7 else "" # Check H column for existing UID (pure digits) if h_val and h_val.isdigit(): existing_uids.add(h_val) # Check if phone is 11-digit and H is empty or "未注册" if re.match(r'^\d{11}$', phone): if not h_val or h_val == "未注册": phone_rows.append((i, phone, row_num)) print(f"Rows with 11-digit phone and empty H: {len(phone_rows)}") print(f"Existing UIDs in H column: {len(existing_uids)}") # Step 2: Query MySQL for phone -> UID mapping phone_to_uid = {} if phone_rows: mysql_conn = mysql.connector.connect(**MYSQL_CONFIG) mysql_cur = mysql_conn.cursor() # Batch query in chunks of 50 chunk_size = 50 for chunk_start in range(0, len(phone_rows), chunk_size): chunk = phone_rows[chunk_start:chunk_start + chunk_size] # Build LIKE conditions conditions = [] for _, phone, _ in chunk: first3 = phone[:3] last4 = phone[-4:] conditions.append(f"tel LIKE '{first3}%{last4}'") query = f"SELECT id, tel FROM vala_app_account WHERE ({' OR '.join(conditions)}) AND deleted_at IS NULL" mysql_cur.execute(query) for uid, tel in mysql_cur.fetchall(): # tel is masked like "138****3774", match by first3+last4 for _, phone, _ in chunk: if phone[:3] == tel[:3] and phone[-4:] == tel[-4:]: phone_to_uid[phone] = str(uid) break time.sleep(0.05) mysql_cur.close() mysql_conn.close() print(f"Phone -> UID matches found: {len(phone_to_uid)}") # Step 3: Collect all UIDs to query PostgreSQL all_uids = set(existing_uids) for phone, uid in phone_to_uid.items(): all_uids.add(uid) print(f"Total unique UIDs to query: {len(all_uids)}") # Step 4: Query PostgreSQL for registration info and trial count uid_reg_info = {} # uid -> {created_at, download_channel} uid_trial_count = {} # uid -> trial_count if all_uids: pg_conn = psycopg2.connect(**PG_CONFIG) pg_cur = pg_conn.cursor() uid_list = list(all_uids) # Query bi_vala_app_account for reg info chunk_size = 100 for chunk_start in range(0, len(uid_list), chunk_size): chunk = uid_list[chunk_start:chunk_start + chunk_size] placeholders = ",".join(["%s"] * len(chunk)) query = f"SELECT id, created_at::date, download_channel FROM bi_vala_app_account WHERE id IN ({placeholders}) AND status=1 AND deleted_at IS NULL" pg_cur.execute(query, chunk) for uid, created_at, download_channel in pg_cur.fetchall(): uid_reg_info[str(uid)] = { "created_at": str(created_at) if created_at else "", "download_channel": download_channel or "", } time.sleep(0.05) # Query bi_user_course_detail for trial count for chunk_start in range(0, len(uid_list), chunk_size): chunk = uid_list[chunk_start:chunk_start + chunk_size] placeholders = ",".join(["%s"] * len(chunk)) query = f"SELECT account_id, COUNT(*) FROM bi_user_course_detail WHERE account_id IN ({placeholders}) AND expire_time IS NULL AND deleted_at IS NULL GROUP BY account_id" pg_cur.execute(query, chunk) for uid, cnt in pg_cur.fetchall(): uid_trial_count[str(uid)] = cnt time.sleep(0.05) pg_cur.close() pg_conn.close() print(f"UIDs with reg info: {len(uid_reg_info)}") print(f"UIDs with trial count: {len(uid_trial_count)}") # Step 5: Build the update data # For each row, determine what to write # H column: UID (only if found in MySQL) # D column: trial count # I column: registration date # J column: download channel # We'll batch writes by column for contiguous ranges updates = [] # (row_num, col_index, value) # col_index: 3=D, 7=H, 8=I, 9=J for i, row in enumerate(rows): row_num = i + 3 while len(row) < 10: row.append("") phone = row[4].strip() if len(row) > 4 else "" h_val = row[7].strip() if len(row) > 7 else "" # Determine UID for this row uid = None if re.match(r'^\d{11}$', phone) and phone in phone_to_uid: uid = phone_to_uid[phone] elif h_val and h_val.isdigit(): uid = h_val if not uid: continue # H column: write UID if phone was matched and H was empty if re.match(r'^\d{11}$', phone) and phone in phone_to_uid: if not h_val or h_val == "未注册": updates.append((row_num, 7, uid)) # D, I, J columns: write if we have data if uid in uid_trial_count: updates.append((row_num, 3, str(uid_trial_count[uid]))) if uid in uid_reg_info: info = uid_reg_info[uid] if info["created_at"]: updates.append((row_num, 8, info["created_at"])) if info["download_channel"]: updates.append((row_num, 9, info["download_channel"])) print(f"Total updates to write: {len(updates)}") # Step 6: Write updates in batches # Group by column and find contiguous ranges # Column index -> list of (row_num, value) col_updates = {3: [], 7: [], 8: [], 9: []} col_names = {3: "D", 7: "H", 8: "I", 9: "J"} for row_num, col_idx, value in updates: col_updates[col_idx].append((row_num, value)) # Sort each column's updates by row_num for col_idx in col_updates: col_updates[col_idx].sort(key=lambda x: x[0]) # Write in contiguous batches total_written = 0 for col_idx in [7, 3, 8, 9]: # Write H first, then D, I, J col_letter = col_names[col_idx] items = col_updates[col_idx] if not items: continue # Group into contiguous ranges i = 0 while i < len(items): start_row = items[i][0] values = [[items[i][1]]] j = i + 1 while j < len(items) and items[j][0] == items[j-1][0] + 1: values.append([items[j][1]]) j += 1 end_row = items[j-1][0] if write_range(start_row, end_row, col_letter, values): total_written += len(values) time.sleep(0.05) i = j # Summary phones_matched = len(phone_to_uid) h_written = len(col_updates[7]) d_written = len(col_updates[3]) i_written = len(col_updates[8]) j_written = len(col_updates[9]) print(f"\n=== SUMMARY ===") print(f"Phones matched in MySQL: {phones_matched}") print(f"H column (UID) written: {h_written}") print(f"D column (trial count) written: {d_written}") print(f"I column (reg date) written: {i_written}") print(f"J column (channel) written: {j_written}") print(f"Total cells written: {total_written}") if __name__ == "__main__": main()