#!/usr/bin/env python3 """Update the 小龙 sheet with UID, trial count, registration date, and download channel. V2: Fixed phone matching and batch writes.""" import json, re, time, sys import mysql.connector, psycopg2, requests FEISHU_TOKEN = "t-g10464c0UK5L67JVXSDDT3EWM4DPLSDY5C7R7NS6" SPREADSHEET_TOKEN = "DU4dsUOJThfbPStMcgBcsMH5nyb" SHEET_ID = "dff8c7" MYSQL_CONFIG = { "host": "bj-cdb-dh2fkqa0.sql.tencentcdb.com", "port": 27751, "user": "read_only", "password": "fsdo45ijfmfmuu77$%^&", "database": "vala_user", } PG_CONFIG = { "host": "bj-postgres-16pob4sg.sql.tencentcdb.com", "port": 28591, "user": "ai_member", "password": "LdfjdjL83h3h3^$&**YGG*", "database": "vala_bi", } def read_sheet(): url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{SHEET_ID}!A3:J1142?valueRenderOption=ToString" r = requests.get(url, headers={"Authorization": f"Bearer {FEISHU_TOKEN}"}) data = r.json() if data.get("code") != 0: print(f"Error reading sheet: {data}"); sys.exit(1) return data["data"]["valueRange"]["values"] def write_range(range_str, values): url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values" headers = {"Authorization": f"Bearer {FEISHU_TOKEN}", "Content-Type": "application/json"} body = {"valueRange": {"range": range_str, "values": values}} r = requests.put(url, headers=headers, json=body) data = r.json() if data.get("code") != 0: print(f" ERROR {range_str}: {data}") return False print(f" OK {range_str}: {len(values)} cells") return True def main(): print("=== Reading sheet ===") rows = read_sheet() print(f"Rows: {len(rows)}") # Parse rows, pad to 10 cols parsed = [] for i, row in enumerate(rows): while len(row) < 10: row.append("") parsed.append({ "idx": i, "row_num": i + 3, "phone": row[4].strip() if row[4] else "", "h_val": row[7].strip() if row[7] else "", }) # Find rows needing phone lookup phone_rows = [p for p in parsed if re.match(r'^\d{11}$', p["phone"]) and (not p["h_val"] or p["h_val"] == "未注册")] existing_uids = set(p["h_val"] for p in parsed if p["h_val"].isdigit()) print(f"Need phone lookup: {len(phone_rows)}, existing UIDs: {len(existing_uids)}") # Step 2: MySQL phone -> UID phone_to_uid = {} if phone_rows: conn = mysql.connector.connect(**MYSQL_CONFIG) cur = conn.cursor() chunk_size = 50 for cs in range(0, len(phone_rows), chunk_size): chunk = phone_rows[cs:cs+chunk_size] conditions = [f"tel LIKE '{p['phone'][:3]}%{p['phone'][-4:]}'" for p in chunk] query = f"SELECT id, tel FROM vala_app_account WHERE ({' OR '.join(conditions)}) AND deleted_at IS NULL" cur.execute(query) for uid, tel in cur.fetchall(): for p in chunk: if p["phone"][:3] == tel[:3] and p["phone"][-4:] == tel[-4:]: phone_to_uid[p["phone"]] = str(uid) break time.sleep(0.05) cur.close(); conn.close() print(f"Phone->UID matches: {len(phone_to_uid)}") # Step 3: Collect all UIDs all_uids = set(existing_uids) for uid in phone_to_uid.values(): all_uids.add(uid) print(f"Total UIDs to query PG: {len(all_uids)}") # Step 4: PostgreSQL queries uid_reg = {} # uid -> {created_at, download_channel} uid_trial = {} # uid -> count if all_uids: conn = psycopg2.connect(**PG_CONFIG) cur = conn.cursor() uid_list = list(all_uids) cs = 100 for start in range(0, len(uid_list), cs): chunk = uid_list[start:start+cs] ph = ",".join(["%s"]*len(chunk)) cur.execute(f"SELECT id, created_at::date, download_channel FROM bi_vala_app_account WHERE id IN ({ph}) AND status=1 AND deleted_at IS NULL", chunk) for uid, ca, dc in cur.fetchall(): uid_reg[str(uid)] = {"created_at": str(ca) if ca else "", "download_channel": dc or ""} time.sleep(0.05) for start in range(0, len(uid_list), cs): chunk = uid_list[start:start+cs] ph = ",".join(["%s"]*len(chunk)) cur.execute(f"SELECT account_id, COUNT(*) FROM bi_user_course_detail WHERE account_id IN ({ph}) AND expire_time IS NULL AND deleted_at IS NULL GROUP BY account_id", chunk) for uid, cnt in cur.fetchall(): uid_trial[str(uid)] = cnt time.sleep(0.05) cur.close(); conn.close() print(f"Reg info: {len(uid_reg)}, Trial counts: {len(uid_trial)}") # Step 5: Build full column arrays for batch write # We have 1140 rows (A3:J1142). Build arrays for H, D, I, J. # For each row, determine what to write. N = len(parsed) h_vals = [""] * N d_vals = [""] * N i_vals = [""] * N j_vals = [""] * N h_written_count = 0 d_written_count = 0 i_written_count = 0 j_written_count = 0 for p in parsed: i = p["idx"] phone = p["phone"] h_val = p["h_val"] # Determine UID uid = None if re.match(r'^\d{11}$', phone) and phone in phone_to_uid: uid = phone_to_uid[phone] elif h_val.isdigit(): uid = h_val if not uid: continue # H: write UID if phone matched and H was empty if re.match(r'^\d{11}$', phone) and phone in phone_to_uid: if not h_val or h_val == "未注册": h_vals[i] = uid h_written_count += 1 # D: trial count if uid in uid_trial: d_vals[i] = str(uid_trial[uid]) d_written_count += 1 # I: reg date if uid in uid_reg and uid_reg[uid]["created_at"]: i_vals[i] = uid_reg[uid]["created_at"] i_written_count += 1 # J: channel if uid in uid_reg and uid_reg[uid]["download_channel"]: j_vals[i] = uid_reg[uid]["download_channel"] j_written_count += 1 # Step 6: Write in full column batches # Write H column h_range = f"{SHEET_ID}!H3:H{2+N}" h_values = [[v] for v in h_vals] if any(v for v in h_vals): write_range(h_range, h_values) time.sleep(0.1) # Write D column d_range = f"{SHEET_ID}!D3:D{2+N}" d_values = [[v] for v in d_vals] if any(v for v in d_vals): write_range(d_range, d_values) time.sleep(0.1) # Write I column i_range = f"{SHEET_ID}!I3:I{2+N}" i_values = [[v] for v in i_vals] if any(v for v in i_vals): write_range(i_range, i_values) time.sleep(0.1) # Write J column j_range = f"{SHEET_ID}!J3:J{2+N}" j_values = [[v] for v in j_vals] if any(v for v in j_vals): write_range(j_range, j_values) time.sleep(0.1) print(f"\n=== SUMMARY ===") print(f"Phones matched in MySQL: {len(phone_to_uid)}") print(f"H column (UID) written: {h_written_count}") print(f"D column (trial count) written: {d_written_count}") print(f"I column (reg date) written: {i_written_count}") print(f"J column (channel) written: {j_written_count}") if __name__ == "__main__": main()