#!/usr/bin/env python3 """Full refresh of Bot 小龙 sheet: H(UID), D(trial count), I(reg date), J(download channel). Preserves existing data, only fills gaps.""" import json, re, time, sys, requests, mysql.connector, psycopg2 def get_token(): r = requests.post('https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal', json={"app_id":"cli_a929ae22e0b8dcc8","app_secret":"OtFjMy7p3qE3VvLbMdcWidwgHOnGD4FJ"}) return r.json()['tenant_access_token'] TOKEN = get_token() SPREADSHEET_TOKEN = "NoZqsFi47hIOHEt9j8WcfRtbnug" SHEET_ID = "qJF4I" MYSQL_CONFIG = { "host": "bj-cdb-dh2fkqa0.sql.tencentcdb.com", "port": 27751, "user": "read_only", "password": "fsdo45ijfmfmuu77$%^&", "database": "vala_user", } PG_CONFIG = { "host": "bj-postgres-16pob4sg.sql.tencentcdb.com", "port": 28591, "user": "ai_member", "password": "LdfjdjL83h3h3^$&**YGG*", "database": "vala_bi", } CHANNEL_MAP = { "Apple App Store": "苹果", "科大讯飞学习机": "讯飞", "学而思学习机": "学而思", "华为应用市场": "华为", "小米应用市场": "小米", "应用宝应用市场": "应用宝", "希沃学习机": "希沃", "荣耀应用市场": "荣耀", "小度学习机": "小度", "oppo应用市场": "OPPO", "vivo应用市场": "VIVO", "京东方学习机": "京东方", "步步高学习机": "步步高", "作业帮学习机": "作业帮", "魅族应用市场": "魅族", "官网": "官网", } def read_sheet(): url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{SHEET_ID}!A3:J2514?valueRenderOption=ToString" r = requests.get(url, headers={"Authorization": f"Bearer {TOKEN}"}) data = r.json() if data.get("code") != 0: print(f"Error reading sheet: {data}"); sys.exit(1) return data["data"]["valueRange"]["values"] def write_range(range_str, values): url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values" headers = {"Authorization": f"Bearer {TOKEN}", "Content-Type": "application/json"} body = {"valueRange": {"range": range_str, "values": values}} r = requests.put(url, headers=headers, json=body) data = r.json() if data.get("code") != 0: print(f" ERROR {range_str}: {data}") return False print(f" OK {range_str}: {len(values)} cells") return True def write_contiguous(col_letter, items): """Write items [(row_num, value), ...] in contiguous batches.""" if not items: return 0 items.sort(key=lambda x: x[0]) total = 0 i = 0 while i < len(items): start_row = items[i][0] values = [[items[i][1]]] j = i + 1 while j < len(items) and items[j][0] == items[j-1][0] + 1: values.append([items[j][1]]) j += 1 end_row = items[j-1][0] range_str = f"{SHEET_ID}!{col_letter}{start_row}:{col_letter}{end_row}" if write_range(range_str, values): total += len(values) time.sleep(0.05) i = j return total def main(): print("=== Reading sheet ===") rows = read_sheet() print(f"Rows: {len(rows)}") # Parse all rows parsed = [] for i, row in enumerate(rows): while len(row) < 10: row.append("") parsed.append({ "idx": i, "row_num": i + 3, "phone": str(row[4]).strip() if row[4] is not None else "", "h_val": str(row[7]).strip() if row[7] is not None else "", "d_val": str(row[3]).strip() if row[3] is not None else "", "i_val": str(row[8]).strip() if row[8] is not None else "", "j_val": str(row[9]).strip() if row[9] is not None else "", }) # Step 1: ALL rows with 11-digit phone → MySQL lookup all_phone_rows = [p for p in parsed if re.match(r'^\d{11}$', p["phone"])] print(f"Rows with 11-digit phone: {len(all_phone_rows)}") phone_to_uid = {} if all_phone_rows: conn = mysql.connector.connect(**MYSQL_CONFIG) cur = conn.cursor() for cs in range(0, len(all_phone_rows), 50): chunk = all_phone_rows[cs:cs+50] conditions = [f"tel LIKE '{p['phone'][:3]}%{p['phone'][-4:]}'" for p in chunk] query = f"SELECT id, tel FROM vala_app_account WHERE ({' OR '.join(conditions)}) AND deleted_at IS NULL" cur.execute(query) for uid, tel in cur.fetchall(): for p in chunk: if p["phone"][:3] == tel[:3] and p["phone"][-4:] == tel[-4:]: phone_to_uid[p["phone"]] = str(uid) break time.sleep(0.05) cur.close(); conn.close() print(f"Phone->UID matches: {len(phone_to_uid)}") # Step 2: Collect all UIDs (from phone matches + existing H digits) all_uids = set() for p in parsed: if p["h_val"].isdigit(): all_uids.add(p["h_val"]) for uid in phone_to_uid.values(): all_uids.add(uid) print(f"Total UIDs to query PG: {len(all_uids)}") # Step 3: PG queries uid_reg = {} uid_trial = {} if all_uids: conn = psycopg2.connect(**PG_CONFIG) cur = conn.cursor() uid_list = list(all_uids) for start in range(0, len(uid_list), 100): chunk = uid_list[start:start+100] ph = ",".join(["%s"]*len(chunk)) cur.execute(f"SELECT id, created_at::date, download_channel FROM bi_vala_app_account WHERE id IN ({ph}) AND status=1 AND deleted_at IS NULL", chunk) for uid, ca, dc in cur.fetchall(): uid_reg[str(uid)] = {"created_at": str(ca) if ca else "", "download_channel": dc or ""} time.sleep(0.05) for start in range(0, len(uid_list), 100): chunk = uid_list[start:start+100] ph = ",".join(["%s"]*len(chunk)) cur.execute(f"SELECT account_id, COUNT(*) FROM bi_user_course_detail WHERE account_id IN ({ph}) AND expire_time IS NULL AND deleted_at IS NULL GROUP BY account_id", chunk) for uid, cnt in cur.fetchall(): uid_trial[str(uid)] = cnt time.sleep(0.05) cur.close(); conn.close() print(f"Reg info: {len(uid_reg)}, Trial counts: {len(uid_trial)}") # Step 4: Build updates - only fill gaps, don't overwrite existing h_updates = [] # (row_num, uid) - write UID where phone matched and H is empty/different d_updates = [] # (row_num, trial_count) - write where D is empty i_updates = [] # (row_num, reg_date) - write where I is empty j_updates = [] # (row_num, channel) - write where J is empty for p in parsed: row_num = p["row_num"] phone = p["phone"] h_val = p["h_val"] d_val = p["d_val"] i_val = p["i_val"] j_val = p["j_val"] # Determine UID uid = None if re.match(r'^\d{11}$', phone) and phone in phone_to_uid: uid = phone_to_uid[phone] elif h_val.isdigit(): uid = h_val if not uid: continue # H: write UID if phone matched and H doesn't already have the right UID if re.match(r'^\d{11}$', phone) and phone in phone_to_uid: if h_val != phone_to_uid[phone]: h_updates.append((row_num, phone_to_uid[phone])) # D: trial count - only if currently empty if uid in uid_trial and not d_val: d_updates.append((row_num, str(uid_trial[uid]))) # I: reg date - only if currently empty if uid in uid_reg and uid_reg[uid]["created_at"] and not i_val: i_updates.append((row_num, uid_reg[uid]["created_at"])) # J: channel - only if currently empty if uid in uid_reg and uid_reg[uid]["download_channel"] and not j_val: raw_ch = uid_reg[uid]["download_channel"] mapped = CHANNEL_MAP.get(raw_ch, raw_ch) j_updates.append((row_num, mapped)) print(f"H updates: {len(h_updates)}, D updates: {len(d_updates)}, I updates: {len(i_updates)}, J updates: {len(j_updates)}") # Step 5: Write total = 0 total += write_contiguous("H", h_updates) total += write_contiguous("D", d_updates) total += write_contiguous("I", i_updates) total += write_contiguous("J", j_updates) print(f"\n=== SUMMARY ===") print(f"Phones matched in MySQL: {len(phone_to_uid)}") print(f"H column (UID) written: {len(h_updates)}") print(f"D column (trial count) written: {len(d_updates)}") print(f"I column (reg date) written: {len(i_updates)}") print(f"J column (channel) written: {len(j_updates)}") print(f"Total cells written: {total}") if __name__ == "__main__": main()