#!/usr/bin/env python3 """ Process 吴迪 sheet: match phones via XXTEA encryption, fill H/D/I/J columns. """ import sys import json import time import urllib.request import urllib.error # Import phone encryption sys.path.insert(0, '/root/.openclaw/workspace/scripts') from phone_encrypt import encrypt_phone # --- Config --- FEISHU_TOKEN = sys.argv[1] if len(sys.argv) > 1 else None if not FEISHU_TOKEN: print("Usage: python3 process_wudi_sheet.py ") sys.exit(1) SPREADSHEET_TOKEN = "NoZqsFi47hIOHEt9j8WcfRtbnug" SHEET_ID = "f975f0" # PostgreSQL config import psycopg2 PG_CONFIG = { "host": "bj-postgres-16pob4sg.sql.tencentcdb.com", "port": 28591, "user": "ai_member", "password": "LdfjdjL83h3h3^$&**YGG*", "database": "vala_bi", } # --- Step 1: Read all sheet data --- def feishu_get(url): req = urllib.request.Request(url) req.add_header("Authorization", f"Bearer {FEISHU_TOKEN}") req.add_header("Content-Type", "application/json") with urllib.request.urlopen(req) as resp: return json.loads(resp.read()) def feishu_put(url, body): data = json.dumps(body).encode() req = urllib.request.Request(url, data=data, method="PUT") req.add_header("Authorization", f"Bearer {FEISHU_TOKEN}") req.add_header("Content-Type", "application/json") with urllib.request.urlopen(req) as resp: return json.loads(resp.read()) print("Step 1: Reading sheet data...") url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{SHEET_ID}!A3:J8016?valueRenderOption=ToString" result = feishu_get(url) rows = result.get("data", {}).get("valueRange", {}).get("values", []) print(f"Read {len(rows)} rows") # Parse rows parsed = [] for i, row in enumerate(rows): excel_row = i + 3 padded = row + [''] * (10 - len(row)) parsed.append({ "excel_row": excel_row, "A": str(padded[0]).strip() if padded[0] else '', "B": str(padded[1]).strip() if padded[1] else '', "C": str(padded[2]).strip() if padded[2] else '', "D": str(padded[3]).strip() if padded[3] else '', "E": str(padded[4]).strip() if padded[4] else '', "F": str(padded[5]).strip() if padded[5] else '', "G": str(padded[6]).strip() if padded[6] else '', "H": str(padded[7]).strip() if padded[7] else '', "I": str(padded[8]).strip() if padded[8] else '', "J": str(padded[9]).strip() if padded[9] else '', }) # --- Step 2: Identify rows needing processing --- # Case A: Has phone (E), H is empty -> need phone match # Case B: Has valid H (numeric UID), D is empty -> need to fill D/I/J from DB phones_to_match = [] # (excel_row, phone) rows_need_dij = [] # (excel_row, uid) for p in parsed: phone = p["E"] h_val = p["H"] d_val = p["D"] # Check if phone is 11-digit if phone and len(phone) == 11 and phone.isdigit(): if not h_val or not h_val.isdigit(): phones_to_match.append((p["excel_row"], phone)) # Check if H has valid UID but D is empty if h_val and h_val.isdigit(): if not d_val or d_val == '': rows_need_dij.append((p["excel_row"], h_val)) print(f"\nStep 2: Analysis") print(f" Phones to match (H empty): {len(phones_to_match)}") print(f" Rows with UID but D empty: {len(rows_need_dij)}") # --- Step 3: Encrypt phones and query PostgreSQL --- print("\nStep 3: Encrypting phones and querying DB...") # Encrypt all phones phone_to_enc = {} for excel_row, phone in phones_to_match: phone_to_enc[phone] = encrypt_phone(phone) # Build lookup: enc -> phone enc_to_phone = {v: k for k, v in phone_to_enc.items()} # Query PostgreSQL for phone matches conn = psycopg2.connect(**PG_CONFIG) cur = conn.cursor() # Get account IDs for encrypted phones enc_list = list(enc_to_phone.keys()) phone_matches = {} # phone -> account_id if enc_list: # Query in batches of 500 batch_size = 500 for batch_start in range(0, len(enc_list), batch_size): batch = enc_list[batch_start:batch_start + batch_size] placeholders = ','.join(['%s'] * len(batch)) cur.execute( f"SELECT id, tel_encrypt FROM bi_vala_app_account WHERE tel_encrypt IN ({placeholders}) AND status=1 AND deleted_at IS NULL", batch ) for row in cur.fetchall(): account_id, tel_enc = row phone = enc_to_phone.get(tel_enc) if phone: phone_matches[phone] = str(account_id) print(f" Batch {batch_start//batch_size + 1}: matched {len(cur.fetchall())} (but we already consumed)") # Re-query properly cur.close() cur = conn.cursor() phone_matches = {} if enc_list: batch_size = 500 for batch_start in range(0, len(enc_list), batch_size): batch = enc_list[batch_start:batch_start + batch_size] placeholders = ','.join(['%s'] * len(batch)) cur.execute( f"SELECT id, tel_encrypt FROM bi_vala_app_account WHERE tel_encrypt IN ({placeholders}) AND status=1 AND deleted_at IS NULL", batch ) results = cur.fetchall() for row in results: account_id, tel_enc = row phone = enc_to_phone.get(tel_enc) if phone: phone_matches[phone] = str(account_id) print(f" Batch {batch_start//batch_size + 1}: {len(results)} results from {len(batch)} phones") print(f" Total phone matches: {len(phone_matches)}") # Get D/I/J for all matched UIDs + existing UIDs all_uids = set() for phone, uid in phone_matches.items(): all_uids.add(int(uid)) for excel_row, uid in rows_need_dij: all_uids.add(int(uid)) print(f"\n Fetching D/I/J for {len(all_uids)} unique UIDs...") uid_info = {} # uid -> {d_count, created_at_date, download_channel} if all_uids: # Get trial lesson count (D column) uid_list = list(all_uids) for batch_start in range(0, len(uid_list), 500): batch = uid_list[batch_start:batch_start + 500] placeholders = ','.join(['%s'] * len(batch)) cur.execute( f"SELECT account_id, COUNT(*) FROM bi_user_course_detail WHERE account_id IN ({placeholders}) AND expire_time IS NULL AND deleted_at IS NULL GROUP BY account_id", batch ) for row in cur.fetchall(): uid = row[0] count = row[1] if uid not in uid_info: uid_info[uid] = {} uid_info[uid]['d_count'] = count # Get registration date and download channel for batch_start in range(0, len(uid_list), 500): batch = uid_list[batch_start:batch_start + 500] placeholders = ','.join(['%s'] * len(batch)) cur.execute( f"SELECT id, created_at, download_channel FROM bi_vala_app_account WHERE id IN ({placeholders}) AND status=1 AND deleted_at IS NULL", batch ) for row in cur.fetchall(): uid = row[0] created_at = row[1] download_channel = row[2] or '' if uid not in uid_info: uid_info[uid] = {} uid_info[uid]['created_at'] = str(created_at.date()) if created_at else '' uid_info[uid]['download_channel'] = download_channel cur.close() conn.close() print(f" Fetched info for {len(uid_info)} UIDs") # --- Step 4: Prepare writes --- print("\nStep 4: Preparing writes...") # Map excel_row -> {H, D, I, J} writes = {} # excel_row -> {col: value} # From phone matches: H column for excel_row, phone in phones_to_match: uid = phone_matches.get(phone) if uid: if excel_row not in writes: writes[excel_row] = {} writes[excel_row]['H'] = uid # From UIDs (both matched and existing): D, I, J # Collect all excel_rows that need D/I/J rows_for_dij = {} for excel_row, uid_str in rows_need_dij: rows_for_dij[excel_row] = int(uid_str) # Also add matched phones that now have UIDs for excel_row, phone in phones_to_match: uid = phone_matches.get(phone) if uid: rows_for_dij[excel_row] = int(uid) for excel_row, uid in rows_for_dij.items(): info = uid_info.get(uid, {}) if excel_row not in writes: writes[excel_row] = {} d_count = info.get('d_count', 0) writes[excel_row]['D'] = str(d_count) if d_count > 0 else '0' created_at = info.get('created_at', '') if created_at: writes[excel_row]['I'] = created_at download_channel = info.get('download_channel', '') if download_channel: writes[excel_row]['J'] = download_channel print(f" Total rows to write: {len(writes)}") # --- Step 5: Execute writes in batches --- print("\nStep 5: Writing to sheet...") # Group by column for batch writes # H column writes h_writes = [(r, writes[r]['H']) for r in sorted(writes.keys()) if 'H' in writes[r]] d_writes = [(r, writes[r]['D']) for r in sorted(writes.keys()) if 'D' in writes[r]] i_writes = [(r, writes[r]['I']) for r in sorted(writes.keys()) if 'I' in writes[r]] j_writes = [(r, writes[r]['J']) for r in sorted(writes.keys()) if 'J' in writes[r]] def write_batch(col_letter, row_values, col_name): """Write consecutive rows in batches""" if not row_values: print(f" {col_name} ({col_letter}): nothing to write") return 0 written = 0 i = 0 while i < len(row_values): # Find consecutive rows j = i + 1 while j < len(row_values) and row_values[j][0] == row_values[j-1][0] + 1: j += 1 batch = row_values[i:j] start_row = batch[0][0] end_row = batch[-1][0] values = [[v[1]] for v in batch] range_str = f"{SHEET_ID}!{col_letter}{start_row}:{col_letter}{end_row}" body = {"valueRange": {"range": range_str, "values": values}} try: url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values" result = feishu_put(url, body) code = result.get("code", -1) if code == 0: written += len(batch) print(f" {col_name} rows {start_row}-{end_row}: OK ({len(batch)} cells)") else: print(f" {col_name} rows {start_row}-{end_row}: ERROR code={code} msg={result.get('msg','')}") except Exception as e: print(f" {col_name} rows {start_row}-{end_row}: EXCEPTION {e}") time.sleep(0.05) i = j return written h_count = write_batch('H', h_writes, 'H(UID)') d_count = write_batch('D', d_writes, 'D(Trial)') i_count = write_batch('I', i_writes, 'I(RegDate)') j_count = write_batch('J', j_writes, 'J(Channel)') # --- Summary --- print(f"\n{'='*60}") print(f"SUMMARY") print(f"{'='*60}") print(f" Phones matched to UID: {len(phone_matches)}") print(f" H (UID) written: {h_count}") print(f" D (Trial count) written: {d_count}") print(f" I (Reg date) written: {i_count}") print(f" J (Channel) written: {j_count}") print(f" Total rows updated: {len(writes)}") # Show unmatched phones unmatched = [(r, p) for r, p in phones_to_match if p not in phone_matches] if unmatched: print(f"\n Unmatched phones ({len(unmatched)}):") for r, p in unmatched[:20]: print(f" Row {r}: {p}") if len(unmatched) > 20: print(f" ... and {len(unmatched) - 20} more")