#!/usr/bin/env python3 """Fix process data: cumulative lesson completion by sales, using sales sheet phone→user matching""" import json, requests, os, time, re import psycopg2 from collections import defaultdict from datetime import datetime, timedelta CRED_DIR = "/root/.openclaw/credentials/xiaoxi" SPREADSHEET_TOKEN = "NoZqsFi47hIOHEt9j8WcfRtbnug" secrets = {} with open("/root/.openclaw/workspace/secrets.env") as f: for line in f: line = line.strip() if line and not line.startswith("#") and "=" in line: k, v = line.split("=", 1) secrets[k.strip()] = v.strip().strip("'") def get_token(): with open(os.path.join(CRED_DIR, "config.json")) as f: cfg = json.load(f) resp = requests.post("https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal", json={"app_id": cfg["apps"][0]["appId"], "app_secret": cfg["apps"][0]["appSecret"]}, timeout=15) return resp.json()["tenant_access_token"] token = get_token() def read_sheet(sheet_id, range_str): resp = requests.get( f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{sheet_id}!{range_str}", headers={"Authorization": f"Bearer {token}"}, timeout=30) return resp.json()["data"]["valueRange"]["values"] def write_values(sheet_id, range_str, values): resp = requests.put( f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values", headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"}, json={"valueRange": {"range": range_str, "values": values}}, params={"valueInputOption": "USER_ENTERED"}, timeout=60) result = resp.json() ok = result.get("code") == 0 if not ok: print(f" ❌ {range_str}: code={result.get('code')} msg={result.get('msg')}") else: print(f" ✅ {range_str}") return ok # ============================================================ # Step 1: Parse sales sheets → phone → sales mapping # ============================================================ print("=== Step 1: Parse sales sheets ===") CS_MAP = {"吴迪": "吴迪", "小龙": "小龙", "Tom": "Tom", "Bob": "Bob"} def parse_sales_sheet(data): """Return list of {sales, phone, user_id, date}""" results = [] for row in data[2:]: # skip header rows if not row or len(row) < 5: continue sales_raw = str(row[0]).strip() if row[0] else "" phone_raw = row[4] user_id_raw = row[7] if len(row) > 7 else None # Map sales sales = None for k, v in CS_MAP.items(): if k in sales_raw: sales = v break if not sales: continue # Parse phone phone = "" if phone_raw: if isinstance(phone_raw, (int, float)): phone = str(int(phone_raw)) else: phone = str(phone_raw).strip() # Parse user_id user_id = "" if user_id_raw: if isinstance(user_id_raw, (int, float)): user_id = str(int(user_id_raw)) else: user_id = str(user_id_raw).strip() results.append({"sales": sales, "phone": phone, "user_id": user_id}) return results wudi = read_sheet("f975f0", "A1:K700") xl = read_sheet("qJF4I", "A1:K1200") cd = read_sheet("qJF4J", "A1:K2500") wudi_entries = parse_sales_sheet(wudi) xl_entries = parse_sales_sheet(xl) cd_entries = parse_sales_sheet(cd) all_entries = wudi_entries + xl_entries + cd_entries print(f" 吴迪: {len(wudi_entries)}, 小龙: {len(xl_entries)}, 成都: {len(cd_entries)}, 总计: {len(all_entries)}") # Build phone→sales mapping (dedup: first match wins) phone_to_sales = {} for e in all_entries: if e["phone"] and e["phone"] not in phone_to_sales: phone_to_sales[e["phone"]] = e["sales"] # Also user_id→sales uid_to_sales = {} for e in all_entries: if e["user_id"] and e["user_id"].isdigit() and e["user_id"] not in uid_to_sales: uid_to_sales[e["user_id"]] = e["sales"] print(f" phone→sales: {len(phone_to_sales)}, uid→sales: {len(uid_to_sales)}") # ============================================================ # Step 2: Query database # ============================================================ print("\n=== Step 2: Query database ===") conn = psycopg2.connect( host="bj-postgres-16pob4sg.sql.tencentcdb.com", port=28591, user="ai_member", password=secrets["PG_ONLINE_PASSWORD"], dbname="vala_bi") cur = conn.cursor() # Users with phone cur.execute("SELECT id, tel FROM bi_vala_app_account WHERE status = 1 AND deleted_at IS NULL") user_phone = {} for row in cur.fetchall(): uid = row[0] tel = (row[1] or "").strip() if tel: user_phone[uid] = tel # User reg month cur.execute(""" SELECT id, DATE_TRUNC('month', created_at)::date as reg_month FROM bi_vala_app_account WHERE status = 1 AND deleted_at IS NULL AND created_at >= '2026-03-01' AND created_at < '2026-07-01' """) user_reg = {row[0]: row[1].strftime("%Y-%m") for row in cur.fetchall()} # Max lesson per user cur.execute(""" SELECT ucp.user_id, MAX(blu.course_lesson) as max_lesson FROM bi_user_chapter_play_record_0 ucp JOIN bi_level_unit_lesson blu ON ucp.chapter_id = blu.id WHERE ucp.play_status = 1 AND blu.course_season = 'S0' AND blu.course_unit = 'U00' AND blu.course_level IN ('L1', 'L2') GROUP BY ucp.user_id """) user_lesson = {row[0]: row[1] for row in cur.fetchall()} # Paid users cur.execute(""" SELECT DISTINCT account_id FROM bi_vala_order WHERE pay_success_date IS NOT NULL AND order_status IN (3, 4) AND key_from IN ('app-active-h5-0-0', 'app-sales-bj-qhm-0') """) paid = set(row[0] for row in cur.fetchall()) conn.close() print(f" Users with phone: {len(user_phone)}, reg: {len(user_reg)}, lessons: {len(user_lesson)}, paid: {len(paid)}") # ============================================================ # Step 3: Match users to sales # ============================================================ print("\n=== Step 3: Match users to sales ===") user_to_sales = {} matched_by_uid = 0 matched_by_phone = 0 for uid in user_reg: # Try user_id match first if str(uid) in uid_to_sales: user_to_sales[uid] = uid_to_sales[str(uid)] matched_by_uid += 1 continue # Try phone match tel = user_phone.get(uid, "") if tel and tel in phone_to_sales: user_to_sales[uid] = phone_to_sales[tel] matched_by_phone += 1 continue # Try partial phone match if tel: for phone, sales in phone_to_sales.items(): if tel in phone or phone in tel: user_to_sales[uid] = sales matched_by_phone += 1 break print(f" Matched: uid={matched_by_uid}, phone={matched_by_phone}, total={len(user_to_sales)}") # ============================================================ # Step 4: Aggregate # ============================================================ print("\n=== Step 4: Aggregate ===") LESSONS = ["L01", "L02", "L03", "L04", "L05"] MONTHS = ["2026-03", "2026-04", "2026-05", "2026-06"] SALES_MAP = { "2026-03": ["合计", "小龙", "Bob", "Tom"], "2026-04": ["合计", "小龙", "吴迪", "Bob", "Tom"], "2026-05": ["合计", "小龙", "吴迪", "Bob", "Tom"], "2026-06": ["合计", "小龙", "吴迪", "Bob", "Tom"], } results = {} for m in MONTHS: results[m] = {} for s in SALES_MAP[m]: results[m][s] = {f"gte_{l}": 0 for l in LESSONS} results[m][s].update({f"cv_{l}": 0 for l in LESSONS}) for uid, reg_month in user_reg.items(): if reg_month not in results: continue sales = user_to_sales.get(uid) max_l = user_lesson.get(uid) is_paid = uid in paid # 合计 if max_l: for l in LESSONS: if max_l >= l: results[reg_month]["合计"][f"gte_{l}"] += 1 if is_paid: results[reg_month]["合计"][f"cv_{l}"] += 1 if sales and sales in results[reg_month]: if max_l: for l in LESSONS: if max_l >= l: results[reg_month][sales][f"gte_{l}"] += 1 if is_paid: results[reg_month][sales][f"cv_{l}"] += 1 for m in MONTHS: print(f"\n {m}:") for s in SALES_MAP[m]: r = results[m][s] print(f" {s}: L01={r['gte_L01']} L02={r['gte_L02']} L03={r['gte_L03']} L04={r['gte_L04']} L05={r['gte_L05']} | cv: {r['cv_L01']} {r['cv_L02']} {r['cv_L03']} {r['cv_L04']} {r['cv_L05']}") # ============================================================ # Step 5: Write to process data # ============================================================ print("\n=== Step 5: Write to process data ===") row_map = [ (2, "2026-03", "合计"), (3, "2026-03", "小龙"), (4, "2026-03", "Bob"), (5, "2026-03", "Tom"), (6, "2026-04", "合计"), (7, "2026-04", "小龙"), (8, "2026-04", "吴迪"), (9, "2026-04", "Bob"), (10, "2026-04", "Tom"), (11, "2026-05", "合计"), (12, "2026-05", "小龙"), (13, "2026-05", "吴迪"), (14, "2026-05", "Bob"), (15, "2026-05", "Tom"), (16, "2026-06", "合计"), (17, "2026-06", "小龙"), (18, "2026-06", "吴迪"), (19, "2026-06", "Bob"), (20, "2026-06", "Tom"), ] # Column mapping: # H=首课人数(≥L01) I=首课率 J=一节课转化人数(≥L01+paid) K=一节课转化率 # L=二次课人数(≥L02) M=二次课率 N=二节课转化人数(≥L02+paid) O=二节课转化率 # P=三次课人数(≥L03) Q=三次课率 R=三节课转化人数(≥L03+paid) S=三节课转化率 # T=四次课人数(≥L04) U=四次课率 V=四节课转化人数(≥L04+paid) W=四节课转化率 # X=五次课人数(≥L05) Y=五次课率 Z=五节课转化人数(≥L05+paid) AA=五节课转化率 col_pairs = [ ("H", "I", "gte_L01", "cv_L01"), # 首课 ("L", "M", "gte_L02", "cv_L02"), # 二次课 ("P", "Q", "gte_L03", "cv_L03"), # 三次课 ("T", "U", "gte_L04", "cv_L04"), # 四次课 ("X", "Y", "gte_L05", "cv_L05"), # 五次课 ] # Also need J/K (一节课转化), N/O (二节课转化), R/S, V/W, Z/AA cv_col_pairs = [ ("J", "K", "cv_L01"), ("N", "O", "cv_L02"), ("R", "S", "cv_L03"), ("V", "W", "cv_L04"), ("Z", "AA", "cv_L05"), ] for row_num, month, sales in row_map: r = results[month][sales] for count_col, rate_col, key, cv_key in col_pairs: count = r[key] cv_count = r[cv_key] # Write count write_values("3aOvV6", f"3aOvV6!{count_col}{row_num}:{count_col}{row_num}", [[count]]) time.sleep(0.1) # Write rate formula write_values("3aOvV6", f"3aOvV6!{rate_col}{row_num}:{rate_col}{row_num}", [[f'=IFERROR({count_col}{row_num}/C{row_num},"")']]) time.sleep(0.1) # Write cv columns for cv_col, cv_rate_col, cv_key in cv_col_pairs: cv_count = r[cv_key] write_values("3aOvV6", f"3aOvV6!{cv_col}{row_num}:{cv_col}{row_num}", [[cv_count]]) time.sleep(0.1) write_values("3aOvV6", f"3aOvV6!{cv_rate_col}{row_num}:{cv_rate_col}{row_num}", [[f'=IFERROR({cv_col}{row_num}/C{row_num},"")']]) time.sleep(0.1) print("\n✅ Process data updated!")