#!/usr/bin/env python3 """Fix process data: cumulative lesson completion + 5月 Tom row""" import json, requests, os, time, sys import pandas as pd import psycopg2 from collections import defaultdict CRED_DIR = "/root/.openclaw/credentials/xiaoxi" SPREADSHEET_TOKEN = "NoZqsFi47hIOHEt9j8WcfRtbnug" # Load secrets secrets = {} with open("/root/.openclaw/workspace/secrets.env") as f: for line in f: line = line.strip() if line and not line.startswith("#") and "=" in line: k, v = line.split("=", 1) secrets[k.strip()] = v.strip().strip("'") def get_feishu_token(): with open(os.path.join(CRED_DIR, "config.json")) as f: cfg = json.load(f) resp = requests.post( "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal", json={"app_id": cfg["apps"][0]["appId"], "app_secret": cfg["apps"][0]["appSecret"]}, timeout=15) return resp.json()["tenant_access_token"] # ============================================================ # Step 1: Load 线索 data (微伴 + 销售表) # ============================================================ print("=== Step 1: Loading 线索 data ===") # Load 微伴 data wb_file = "/root/.openclaw/media/inbound/å¾_ä¼_-å_æ_å_è_æ_æ_å_¼å_º_3---79d6aba9-7cd8-4e99-9b4f-dd981a5ca639.xlsx" wb_df = pd.read_excel(wb_file) print(f" 微伴: {len(wb_df)} rows, cols: {list(wb_df.columns)}") # Load 销售表 from 飞书 token = get_feishu_token() def read_sheet(sheet_id, range_str): resp = requests.get( f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{sheet_id}!{range_str}", headers={"Authorization": f"Bearer {token}"}, timeout=30) return resp.json()["data"]["valueRange"]["values"] # 吴迪 sheet wudi_data = read_sheet("f975f0", "A1:K700") # 小龙 sheet xl_data = read_sheet("qJF4I", "A1:K1200") # 成都 sheet cd_data = read_sheet("qJF4J", "A1:K2500") print(f" 吴迪: {len(wudi_data)} rows, 小龙: {len(xl_data)} rows, 成都: {len(cd_data)} rows") # Parse 销售表 CS_MAP = {"吴迪": "吴迪", "益达老师": "小龙", "瓦拉英语-Tom老师": "Tom", "瓦拉英语-Bob老师": "Bob"} def parse_sales_sheet(data, sheet_name): """Parse sales sheet, return list of {date, name, phone, sales}""" results = [] header = data[0] # Find columns date_col = name_col = phone_col = cs_col = None for i, h in enumerate(header): if h and "日期" in str(h): date_col = i if h and "昵称" in str(h): name_col = i if h and "手机" in str(h): phone_col = i if h and "客服" in str(h): cs_col = i if cs_col is None: # Try to find CS column by name for i, h in enumerate(header): if h and sheet_name in str(h): cs_col = i for row in data[1:]: if not row or len(row) <= max(filter(None, [date_col, name_col, phone_col, cs_col or 0])): continue date_val = row[date_col] if date_col is not None and date_col < len(row) else None name_val = row[name_col] if name_col is not None and name_col < len(row) else None phone_val = row[phone_col] if phone_col is not None and phone_col < len(row) else None cs_val = row[cs_col] if cs_col is not None and cs_col < len(row) else None if not date_val: continue # Parse date if isinstance(date_val, (int, float)): from datetime import datetime, timedelta date_str = (datetime(1899, 12, 30) + timedelta(days=int(date_val))).strftime("%Y-%m-%d") else: date_str = str(date_val).strip() # Map CS sales = None if cs_val: cs_str = str(cs_val).strip() for k, v in CS_MAP.items(): if k in cs_str: sales = v break if not sales: if sheet_name == "吴迪": sales = "吴迪" elif sheet_name == "小龙": sales = "小龙" results.append({ "date": date_str, "name": str(name_val).strip() if name_val else "", "phone": str(phone_val).strip() if phone_val else "", "sales": sales }) return results wudi_entries = parse_sales_sheet(wudi_data, "吴迪") xl_entries = parse_sales_sheet(xl_data, "小龙") cd_entries = parse_sales_sheet(cd_data, "成都") print(f" 吴迪 entries: {len(wudi_entries)}, 小龙 entries: {len(xl_entries)}, 成都 entries: {len(cd_entries)}") # ============================================================ # Step 2: Query database for user lesson completion # ============================================================ print("\n=== Step 2: Querying database ===") conn = psycopg2.connect( host="bj-postgres-16pob4sg.sql.tencentcdb.com", port=28591, user="ai_member", password=secrets["PG_ONLINE_PASSWORD"], dbname="vala_bi" ) # Get all users with their phone numbers cur = conn.cursor() cur.execute(""" SELECT id, tel, tel_encrypt FROM bi_vala_app_account WHERE status = 1 AND deleted_at IS NULL """) users = {row[0]: {"tel": row[1], "tel_encrypt": row[2]} for row in cur.fetchall()} print(f" Users: {len(users)}") # Get cumulative lesson completion per user cur.execute(""" SELECT ucp.user_id, MAX(blu.course_lesson) as max_lesson FROM bi_user_chapter_play_record_0 ucp JOIN bi_level_unit_lesson blu ON ucp.chapter_id = blu.id WHERE ucp.play_status = 1 AND blu.course_season = 'S0' AND blu.course_unit = 'U00' AND blu.course_level IN ('L1', 'L2') GROUP BY ucp.user_id """) user_lessons = {} for row in cur.fetchall(): user_lessons[row[0]] = row[1] print(f" Users with lessons: {len(user_lessons)}") # Get paid users cur.execute(""" SELECT DISTINCT account_id FROM bi_vala_order WHERE pay_success_date IS NOT NULL AND order_status IN (3, 4) AND key_from IN ('app-active-h5-0-0', 'app-sales-bj-qhm-0') """) paid_users = set(row[0] for row in cur.fetchall()) print(f" Paid users: {len(paid_users)}") conn.close() # ============================================================ # Step 3: Match users to sales via 线索 data # ============================================================ print("\n=== Step 3: Matching users to sales ===") # Build phone → sales mapping from 线索 data # 微伴: phone → CS wb_phone_to_sales = {} for _, row in wb_df.iterrows(): phone = str(row.get("手机号", "")).strip() cs = str(row.get("客服", "")).strip() if phone and cs: for k, v in CS_MAP.items(): if k in cs: wb_phone_to_sales[phone] = v break # 销售表: phone → sales sales_phone_to_sales = {} for entry in wudi_entries + xl_entries + cd_entries: if entry["phone"] and entry["sales"]: sales_phone_to_sales[entry["phone"]] = entry["sales"] print(f" 微伴 phone→sales: {len(wb_phone_to_sales)}") print(f" 销售表 phone→sales: {len(sales_phone_to_sales)}") # Match users to sales via phone # Try: exact match on tel, then partial match user_to_sales = {} unmatched = 0 for uid, uinfo in users.items(): tel = (uinfo.get("tel") or "").strip() if not tel: unmatched += 1 continue # Try 微伴 first if tel in wb_phone_to_sales: user_to_sales[uid] = wb_phone_to_sales[tel] continue # Try 销售表 if tel in sales_phone_to_sales: user_to_sales[uid] = sales_phone_to_sales[tel] continue # Try partial match matched = False for phone, sales in wb_phone_to_sales.items(): if tel in phone or phone in tel: user_to_sales[uid] = sales matched = True break if not matched: for phone, sales in sales_phone_to_sales.items(): if tel in phone or phone in tel: user_to_sales[uid] = sales matched = True break if not matched: unmatched += 1 print(f" Matched users: {len(user_to_sales)}, unmatched: {unmatched}") # ============================================================ # Step 4: Aggregate by month and sales # ============================================================ print("\n=== Step 4: Aggregating ===") # Get user registration month conn = psycopg2.connect( host="bj-postgres-16pob4sg.sql.tencentcdb.com", port=28591, user="ai_member", password=secrets["PG_ONLINE_PASSWORD"], dbname="vala_bi" ) cur = conn.cursor() cur.execute(""" SELECT id, DATE_TRUNC('month', created_at)::date as reg_month FROM bi_vala_app_account WHERE status = 1 AND deleted_at IS NULL AND created_at >= '2026-03-01' AND created_at < '2026-07-01' """) user_reg_month = {row[0]: row[1].strftime("%Y-%m") for row in cur.fetchall()} conn.close() # For each month+sales, count cumulative lesson completion LESSON_ORDER = ["L01", "L02", "L03", "L04", "L05"] MONTHS = ["2026-03", "2026-04", "2026-05", "2026-06"] SALES_LIST = { "2026-03": ["合计", "小龙", "Bob", "Tom"], "2026-04": ["合计", "小龙", "吴迪", "Bob", "Tom"], "2026-05": ["合计", "小龙", "吴迪", "Bob", "Tom"], "2026-06": ["合计", "小龙", "吴迪", "Bob", "Tom"], } # Build result: {month: {sales: {≥L01: count, ≥L02: count, ..., cv_≥L01: count, ...}}} results = {} for month in MONTHS: results[month] = {} for sales in SALES_LIST[month]: results[month][sales] = {f"gte_{l}": 0 for l in LESSON_ORDER} results[month][sales].update({f"cv_{l}": 0 for l in LESSON_ORDER}) for uid, reg_month in user_reg_month.items(): if reg_month not in results: continue sales = user_to_sales.get(uid, None) max_lesson = user_lessons.get(uid, None) is_paid = uid in paid_users # Add to 合计 if max_lesson: for lesson in LESSON_ORDER: if max_lesson >= lesson: results[reg_month]["合计"][f"gte_{lesson}"] += 1 if is_paid: results[reg_month]["合计"][f"cv_{lesson}"] += 1 # Add to specific sales if sales and sales in results[reg_month]: if max_lesson: for lesson in LESSON_ORDER: if max_lesson >= lesson: results[reg_month][sales][f"gte_{lesson}"] += 1 if is_paid: results[reg_month][sales][f"cv_{lesson}"] += 1 # Print results for month in MONTHS: print(f"\n {month}:") for sales in SALES_LIST[month]: r = results[month][sales] print(f" {sales}: ≥L01={r['gte_L01']} ≥L02={r['gte_L02']} ≥L03={r['gte_L03']} ≥L04={r['gte_L04']} ≥L05={r['gte_L05']} | cv: {r['cv_L01']} {r['cv_L02']} {r['cv_L03']} {r['cv_L04']} {r['cv_L05']}") # ============================================================ # Step 5: Write to process data sheet # ============================================================ print("\n=== Step 5: Writing to process data ===") def write_values(sheet_id, range_str, values): resp = requests.put( f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values", headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"}, json={"valueRange": {"range": range_str, "values": values}}, params={"valueInputOption": "USER_ENTERED"}, timeout=60) result = resp.json() ok = result.get("code") == 0 if not ok: print(f" ❌ {range_str}: code={result.get('code')} msg={result.get('msg')}") else: print(f" ✅ {range_str}") return ok # Column mapping for process data: # H=首课人数 I=首课率 J=一节课转化人数 K=一节课转化率 # L=二次课人数 M=二次课率 N=二节课转化人数 O=二节课转化率 # P=三次课人数 Q=三次课率 R=三节课转化人数 S=三节课转化率 # T=四次课人数 U=四次课率 V=四节课转化人数 W=四节课转化率 # X=五次课人数 Y=五次课率 Z=五节课转化人数 AA=五节课转化率 # Row mapping: 2=3月合计, 3=3月小龙, 4=3月Bob, 5=3月Tom # 6=4月合计, 7=4月小龙, 8=4月吴迪, 9=4月Bob, 10=4月Tom # 11=5月合计, 12=5月小龙, 13=5月吴迪, 14=5月Bob, 15=5月Tom # 16=6月合计, 17=6月小龙, 18=6月吴迪, 19=6月Bob, 20=6月Tom row_map = [ (2, "2026-03", "合计"), (3, "2026-03", "小龙"), (4, "2026-03", "Bob"), (5, "2026-03", "Tom"), (6, "2026-04", "合计"), (7, "2026-04", "小龙"), (8, "2026-04", "吴迪"), (9, "2026-04", "Bob"), (10, "2026-04", "Tom"), (11, "2026-05", "合计"), (12, "2026-05", "小龙"), (13, "2026-05", "吴迪"), (14, "2026-05", "Bob"), (15, "2026-05", "Tom"), (16, "2026-06", "合计"), (17, "2026-06", "小龙"), (18, "2026-06", "吴迪"), (19, "2026-06", "Bob"), (20, "2026-06", "Tom"), ] # Write cumulative lesson data: H, J, L, N, P, R, T, V, X, Z # H=首课人数(≥L01), J=一节课转化人数(≥L01+paid) # L=二次课人数(≥L02), N=二节课转化人数(≥L02+paid) # P=三次课人数(≥L03), R=三节课转化人数(≥L03+paid) # T=四次课人数(≥L04), V=四节课转化人数(≥L04+paid) # X=五次课人数(≥L05), Z=五节课转化人数(≥L05+paid) for row_num, month, sales in row_map: r = results[month][sales] # Build the row data for columns H through AA # We only write the count columns (H, J, L, N, P, R, T, V, X, Z) # The rate columns (I, K, M, O, Q, S, U, W, Y, AA) are formulas row_data = [ [r["gte_L01"]], # H: 首课人数 [f'=IFERROR(H{row_num}/C{row_num},"")'], # I: 首课率 [r["cv_L01"]], # J: 一节课转化人数 [f'=IFERROR(J{row_num}/C{row_num},"")'], # K: 一节课转化率 [r["gte_L02"]], # L: 二次课人数 [f'=IFERROR(L{row_num}/C{row_num},"")'], # M: 二次课率 [r["cv_L02"]], # N: 二节课转化人数 [f'=IFERROR(N{row_num}/C{row_num},"")'], # O: 二节课转化率 [r["gte_L03"]], # P: 三次课人数 [f'=IFERROR(P{row_num}/C{row_num},"")'], # Q: 三次课率 [r["cv_L03"]], # R: 三节课转化人数 [f'=IFERROR(R{row_num}/C{row_num},"")'], # S: 三节课转化率 [r["gte_L04"]], # T: 四次课人数 [f'=IFERROR(T{row_num}/C{row_num},"")'], # U: 四次课率 [r["cv_L04"]], # V: 四节课转化人数 [f'=IFERROR(V{row_num}/C{row_num},"")'], # W: 四节课转化率 [r["gte_L05"]], # X: 五次课人数 [f'=IFERROR(X{row_num}/C{row_num},"")'], # Y: 五次课率 [r["cv_L05"]], # Z: 五节课转化人数 [f'=IFERROR(Z{row_num}/C{row_num},"")'], # AA: 五节课转化率 ] # Write H through AA col_letters = "HIJKLMNOPQRSTUVWXYZAA" for i, (col, data) in enumerate(zip( ["H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z","AA"], row_data )): write_values("3aOvV6", f"3aOvV6!{col}{row_num}:{col}{row_num}", [data]) time.sleep(0.15) print("\n✅ Process data updated!")