ai_member_xiaoxi/scripts/fix_process_data.py
2026-06-03 08:00:01 +08:00

397 lines
15 KiB
Python

#!/usr/bin/env python3
"""Fix process data: cumulative lesson completion + 5月 Tom row"""
import json, requests, os, time, sys
import pandas as pd
import psycopg2
from collections import defaultdict
CRED_DIR = "/root/.openclaw/credentials/xiaoxi"
SPREADSHEET_TOKEN = "NoZqsFi47hIOHEt9j8WcfRtbnug"
# Load secrets
secrets = {}
with open("/root/.openclaw/workspace/secrets.env") as f:
for line in f:
line = line.strip()
if line and not line.startswith("#") and "=" in line:
k, v = line.split("=", 1)
secrets[k.strip()] = v.strip().strip("'")
def get_feishu_token():
with open(os.path.join(CRED_DIR, "config.json")) as f:
cfg = json.load(f)
resp = requests.post(
"https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal",
json={"app_id": cfg["apps"][0]["appId"], "app_secret": cfg["apps"][0]["appSecret"]},
timeout=15)
return resp.json()["tenant_access_token"]
# ============================================================
# Step 1: Load 线索 data (微伴 + 销售表)
# ============================================================
print("=== Step 1: Loading 线索 data ===")
# Load 微伴 data
wb_file = "/root/.openclaw/media/inbound/å¾_ä¼_-å_æ_å_è_æ_æ_å_¼å_º_3---79d6aba9-7cd8-4e99-9b4f-dd981a5ca639.xlsx"
wb_df = pd.read_excel(wb_file)
print(f" 微伴: {len(wb_df)} rows, cols: {list(wb_df.columns)}")
# Load 销售表 from 飞书
token = get_feishu_token()
def read_sheet(sheet_id, range_str):
resp = requests.get(
f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{sheet_id}!{range_str}",
headers={"Authorization": f"Bearer {token}"}, timeout=30)
return resp.json()["data"]["valueRange"]["values"]
# 吴迪 sheet
wudi_data = read_sheet("f975f0", "A1:K700")
# 小龙 sheet
xl_data = read_sheet("qJF4I", "A1:K1200")
# 成都 sheet
cd_data = read_sheet("qJF4J", "A1:K2500")
print(f" 吴迪: {len(wudi_data)} rows, 小龙: {len(xl_data)} rows, 成都: {len(cd_data)} rows")
# Parse 销售表
CS_MAP = {"吴迪": "吴迪", "益达老师": "小龙", "瓦拉英语-Tom老师": "Tom", "瓦拉英语-Bob老师": "Bob"}
def parse_sales_sheet(data, sheet_name):
"""Parse sales sheet, return list of {date, name, phone, sales}"""
results = []
header = data[0]
# Find columns
date_col = name_col = phone_col = cs_col = None
for i, h in enumerate(header):
if h and "日期" in str(h): date_col = i
if h and "昵称" in str(h): name_col = i
if h and "手机" in str(h): phone_col = i
if h and "客服" in str(h): cs_col = i
if cs_col is None:
# Try to find CS column by name
for i, h in enumerate(header):
if h and sheet_name in str(h): cs_col = i
for row in data[1:]:
if not row or len(row) <= max(filter(None, [date_col, name_col, phone_col, cs_col or 0])):
continue
date_val = row[date_col] if date_col is not None and date_col < len(row) else None
name_val = row[name_col] if name_col is not None and name_col < len(row) else None
phone_val = row[phone_col] if phone_col is not None and phone_col < len(row) else None
cs_val = row[cs_col] if cs_col is not None and cs_col < len(row) else None
if not date_val:
continue
# Parse date
if isinstance(date_val, (int, float)):
from datetime import datetime, timedelta
date_str = (datetime(1899, 12, 30) + timedelta(days=int(date_val))).strftime("%Y-%m-%d")
else:
date_str = str(date_val).strip()
# Map CS
sales = None
if cs_val:
cs_str = str(cs_val).strip()
for k, v in CS_MAP.items():
if k in cs_str:
sales = v
break
if not sales:
if sheet_name == "吴迪": sales = "吴迪"
elif sheet_name == "小龙": sales = "小龙"
results.append({
"date": date_str,
"name": str(name_val).strip() if name_val else "",
"phone": str(phone_val).strip() if phone_val else "",
"sales": sales
})
return results
wudi_entries = parse_sales_sheet(wudi_data, "吴迪")
xl_entries = parse_sales_sheet(xl_data, "小龙")
cd_entries = parse_sales_sheet(cd_data, "成都")
print(f" 吴迪 entries: {len(wudi_entries)}, 小龙 entries: {len(xl_entries)}, 成都 entries: {len(cd_entries)}")
# ============================================================
# Step 2: Query database for user lesson completion
# ============================================================
print("\n=== Step 2: Querying database ===")
conn = psycopg2.connect(
host="bj-postgres-16pob4sg.sql.tencentcdb.com",
port=28591,
user="ai_member",
password=secrets["PG_ONLINE_PASSWORD"],
dbname="vala_bi"
)
# Get all users with their phone numbers
cur = conn.cursor()
cur.execute("""
SELECT id, tel, tel_encrypt
FROM bi_vala_app_account
WHERE status = 1 AND deleted_at IS NULL
""")
users = {row[0]: {"tel": row[1], "tel_encrypt": row[2]} for row in cur.fetchall()}
print(f" Users: {len(users)}")
# Get cumulative lesson completion per user
cur.execute("""
SELECT ucp.user_id, MAX(blu.course_lesson) as max_lesson
FROM bi_user_chapter_play_record_0 ucp
JOIN bi_level_unit_lesson blu ON ucp.chapter_id = blu.id
WHERE ucp.play_status = 1
AND blu.course_season = 'S0'
AND blu.course_unit = 'U00'
AND blu.course_level IN ('L1', 'L2')
GROUP BY ucp.user_id
""")
user_lessons = {}
for row in cur.fetchall():
user_lessons[row[0]] = row[1]
print(f" Users with lessons: {len(user_lessons)}")
# Get paid users
cur.execute("""
SELECT DISTINCT account_id
FROM bi_vala_order
WHERE pay_success_date IS NOT NULL
AND order_status IN (3, 4)
AND key_from IN ('app-active-h5-0-0', 'app-sales-bj-qhm-0')
""")
paid_users = set(row[0] for row in cur.fetchall())
print(f" Paid users: {len(paid_users)}")
conn.close()
# ============================================================
# Step 3: Match users to sales via 线索 data
# ============================================================
print("\n=== Step 3: Matching users to sales ===")
# Build phone → sales mapping from 线索 data
# 微伴: phone → CS
wb_phone_to_sales = {}
for _, row in wb_df.iterrows():
phone = str(row.get("手机号", "")).strip()
cs = str(row.get("客服", "")).strip()
if phone and cs:
for k, v in CS_MAP.items():
if k in cs:
wb_phone_to_sales[phone] = v
break
# 销售表: phone → sales
sales_phone_to_sales = {}
for entry in wudi_entries + xl_entries + cd_entries:
if entry["phone"] and entry["sales"]:
sales_phone_to_sales[entry["phone"]] = entry["sales"]
print(f" 微伴 phone→sales: {len(wb_phone_to_sales)}")
print(f" 销售表 phone→sales: {len(sales_phone_to_sales)}")
# Match users to sales via phone
# Try: exact match on tel, then partial match
user_to_sales = {}
unmatched = 0
for uid, uinfo in users.items():
tel = (uinfo.get("tel") or "").strip()
if not tel:
unmatched += 1
continue
# Try 微伴 first
if tel in wb_phone_to_sales:
user_to_sales[uid] = wb_phone_to_sales[tel]
continue
# Try 销售表
if tel in sales_phone_to_sales:
user_to_sales[uid] = sales_phone_to_sales[tel]
continue
# Try partial match
matched = False
for phone, sales in wb_phone_to_sales.items():
if tel in phone or phone in tel:
user_to_sales[uid] = sales
matched = True
break
if not matched:
for phone, sales in sales_phone_to_sales.items():
if tel in phone or phone in tel:
user_to_sales[uid] = sales
matched = True
break
if not matched:
unmatched += 1
print(f" Matched users: {len(user_to_sales)}, unmatched: {unmatched}")
# ============================================================
# Step 4: Aggregate by month and sales
# ============================================================
print("\n=== Step 4: Aggregating ===")
# Get user registration month
conn = psycopg2.connect(
host="bj-postgres-16pob4sg.sql.tencentcdb.com",
port=28591,
user="ai_member",
password=secrets["PG_ONLINE_PASSWORD"],
dbname="vala_bi"
)
cur = conn.cursor()
cur.execute("""
SELECT id, DATE_TRUNC('month', created_at)::date as reg_month
FROM bi_vala_app_account
WHERE status = 1 AND deleted_at IS NULL
AND created_at >= '2026-03-01' AND created_at < '2026-07-01'
""")
user_reg_month = {row[0]: row[1].strftime("%Y-%m") for row in cur.fetchall()}
conn.close()
# For each month+sales, count cumulative lesson completion
LESSON_ORDER = ["L01", "L02", "L03", "L04", "L05"]
MONTHS = ["2026-03", "2026-04", "2026-05", "2026-06"]
SALES_LIST = {
"2026-03": ["合计", "小龙", "Bob", "Tom"],
"2026-04": ["合计", "小龙", "吴迪", "Bob", "Tom"],
"2026-05": ["合计", "小龙", "吴迪", "Bob", "Tom"],
"2026-06": ["合计", "小龙", "吴迪", "Bob", "Tom"],
}
# Build result: {month: {sales: {≥L01: count, ≥L02: count, ..., cv_≥L01: count, ...}}}
results = {}
for month in MONTHS:
results[month] = {}
for sales in SALES_LIST[month]:
results[month][sales] = {f"gte_{l}": 0 for l in LESSON_ORDER}
results[month][sales].update({f"cv_{l}": 0 for l in LESSON_ORDER})
for uid, reg_month in user_reg_month.items():
if reg_month not in results:
continue
sales = user_to_sales.get(uid, None)
max_lesson = user_lessons.get(uid, None)
is_paid = uid in paid_users
# Add to 合计
if max_lesson:
for lesson in LESSON_ORDER:
if max_lesson >= lesson:
results[reg_month]["合计"][f"gte_{lesson}"] += 1
if is_paid:
results[reg_month]["合计"][f"cv_{lesson}"] += 1
# Add to specific sales
if sales and sales in results[reg_month]:
if max_lesson:
for lesson in LESSON_ORDER:
if max_lesson >= lesson:
results[reg_month][sales][f"gte_{lesson}"] += 1
if is_paid:
results[reg_month][sales][f"cv_{lesson}"] += 1
# Print results
for month in MONTHS:
print(f"\n {month}:")
for sales in SALES_LIST[month]:
r = results[month][sales]
print(f" {sales}: ≥L01={r['gte_L01']} ≥L02={r['gte_L02']} ≥L03={r['gte_L03']} ≥L04={r['gte_L04']} ≥L05={r['gte_L05']} | cv: {r['cv_L01']} {r['cv_L02']} {r['cv_L03']} {r['cv_L04']} {r['cv_L05']}")
# ============================================================
# Step 5: Write to process data sheet
# ============================================================
print("\n=== Step 5: Writing to process data ===")
def write_values(sheet_id, range_str, values):
resp = requests.put(
f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values",
headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"},
json={"valueRange": {"range": range_str, "values": values}},
params={"valueInputOption": "USER_ENTERED"},
timeout=60)
result = resp.json()
ok = result.get("code") == 0
if not ok:
print(f"{range_str}: code={result.get('code')} msg={result.get('msg')}")
else:
print(f"{range_str}")
return ok
# Column mapping for process data:
# H=首课人数 I=首课率 J=一节课转化人数 K=一节课转化率
# L=二次课人数 M=二次课率 N=二节课转化人数 O=二节课转化率
# P=三次课人数 Q=三次课率 R=三节课转化人数 S=三节课转化率
# T=四次课人数 U=四次课率 V=四节课转化人数 W=四节课转化率
# X=五次课人数 Y=五次课率 Z=五节课转化人数 AA=五节课转化率
# Row mapping: 2=3月合计, 3=3月小龙, 4=3月Bob, 5=3月Tom
# 6=4月合计, 7=4月小龙, 8=4月吴迪, 9=4月Bob, 10=4月Tom
# 11=5月合计, 12=5月小龙, 13=5月吴迪, 14=5月Bob, 15=5月Tom
# 16=6月合计, 17=6月小龙, 18=6月吴迪, 19=6月Bob, 20=6月Tom
row_map = [
(2, "2026-03", "合计"), (3, "2026-03", "小龙"), (4, "2026-03", "Bob"), (5, "2026-03", "Tom"),
(6, "2026-04", "合计"), (7, "2026-04", "小龙"), (8, "2026-04", "吴迪"), (9, "2026-04", "Bob"), (10, "2026-04", "Tom"),
(11, "2026-05", "合计"), (12, "2026-05", "小龙"), (13, "2026-05", "吴迪"), (14, "2026-05", "Bob"), (15, "2026-05", "Tom"),
(16, "2026-06", "合计"), (17, "2026-06", "小龙"), (18, "2026-06", "吴迪"), (19, "2026-06", "Bob"), (20, "2026-06", "Tom"),
]
# Write cumulative lesson data: H, J, L, N, P, R, T, V, X, Z
# H=首课人数(≥L01), J=一节课转化人数(≥L01+paid)
# L=二次课人数(≥L02), N=二节课转化人数(≥L02+paid)
# P=三次课人数(≥L03), R=三节课转化人数(≥L03+paid)
# T=四次课人数(≥L04), V=四节课转化人数(≥L04+paid)
# X=五次课人数(≥L05), Z=五节课转化人数(≥L05+paid)
for row_num, month, sales in row_map:
r = results[month][sales]
# Build the row data for columns H through AA
# We only write the count columns (H, J, L, N, P, R, T, V, X, Z)
# The rate columns (I, K, M, O, Q, S, U, W, Y, AA) are formulas
row_data = [
[r["gte_L01"]], # H: 首课人数
[f'=IFERROR(H{row_num}/C{row_num},"")'], # I: 首课率
[r["cv_L01"]], # J: 一节课转化人数
[f'=IFERROR(J{row_num}/C{row_num},"")'], # K: 一节课转化率
[r["gte_L02"]], # L: 二次课人数
[f'=IFERROR(L{row_num}/C{row_num},"")'], # M: 二次课率
[r["cv_L02"]], # N: 二节课转化人数
[f'=IFERROR(N{row_num}/C{row_num},"")'], # O: 二节课转化率
[r["gte_L03"]], # P: 三次课人数
[f'=IFERROR(P{row_num}/C{row_num},"")'], # Q: 三次课率
[r["cv_L03"]], # R: 三节课转化人数
[f'=IFERROR(R{row_num}/C{row_num},"")'], # S: 三节课转化率
[r["gte_L04"]], # T: 四次课人数
[f'=IFERROR(T{row_num}/C{row_num},"")'], # U: 四次课率
[r["cv_L04"]], # V: 四节课转化人数
[f'=IFERROR(V{row_num}/C{row_num},"")'], # W: 四节课转化率
[r["gte_L05"]], # X: 五次课人数
[f'=IFERROR(X{row_num}/C{row_num},"")'], # Y: 五次课率
[r["cv_L05"]], # Z: 五节课转化人数
[f'=IFERROR(Z{row_num}/C{row_num},"")'], # AA: 五节课转化率
]
# Write H through AA
col_letters = "HIJKLMNOPQRSTUVWXYZAA"
for i, (col, data) in enumerate(zip(
["H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z","AA"],
row_data
)):
write_values("3aOvV6", f"3aOvV6!{col}{row_num}:{col}{row_num}", [data])
time.sleep(0.15)
print("\n✅ Process data updated!")