315 lines
11 KiB
Python
315 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
"""Fix process data: cumulative lesson completion by sales, using sales sheet phone→user matching"""
|
|
|
|
import json, requests, os, time, re
|
|
import psycopg2
|
|
from collections import defaultdict
|
|
from datetime import datetime, timedelta
|
|
|
|
CRED_DIR = "/root/.openclaw/credentials/xiaoxi"
|
|
SPREADSHEET_TOKEN = "NoZqsFi47hIOHEt9j8WcfRtbnug"
|
|
|
|
secrets = {}
|
|
with open("/root/.openclaw/workspace/secrets.env") as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if line and not line.startswith("#") and "=" in line:
|
|
k, v = line.split("=", 1)
|
|
secrets[k.strip()] = v.strip().strip("'")
|
|
|
|
def get_token():
|
|
with open(os.path.join(CRED_DIR, "config.json")) as f: cfg = json.load(f)
|
|
resp = requests.post("https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal",
|
|
json={"app_id": cfg["apps"][0]["appId"], "app_secret": cfg["apps"][0]["appSecret"]}, timeout=15)
|
|
return resp.json()["tenant_access_token"]
|
|
|
|
token = get_token()
|
|
|
|
def read_sheet(sheet_id, range_str):
|
|
resp = requests.get(
|
|
f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{sheet_id}!{range_str}",
|
|
headers={"Authorization": f"Bearer {token}"}, timeout=30)
|
|
return resp.json()["data"]["valueRange"]["values"]
|
|
|
|
def write_values(sheet_id, range_str, values):
|
|
resp = requests.put(
|
|
f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values",
|
|
headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"},
|
|
json={"valueRange": {"range": range_str, "values": values}},
|
|
params={"valueInputOption": "USER_ENTERED"},
|
|
timeout=60)
|
|
result = resp.json()
|
|
ok = result.get("code") == 0
|
|
if not ok:
|
|
print(f" ❌ {range_str}: code={result.get('code')} msg={result.get('msg')}")
|
|
else:
|
|
print(f" ✅ {range_str}")
|
|
return ok
|
|
|
|
# ============================================================
|
|
# Step 1: Parse sales sheets → phone → sales mapping
|
|
# ============================================================
|
|
print("=== Step 1: Parse sales sheets ===")
|
|
|
|
CS_MAP = {"吴迪": "吴迪", "小龙": "小龙", "Tom": "Tom", "Bob": "Bob"}
|
|
|
|
def parse_sales_sheet(data):
|
|
"""Return list of {sales, phone, user_id, date}"""
|
|
results = []
|
|
for row in data[2:]: # skip header rows
|
|
if not row or len(row) < 5:
|
|
continue
|
|
sales_raw = str(row[0]).strip() if row[0] else ""
|
|
phone_raw = row[4]
|
|
user_id_raw = row[7] if len(row) > 7 else None
|
|
|
|
# Map sales
|
|
sales = None
|
|
for k, v in CS_MAP.items():
|
|
if k in sales_raw:
|
|
sales = v
|
|
break
|
|
if not sales:
|
|
continue
|
|
|
|
# Parse phone
|
|
phone = ""
|
|
if phone_raw:
|
|
if isinstance(phone_raw, (int, float)):
|
|
phone = str(int(phone_raw))
|
|
else:
|
|
phone = str(phone_raw).strip()
|
|
|
|
# Parse user_id
|
|
user_id = ""
|
|
if user_id_raw:
|
|
if isinstance(user_id_raw, (int, float)):
|
|
user_id = str(int(user_id_raw))
|
|
else:
|
|
user_id = str(user_id_raw).strip()
|
|
|
|
results.append({"sales": sales, "phone": phone, "user_id": user_id})
|
|
return results
|
|
|
|
wudi = read_sheet("f975f0", "A1:K700")
|
|
xl = read_sheet("qJF4I", "A1:K1200")
|
|
cd = read_sheet("qJF4J", "A1:K2500")
|
|
|
|
wudi_entries = parse_sales_sheet(wudi)
|
|
xl_entries = parse_sales_sheet(xl)
|
|
cd_entries = parse_sales_sheet(cd)
|
|
|
|
all_entries = wudi_entries + xl_entries + cd_entries
|
|
print(f" 吴迪: {len(wudi_entries)}, 小龙: {len(xl_entries)}, 成都: {len(cd_entries)}, 总计: {len(all_entries)}")
|
|
|
|
# Build phone→sales mapping (dedup: first match wins)
|
|
phone_to_sales = {}
|
|
for e in all_entries:
|
|
if e["phone"] and e["phone"] not in phone_to_sales:
|
|
phone_to_sales[e["phone"]] = e["sales"]
|
|
|
|
# Also user_id→sales
|
|
uid_to_sales = {}
|
|
for e in all_entries:
|
|
if e["user_id"] and e["user_id"].isdigit() and e["user_id"] not in uid_to_sales:
|
|
uid_to_sales[e["user_id"]] = e["sales"]
|
|
|
|
print(f" phone→sales: {len(phone_to_sales)}, uid→sales: {len(uid_to_sales)}")
|
|
|
|
# ============================================================
|
|
# Step 2: Query database
|
|
# ============================================================
|
|
print("\n=== Step 2: Query database ===")
|
|
|
|
conn = psycopg2.connect(
|
|
host="bj-postgres-16pob4sg.sql.tencentcdb.com", port=28591,
|
|
user="ai_member", password=secrets["PG_ONLINE_PASSWORD"], dbname="vala_bi")
|
|
cur = conn.cursor()
|
|
|
|
# Users with phone
|
|
cur.execute("SELECT id, tel FROM bi_vala_app_account WHERE status = 1 AND deleted_at IS NULL")
|
|
user_phone = {}
|
|
for row in cur.fetchall():
|
|
uid = row[0]
|
|
tel = (row[1] or "").strip()
|
|
if tel:
|
|
user_phone[uid] = tel
|
|
|
|
# User reg month
|
|
cur.execute("""
|
|
SELECT id, DATE_TRUNC('month', created_at)::date as reg_month
|
|
FROM bi_vala_app_account WHERE status = 1 AND deleted_at IS NULL
|
|
AND created_at >= '2026-03-01' AND created_at < '2026-07-01'
|
|
""")
|
|
user_reg = {row[0]: row[1].strftime("%Y-%m") for row in cur.fetchall()}
|
|
|
|
# Max lesson per user
|
|
cur.execute("""
|
|
SELECT ucp.user_id, MAX(blu.course_lesson) as max_lesson
|
|
FROM bi_user_chapter_play_record_0 ucp
|
|
JOIN bi_level_unit_lesson blu ON ucp.chapter_id = blu.id
|
|
WHERE ucp.play_status = 1
|
|
AND blu.course_season = 'S0' AND blu.course_unit = 'U00'
|
|
AND blu.course_level IN ('L1', 'L2')
|
|
GROUP BY ucp.user_id
|
|
""")
|
|
user_lesson = {row[0]: row[1] for row in cur.fetchall()}
|
|
|
|
# Paid users
|
|
cur.execute("""
|
|
SELECT DISTINCT account_id FROM bi_vala_order
|
|
WHERE pay_success_date IS NOT NULL AND order_status IN (3, 4)
|
|
AND key_from IN ('app-active-h5-0-0', 'app-sales-bj-qhm-0')
|
|
""")
|
|
paid = set(row[0] for row in cur.fetchall())
|
|
|
|
conn.close()
|
|
|
|
print(f" Users with phone: {len(user_phone)}, reg: {len(user_reg)}, lessons: {len(user_lesson)}, paid: {len(paid)}")
|
|
|
|
# ============================================================
|
|
# Step 3: Match users to sales
|
|
# ============================================================
|
|
print("\n=== Step 3: Match users to sales ===")
|
|
|
|
user_to_sales = {}
|
|
matched_by_uid = 0
|
|
matched_by_phone = 0
|
|
|
|
for uid in user_reg:
|
|
# Try user_id match first
|
|
if str(uid) in uid_to_sales:
|
|
user_to_sales[uid] = uid_to_sales[str(uid)]
|
|
matched_by_uid += 1
|
|
continue
|
|
|
|
# Try phone match
|
|
tel = user_phone.get(uid, "")
|
|
if tel and tel in phone_to_sales:
|
|
user_to_sales[uid] = phone_to_sales[tel]
|
|
matched_by_phone += 1
|
|
continue
|
|
|
|
# Try partial phone match
|
|
if tel:
|
|
for phone, sales in phone_to_sales.items():
|
|
if tel in phone or phone in tel:
|
|
user_to_sales[uid] = sales
|
|
matched_by_phone += 1
|
|
break
|
|
|
|
print(f" Matched: uid={matched_by_uid}, phone={matched_by_phone}, total={len(user_to_sales)}")
|
|
|
|
# ============================================================
|
|
# Step 4: Aggregate
|
|
# ============================================================
|
|
print("\n=== Step 4: Aggregate ===")
|
|
|
|
LESSONS = ["L01", "L02", "L03", "L04", "L05"]
|
|
MONTHS = ["2026-03", "2026-04", "2026-05", "2026-06"]
|
|
SALES_MAP = {
|
|
"2026-03": ["合计", "小龙", "Bob", "Tom"],
|
|
"2026-04": ["合计", "小龙", "吴迪", "Bob", "Tom"],
|
|
"2026-05": ["合计", "小龙", "吴迪", "Bob", "Tom"],
|
|
"2026-06": ["合计", "小龙", "吴迪", "Bob", "Tom"],
|
|
}
|
|
|
|
results = {}
|
|
for m in MONTHS:
|
|
results[m] = {}
|
|
for s in SALES_MAP[m]:
|
|
results[m][s] = {f"gte_{l}": 0 for l in LESSONS}
|
|
results[m][s].update({f"cv_{l}": 0 for l in LESSONS})
|
|
|
|
for uid, reg_month in user_reg.items():
|
|
if reg_month not in results:
|
|
continue
|
|
sales = user_to_sales.get(uid)
|
|
max_l = user_lesson.get(uid)
|
|
is_paid = uid in paid
|
|
|
|
# 合计
|
|
if max_l:
|
|
for l in LESSONS:
|
|
if max_l >= l:
|
|
results[reg_month]["合计"][f"gte_{l}"] += 1
|
|
if is_paid:
|
|
results[reg_month]["合计"][f"cv_{l}"] += 1
|
|
|
|
if sales and sales in results[reg_month]:
|
|
if max_l:
|
|
for l in LESSONS:
|
|
if max_l >= l:
|
|
results[reg_month][sales][f"gte_{l}"] += 1
|
|
if is_paid:
|
|
results[reg_month][sales][f"cv_{l}"] += 1
|
|
|
|
for m in MONTHS:
|
|
print(f"\n {m}:")
|
|
for s in SALES_MAP[m]:
|
|
r = results[m][s]
|
|
print(f" {s}: L01={r['gte_L01']} L02={r['gte_L02']} L03={r['gte_L03']} L04={r['gte_L04']} L05={r['gte_L05']} | cv: {r['cv_L01']} {r['cv_L02']} {r['cv_L03']} {r['cv_L04']} {r['cv_L05']}")
|
|
|
|
# ============================================================
|
|
# Step 5: Write to process data
|
|
# ============================================================
|
|
print("\n=== Step 5: Write to process data ===")
|
|
|
|
row_map = [
|
|
(2, "2026-03", "合计"), (3, "2026-03", "小龙"), (4, "2026-03", "Bob"), (5, "2026-03", "Tom"),
|
|
(6, "2026-04", "合计"), (7, "2026-04", "小龙"), (8, "2026-04", "吴迪"), (9, "2026-04", "Bob"), (10, "2026-04", "Tom"),
|
|
(11, "2026-05", "合计"), (12, "2026-05", "小龙"), (13, "2026-05", "吴迪"), (14, "2026-05", "Bob"), (15, "2026-05", "Tom"),
|
|
(16, "2026-06", "合计"), (17, "2026-06", "小龙"), (18, "2026-06", "吴迪"), (19, "2026-06", "Bob"), (20, "2026-06", "Tom"),
|
|
]
|
|
|
|
# Column mapping:
|
|
# H=首课人数(≥L01) I=首课率 J=一节课转化人数(≥L01+paid) K=一节课转化率
|
|
# L=二次课人数(≥L02) M=二次课率 N=二节课转化人数(≥L02+paid) O=二节课转化率
|
|
# P=三次课人数(≥L03) Q=三次课率 R=三节课转化人数(≥L03+paid) S=三节课转化率
|
|
# T=四次课人数(≥L04) U=四次课率 V=四节课转化人数(≥L04+paid) W=四节课转化率
|
|
# X=五次课人数(≥L05) Y=五次课率 Z=五节课转化人数(≥L05+paid) AA=五节课转化率
|
|
|
|
col_pairs = [
|
|
("H", "I", "gte_L01", "cv_L01"), # 首课
|
|
("L", "M", "gte_L02", "cv_L02"), # 二次课
|
|
("P", "Q", "gte_L03", "cv_L03"), # 三次课
|
|
("T", "U", "gte_L04", "cv_L04"), # 四次课
|
|
("X", "Y", "gte_L05", "cv_L05"), # 五次课
|
|
]
|
|
|
|
# Also need J/K (一节课转化), N/O (二节课转化), R/S, V/W, Z/AA
|
|
cv_col_pairs = [
|
|
("J", "K", "cv_L01"),
|
|
("N", "O", "cv_L02"),
|
|
("R", "S", "cv_L03"),
|
|
("V", "W", "cv_L04"),
|
|
("Z", "AA", "cv_L05"),
|
|
]
|
|
|
|
for row_num, month, sales in row_map:
|
|
r = results[month][sales]
|
|
|
|
for count_col, rate_col, key, cv_key in col_pairs:
|
|
count = r[key]
|
|
cv_count = r[cv_key]
|
|
|
|
# Write count
|
|
write_values("3aOvV6", f"3aOvV6!{count_col}{row_num}:{count_col}{row_num}", [[count]])
|
|
time.sleep(0.1)
|
|
|
|
# Write rate formula
|
|
write_values("3aOvV6", f"3aOvV6!{rate_col}{row_num}:{rate_col}{row_num}",
|
|
[[f'=IFERROR({count_col}{row_num}/C{row_num},"")']])
|
|
time.sleep(0.1)
|
|
|
|
# Write cv columns
|
|
for cv_col, cv_rate_col, cv_key in cv_col_pairs:
|
|
cv_count = r[cv_key]
|
|
write_values("3aOvV6", f"3aOvV6!{cv_col}{row_num}:{cv_col}{row_num}", [[cv_count]])
|
|
time.sleep(0.1)
|
|
write_values("3aOvV6", f"3aOvV6!{cv_rate_col}{row_num}:{cv_rate_col}{row_num}",
|
|
[[f'=IFERROR({cv_col}{row_num}/C{row_num},"")']])
|
|
time.sleep(0.1)
|
|
|
|
print("\n✅ Process data updated!")
|