#!/usr/bin/env python3 """ 北京试点线索刷新 — 从北京工作簿 LP 表 + DB 重建 4koH9C 数据源: - LP 慧萌 kkzS48 / LP 虹茗 62366f (北京工作簿 FPYMsatUPhCpy5trDKdcfNH2nvM) - 成单 qX7oJ6 (北京工作簿,销售「萌」= 慧萌) - DB (vala_bi): 注册信息 + 订单 + 退款 + 行课 写入目标: 4koH9C (A-Z, 同销售三表结构) """ import json, requests, os, sys, psycopg2, time from datetime import datetime, timedelta from collections import defaultdict SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, SCRIPTS_DIR) from phone_encrypt import encrypt_phone # ── 配置 ── PG_HOST = "bj-postgres-16pob4sg.sql.tencentcdb.com" PG_PORT = 28591 PG_USER = "ai_member" PG_DB = "vala_bi" TARGET_SPREADSHEET = "NoZqsFi47hIOHEt9j8WcfRtbnug" TARGET_SHEET = "4koH9C" BEIJING_SPREADSHEET = "FPYMsatUPhCpy5trDKdcfNH2nvM" LP_SHEETS = [ ("kkzS48", "慧萌"), ("62366f", "虹茗"), ] CHENGDAN_SHEET = "qX7oJ6" CRED_DIR = "/root/.openclaw/credentials/xiaoxi" LOG_FILE = "/var/log/xiaoxi_beijing_leads_refresh.log" GOODS_NAMES = { 57: "瓦拉英语level1·单季", 60: "瓦拉英语level1", 63: "瓦拉英语level1·单季", 31: "瓦拉英语年包", 32: "瓦拉英语单季度包", 33: "瓦拉英语level2", 54: "瓦拉英语季度包", 61: "瓦拉英语level1+2", } def log(msg): ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S") line = f"[{ts}] {msg}" print(line) with open(LOG_FILE, "a") as f: f.write(line + "\n") def get_pg_password(): with open(os.path.join(SCRIPTS_DIR, "..", "secrets.env")) as f: for line in f: if line.startswith("PG_ONLINE_PASSWORD="): return line.strip().split("=", 1)[1].strip("'\"") def get_fs_token(): with open(os.path.join(CRED_DIR, "config.json")) as f: cfg = json.load(f) resp = requests.post( "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal", json={"app_id": cfg["apps"][0]["appId"], "app_secret": cfg["apps"][0]["appSecret"]}, timeout=15 ) return resp.json()["tenant_access_token"] def read_sheet(token, spreadsheet, sheet_id, range_str=None): url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{spreadsheet}/values/{sheet_id}" if range_str: url += f"!{range_str}" resp = requests.get(url, headers={"Authorization": f"Bearer {token}"}, timeout=30) data = resp.json() if data.get("code") != 0: raise RuntimeError(f"读取失败 {sheet_id}: {data}") return data["data"]["valueRange"]["values"] def put_values(token, sheet_id, range_str, values): url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{TARGET_SPREADSHEET}/values" body = {"valueRange": {"range": f"{sheet_id}!{range_str}", "values": values}} resp = requests.put(url, headers={ "Authorization": f"Bearer {token}", "Content-Type": "application/json" }, json=body, timeout=30) r = resp.json() if r.get("code") != 0: log(f" ❌ {range_str}: {r.get('code')} {r.get('msg')}") return False return True def excel_serial_to_date(serial): """Excel serial number → YYYY-MM-DD""" if not serial: return None try: s = int(float(serial)) return (datetime(1899, 12, 30) + timedelta(days=s)).strftime("%Y-%m-%d") except: return None def excel_serial_to_md(serial): """Excel serial → M月D日 格式""" d = excel_serial_to_date(serial) if not d: return "" dt = datetime.strptime(d, "%Y-%m-%d") return f"{dt.month}月{dt.day}日" def classify_sales_channel(key_from): if not key_from: return "直购" kf = key_from.strip() if kf in ('app-active-h5-0-0', 'app-sales-bj-qhm-0', 'app-sales-bj-wd-0'): return "端内" if kf.startswith('sales-adp-'): return "销转" if kf.startswith('newmedia-daren-') or kf == 'newmedia-dianpu-wwxx-0-0': return "达人" return "直购" def batch_in(cur, sql_tpl, params, chunk=500): results = [] for i in range(0, len(params), chunk): batch = params[i:i+chunk] if not batch: break ph = ",".join(["%s"] * len(batch)) cur.execute(sql_tpl % ph, batch) results.extend(cur.fetchall()) return results # ── Step 1: 解析 LP 表 ── def parse_lp_sheets(token): """返回 [{sales, nickname, date_str, exp_lessons, phone, grade, followup, lp_uid}, ...]""" all_leads = [] for sid, sales_name in LP_SHEETS: rows = read_sheet(token, BEIJING_SPREADSHEET, sid) log(f" {sales_name} ({sid}): {len(rows)} rows total") for idx, row in enumerate(rows[2:], start=3): if not row or len(row) < 6: continue # A: 是否下单 (col 0) # B: 序列号 (col 1) # C: 进线索日期 (col 2) - Excel serial date_serial = row[2] if len(row) > 2 else None date_str = excel_serial_to_md(date_serial) if date_serial else "" # E: 微信昵称 (col 4) nickname = str(row[4]).strip() if len(row) > 4 and row[4] else "" # F: 手机号 (col 5) phone = "" if len(row) > 5 and row[5]: try: phone = str(int(float(row[5]))) except: phone = str(row[5]).strip() # G: 孩子年龄/年级 (col 6) grade = str(row[6]).strip() if len(row) > 6 and row[6] else "" # H: 英语基础和在学课程 (col 7) → 课史/跟进 followup = str(row[7]).strip() if len(row) > 7 and row[7] else "" # N: 用户ID (col 13) - LP 自带的 lp_uid = "" if len(row) > 13 and row[13]: try: lp_uid = str(int(float(row[13]))) except: pass # T: U0行课进度 (col 18) → 体验节数 u0_progress = str(row[18]).strip() if len(row) > 18 and row[18] else "" exp_lessons = u0_progress if u0_progress else "" if not phone or len(phone) != 11: continue # skip rows without valid phone all_leads.append({ "sales": sales_name, "nickname": nickname, "date_str": date_str, "date_serial": date_serial, "exp_lessons": exp_lessons, "phone": phone, "grade": grade, "followup": followup, "lp_uid": lp_uid, }) log(f" {sales_name}: {len([l for l in all_leads if l['sales']==sales_name])} valid leads") log(f" 共 {len(all_leads)} 条有效线索") return all_leads # ── Step 2: DB 批量查询 ── def query_db(conn, leads): """查询注册信息、订单、退款、行课""" cur = conn.cursor() # 加密所有手机号 phone_enc_map = {} for lead in leads: enc = encrypt_phone(lead["phone"]) phone_enc_map[enc] = lead["phone"] lead["tel_encrypt"] = enc enc_list = list(phone_enc_map.keys()) # 2a. 注册信息 log(" 查询注册信息...") reg_info = batch_in(cur, "SELECT id, tel_encrypt, created_at, download_channel FROM bi_vala_app_account WHERE tel_encrypt IN (%s) AND status=1 AND deleted_at IS NULL", enc_list ) tel_to_account = {} for aid, tel_enc, created_at, dc in reg_info: tel_to_account[tel_enc] = { "account_id": aid, "reg_date": created_at.strftime("%Y-%m-%d") if created_at else "", "download_channel": dc or "", } # 填充到 leads account_ids = set() for lead in leads: acc = tel_to_account.get(lead["tel_encrypt"], {}) lead["account_id"] = acc.get("account_id") lead["reg_date"] = acc.get("reg_date", "") lead["download_channel"] = acc.get("download_channel", "") if lead["account_id"]: account_ids.add(lead["account_id"]) aid_list = list(account_ids) log(f" 匹配到 account_id: {len(aid_list)}") # 2b. 订单信息 log(" 查询订单信息...") lead["orders"] = [] lead["valid_order"] = None if aid_list: orders = batch_in(cur, "SELECT account_id, trade_no, pay_success_date, key_from, goods_id, pay_amount_int, order_status FROM bi_vala_order WHERE account_id IN (%s) AND pay_success_date IS NOT NULL AND order_status IN (3,4) ORDER BY pay_success_date DESC", aid_list ) # 按 account_id 分组 aid_orders = defaultdict(list) for o in orders: aid_orders[o[0]].append(o) # 退款 trade_nos = [o[1] for o in orders if o[1]] refund_map = defaultdict(int) if trade_nos: refunds = batch_in(cur, "SELECT trade_no, refund_amount_int FROM bi_refund_order WHERE trade_no IN (%s) AND status=3", trade_nos ) for tn, amt in refunds: refund_map[tn] += amt # SUM 多笔退费 # 为每个 account 找有效订单 for lead in leads: aid = lead.get("account_id") if not aid: continue olist = aid_orders.get(aid, []) lead["all_orders"] = olist # 找有效订单: GSV>0, 非全额退, 下单日期≥进线日期 lead_date = excel_serial_to_date(lead.get("date_serial")) for o in olist: trade_no = o[1] pay_date = o[2] key_from = o[3] goods_id = o[4] gmv = o[5] order_status = o[6] total_refund = refund_map.get(trade_no, 0) gsv = gmv - total_refund if gsv <= 0: continue # 全额退或 GSV≤0 if gmv == total_refund: continue # 全额退 pay_date_str = pay_date.strftime("%Y-%m-%d") if pay_date else "" if lead_date and pay_date_str < lead_date: continue # 下单早于进线 lead["valid_order"] = { "trade_no": trade_no, "pay_date": pay_date_str, "pay_date_md": f"{pay_date.month}月{pay_date.day}日" if pay_date else "", "key_from": key_from or "", "goods_id": goods_id, "gmv": gmv / 100.0, "refund": total_refund / 100.0, "gsv": gsv / 100.0, "product": GOODS_NAMES.get(goods_id, f"商品{goods_id}"), "channel_class": classify_sales_channel(key_from), } break # 取第一个符合条件的(最新) # 2c. 激活课程 log(" 查询激活课程...") if aid_list: try: activations = batch_in(cur, "SELECT account_id, season_package_level FROM bi_vala_seasonal_ticket WHERE account_id IN (%s) AND status=1 AND deleted_at IS NULL AND season_package_level IN ('A1','A2')", aid_list ) aid_activation = {} for aid, lvl in activations: aid_activation[aid] = lvl for lead in leads: aid = lead.get("account_id") if aid: lead["activation"] = aid_activation.get(aid, "") else: lead["activation"] = "" except Exception as e: log(f" 激活查询异常: {e}") for lead in leads: lead["activation"] = "" # 2d. 角色 + 行课 log(" 查询角色信息...") lead["lesson_progress"] = "" lead["lesson_time"] = "" lead["lesson_minutes"] = 0 if aid_list: char_info = batch_in(cur, "SELECT account_id, id FROM bi_vala_app_character WHERE account_id IN (%s) AND deleted_at IS NULL", aid_list ) account_chars = defaultdict(list) char_to_account = {} for aid, cid in char_info: account_chars[aid].append(cid) char_to_account[cid] = aid char_ids = list(char_to_account.keys()) log(f" 角色数: {len(char_ids)}") # 课程映射 cur.execute("SELECT id, course_level, course_season, course_unit, course_lesson FROM bi_level_unit_lesson") chapter_map = {} for ch_id, cl, cs, cu, cl2 in cur.fetchall(): chapter_map[ch_id] = (cl or "", cs or "", cu or "", cl2 or "") # 课时完成记录 log(" 查询课时完成记录...") char_plays = defaultdict(lambda: {"latest_time": None, "latest_chapter": None}) for tbl_idx in range(8): table = f"bi_user_chapter_play_record_{tbl_idx}" try: cur.execute( f"SELECT user_id, chapter_id, created_at FROM {table} WHERE play_status=1 AND deleted_at IS NULL AND user_id = ANY(%s)", (char_ids,) ) for uid, ch_id, created_at in cur.fetchall(): ch_data = chapter_map.get(ch_id) if not ch_data: continue rec = char_plays[uid] if rec["latest_time"] is None or created_at > rec["latest_time"]: rec["latest_time"] = created_at rec["latest_chapter"] = ch_data except Exception as e: log(f" 警告 {table}: {e}") # 学习总耗时 log(" 查询学习耗时...") for tbl_idx in range(8): table = f"bi_user_component_play_record_{tbl_idx}" try: cur.execute( f"SELECT user_id, SUM(COALESCE(interval_time,0)) FROM {table} WHERE user_id = ANY(%s) AND deleted_at IS NULL GROUP BY user_id", (char_ids,) ) for uid, total_ms in cur.fetchall(): if uid in char_plays: char_plays[uid]["total_ms"] = char_plays[uid].get("total_ms", 0) + (total_ms or 0) except Exception as e: log(f" 警告 {table}: {e}") # 汇总到 account 级别 for lead in leads: aid = lead.get("account_id") if not aid: continue chars = account_chars.get(aid, []) best_time = None best_ch = None total_ms = 0 for cid in chars: play = char_plays.get(cid) if not play: continue if play.get("latest_chapter"): if best_time is None or play["latest_time"] > best_time: best_time = play["latest_time"] best_ch = play["latest_chapter"] total_ms += play.get("total_ms", 0) if best_ch: cl, cs, cu, cl2 = best_ch lead["lesson_progress"] = f"{cl}-{cs}-{cu}-{cl2}" if best_time: lead["lesson_time"] = best_time.strftime("%Y-%m-%d") lead["lesson_minutes"] = round(total_ms / 60000, 1) if total_ms > 0 else 0 cur.close() log(" DB 查询完成") return leads # ── Step 3: 写入目标表 ── def write_target_sheet(token, leads): """Clear A3:Z500, 写入所有线索行""" log(" 写入 4koH9C...") # 先清空 A1 的迁移提示 put_values(token, TARGET_SHEET, "A1:A1", [[""]]) # 恢复标准表头 r1 header = [["销售归属", "微信昵称", "进线日期", "体验节数", "手机号", "用户年级", "课史/跟进", "用户ID", "注册日期", "下载渠道", "下单日期", "成交渠道", "产品", "下单金额(GMV)", "退款金额", "实际收入(GSV)", "激活课程", "当前行课进度", "最近行课时间", "累计学习时长(min)", "更新时间", "微伴补充", "进线早于下单", "订单号", "有效订单", "渠道归属"]] put_values(token, TARGET_SHEET, "A1:Z1", header) # Clear r2 put_values(token, TARGET_SHEET, "A2:Z2", [[""] * 26]) # 构建数据行 update_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") rows = [] for lead in leads: vo = lead.get("valid_order") row = [ lead["sales"], # A: 销售归属 lead["nickname"], # B: 微信昵称 lead["date_str"], # C: 进线日期 lead["exp_lessons"], # D: 体验节数 lead["phone"], # E: 手机号 lead["grade"], # F: 用户年级 lead["followup"], # G: 课史/跟进 lead.get("account_id", "") or "", # H: 用户ID lead.get("reg_date", ""), # I: 注册日期 lead.get("download_channel", ""), # J: 下载渠道 vo["pay_date_md"] if vo else "", # K: 下单日期 vo["key_from"] if vo else "", # L: 成交渠道 vo["product"] if vo else "", # M: 产品 vo["gmv"] if vo else "", # N: GMV vo["refund"] if vo else "", # O: 退款金额 vo["gsv"] if vo else "", # P: GSV lead.get("activation", ""), # Q: 激活课程 lead.get("lesson_progress", ""), # R: 当前行课进度 lead.get("lesson_time", ""), # S: 最近行课时间 lead.get("lesson_minutes", 0) or "", # T: 累计学习时长 update_time, # U: 更新时间 "", # V: 微伴补充 (不填) "", # W: 进线早于下单 (Cursor 填) vo["trade_no"] if vo else "", # X: 订单号 1 if vo else 0, # Y: 有效订单 vo["channel_class"] if vo else "", # Z: 渠道归属 ] rows.append(row) total = len(rows) log(f" 共 {total} 行,Y=1: {sum(1 for r in rows if r[24]==1)}") # 分批写入 (每批最多 20 行 × 26 列 = 520 格,远低于 4400) for batch_start in range(0, total, 20): batch = rows[batch_start:batch_start+20] sr = 3 + batch_start er = sr + len(batch) - 1 put_values(token, TARGET_SHEET, f"A{sr}:Z{er}", batch) time.sleep(0.3) # 清除多余旧行 if total < 498: clear_start = 3 + total clear_end = 500 empty_rows = [[""] * 26 for _ in range(clear_end - clear_start + 1)] put_values(token, TARGET_SHEET, f"A{clear_start}:Z{clear_end}", empty_rows) log(f" 清除多余行 A{clear_start}:Z{clear_end}") log(f" 写入完成") # ── Main ── def main(): log("=" * 50) log("北京试点线索刷新 启动") try: token = get_fs_token() conn = psycopg2.connect( host=PG_HOST, port=PG_PORT, user=PG_USER, password=get_pg_password(), dbname=PG_DB, connect_timeout=30 ) # Step 1: 解析 LP 表 log("Step 1: 解析 LP 表") leads = parse_lp_sheets(token) # Step 2: DB 查询 log("Step 2: DB 查询")