🤖 每日自动备份 - 2026-05-19 08:00:01

This commit is contained in:
小溪 2026-05-19 08:00:01 +08:00
parent 8d77e036c5
commit b234be7bba
5 changed files with 491 additions and 19 deletions

15
memory/2026-05-18-0958.md Normal file
View File

@ -0,0 +1,15 @@
# Session: 2026-05-18 09:58:46 GMT+8
- **Session Key**: agent:main:feishu:direct:ou_e63ce6b760ad39382852472f28fbe2a2
- **Session ID**: e42c0253-2c2c-41da-9802-4c980f0fad8d
- **Source**: feishu
## Conversation Summary
assistant: 承龙,`/eset` 不是我能识别的命令。你是想输入以下哪个吗?
- `/exec` — 执行命令
- `/set` — 设置(具体是什么设置?)
- 或者其他?
如果这是你自定义的一个指令,请跟我说一下它的用途,我来配合。

View File

@ -0,0 +1,5 @@
# Session: 2026-05-18 18:04:40 GMT+8
- **Session Key**: agent:main:feishu:direct:ou_e63ce6b760ad39382852472f28fbe2a2
- **Session ID**: 9e66388a-8314-48f3-8131-02d369f1e547
- **Source**: feishu

28
memory/2026-05-18.md Normal file
View File

@ -0,0 +1,28 @@
# 2026-05-18 工作日志
## 小红书线索学习数据匹配
[李承龙/陈逸鸫需求] 按CSV文件 `may2026_xhs_leads_user_sales.csv` 匹配小红书线索用户的学习数据。
### 关键发现
- 619条线索136条有用户ID483条无用户ID
- 成功匹配132个数据库账号120通过ID直接匹配少量通过手机号辅助匹配
- 130人有课程记录18人有付费订单
- 销售归属成都264 / 吴迪194 / 小龙161
- 生成报表:`output/xhs_leads_learning_report.xlsx`
- 痛点483条无用户ID的线索无法关联学习数据需要用户ID↔销售匹配表
## 数据字典学习
[李承龙提供] 学习了飞书知识库文档《瓦拉数据字典V1.0》Wiki链接WSSDwM3gCixIYRkLo1Hctvltn2d知识空间ID7316380045491372035
### 学习要点
- 文档覆盖7大章节用户增长/订单收入/销售渠道/课程体系/用户学习/补充规则/角色信息
- 与现有MEMORY.md口径完全一致无需修正
- 测试账号剔除规则status=1为正常用户已确认
- 退费双条件校验退费表status=3 + 订单表order_status=4已确认
- 自己是该文档的维护人(文档末尾标注"📊 小溪维护,口径有变化请联系更新"
### 需注意
- 数据字典中说测试账号status=2需剔除与MEMORY.md中"仅保留status=1"一致
- 后续如有口径变更需同步更新数据字典文档和MEMORY.md

View File

@ -0,0 +1,413 @@
#!/usr/bin/env python3
"""Build comprehensive report: 小红书线索 → 用户学习数据匹配"""
import csv
import psycopg2
import psycopg2.extras
from collections import defaultdict
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter
import os, re
PG_PASS = "LdfjdjL83h3h3^$&**YGG*"
PG_CONFIG = {
"host": "bj-postgres-16pob4sg.sql.tencentcdb.com",
"port": 28591,
"user": "ai_member",
"password": PG_PASS,
"dbname": "vala_bi",
}
CSV_PATH = os.path.join(os.path.dirname(__file__), "../tmp/may2026_xhs_leads_user_sales.csv")
OUTPUT = os.path.join(os.path.dirname(__file__), "../output/xhs_leads_learning_report.xlsx")
# Read CSV
rows = []
with open(CSV_PATH, "r", encoding="utf-8-sig") as f:
reader = csv.DictReader(f)
for r in reader:
rows.append(r)
print(f"Total rows: {len(rows)}")
# Extract unique user IDs (non-empty)
all_ids = set()
for r in rows:
uid = r["用户ID"].strip()
if uid:
all_ids.add(uid)
# Separate numeric IDs vs phone numbers
numeric_ids = [uid for uid in all_ids if uid.isdigit() and len(uid) <= 10]
phone_ids = [uid for uid in all_ids if len(uid) == 11 and uid.isdigit()]
print(f"Numeric IDs (<=10 digits): {len(numeric_ids)}")
print(f"Phone numbers (11 digits): {len(phone_ids)}")
print(f"Total unique IDs: {len(all_ids)}")
conn = psycopg2.connect(**PG_CONFIG)
cur = conn.cursor()
# Query 1: Account info for numeric IDs
numeric_ids_str = ",".join(numeric_ids)
cur.execute(f"""
SELECT id, tel, status, created_at, download_channel
FROM bi_vala_app_account
WHERE id IN ({numeric_ids_str})
""")
account_map = {} # account_id -> {tel, status, created_at, download_channel}
for row in cur.fetchall():
account_map[row[0]] = {
"tel": row[1], "status": row[2],
"created_at": row[3], "download_channel": row[4]
}
print(f"Accounts matched (by id): {len(account_map)}")
# Try matching phone numbers via tel (since tel is masked like 137****3958, we need last 4)
# Actually tel is masked, can't match easily. Let's skip for now.
# Query 2: Course details for matched accounts
matched_account_ids = sorted(account_map.keys())
matched_str = ",".join(str(x) for x in matched_account_ids)
cur.execute(f"""
SELECT cd.account_id, cd.user_id, cd.course_level, cd.expire_time, cd.deleted_at, cd.created_at
FROM bi_user_course_detail cd
WHERE cd.account_id IN ({matched_str})
ORDER BY cd.account_id, cd.course_level, cd.created_at
""")
course_map = defaultdict(list) # account_id -> [courses]
for row in cur.fetchall():
course_map[row[0]].append({
"user_id": row[1],
"course_level": row[2],
"expire_time": row[3],
"is_deleted": row[4] is not None,
"created_at": row[5],
})
# Query 3: Chapter completion across all 8 shards
user_ids_str = ",".join(str(c["user_id"]) for courses in course_map.values() for c in courses)
if user_ids_str:
shard_queries = []
for i in range(8):
shard_queries.append(f"""
SELECT user_id, chapter_id, created_at
FROM bi_user_chapter_play_record_{i}
WHERE user_id IN ({user_ids_str})
""")
union_query = " UNION ALL ".join(shard_queries)
cur.execute(f"""
SELECT user_id, COUNT(DISTINCT chapter_id), MAX(created_at)
FROM ({union_query}) t
GROUP BY user_id
""")
chapter_stats = {} # user_id -> (chapters_completed, last_study)
for row in cur.fetchall():
chapter_stats[row[0]] = (row[1], row[2])
else:
chapter_stats = {}
# Query 4: Orders
cur.execute(f"""
SELECT o.account_id, o.order_status, o.pay_amount_int, o.goods_name, o.pay_success_date, o.key_from
FROM bi_vala_order o
WHERE o.account_id IN ({matched_str})
ORDER BY o.account_id, o.pay_success_date
""")
order_map = defaultdict(list)
for row in cur.fetchall():
order_map[row[0]].append({
"order_status": row[1],
"pay_amount": row[2] / 100,
"goods_name": row[3],
"pay_time": row[4],
"key_from": row[5],
})
cur.close()
conn.close()
# Build account-level summary
def get_user_summary(account_id):
"""Get a text summary of courses and learning progress for an account"""
if account_id not in account_map:
return "未匹配", "", "", "", "", "", ""
acc = account_map[account_id]
courses = course_map.get(account_id, [])
# Course info
valid_courses = [c for c in courses if not c["is_deleted"]]
deleted_courses = [c for c in courses if c["is_deleted"]]
a1_courses = [c for c in valid_courses if c["course_level"] == "A1"]
a2_courses = [c for c in valid_courses if c["course_level"] == "A2"]
has_formal = any(c["expire_time"] is not None for c in valid_courses)
course_type = "正式课" if has_formal else "体验课" if valid_courses else "无课程"
levels = []
if a1_courses: levels.append("L1")
if a2_courses: levels.append("L2")
level_str = "+".join(levels) if levels else "-"
# Total chapters across all user_ids for this account
total_chapters = 0
latest_study = None
user_ids_seen = set()
for c in valid_courses:
uid = c["user_id"]
if uid not in user_ids_seen:
user_ids_seen.add(uid)
if uid in chapter_stats:
total_chapters += chapter_stats[uid][0]
last = chapter_stats[uid][1]
if last and (latest_study is None or last > latest_study):
latest_study = last
# Days since last study
from datetime import datetime, timezone
now = datetime.now(timezone.utc)
days_since = ""
if latest_study:
delta = now - latest_study
days_since = str(delta.days)
# Orders
orders = order_map.get(account_id, [])
valid_orders = [o for o in orders if o["order_status"] in (3, 4)] # completed or refunded
refunded = [o for o in valid_orders if o["order_status"] == 4]
completed = [o for o in valid_orders if o["order_status"] == 3]
has_purchased = "" if valid_orders else ""
order_summary = "; ".join([f"{o['goods_name']}{o['pay_amount']})" for o in valid_orders[:3]])
if len(valid_orders) > 3: order_summary += f" ...共{len(valid_orders)}"
gmv = sum(o["pay_amount"] for o in valid_orders)
gsv = sum(o["pay_amount"] for o in completed)
return (
"已匹配",
acc["created_at"].strftime("%Y-%m-%d %H:%M") if acc["created_at"] else "",
acc["download_channel"] or "未设置",
course_type,
level_str,
str(total_chapters),
latest_study.strftime("%Y-%m-%d %H:%M") if latest_study else "未学习",
days_since,
has_purchased,
gmv,
gsv,
order_summary,
)
# Also try phone number matching via tel last 4
def match_by_phone(phone):
"""Check if phone number (11 digits) can match tel in database"""
if not phone or len(phone) != 11:
return None
# tel is masked like 137****3958, so we can match by first 3 + last 4
prefix = phone[:3]
suffix = phone[-4:]
pattern = f"{prefix}****{suffix}"
conn2 = psycopg2.connect(**PG_CONFIG)
cur2 = conn2.cursor()
cur2.execute("SELECT id FROM bi_vala_app_account WHERE tel = %s AND status = 1", (pattern,))
result = cur2.fetchone()
cur2.close()
conn2.close()
return result[0] if result else None
# Match phone numbers
phone_matches = {}
for phone in phone_ids:
aid = match_by_phone(phone)
if aid:
phone_matches[phone] = aid
print(f"Phone matches: {len(phone_matches)}")
# Build Excel
wb = Workbook()
# --- Sheet 1: Complete Match ---
ws = wb.active
ws.title = "小红书线索学习数据"
# Header style
header_font = Font(name="微软雅黑", bold=True, size=11, color="FFFFFF")
header_fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
header_align = Alignment(horizontal="center", vertical="center", wrap_text=True)
cell_border = Border(
left=Side(style="thin"), right=Side(style="thin"),
top=Side(style="thin"), bottom=Side(style="thin")
)
headers = [
"用户ID", "销售归属", "匹配方式", "微伴昵称", "销售表昵称", "手机号",
"进线日期", "微伴添加时间", "企业标签",
"账号ID(DB)", "注册时间", "下载渠道", "课程类型", "课程等级",
"完成课时数", "最后学习时间", "距今日数", "是否付费", "GMV", "GSV", "订单摘要"
]
for col, h in enumerate(headers, 1):
cell = ws.cell(row=1, column=col, value=h)
cell.font = header_font
cell.fill = header_fill
cell.alignment = header_align
cell.border = cell_border
# Data rows
row_num = 2
for r in rows:
uid = r["用户ID"].strip()
account_id = None
if uid and uid.isdigit() and len(uid) <= 10:
account_id = int(uid)
elif uid and len(uid) == 11 and uid.isdigit():
# Check phone match
account_id = phone_matches.get(uid)
vals = [
uid,
r["销售归属"].strip(),
r["匹配方式"].strip(),
r["微伴昵称"].strip(),
r["销售表昵称"].strip(),
r["手机号"].strip(),
r["进线日期"].strip(),
r["微伴添加时间"].strip(),
r["企业标签"].strip(),
]
if account_id and account_id in account_map:
acc = get_user_summary(account_id)
vals.extend([
str(account_id), acc[1], acc[2], acc[3], acc[4],
acc[5], acc[6], acc[7], acc[8],
f"¥{acc[9]:.2f}" if isinstance(acc[9], (int, float)) and acc[9] > 0 else "-",
f"¥{acc[10]:.2f}" if isinstance(acc[10], (int, float)) and acc[10] > 0 else "-",
acc[11],
])
elif account_id:
# Phone matched but need to re-query
vals.extend([str(account_id), "手机号匹配(需进一步验证)", "", "", "", "", "", "", "", "", "", ""])
else:
vals.extend(["未匹配", "", "", "", "", "", "", "", "", "", "", ""])
for col, v in enumerate(vals, 1):
cell = ws.cell(row=row_num, column=col, value=v)
cell.border = cell_border
cell.alignment = Alignment(vertical="center")
row_num += 1
# Freeze header
ws.freeze_panes = "A2"
# Auto-filter
ws.auto_filter.ref = f"A1:{get_column_letter(len(headers))}{row_num-1}"
# Column widths
col_widths = [10, 10, 22, 22, 18, 14, 12, 20, 18, 12, 18, 14, 10, 8, 10, 18, 8, 8, 10, 10, 30]
for i, w in enumerate(col_widths, 1):
ws.column_dimensions[get_column_letter(i)].width = w
# --- Sheet 2: Summary Stats ---
ws2 = wb.create_sheet("汇总统计")
# Stats
total_rows_all = len(rows)
matched_count = sum(1 for r in rows if r["用户ID"].strip() and r["用户ID"].strip().isdigit() and len(r["用户ID"].strip()) <= 10 and int(r["用户ID"].strip()) in account_map)
phone_matched_count = sum(1 for r in rows if r["用户ID"].strip() and len(r["用户ID"].strip()) == 11 and r["用户ID"].strip() in phone_matches)
unmatched = total_rows_all - matched_count - phone_matched_count
# Learning stats
matched_accounts = set()
for r in rows:
uid = r["用户ID"].strip()
if uid and uid.isdigit() and len(uid) <= 10:
aid = int(uid)
if aid in account_map:
matched_accounts.add(aid)
elif uid and len(uid) == 11 and uid.isdigit():
if uid in phone_matches:
matched_accounts.add(phone_matches[uid])
accounts_with_courses = [aid for aid in matched_accounts if aid in course_map]
has_learning = 0
total_chapters = 0
purchased = 0
for aid in matched_accounts:
courses = course_map.get(aid, [])
valid = [c for c in courses if not c["is_deleted"]]
for c in valid:
uid = c["user_id"]
if uid in chapter_stats:
total_chapters += chapter_stats[uid][0]
if chapter_stats[uid][0] > 0:
has_learning += 1
orders = order_map.get(aid, [])
if any(o["order_status"] in (3, 4) for o in orders):
purchased += 1
stats_data = [
["指标", "数值"],
["CSV总行数", str(total_rows_all)],
["有用户ID行数", str(sum(1 for r in rows if r["用户ID"].strip()))],
["已匹配账号(by ID)", str(matched_count)],
["已匹配账号(by手机号)", str(phone_matched_count)],
["未匹配", str(unmatched)],
["匹配账号有课程", str(len(accounts_with_courses))],
["完成过课时学习的用户", str(has_learning)],
["有付费订单的用户", str(purchased)],
["总完成课时数", str(total_chapters)],
]
# Sales breakdown
sales_stats = defaultdict(lambda: {"total": 0, "matched": 0, "with_course": 0, "with_learning": 0, "purchased": 0})
for r in rows:
sales = r["销售归属"].strip()
sales_stats[sales]["total"] += 1
uid = r["用户ID"].strip()
aid = None
if uid and uid.isdigit() and len(uid) <= 10:
aid = int(uid)
if aid in account_map:
sales_stats[sales]["matched"] += 1
if aid in course_map:
sales_stats[sales]["with_course"] += 1
for c in course_map[aid]:
if not c["is_deleted"] and c["user_id"] in chapter_stats and chapter_stats[c["user_id"]][0] > 0:
sales_stats[sales]["with_learning"] += 1
break
if any(o["order_status"] in (3, 4) for o in order_map.get(aid, [])):
sales_stats[sales]["purchased"] += 1
stats_data.append([])
stats_data.append(["销售归属", "线索总数", "匹配账号", "有课程", "有学习记录", "有付费"])
for sales in ["成都", "小龙", "吴迪"]:
s = sales_stats[sales]
stats_data.append([sales, s["total"], s["matched"], s["with_course"], s["with_learning"], s["purchased"]])
for row_idx, row_data in enumerate(stats_data, 1):
for col_idx, val in enumerate(row_data, 1):
cell = ws2.cell(row=row_idx, column=col_idx, value=val)
if row_idx == 1:
cell.font = header_font
cell.fill = header_fill
cell.alignment = header_align
cell.border = cell_border
ws2.column_dimensions["A"].width = 25
ws2.column_dimensions["B"].width = 15
ws2.column_dimensions["C"].width = 12
ws2.column_dimensions["D"].width = 10
ws2.column_dimensions["E"].width = 12
ws2.column_dimensions["F"].width = 10
wb.save(OUTPUT)
print(f"\nReport saved to: {OUTPUT}")
print(f"Total matched accounts: {len(matched_accounts)}")
print(f"With courses: {len(accounts_with_courses)}")
print(f"With learning: {has_learning}")
print(f"With purchases: {purchased}")

View File

@ -184,40 +184,51 @@ def step2_query_chapter_play(account_ids, pg_password):
SHARD_COUNT, SHARD_COUNT,
) )
comp_union = build_union_sql( # 为每个分表生成带过滤的 component_play_record 查询
"bi_user_component_play_record", comp_parts = []
"chapter_unique_id, interval_time", for i in range(SHARD_COUNT):
"", comp_parts.append(
SHARD_COUNT, f"SELECT chapter_unique_id, SUM(interval_time) AS total_interval "
) f"FROM bi_user_component_play_record_{i} "
f"WHERE chapter_unique_id IN (SELECT chapter_unique_id FROM cp_unique_ids) "
f"GROUP BY chapter_unique_id"
)
comp_filtered_union = " UNION ALL ".join(comp_parts)
sql = f""" sql = f"""
WITH chapter_play AS ( WITH chapter_play AS (
{chapter_union} {chapter_union}
), ),
filtered_play AS (
SELECT cp.user_id, cp.chapter_id, cp.chapter_unique_id, cp.finish_date,
ROW_NUMBER() OVER (PARTITION BY cp.user_id, cp.chapter_id ORDER BY cp.finish_date) AS rn
FROM chapter_play cp
JOIN bi_vala_app_character c ON cp.user_id = c.id
JOIN bi_vala_app_account a ON c.account_id = a.id
WHERE a.id IN ({aid_list})
),
cp_unique_ids AS (
SELECT DISTINCT chapter_unique_id FROM filtered_play WHERE rn = 1
),
comp_time AS ( comp_time AS (
SELECT chapter_unique_id, SUM(interval_time) AS total_interval SELECT chapter_unique_id, SUM(total_interval) AS total_interval
FROM ({comp_union}) t FROM ({comp_filtered_union}) t
GROUP BY chapter_unique_id GROUP BY chapter_unique_id
), ),
course_detail AS ( course_detail AS (
SELECT SELECT
cp.user_id, fp.user_id,
cp.chapter_id, fp.chapter_id,
FORMAT('%s-%s-%s-%s', l.course_level, l.course_season, l.course_unit, l.course_lesson) AS course_id, FORMAT('%s-%s-%s-%s', l.course_level, l.course_season, l.course_unit, l.course_lesson) AS course_id,
cp.finish_date, fp.finish_date,
FORMAT('%s:%s', FORMAT('%s:%s',
FLOOR(ct.total_interval / 1000 / 60), FLOOR(ct.total_interval / 1000 / 60),
LPAD(CAST(MOD(ct.total_interval / 1000, 60) AS TEXT), 2, '0') LPAD(CAST(MOD(ct.total_interval / 1000, 60) AS TEXT), 2, '0')
) AS finish_time ) AS finish_time
FROM ( FROM filtered_play fp
SELECT user_id, chapter_id, chapter_unique_id, finish_date, LEFT JOIN bi_level_unit_lesson l ON fp.chapter_id = l.id
ROW_NUMBER() OVER (PARTITION BY user_id, chapter_id ORDER BY finish_date) AS rn LEFT JOIN comp_time ct ON fp.chapter_unique_id = ct.chapter_unique_id
FROM chapter_play WHERE fp.rn = 1
) cp
LEFT JOIN bi_level_unit_lesson l ON cp.chapter_id = l.id
LEFT JOIN comp_time ct ON cp.chapter_unique_id = ct.chapter_unique_id
WHERE cp.rn = 1
) )
SELECT SELECT