ai_member_xiaoxi/scripts/april_refund_analysis.py

#!/usr/bin/env python3
"""
四月份退费用户学习进度分析 - 生成Excel表格
表头：账号ID、购课渠道、购课时间、退费时间、用户类型、
      L1U0L1~L2U0L5 各课时的（是否进入、进入时间、是否完成、完成时间）
"""

import psycopg2
import pandas as pd
from datetime import datetime
import os

# 数据库连接
PG_ONLINE = {
    'host': 'bj-postgres-16pob4sg.sql.tencentcdb.com',
    'port': 28591,
    'user': 'ai_member',
    'password': os.environ.get('PG_ONLINE_PASSWORD', 'LdfjdjL83h3h3^$&**YGG*'),
    'dbname': 'vala_bi'
}

# 课时映射: L1U0L1~L1U0L5 (A1), L2U0L1~L2U0L5 (A2)
CHAPTER_MAP = {
    343: 'L1U0L1', 344: 'L1U0L2', 345: 'L1U0L3', 346: 'L1U0L4', 348: 'L1U0L5',
    55: 'L2U0L1', 56: 'L2U0L2', 57: 'L2U0L3', 58: 'L2U0L4', 59: 'L2U0L5',
}
CHAPTER_IDS = list(CHAPTER_MAP.keys())
LESSON_ORDER = ['L1U0L1','L1U0L2','L1U0L3','L1U0L4','L1U0L5',
                'L2U0L1','L2U0L2','L2U0L3','L2U0L4','L2U0L5']

def get_connection():
    return psycopg2.connect(**PG_ONLINE)

def fetch_refund_users(conn):
    """获取四月退费用户及订单信息"""
    sql = """
    WITH april_refund_accounts AS (
        SELECT DISTINCT o.account_id
        FROM bi_vala_order o
        JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1
        WHERE o.order_status = 4
          AND o.updated_at >= '2026-04-01' AND o.updated_at < '2026-05-01'
          AND o.created_at >= '2026-04-01' AND o.created_at < '2026-05-01'
    ),
    -- 每个用户所有四月订单
    user_all_orders AS (
        SELECT o.account_id,
               COUNT(DISTINCT o.id) AS total_orders,
               COUNT(DISTINCT CASE WHEN o.order_status = 4 THEN o.id END) AS refunded_orders,
               COUNT(DISTINCT CASE WHEN o.order_status = 3 THEN o.id END) AS active_orders
        FROM bi_vala_order o
        JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1
        WHERE o.account_id IN (SELECT account_id FROM april_refund_accounts)
          AND o.order_status IN (3, 4)
          AND o.created_at >= '2026-04-01' AND o.created_at < '2026-05-01'
        GROUP BY o.account_id
    ),
    -- 取最晚购课时间的退费订单
    latest_order AS (
        SELECT DISTINCT ON (o.account_id)
               o.account_id,
               o.key_from,
               o.sale_channel,
               o.created_at AS purchase_time,
               o.updated_at AS refund_time
        FROM bi_vala_order o
        JOIN bi_vala_app_account a ON o.account_id = a.id AND a.status = 1
        WHERE o.account_id IN (SELECT account_id FROM april_refund_accounts)
          AND o.order_status = 4
          AND o.created_at >= '2026-04-01' AND o.created_at < '2026-05-01'
        ORDER BY o.account_id, o.created_at DESC
    )
    SELECT lo.account_id, lo.key_from, lo.sale_channel, lo.purchase_time, lo.refund_time,
           uo.total_orders, uo.refunded_orders, uo.active_orders
    FROM latest_order lo
    JOIN user_all_orders uo ON lo.account_id = uo.account_id
    ORDER BY lo.account_id;
    """
    with conn.cursor() as cur:
        cur.execute(sql)
        columns = [desc[0] for desc in cur.description]
        rows = cur.fetchall()
    return pd.DataFrame(rows, columns=columns)

def classify_user(row):
    if row['total_orders'] == 1:
        return '单订单退费'
    elif row['active_orders'] > 0:
        return '多订单未全退'
    else:
        return '多订单全退'

def get_channel_name(key_from, sale_channel):
    """直接返回key_from原始值"""
    return key_from or ''

def fetch_chapter_records(conn, account_ids):
    """获取用户的课时学习记录"""
    # 获取角色ID映射
    acct_str = ','.join(str(a) for a in account_ids)
    sql_char = f"""
    SELECT id AS character_id, account_id FROM bi_vala_app_character
    WHERE account_id IN ({acct_str})
    """
    with conn.cursor() as cur:
        cur.execute(sql_char)
        char_rows = cur.fetchall()

    if not char_rows:
        return {}

    char_to_account = {}
    char_ids_by_table = {}
    for cid, aid in char_rows:
        char_to_account[cid] = aid
        table_idx = cid % 8
        char_ids_by_table.setdefault(table_idx, []).append(cid)

    # 从各分表获取课时记录
    chapter_ids_str = ','.join(str(c) for c in CHAPTER_IDS)
    all_records = []

    for table_idx, cids in char_ids_by_table.items():
        cids_str = ','.join(str(c) for c in cids)
        sql_rec = f"""
        SELECT user_id, chapter_id, play_status, created_at
        FROM bi_user_chapter_play_record_{table_idx}
        WHERE user_id IN ({cids_str})
          AND chapter_id IN ({chapter_ids_str})
        ORDER BY user_id, chapter_id, created_at
        """
        with conn.cursor() as cur:
            cur.execute(sql_rec)
            records = cur.fetchall()
            all_records.extend(records)

    # 整理数据: account_id -> chapter_id -> {entered, enter_time, completed, complete_time}
    result = {}
    for user_id, chapter_id, play_status, created_at in all_records:
        account_id = char_to_account.get(user_id)
        if not account_id:
            continue

        if account_id not in result:
            result[account_id] = {}
        if chapter_id not in result[account_id]:
            result[account_id][chapter_id] = {
                'entered': True,
                'enter_time': created_at,
                'completed': False,
                'complete_time': None
            }
        else:
            # 更新最早进入时间
            if created_at < result[account_id][chapter_id]['enter_time']:
                result[account_id][chapter_id]['enter_time'] = created_at

        # 完成状态
        if play_status == 1:
            if not result[account_id][chapter_id]['completed']:
                result[account_id][chapter_id]['completed'] = True
                result[account_id][chapter_id]['complete_time'] = created_at
            else:
                # 取最早完成时间
                if created_at < result[account_id][chapter_id]['complete_time']:
                    result[account_id][chapter_id]['complete_time'] = created_at

    return result

def format_time(t):
    if t is None:
        return ''
    if isinstance(t, datetime):
        return t.strftime('%Y-%m-%d %H:%M:%S')
    return str(t)

def main():
    conn = get_connection()

    print("1. 获取退费用户订单信息...")
    df_users = fetch_refund_users(conn)
    print(f"   共{len(df_users)}个退费用户")

    # 分类用户
    df_users['用户类型'] = df_users.apply(classify_user, axis=1)
    df_users['购课渠道'] = df_users.apply(lambda r: get_channel_name(r['key_from'], r['sale_channel']), axis=1)

    print("2. 获取课时学习记录...")
    account_ids = df_users['account_id'].tolist()
    chapter_records = fetch_chapter_records(conn, account_ids)
    print(f"   共{len(chapter_records)}个用户有学习记录")

    print("3. 组装表格...")
    rows = []
    for _, user in df_users.iterrows():
        aid = user['account_id']
        row = {
            '账号ID': aid,
            '购课渠道': user['购课渠道'],
            '购课时间': format_time(user['purchase_time']),
            '退费时间': format_time(user['refund_time']),
            '用户类型': user['用户类型'],
        }

        user_chapters = chapter_records.get(aid, {})
        for lesson in LESSON_ORDER:
            # 找到对应的chapter_id
            ch_id = [k for k, v in CHAPTER_MAP.items() if v == lesson][0]
            ch_data = user_chapters.get(ch_id, {})

            row[f'{lesson}-是否进入'] = '是' if ch_data.get('entered', False) else '否'
            row[f'{lesson}-进入时间'] = format_time(ch_data.get('enter_time'))
            row[f'{lesson}-是否完成'] = '是' if ch_data.get('completed', False) else '否'
            row[f'{lesson}-完成时间'] = format_time(ch_data.get('complete_time'))

        rows.append(row)

    df_result = pd.DataFrame(rows)

    # 导出Excel
    output_path = '/tmp/openclaw/四月退费用户学习进度分析.xlsx'
    os.makedirs('/tmp/openclaw', exist_ok=True)

    with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
        df_result.to_excel(writer, index=False, sheet_name='退费用户学习进度')

        # 调整列宽
        ws = writer.sheets['退费用户学习进度']
        for col in ws.columns:
            max_length = max(len(str(cell.value or '')) for cell in col)
            col_letter = col[0].column_letter
            ws.column_dimensions[col_letter].width = min(max_length + 2, 25)

    print(f"4. 导出完成: {output_path}")
    print(f"   总行数: {len(df_result)}")
    print(f"   用户类型分布:")
    print(df_result['用户类型'].value_counts().to_string())

    conn.close()

if __name__ == '__main__':
    main()