ai_member_xiaoxi/scripts/generate_report.py

import pandas as pd
import psycopg2

# 1. 读取最新的带成交标记的订单数据
order_df = pd.read_csv('2026年3月1日至今订单_含正确成交标记.csv')
print(f"订单总数：{len(order_df)}")

# 2. 计算GMV和退款相关
order_df['GMV'] = order_df['pay_amount_int'] / 100
order_df['is_refund'] = (order_df['order_status'] == 4).astype(int)
# 计算GSV：退款订单GSV为0，其他为GMV
order_df['GSV'] = order_df.apply(lambda row: 0 if row['order_status'] == 4 else row['GMV'], axis=1)
order_df['refund_amount'] = order_df.apply(lambda row: row['GMV'] if row['order_status'] == 4 else 0, axis=1)

# 3. 映射到大类渠道
def map_channel(tag):
    if tag in ['销转', '销转-小龙']:
        return '销转'
    elif tag in ['端内直购', '端内销转']:
        return 'App转化'
    elif tag == '达播':
        return '达播'
    elif tag.startswith('班主任-'):
        return '班主任'
    elif tag == '店铺直购':
        return '店铺直购'
    else:
        return '其他'

order_df['渠道大类'] = order_df['成交标记'].apply(map_channel)

# 4. 按大类统计
channel_stats = order_df.groupby('渠道大类').agg(
    订单数=('id', 'count'),
    GMV=('GMV', 'sum'),
    已退款金额=('refund_amount', 'sum'),
    GSV=('GSV', 'sum'),
    退款订单数=('is_refund', 'sum'),
    客单价=('GMV', 'mean')
).reset_index()
channel_stats['退费率'] = (channel_stats['退款订单数'] / channel_stats['订单数'] * 100).round(1).astype(str) + '%'
channel_stats['GMV'] = channel_stats['GMV'].round(2)
channel_stats['GSV'] = channel_stats['GSV'].round(2)
channel_stats['已退款金额'] = channel_stats['已退款金额'].round(2)
channel_stats['客单价'] = channel_stats['客单价'].round(2)

# 5. 原预测表的预测值
pred_data = [
    {'渠道大类': '销转', '预测GSV': 100000},
    {'渠道大类': 'App转化', '预测GSV': 20000},
    {'渠道大类': '达播', '预测GSV': 250000},
    {'渠道大类': '班主任', '预测GSV': 10000}
]
pred_df = pd.DataFrame(pred_data)

# 6. 合并实际和预测数据
report_df = pd.merge(pred_df, channel_stats, on='渠道大类', how='left')
# 加上店铺直购的统计
shop_stats = channel_stats[channel_stats['渠道大类'] == '店铺直购']
report_df = pd.concat([report_df, shop_stats], ignore_index=True)
# 加上总计
total = pd.DataFrame({
    '渠道大类': ['总计'],
    '预测GSV': [pred_df['预测GSV'].sum()],
    '订单数': [channel_stats['订单数'].sum()],
    'GMV': [channel_stats['GMV'].sum()],
    '已退款金额': [channel_stats['已退款金额'].sum()],
    'GSV': [channel_stats['GSV'].sum()],
    '退款订单数': [channel_stats['退款订单数'].sum()],
    '客单价': [channel_stats['GMV'].sum()/channel_stats['订单数'].sum()],
    '退费率': [str((channel_stats['退款订单数'].sum()/channel_stats['订单数'].sum()*100).round(1)) + '%']
})
report_df = pd.concat([report_df, total], ignore_index=True)
report_df['完成率'] = report_df.apply(lambda row: str(round(row['GSV']/row['预测GSV']*100, 1)) + '%' if pd.notna(row['预测GSV']) else '-', axis=1)

# 7. 保存报表
output_file = '2026年3月收入预测报表_最新版.xlsx'
with pd.ExcelWriter(output_file) as writer:
    report_df.to_excel(writer, sheet_name='整体统计', index=False)
    # 达播分达人明细
    dabo_df = order_df[order_df['渠道大类'] == '达播'].groupby('key_from').agg(
        订单数=('id', 'count'),
        GMV=('GMV', 'sum'),
        GSV=('GSV', 'sum'),
        退费率=('is_refund', lambda x: str((x.sum()/x.count()*100).round(1)) + '%')
    ).reset_index()
    dabo_df.to_excel(writer, sheet_name='达播达人明细', index=False)
    # 成交标记明细
    tag_df = order_df.groupby('成交标记').agg(
        订单数=('id', 'count'),
        GMV=('GMV', 'sum'),
        GSV=('GSV', 'sum'),
        退费率=('is_refund', lambda x: str((x.sum()/x.count()*100).round(1)) + '%')
    ).reset_index()
    tag_df.to_excel(writer, sheet_name='成交标记明细', index=False)

print(f"\n最新3月收入预测报表已生成：{output_file}")
print("\n整体统计结果：")
print(report_df[['渠道大类', '预测GSV', 'GSV', '完成率', '订单数', 'GMV', '退费率']])