ai_member_xiaoxi/scripts/generate_report.py
2026-04-02 08:00:01 +08:00

100 lines
4.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
import psycopg2
# 1. 读取最新的带成交标记的订单数据
order_df = pd.read_csv('2026年3月1日至今订单_含正确成交标记.csv')
print(f"订单总数:{len(order_df)}")
# 2. 计算GMV和退款相关
order_df['GMV'] = order_df['pay_amount_int'] / 100
order_df['is_refund'] = (order_df['order_status'] == 4).astype(int)
# 计算GSV退款订单GSV为0其他为GMV
order_df['GSV'] = order_df.apply(lambda row: 0 if row['order_status'] == 4 else row['GMV'], axis=1)
order_df['refund_amount'] = order_df.apply(lambda row: row['GMV'] if row['order_status'] == 4 else 0, axis=1)
# 3. 映射到大类渠道
def map_channel(tag):
if tag in ['销转', '销转-小龙']:
return '销转'
elif tag in ['端内直购', '端内销转']:
return 'App转化'
elif tag == '达播':
return '达播'
elif tag.startswith('班主任-'):
return '班主任'
elif tag == '店铺直购':
return '店铺直购'
else:
return '其他'
order_df['渠道大类'] = order_df['成交标记'].apply(map_channel)
# 4. 按大类统计
channel_stats = order_df.groupby('渠道大类').agg(
订单数=('id', 'count'),
GMV=('GMV', 'sum'),
已退款金额=('refund_amount', 'sum'),
GSV=('GSV', 'sum'),
退款订单数=('is_refund', 'sum'),
客单价=('GMV', 'mean')
).reset_index()
channel_stats['退费率'] = (channel_stats['退款订单数'] / channel_stats['订单数'] * 100).round(1).astype(str) + '%'
channel_stats['GMV'] = channel_stats['GMV'].round(2)
channel_stats['GSV'] = channel_stats['GSV'].round(2)
channel_stats['已退款金额'] = channel_stats['已退款金额'].round(2)
channel_stats['客单价'] = channel_stats['客单价'].round(2)
# 5. 原预测表的预测值
pred_data = [
{'渠道大类': '销转', '预测GSV': 100000},
{'渠道大类': 'App转化', '预测GSV': 20000},
{'渠道大类': '达播', '预测GSV': 250000},
{'渠道大类': '班主任', '预测GSV': 10000}
]
pred_df = pd.DataFrame(pred_data)
# 6. 合并实际和预测数据
report_df = pd.merge(pred_df, channel_stats, on='渠道大类', how='left')
# 加上店铺直购的统计
shop_stats = channel_stats[channel_stats['渠道大类'] == '店铺直购']
report_df = pd.concat([report_df, shop_stats], ignore_index=True)
# 加上总计
total = pd.DataFrame({
'渠道大类': ['总计'],
'预测GSV': [pred_df['预测GSV'].sum()],
'订单数': [channel_stats['订单数'].sum()],
'GMV': [channel_stats['GMV'].sum()],
'已退款金额': [channel_stats['已退款金额'].sum()],
'GSV': [channel_stats['GSV'].sum()],
'退款订单数': [channel_stats['退款订单数'].sum()],
'客单价': [channel_stats['GMV'].sum()/channel_stats['订单数'].sum()],
'退费率': [str((channel_stats['退款订单数'].sum()/channel_stats['订单数'].sum()*100).round(1)) + '%']
})
report_df = pd.concat([report_df, total], ignore_index=True)
report_df['完成率'] = report_df.apply(lambda row: str(round(row['GSV']/row['预测GSV']*100, 1)) + '%' if pd.notna(row['预测GSV']) else '-', axis=1)
# 7. 保存报表
output_file = '2026年3月收入预测报表_最新版.xlsx'
with pd.ExcelWriter(output_file) as writer:
report_df.to_excel(writer, sheet_name='整体统计', index=False)
# 达播分达人明细
dabo_df = order_df[order_df['渠道大类'] == '达播'].groupby('key_from').agg(
订单数=('id', 'count'),
GMV=('GMV', 'sum'),
GSV=('GSV', 'sum'),
退费率=('is_refund', lambda x: str((x.sum()/x.count()*100).round(1)) + '%')
).reset_index()
dabo_df.to_excel(writer, sheet_name='达播达人明细', index=False)
# 成交标记明细
tag_df = order_df.groupby('成交标记').agg(
订单数=('id', 'count'),
GMV=('GMV', 'sum'),
GSV=('GSV', 'sum'),
退费率=('is_refund', lambda x: str((x.sum()/x.count()*100).round(1)) + '%')
).reset_index()
tag_df.to_excel(writer, sheet_name='成交标记明细', index=False)
print(f"\n最新3月收入预测报表已生成{output_file}")
print("\n整体统计结果:")
print(report_df[['渠道大类', '预测GSV', 'GSV', '完成率', '订单数', 'GMV', '退费率']])