100 lines
4.2 KiB
Python
100 lines
4.2 KiB
Python
import pandas as pd
|
||
import psycopg2
|
||
|
||
# 1. 读取最新的带成交标记的订单数据
|
||
order_df = pd.read_csv('2026年3月1日至今订单_含正确成交标记.csv')
|
||
print(f"订单总数:{len(order_df)}")
|
||
|
||
# 2. 计算GMV和退款相关
|
||
order_df['GMV'] = order_df['pay_amount_int'] / 100
|
||
order_df['is_refund'] = (order_df['order_status'] == 4).astype(int)
|
||
# 计算GSV:退款订单GSV为0,其他为GMV
|
||
order_df['GSV'] = order_df.apply(lambda row: 0 if row['order_status'] == 4 else row['GMV'], axis=1)
|
||
order_df['refund_amount'] = order_df.apply(lambda row: row['GMV'] if row['order_status'] == 4 else 0, axis=1)
|
||
|
||
# 3. 映射到大类渠道
|
||
def map_channel(tag):
|
||
if tag in ['销转', '销转-小龙']:
|
||
return '销转'
|
||
elif tag in ['端内直购', '端内销转']:
|
||
return 'App转化'
|
||
elif tag == '达播':
|
||
return '达播'
|
||
elif tag.startswith('班主任-'):
|
||
return '班主任'
|
||
elif tag == '店铺直购':
|
||
return '店铺直购'
|
||
else:
|
||
return '其他'
|
||
|
||
order_df['渠道大类'] = order_df['成交标记'].apply(map_channel)
|
||
|
||
# 4. 按大类统计
|
||
channel_stats = order_df.groupby('渠道大类').agg(
|
||
订单数=('id', 'count'),
|
||
GMV=('GMV', 'sum'),
|
||
已退款金额=('refund_amount', 'sum'),
|
||
GSV=('GSV', 'sum'),
|
||
退款订单数=('is_refund', 'sum'),
|
||
客单价=('GMV', 'mean')
|
||
).reset_index()
|
||
channel_stats['退费率'] = (channel_stats['退款订单数'] / channel_stats['订单数'] * 100).round(1).astype(str) + '%'
|
||
channel_stats['GMV'] = channel_stats['GMV'].round(2)
|
||
channel_stats['GSV'] = channel_stats['GSV'].round(2)
|
||
channel_stats['已退款金额'] = channel_stats['已退款金额'].round(2)
|
||
channel_stats['客单价'] = channel_stats['客单价'].round(2)
|
||
|
||
# 5. 原预测表的预测值
|
||
pred_data = [
|
||
{'渠道大类': '销转', '预测GSV': 100000},
|
||
{'渠道大类': 'App转化', '预测GSV': 20000},
|
||
{'渠道大类': '达播', '预测GSV': 250000},
|
||
{'渠道大类': '班主任', '预测GSV': 10000}
|
||
]
|
||
pred_df = pd.DataFrame(pred_data)
|
||
|
||
# 6. 合并实际和预测数据
|
||
report_df = pd.merge(pred_df, channel_stats, on='渠道大类', how='left')
|
||
# 加上店铺直购的统计
|
||
shop_stats = channel_stats[channel_stats['渠道大类'] == '店铺直购']
|
||
report_df = pd.concat([report_df, shop_stats], ignore_index=True)
|
||
# 加上总计
|
||
total = pd.DataFrame({
|
||
'渠道大类': ['总计'],
|
||
'预测GSV': [pred_df['预测GSV'].sum()],
|
||
'订单数': [channel_stats['订单数'].sum()],
|
||
'GMV': [channel_stats['GMV'].sum()],
|
||
'已退款金额': [channel_stats['已退款金额'].sum()],
|
||
'GSV': [channel_stats['GSV'].sum()],
|
||
'退款订单数': [channel_stats['退款订单数'].sum()],
|
||
'客单价': [channel_stats['GMV'].sum()/channel_stats['订单数'].sum()],
|
||
'退费率': [str((channel_stats['退款订单数'].sum()/channel_stats['订单数'].sum()*100).round(1)) + '%']
|
||
})
|
||
report_df = pd.concat([report_df, total], ignore_index=True)
|
||
report_df['完成率'] = report_df.apply(lambda row: str(round(row['GSV']/row['预测GSV']*100, 1)) + '%' if pd.notna(row['预测GSV']) else '-', axis=1)
|
||
|
||
# 7. 保存报表
|
||
output_file = '2026年3月收入预测报表_最新版.xlsx'
|
||
with pd.ExcelWriter(output_file) as writer:
|
||
report_df.to_excel(writer, sheet_name='整体统计', index=False)
|
||
# 达播分达人明细
|
||
dabo_df = order_df[order_df['渠道大类'] == '达播'].groupby('key_from').agg(
|
||
订单数=('id', 'count'),
|
||
GMV=('GMV', 'sum'),
|
||
GSV=('GSV', 'sum'),
|
||
退费率=('is_refund', lambda x: str((x.sum()/x.count()*100).round(1)) + '%')
|
||
).reset_index()
|
||
dabo_df.to_excel(writer, sheet_name='达播达人明细', index=False)
|
||
# 成交标记明细
|
||
tag_df = order_df.groupby('成交标记').agg(
|
||
订单数=('id', 'count'),
|
||
GMV=('GMV', 'sum'),
|
||
GSV=('GSV', 'sum'),
|
||
退费率=('is_refund', lambda x: str((x.sum()/x.count()*100).round(1)) + '%')
|
||
).reset_index()
|
||
tag_df.to_excel(writer, sheet_name='成交标记明细', index=False)
|
||
|
||
print(f"\n最新3月收入预测报表已生成:{output_file}")
|
||
print("\n整体统计结果:")
|
||
print(report_df[['渠道大类', '预测GSV', 'GSV', '完成率', '订单数', 'GMV', '退费率']])
|