import pandas as pd import psycopg2 # 1. 读取最新的带成交标记的订单数据 order_df = pd.read_csv('2026年3月1日至今订单_含正确成交标记.csv') print(f"订单总数:{len(order_df)}") # 2. 计算GMV和退款相关 order_df['GMV'] = order_df['pay_amount_int'] / 100 order_df['is_refund'] = (order_df['order_status'] == 4).astype(int) # 计算GSV:退款订单GSV为0,其他为GMV order_df['GSV'] = order_df.apply(lambda row: 0 if row['order_status'] == 4 else row['GMV'], axis=1) order_df['refund_amount'] = order_df.apply(lambda row: row['GMV'] if row['order_status'] == 4 else 0, axis=1) # 3. 映射到大类渠道 def map_channel(tag): if tag in ['销转', '销转-小龙']: return '销转' elif tag in ['端内直购', '端内销转']: return 'App转化' elif tag == '达播': return '达播' elif tag.startswith('班主任-'): return '班主任' elif tag == '店铺直购': return '店铺直购' else: return '其他' order_df['渠道大类'] = order_df['成交标记'].apply(map_channel) # 4. 按大类统计 channel_stats = order_df.groupby('渠道大类').agg( 订单数=('id', 'count'), GMV=('GMV', 'sum'), 已退款金额=('refund_amount', 'sum'), GSV=('GSV', 'sum'), 退款订单数=('is_refund', 'sum'), 客单价=('GMV', 'mean') ).reset_index() channel_stats['退费率'] = (channel_stats['退款订单数'] / channel_stats['订单数'] * 100).round(1).astype(str) + '%' channel_stats['GMV'] = channel_stats['GMV'].round(2) channel_stats['GSV'] = channel_stats['GSV'].round(2) channel_stats['已退款金额'] = channel_stats['已退款金额'].round(2) channel_stats['客单价'] = channel_stats['客单价'].round(2) # 5. 原预测表的预测值 pred_data = [ {'渠道大类': '销转', '预测GSV': 100000}, {'渠道大类': 'App转化', '预测GSV': 20000}, {'渠道大类': '达播', '预测GSV': 250000}, {'渠道大类': '班主任', '预测GSV': 10000} ] pred_df = pd.DataFrame(pred_data) # 6. 合并实际和预测数据 report_df = pd.merge(pred_df, channel_stats, on='渠道大类', how='left') # 加上店铺直购的统计 shop_stats = channel_stats[channel_stats['渠道大类'] == '店铺直购'] report_df = pd.concat([report_df, shop_stats], ignore_index=True) # 加上总计 total = pd.DataFrame({ '渠道大类': ['总计'], '预测GSV': [pred_df['预测GSV'].sum()], '订单数': [channel_stats['订单数'].sum()], 'GMV': [channel_stats['GMV'].sum()], '已退款金额': [channel_stats['已退款金额'].sum()], 'GSV': [channel_stats['GSV'].sum()], '退款订单数': [channel_stats['退款订单数'].sum()], '客单价': [channel_stats['GMV'].sum()/channel_stats['订单数'].sum()], '退费率': [str((channel_stats['退款订单数'].sum()/channel_stats['订单数'].sum()*100).round(1)) + '%'] }) report_df = pd.concat([report_df, total], ignore_index=True) report_df['完成率'] = report_df.apply(lambda row: str(round(row['GSV']/row['预测GSV']*100, 1)) + '%' if pd.notna(row['预测GSV']) else '-', axis=1) # 7. 保存报表 output_file = '2026年3月收入预测报表_最新版.xlsx' with pd.ExcelWriter(output_file) as writer: report_df.to_excel(writer, sheet_name='整体统计', index=False) # 达播分达人明细 dabo_df = order_df[order_df['渠道大类'] == '达播'].groupby('key_from').agg( 订单数=('id', 'count'), GMV=('GMV', 'sum'), GSV=('GSV', 'sum'), 退费率=('is_refund', lambda x: str((x.sum()/x.count()*100).round(1)) + '%') ).reset_index() dabo_df.to_excel(writer, sheet_name='达播达人明细', index=False) # 成交标记明细 tag_df = order_df.groupby('成交标记').agg( 订单数=('id', 'count'), GMV=('GMV', 'sum'), GSV=('GSV', 'sum'), 退费率=('is_refund', lambda x: str((x.sum()/x.count()*100).round(1)) + '%') ).reset_index() tag_df.to_excel(writer, sheet_name='成交标记明细', index=False) print(f"\n最新3月收入预测报表已生成:{output_file}") print("\n整体统计结果:") print(report_df[['渠道大类', '预测GSV', 'GSV', '完成率', '订单数', 'GMV', '退费率']])