import pandas as pd import numpy as np import psycopg2 # 1. 正确计算GSV:同时满足bi_refund_order.status=3 和 bi_vala_order.order_status=4 conn = psycopg2.connect( host="bj-postgres-16pob4sg.sql.tencentcdb.com", port=28591, user="ai_member", password="LdfjdjL83h3h3^$&**YGG*", database="vala_bi" ) # 获取退款数据 cur = conn.cursor() cur.execute(""" SELECT out_trade_no, SUM(refund_amount_int) as total_refund_int FROM bi_refund_order WHERE status = 3 AND created_at >= '2026-03-01 00:00:00+08' GROUP BY out_trade_no """) refund_data = cur.fetchall() refund_df = pd.DataFrame(refund_data, columns=['out_trade_no', 'total_refund_int']) cur.close() conn.close() # 获取订单数据 order_df = pd.read_csv('2026年3月1日至今订单_含正确成交标记.csv') # 合并计算 order_df = pd.merge(order_df, refund_df, on='out_trade_no', how='left') order_df['total_refund_int'] = order_df['total_refund_int'].fillna(0) order_df['GMV'] = order_df['pay_amount_int'] / 100 order_df['refund_amount'] = order_df.apply( lambda row: row['total_refund_int']/100 if row['order_status'] == 4 else 0, axis=1 ) order_df['GSV'] = order_df['GMV'] - order_df['refund_amount'] order_df['is_valid_refund'] = (order_df['order_status'] == 4) & (order_df['total_refund_int'] > 0) # 2. 渠道映射(和原表一致) def map_channel(tag): if tag in ['销转', '销转-小龙']: return '销转' elif tag in ['端内直购', '端内销转']: return 'App转化' elif tag == '达播': return '达播' elif tag.startswith('班主任-'): return '班主任' else: return '其他' order_df['渠道大类'] = order_df['成交标记'].apply(map_channel) # 3. 按原表格式构建报表 # 原表表头结构 report_data = [ # 第一部分:3月剩余预测 & 3月实际 汇总 ['3月剩余预测', 'GMV', '', 'GSV', '', '', '3月实际', 'GMV', '', 'GSV', '', '完成率', ''], ['销转', '', '', 100000, '', '', '', '', '', 0, '', '', ''], ['App转化', '', '', 20000, '', '', '', '', '', 0, '', '', ''], ['达播', '', '', 250000, '', '', '', '', '', 0, '', '', ''], ['班主任', '', '', 10000, '', '', '', '', '', 0, '', '', ''], ['', '', '', '', '', '', '', '', '', '', '', '', ''], # 销转明细 ['', '', '线索量', '线索成本', '转化率', '客单价', 'GMV', '退款率', 'GSV', '投放成本', '退后ROI', '', ''], ['销转', '第一周', 0, 0, 0, 0, 0, 0, 0, 0, 0, '', ''], ['', '第二周', 0, 0, 0, 0, 0, 0, 0, 0, 0, '', ''], ['', '第三周', 0, 0, 0, 0, 0, 0, 0, 0, 0, '', ''], ['', '第四周', 0, 0, 0, 0, 0, 0, 0, 0, 0, '', ''], ['', '小计', 0, 0, 0, 0, 0, 0, 0, 0, 0, '', ''], ['', '', '', '', '', '', '', '', '', '', '', '', ''], # App转化明细 ['App转化', '', '注册人数', '转化率', '客单价', 'GMV', '退款率', 'GSV', '', '', '', '', ''], ['', '自然转化', 0, 0, 0, 0, 0, 0, 0, '', '', '', ''], ['', '销售转化', 0, 0, 0, 0, 0, 0, 0, '', '', '', ''], ['', '小计', 0, 0, 0, 0, 0, 0, 0, '', '', '', ''], ['', '', '', '', '', '', '', '', '', '', '', '', ''], # 达播明细 ['达播', '', '达人', '订单量', '均单价', 'GMV', '退款率', 'GSV', '', '', '', '', ''], ] # 计算汇总数据 channel_summary = order_df.groupby('渠道大类').agg( 总订单数=('id', 'count'), 总GMV=('GMV', 'sum'), 总GSV=('GSV', 'sum'), 退款订单数=('is_valid_refund', 'sum'), 总退款金额=('refund_amount', 'sum') ).reset_index() # 填充汇总行 channel_map = {'销转': 1, 'App转化': 2, '达播': 3, '班主任': 4} for _, row in channel_summary.iterrows(): if row['渠道大类'] in channel_map: idx = channel_map[row['渠道大类']] report_data[idx][3] = 100000 if idx ==1 else 20000 if idx==2 else 250000 if idx==3 else 10000 report_data[idx][8] = round(row['总GSV'], 2) report_data[idx][9] = round(row['总GMV'], 2) report_data[idx][10] = f"{round(row['总GSV']/report_data[idx][3]*100, 1)}%" report_data[idx][7] = round(row['总GMV'], 2) report_data[idx][11] = f"{round(row['退款订单数']/row['总订单数']*100,1)}%" # 填充达播达人明细 dabo_orders = order_df[order_df['渠道大类'] == '达播'] dabo_summary = dabo_orders.groupby('key_from').agg( 订单数=('id', 'count'), GMV=('GMV', 'sum'), GSV=('GSV', 'sum'), 退款数=('is_valid_refund', 'sum') ).reset_index() dabo_summary['退费率'] = (dabo_summary['退款数'] / dabo_summary['订单数'] * 100).round(1) dabo_summary['均单价'] = (dabo_summary['GMV'] / dabo_summary['订单数']).round(2) # 匹配达人名称 def get_daren_name(key): if '晚柠' in key: return '晚柠' elif '念妈' in key: return '念妈' elif '小花生' in key: return '小花生' elif '盈姐' in key: return '盈姐' elif '百克力' in key: return '百克力' elif '海淀妈妈优选' in key: return '海淀妈妈优选' elif '海淀小水妈' in key: return '海淀小水妈' else: return '其他达人' dabo_summary['达人'] = dabo_summary['key_from'].apply(get_daren_name) dabo_final = dabo_summary.groupby('达人').agg( 订单数=('订单数', 'sum'), GMV=('GMV', 'sum'), GSV=('GSV', 'sum'), 退费率=('退费率', 'mean'), 均单价=('均单价', 'mean') ).reset_index() for _, row in dabo_final.iterrows(): report_data.append([ '', '', row['达人'], row['订单数'], round(row['均单价'],2), round(row['GMV'],2), f"{row['退费率']}%", round(row['GSV'],2), '', '', '', '', '' ]) # 添加达播小计 dabo_total = dabo_final.sum() report_data.append([ '', '', '小计', dabo_total['订单数'], round(dabo_total['GMV']/dabo_total['订单数'],2), round(dabo_total['GMV'],2), f"{round(dabo_orders['is_valid_refund'].sum()/len(dabo_orders)*100,1)}%", round(dabo_total['GSV'],2), '', '', '', '', '' ]) # 班主任明细 report_data.extend([ ['', '', '', '', '', '', '', '', '', '', '', '', ''], ['班主任', '', '分类', '订单量', 'GMV', '退款订单', '退款金额', 'GSV', '', '', '', '', ''], ['', '', '季转年', 0, 0, 0, 0, 0, '', '', '', '', ''], ['', '', '年转年', 0, 0, 0, 0, 0, '', '', '', '', ''], ['', '', '转介绍', 0, 0, 0, 0, 0, '', '', '', '', ''], ['', '', '退费重报', 0, 0, 0, 0, 0, '', '', '', '', ''], ]) banzhuren_orders = order_df[order_df['渠道大类'] == '班主任'] bzr_summary = banzhuren_orders.groupby('成交标记').agg( 订单数=('id', 'count'), GMV=('GMV', 'sum'), GSV=('GSV', 'sum'), 退款数=('is_valid_refund', 'sum'), 退款金额=('refund_amount', 'sum') ).reset_index() for _, row in bzr_summary.iterrows(): if '年续' in row['成交标记'] or '年转年' in row['成交标记']: idx = -4 elif '转介绍' in row['成交标记']: idx = -3 elif '重报' in row['成交标记']: idx = -2 else: idx = -5 report_data[idx][3] = row['订单数'] report_data[idx][4] = round(row['GMV'],2) report_data[idx][5] = row['退款数'] report_data[idx][6] = round(row['退款金额'],2) report_data[idx][7] = round(row['GSV'],2) # 班主任小计 bzr_total = bzr_summary.sum() report_data.append([ '', '', '小计', bzr_total['订单数'], round(bzr_total['GMV'],2), bzr_total['退款数'], round(bzr_total['退款金额'],2), round(bzr_total['GSV'],2), '', '', '', '', '' ]) # 转换为DataFrame并保存 df = pd.DataFrame(report_data) output_file = '2026年3月收入预测报表_与原表格式一致.xlsx' with pd.ExcelWriter(output_file, engine='openpyxl') as writer: df.to_excel(writer, index=False, header=False, sheet_name='3月收入报表') print("报表已生成,格式与原表完全一致,GSV已按正确口径重新计算:") print(channel_summary[['渠道大类', '总GMV', '总GSV', '退款订单数']]) print(f"\n总GSV:{round(order_df['GSV'].sum(),2)} 元,总GMV:{round(order_df['GMV'].sum(),2)} 元,整体退费率:{round(order_df['is_valid_refund'].sum()/len(order_df)*100,1)}%")