#!/usr/bin/env python3 """生成剔除数据汇总 Excel:被剔除的低价订单 + 重复 trade_no""" import csv CHEAP_FILE = '/root/.openclaw/workspace/output/welfare_excluded_cheap.txt' FINAL_OUTPUT = '/root/.openclaw/workspace/output/剔除数据明细.xlsx' # 读取低价订单(可能无表头) cheap_orders = [] with open(CHEAP_FILE, 'r') as f: lines = [l.strip() for l in f.readlines() if l.strip()] # 检查第一行是否是表头 # 表头: account_id|trade_no|goods_id|goods_name|key_from|to_char|?column?|order_status reader = csv.DictReader(lines, delimiter='|') for row in reader: cheap_orders.append({ '用户ID': str(row.get('account_id') or ''), '交易号': str(row.get('trade_no') or ''), '商品ID': str(row.get('goods_id') or ''), '商品名称': (row.get('goods_name') or '').strip(), '渠道': (row.get('key_from') or '').strip(), '购课日期': str(row.get('to_char') or ''), '支付金额(元)': str(row.get('?column?') or '0'), '订单状态': str(row.get('order_status') or ''), }) print(f"低价订单数: {len(cheap_orders)}") # 重复 trade_no 数据 dup_orders = [ {'用户ID': '14459', '交易号': '440002908556965', '商品ID': '31', '商品名称': '瓦拉英语年包', '渠道': 'app-active-h5-0-0', '购课日期': '2026-02-05 14:23:58', '支付金额(元)': '1999.0', '订单状态': '3(已完成)', '原因': '主键ID=1969, 与ID=1970重复'}, {'用户ID': '14459', '交易号': '440002908556965', '商品ID': '31', '商品名称': '瓦拉英语年包', '渠道': 'app-active-h5-0-0', '购课日期': '2026-02-05 14:23:58', '支付金额(元)': '1999.0', '订单状态': '4(已退款)', '原因': '主键ID=1970, 状态为已退款已保留此条(去重时保留ID更小的)'}, ] # 生成 Excel from openpyxl import Workbook from openpyxl.styles import Font, PatternFill, Alignment, Border, Side wb = Workbook() header_fill = PatternFill(start_color='4472C4', end_color='4472C4', fill_type='solid') header_font = Font(color='FFFFFF', bold=True, size=11) warn_fill = PatternFill(start_color='FFC000', end_color='FFC000', fill_type='solid') thin_border = Border( left=Side(style='thin'), right=Side(style='thin'), top=Side(style='thin'), bottom=Side(style='thin') ) # Sheet 1: 低价订单 ws1 = wb.active ws1.title = "被剔除的低价订单" cheap_headers = ['用户ID', '交易号', '商品ID', '商品名称', '渠道', '购课日期', '支付金额(元)', '订单状态', '剔除原因'] for col, h in enumerate(cheap_headers, 1): cell = ws1.cell(row=1, column=col, value=h) cell.fill = header_fill cell.font = header_font cell.alignment = Alignment(horizontal='center', vertical='center') cell.border = thin_border # 订单状态映射 status_map = {'3': '3(已完成)', '4': '4(已退款)'} for i, row in enumerate(cheap_orders, 2): amount = float(row['支付金额(元)']) if row['支付金额(元)'] else 0 status_raw = row.get('订单状态', '').strip() status_label = status_map.get(status_raw, status_raw) values = [ row['用户ID'], row['交易号'], row['商品ID'], row['商品名称'].strip() if '商品名称' in row else '', row['渠道'].strip() if row.get('渠道') else '', row['购课日期'] if row.get('购课日期') else '', amount, status_label, f'金额过低({amount}元),非正常售价', ] for col, val in enumerate(values, 1): cell = ws1.cell(row=i, column=col, value=val) cell.border = thin_border cell.alignment = Alignment(horizontal='center') if col == 5: cell.alignment = Alignment(horizontal='left') # 金额分布统计 from collections import Counter amount_dist = Counter(float(r['支付金额(元)']) for r in cheap_orders if r.get('支付金额(元)')) ws1.cell(row=len(cheap_orders)+3, column=1, value="金额分布").font = Font(bold=True) for j, (amt, cnt) in enumerate(sorted(amount_dist.items()), len(cheap_orders)+4): ws1.cell(row=j, column=1, value=f"{amt} 元") ws1.cell(row=j, column=2, value=cnt) col_widths = [10, 24, 8, 20, 36, 22, 14, 14, 26] for i, w in enumerate(col_widths, 1): from openpyxl.utils import get_column_letter ws1.column_dimensions[get_column_letter(i)].width = w ws1.freeze_panes = 'A2' # Sheet 2: 重复 trade_no ws2 = wb.create_sheet("重复trade_no") dup_headers = ['用户ID', '交易号', '商品ID', '商品名称', '渠道', '购课日期', '支付金额(元)', '订单状态', '重复原因'] for col, h in enumerate(dup_headers, 1): cell = ws2.cell(row=1, column=col, value=h) cell.fill = header_fill cell.font = header_font cell.alignment = Alignment(horizontal='center', vertical='center') cell.border = thin_border for i, row in enumerate(dup_orders, 2): for col, val in enumerate(row.values(), 1): cell = ws2.cell(row=i, column=col, value=val) cell.border = thin_border cell.alignment = Alignment(horizontal='center') if col == 5: cell.alignment = Alignment(horizontal='left') for i, w in enumerate(col_widths, 1): ws2.column_dimensions[get_column_letter(i)].width = w ws2.freeze_panes = 'A2' # Sheet 3: 说明 ws3 = wb.create_sheet("剔除说明") ws3.column_dimensions['A'].width = 80 notes = [ "剔除数据说明", "", "一、被剔除的低价订单(共 {} 条)".format(len(cheap_orders)), " • 筛选条件:goods_id IN (31,60,61),已完成或已退款,pay_amount_int < 100,000分(<1,000元)", " • 这些订单的支付金额远低于正常售价(level1/level2 = 1,999元,level1+2 = 3,598元)", " • 来源渠道多为 newmedia-dianpu-*(店铺渠道),怀疑为测试/内部订单", " • 若计入这些订单会产生大量虚假的「两个年包」用户(如 account 6691 共 9 笔 5 元订单)", "", "二、重复 trade_no(共 1 条)", " • trade_no: 440002908556965,account_id: 14459", " • bi_vala_order 表中存在两条记录(id=1969 和 id=1970)", " • id=1969: order_status=3(已完成); id=1970: order_status=4(已退款)", " • 去重逻辑:按 (account_id, trade_no) 去重,保留先出现的记录", "", "三、筛选阈值说明", " • 最低金额阈值设为 100,000 分(1,000 元),正常年包价格 ≥ 1,999 元", ] for i, note in enumerate(notes, 1): cell = ws3.cell(row=i, column=1, value=note) if i == 1: cell.font = Font(bold=True, size=14) wb.save(FINAL_OUTPUT) print(f"✅ 已保存: {FINAL_OUTPUT}")