158 lines
6.6 KiB
Python
158 lines
6.6 KiB
Python
#!/usr/bin/env python3
|
||
"""生成剔除数据汇总 Excel:被剔除的低价订单 + 重复 trade_no"""
|
||
import csv
|
||
|
||
CHEAP_FILE = '/root/.openclaw/workspace/output/welfare_excluded_cheap.txt'
|
||
FINAL_OUTPUT = '/root/.openclaw/workspace/output/剔除数据明细.xlsx'
|
||
|
||
# 读取低价订单(可能无表头)
|
||
cheap_orders = []
|
||
with open(CHEAP_FILE, 'r') as f:
|
||
lines = [l.strip() for l in f.readlines() if l.strip()]
|
||
# 检查第一行是否是表头
|
||
# 表头: account_id|trade_no|goods_id|goods_name|key_from|to_char|?column?|order_status
|
||
reader = csv.DictReader(lines, delimiter='|')
|
||
for row in reader:
|
||
cheap_orders.append({
|
||
'用户ID': str(row.get('account_id') or ''),
|
||
'交易号': str(row.get('trade_no') or ''),
|
||
'商品ID': str(row.get('goods_id') or ''),
|
||
'商品名称': (row.get('goods_name') or '').strip(),
|
||
'渠道': (row.get('key_from') or '').strip(),
|
||
'购课日期': str(row.get('to_char') or ''),
|
||
'支付金额(元)': str(row.get('?column?') or '0'),
|
||
'订单状态': str(row.get('order_status') or ''),
|
||
})
|
||
|
||
print(f"低价订单数: {len(cheap_orders)}")
|
||
|
||
# 重复 trade_no 数据
|
||
dup_orders = [
|
||
{'用户ID': '14459', '交易号': '440002908556965', '商品ID': '31',
|
||
'商品名称': '瓦拉英语年包', '渠道': 'app-active-h5-0-0',
|
||
'购课日期': '2026-02-05 14:23:58', '支付金额(元)': '1999.0',
|
||
'订单状态': '3(已完成)', '原因': '主键ID=1969, 与ID=1970重复'},
|
||
{'用户ID': '14459', '交易号': '440002908556965', '商品ID': '31',
|
||
'商品名称': '瓦拉英语年包', '渠道': 'app-active-h5-0-0',
|
||
'购课日期': '2026-02-05 14:23:58', '支付金额(元)': '1999.0',
|
||
'订单状态': '4(已退款)', '原因': '主键ID=1970, 状态为已退款已保留此条(去重时保留ID更小的)'},
|
||
]
|
||
|
||
# 生成 Excel
|
||
from openpyxl import Workbook
|
||
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
|
||
|
||
wb = Workbook()
|
||
header_fill = PatternFill(start_color='4472C4', end_color='4472C4', fill_type='solid')
|
||
header_font = Font(color='FFFFFF', bold=True, size=11)
|
||
warn_fill = PatternFill(start_color='FFC000', end_color='FFC000', fill_type='solid')
|
||
thin_border = Border(
|
||
left=Side(style='thin'), right=Side(style='thin'),
|
||
top=Side(style='thin'), bottom=Side(style='thin')
|
||
)
|
||
|
||
# Sheet 1: 低价订单
|
||
ws1 = wb.active
|
||
ws1.title = "被剔除的低价订单"
|
||
|
||
cheap_headers = ['用户ID', '交易号', '商品ID', '商品名称', '渠道', '购课日期',
|
||
'支付金额(元)', '订单状态', '剔除原因']
|
||
for col, h in enumerate(cheap_headers, 1):
|
||
cell = ws1.cell(row=1, column=col, value=h)
|
||
cell.fill = header_fill
|
||
cell.font = header_font
|
||
cell.alignment = Alignment(horizontal='center', vertical='center')
|
||
cell.border = thin_border
|
||
|
||
# 订单状态映射
|
||
status_map = {'3': '3(已完成)', '4': '4(已退款)'}
|
||
|
||
for i, row in enumerate(cheap_orders, 2):
|
||
amount = float(row['支付金额(元)']) if row['支付金额(元)'] else 0
|
||
status_raw = row.get('订单状态', '').strip()
|
||
status_label = status_map.get(status_raw, status_raw)
|
||
|
||
values = [
|
||
row['用户ID'],
|
||
row['交易号'],
|
||
row['商品ID'],
|
||
row['商品名称'].strip() if '商品名称' in row else '',
|
||
row['渠道'].strip() if row.get('渠道') else '',
|
||
row['购课日期'] if row.get('购课日期') else '',
|
||
amount,
|
||
status_label,
|
||
f'金额过低({amount}元),非正常售价',
|
||
]
|
||
for col, val in enumerate(values, 1):
|
||
cell = ws1.cell(row=i, column=col, value=val)
|
||
cell.border = thin_border
|
||
cell.alignment = Alignment(horizontal='center')
|
||
if col == 5:
|
||
cell.alignment = Alignment(horizontal='left')
|
||
|
||
# 金额分布统计
|
||
from collections import Counter
|
||
amount_dist = Counter(float(r['支付金额(元)']) for r in cheap_orders if r.get('支付金额(元)'))
|
||
ws1.cell(row=len(cheap_orders)+3, column=1, value="金额分布").font = Font(bold=True)
|
||
for j, (amt, cnt) in enumerate(sorted(amount_dist.items()), len(cheap_orders)+4):
|
||
ws1.cell(row=j, column=1, value=f"{amt} 元")
|
||
ws1.cell(row=j, column=2, value=cnt)
|
||
|
||
col_widths = [10, 24, 8, 20, 36, 22, 14, 14, 26]
|
||
for i, w in enumerate(col_widths, 1):
|
||
from openpyxl.utils import get_column_letter
|
||
ws1.column_dimensions[get_column_letter(i)].width = w
|
||
ws1.freeze_panes = 'A2'
|
||
|
||
# Sheet 2: 重复 trade_no
|
||
ws2 = wb.create_sheet("重复trade_no")
|
||
dup_headers = ['用户ID', '交易号', '商品ID', '商品名称', '渠道', '购课日期',
|
||
'支付金额(元)', '订单状态', '重复原因']
|
||
for col, h in enumerate(dup_headers, 1):
|
||
cell = ws2.cell(row=1, column=col, value=h)
|
||
cell.fill = header_fill
|
||
cell.font = header_font
|
||
cell.alignment = Alignment(horizontal='center', vertical='center')
|
||
cell.border = thin_border
|
||
|
||
for i, row in enumerate(dup_orders, 2):
|
||
for col, val in enumerate(row.values(), 1):
|
||
cell = ws2.cell(row=i, column=col, value=val)
|
||
cell.border = thin_border
|
||
cell.alignment = Alignment(horizontal='center')
|
||
if col == 5:
|
||
cell.alignment = Alignment(horizontal='left')
|
||
|
||
for i, w in enumerate(col_widths, 1):
|
||
ws2.column_dimensions[get_column_letter(i)].width = w
|
||
ws2.freeze_panes = 'A2'
|
||
|
||
# Sheet 3: 说明
|
||
ws3 = wb.create_sheet("剔除说明")
|
||
ws3.column_dimensions['A'].width = 80
|
||
notes = [
|
||
"剔除数据说明",
|
||
"",
|
||
"一、被剔除的低价订单(共 {} 条)".format(len(cheap_orders)),
|
||
" • 筛选条件:goods_id IN (31,60,61),已完成或已退款,pay_amount_int < 100,000分(<1,000元)",
|
||
" • 这些订单的支付金额远低于正常售价(level1/level2 = 1,999元,level1+2 = 3,598元)",
|
||
" • 来源渠道多为 newmedia-dianpu-*(店铺渠道),怀疑为测试/内部订单",
|
||
" • 若计入这些订单会产生大量虚假的「两个年包」用户(如 account 6691 共 9 笔 5 元订单)",
|
||
"",
|
||
"二、重复 trade_no(共 1 条)",
|
||
" • trade_no: 440002908556965,account_id: 14459",
|
||
" • bi_vala_order 表中存在两条记录(id=1969 和 id=1970)",
|
||
" • id=1969: order_status=3(已完成); id=1970: order_status=4(已退款)",
|
||
" • 去重逻辑:按 (account_id, trade_no) 去重,保留先出现的记录",
|
||
"",
|
||
"三、筛选阈值说明",
|
||
" • 最低金额阈值设为 100,000 分(1,000 元),正常年包价格 ≥ 1,999 元",
|
||
]
|
||
for i, note in enumerate(notes, 1):
|
||
cell = ws3.cell(row=i, column=1, value=note)
|
||
if i == 1:
|
||
cell.font = Font(bold=True, size=14)
|
||
|
||
wb.save(FINAL_OUTPUT)
|
||
print(f"✅ 已保存: {FINAL_OUTPUT}")
|