226 lines
7.5 KiB
Python
226 lines
7.5 KiB
Python
#!/usr/bin/env python3
|
||
"""合并订单数据 + parent_address,生成最终 Excel"""
|
||
import csv
|
||
import os
|
||
from collections import Counter
|
||
|
||
# 文件路径
|
||
ORDER_FILE = '/root/.openclaw/workspace/output/welfare_step1_result.txt'
|
||
ADDR_FILE = '/root/.openclaw/workspace/output/parent_address_result.txt'
|
||
FINAL_OUTPUT = '/root/.openclaw/workspace/output/福利品用户名单.xlsx'
|
||
|
||
# 读取订单数据
|
||
orders = []
|
||
with open(ORDER_FILE, 'r') as f:
|
||
reader = csv.DictReader(f)
|
||
for row in reader:
|
||
orders.append(row)
|
||
|
||
print(f"读取订单数: {len(orders)}")
|
||
|
||
# 按 (account_id, trade_no) 去重
|
||
seen = set()
|
||
orders_dedup = []
|
||
dup_count = 0
|
||
for row in orders:
|
||
key = (row['用户ID'], row['交易号'])
|
||
if key in seen:
|
||
dup_count += 1
|
||
continue
|
||
seen.add(key)
|
||
orders_dedup.append(row)
|
||
|
||
print(f"去重后订单数: {len(orders_dedup)} (移除 {dup_count} 条重复)")
|
||
|
||
# 读取地址数据(跳过非 CSV 行如 CREATE TABLE / COPY)
|
||
addr_map = {}
|
||
with open(ADDR_FILE, 'r') as f:
|
||
lines = f.readlines()
|
||
header_idx = 0
|
||
for i, line in enumerate(lines):
|
||
if line.startswith('account_id'):
|
||
header_idx = i
|
||
break
|
||
reader = csv.DictReader(lines[header_idx:])
|
||
for row in reader:
|
||
addr_map[row['account_id']] = row
|
||
|
||
print(f"读取地址记录数: {len(addr_map)}")
|
||
|
||
# 合并
|
||
matched = 0
|
||
unmatched = 0
|
||
for row in orders_dedup:
|
||
aid = row['用户ID']
|
||
if aid in addr_map:
|
||
row['收件人'] = addr_map[aid].get('name', '')
|
||
row['电话'] = addr_map[aid].get('phone_number', '')
|
||
row['区域'] = addr_map[aid].get('region', '')
|
||
row['地址'] = addr_map[aid].get('address', '')
|
||
matched += 1
|
||
else:
|
||
row['收件人'] = ''
|
||
row['电话'] = ''
|
||
row['区域'] = ''
|
||
row['地址'] = ''
|
||
unmatched += 1
|
||
|
||
print(f"有地址: {matched}, 无地址: {unmatched}")
|
||
|
||
# 情况分布
|
||
case_counts = Counter(row['来源情况'] for row in orders_dedup)
|
||
print("\n=== 情况分布(按订单行)===")
|
||
for k, v in case_counts.most_common():
|
||
print(f" {k}: {v}")
|
||
|
||
# 唯一用户数
|
||
unique_users = set(row['用户ID'] for row in orders_dedup)
|
||
print(f"\n唯一用户数: {len(unique_users)}")
|
||
# 有地址的唯一用户数
|
||
users_with_addr = set(row['用户ID'] for row in orders_dedup if row['收件人'])
|
||
print(f"有地址的唯一用户数: {len(users_with_addr)}")
|
||
|
||
# 情况分布(按用户)
|
||
user_case = {}
|
||
for row in orders_dedup:
|
||
uid = row['用户ID']
|
||
if uid not in user_case:
|
||
user_case[uid] = set()
|
||
user_case[uid].add(row['来源情况'])
|
||
|
||
case_user_counts = Counter('+'.join(sorted(v)) for v in user_case.values())
|
||
print("\n=== 情况分布(按用户数)===")
|
||
for k, v in case_user_counts.most_common():
|
||
print(f" {k}: {v}")
|
||
|
||
# 生成 Excel
|
||
try:
|
||
from openpyxl import Workbook
|
||
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
|
||
from openpyxl.utils import get_column_letter
|
||
except ImportError:
|
||
os.system('pip install openpyxl -q')
|
||
from openpyxl import Workbook
|
||
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
|
||
from openpyxl.utils import get_column_letter
|
||
|
||
wb = Workbook()
|
||
ws = wb.active
|
||
ws.title = "福利品用户名单"
|
||
|
||
# 表头
|
||
headers = ['用户ID', '交易号', '商品ID', '商品名称', '渠道(key_from)', '购课日期',
|
||
'支付金额(元)', '退款金额(元)', '退费状态', '来源情况',
|
||
'收件人', '电话', '区域', '地址']
|
||
header_fill = PatternFill(start_color='4472C4', end_color='4472C4', fill_type='solid')
|
||
header_font = Font(color='FFFFFF', bold=True, size=11)
|
||
thin_border = Border(
|
||
left=Side(style='thin'), right=Side(style='thin'),
|
||
top=Side(style='thin'), bottom=Side(style='thin')
|
||
)
|
||
|
||
for col, h in enumerate(headers, 1):
|
||
cell = ws.cell(row=1, column=col, value=h)
|
||
cell.fill = header_fill
|
||
cell.font = header_font
|
||
cell.alignment = Alignment(horizontal='center', vertical='center')
|
||
cell.border = thin_border
|
||
|
||
# 数据行
|
||
for i, row in enumerate(orders_dedup, 2):
|
||
values = [
|
||
int(row['用户ID']),
|
||
row['交易号'],
|
||
row['商品ID'],
|
||
row['商品名称'].strip(),
|
||
row['渠道'].strip(),
|
||
row['购课日期'],
|
||
float(row['支付金额(元)']),
|
||
float(row['退款金额(元)']) if row['退款金额(元)'] else '',
|
||
row['退费状态'],
|
||
row['来源情况'],
|
||
row['收件人'],
|
||
row['电话'],
|
||
row['区域'],
|
||
row['地址'],
|
||
]
|
||
for col, val in enumerate(values, 1):
|
||
cell = ws.cell(row=i, column=col, value=val)
|
||
cell.border = thin_border
|
||
if col in (1, 2, 3, 7, 8, 9, 10):
|
||
cell.alignment = Alignment(horizontal='center')
|
||
elif col == 4:
|
||
cell.alignment = Alignment(horizontal='left')
|
||
|
||
# 列宽
|
||
col_widths = [10, 22, 8, 22, 40, 20, 14, 14, 10, 14, 12, 16, 30, 40]
|
||
for i, w in enumerate(col_widths, 1):
|
||
ws.column_dimensions[get_column_letter(i)].width = w
|
||
|
||
# 冻结首行
|
||
ws.freeze_panes = 'A2'
|
||
|
||
# 添加统计 sheet
|
||
ws2 = wb.create_sheet("统计汇总")
|
||
ws2.merge_cells('A1:D1')
|
||
ws2.cell(row=1, column=1, value="福利品用户统计汇总").font = Font(bold=True, size=14)
|
||
ws2.cell(row=2, column=1, value=f"总用户数: {len(unique_users)}")
|
||
ws2.cell(row=3, column=1, value=f"有地址的用户数: {len(users_with_addr)}")
|
||
ws2.cell(row=4, column=1, value=f"无地址的用户数: {len(unique_users - users_with_addr)}")
|
||
ws2.cell(row=5, column=1, value=f"总订单数: {len(orders_dedup)}")
|
||
|
||
ws2.cell(row=7, column=1, value="情况分布(按用户数)").font = Font(bold=True)
|
||
for i, (k, v) in enumerate(case_user_counts.most_common(), 8):
|
||
ws2.cell(row=i, column=1, value=k)
|
||
ws2.cell(row=i, column=2, value=v)
|
||
|
||
ws2.cell(row=7, column=4, value="情况分布(按订单行数)").font = Font(bold=True)
|
||
for i, (k, v) in enumerate(case_counts.most_common(), 8):
|
||
ws2.cell(row=i, column=4, value=k)
|
||
ws2.cell(row=i, column=5, value=v)
|
||
|
||
# 添加用户地址汇总 sheet(按用户去重,仅保留地址信息)
|
||
ws3 = wb.create_sheet("用户地址汇总")
|
||
addr_headers = ['用户ID', '收件人', '电话', '区域', '地址', '来源情况']
|
||
for col, h in enumerate(addr_headers, 1):
|
||
cell = ws3.cell(row=1, column=col, value=h)
|
||
cell.fill = header_fill
|
||
cell.font = header_font
|
||
cell.alignment = Alignment(horizontal='center', vertical='center')
|
||
cell.border = thin_border
|
||
|
||
# 按用户去重,取第一个订单的地址信息
|
||
user_addrs = {}
|
||
for row in orders_dedup:
|
||
uid = row['用户ID']
|
||
if uid not in user_addrs:
|
||
user_addrs[uid] = row
|
||
# 如果有多个来源情况,合并
|
||
elif row['来源情况'] not in user_addrs[uid]['来源情况']:
|
||
user_addrs[uid]['来源情况'] = user_addrs[uid]['来源情况'] + '+' + row['来源情况']
|
||
|
||
for i, (uid, row) in enumerate(sorted(user_addrs.items()), 2):
|
||
values = [
|
||
int(uid),
|
||
row['收件人'],
|
||
row['电话'],
|
||
row['区域'],
|
||
row['地址'],
|
||
row['来源情况'],
|
||
]
|
||
for col, val in enumerate(values, 1):
|
||
cell = ws3.cell(row=i, column=col, value=val)
|
||
cell.border = thin_border
|
||
cell.alignment = Alignment(horizontal='center')
|
||
if col in (2, 4, 5):
|
||
cell.alignment = Alignment(horizontal='left')
|
||
|
||
addr_col_widths = [10, 12, 16, 30, 40, 14]
|
||
for i, w in enumerate(addr_col_widths, 1):
|
||
ws3.column_dimensions[get_column_letter(i)].width = w
|
||
ws3.freeze_panes = 'A2'
|
||
|
||
wb.save(FINAL_OUTPUT)
|
||
print(f"\n✅ Excel 已保存: {FINAL_OUTPUT}")
|
||
print(f"用户地址汇总 Sheet: {len(user_addrs)} 行")
|