refund-user-learning-analys.../scripts/generate_excel.py

93 lines
3.8 KiB
Python

#!/usr/bin/env python3
"""
从 JSON 结果生成 Excel 报表
用法: python3 generate_excel.py --input /tmp/report.json --output /tmp/report.xlsx
"""
import argparse, json
import openpyxl
from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
from openpyxl.utils import get_column_letter
def style_sheet(ws):
hfont = Font(bold=True, size=11)
hfill = PatternFill(start_color="D9E1F2", end_color="D9E1F2", fill_type="solid")
halign = Alignment(horizontal="center", vertical="center", wrap_text=True)
calign = Alignment(horizontal="center", vertical="center")
border = Border(left=Side(style='thin'), right=Side(style='thin'),
top=Side(style='thin'), bottom=Side(style='thin'))
for col in range(1, ws.max_column + 1):
cell = ws.cell(row=1, column=col)
cell.font, cell.fill, cell.alignment, cell.border = hfont, hfill, halign, border
for row in range(2, ws.max_row + 1):
for col in range(1, ws.max_column + 1):
cell = ws.cell(row=row, column=col)
cell.alignment, cell.border = calign, border
for col in range(1, ws.max_column + 1):
mx = max((len(str(ws.cell(r, col).value or "")) for r in range(1, ws.max_row + 1)), default=5)
ws.column_dimensions[get_column_letter(col)].width = max(mx + 4, 10)
def main():
p = argparse.ArgumentParser()
p.add_argument("--input", required=True)
p.add_argument("--output", required=True)
args = p.parse_args()
with open(args.input) as f:
data = json.load(f)
wb = openpyxl.Workbook()
# Sheet 1: Overview
ws = wb.active
ws.title = "总览"
ws.append(["指标", "数值"])
fun = data["funnel"]
ws.append(["购课退费用户总数", fun["total_refund"]])
ws.append(["剔除仍有有效订单后", fun["pure_refund"]])
ws.append(["其中完成U0全部5节课", fun["completed_u0"]])
ws.append([" - 仅完成L1-U0", fun["l1_only"]])
ws.append([" - 仅完成L2-U0", fun["l2_only"]])
ws.append([" - L1+L2都完成", fun["both"]])
ws.append(["完成U0占比", f"{round(fun['completed_u0']/fun['pure_refund']*100, 1)}%"])
style_sheet(ws)
# Sheet 2: Review
ws2 = wb.create_sheet("课程巩固(Review)")
ws2.append(["等级", "课时", "做了巩固的人数", "平均用时(分钟)", "平均正确率"])
for r in data["review"]:
ws2.append([r["course"], r["lesson"], r["review_count"],
r["avg_duration_min"], f"{r['avg_right_rate_pct']}%"])
style_sheet(ws2)
# Sheet 3: Summary
ws3 = wb.create_sheet("单元强化(Summary)")
ws3.append(["等级", "知识模块总数", "进入人数", "全部完成", "做1个", "做2个", "做3个", "做4个"])
for r in data["summary"]:
ws3.append([r["course"], r["total_km"], r["enter_count"], r["all_done"],
r["done_1"], r["done_2"], r["done_3"], r["done_4"]])
style_sheet(ws3)
# Sheet 4: Challenge
ws4 = wb.create_sheet("单元挑战(Challenge)")
ws4.append(["等级", "维度", "参与人数", "Perfect", "Perfect%", "Good", "Good%", "Oops", "Oops%"])
for r in data["challenge"]:
ws4.append([r["course"], r["category"], r["enter_count"],
r["perfect"], f"{r['perfect_pct']}%", r["good"], f"{r['good_pct']}%",
r["oops"], f"{r['oops_pct']}%"])
style_sheet(ws4)
# Sheet 5: Outliers
if data.get("outliers"):
ws5 = wb.create_sheet("剔除的异常数据")
ws5.append(["等级", "课时", "user_id", "巩固用时(分钟)", "play_time(ms)", "记录时间"])
for r in data["outliers"]:
ws5.append([r["course"], r["lesson"], r["user_id"],
r["duration_min"], r["play_time_ms"], r["created_at"]])
style_sheet(ws5)
wb.save(args.output)
print(f"Excel saved: {args.output}")
if __name__ == "__main__":
main()