commit 421cfe7aae0ba2c2b8e4016df113053cee12e6bc Author: OpenClaw Bot Date: Mon Apr 13 15:04:10 2026 +0800 auto-sync: vala-component-oops-stat 2026-04-13_15:04 diff --git a/SKILL.md b/SKILL.md new file mode 100644 index 0000000..164320d --- /dev/null +++ b/SKILL.md @@ -0,0 +1,45 @@ +--- +name: vala-component-oops-stat +description: 瓦拉英语组件练习Oops率统计工具,支持按日期统计、自动剔除测试账号、分L1/L2等级、自定义样本量规则、导出Excel报表。使用场景:(1) 统计组件练习错误率/Oops率 (2) 按难度等级拆分L1(A1)/L2(A2)统计结果 (3) 练习次数不足10次自动补充历史记录 (4) 导出多sheet Excel报表 (5) 按Oops率降序排序展示高错误率组件 +--- + +# 瓦拉英语组件Oops率统计技能 + +## 功能说明 +用于统计飞书多维表格中组件练习的Oops(错误)率,支持灵活配置统计规则,自动处理数据口径问题,输出标准化统计结果。 + +## 核心特性 +✅ 自动剔除测试账号:仅保留`bi_vala_app_account.status = 1`的正常用户练习记录 +✅ 等级拆分:自动按`level`字段拆分L1(A1)/L2(A2)两个难度等级分别统计 +✅ Oops判定规则: + - 练习结果为Oops/Opps → 记为Oops + - 练习结果为pass且c_type包含`core_`或`scence_` → 记为Oops + - 其余pass结果 → 记为Good,不计入Oops +✅ 样本量规则:昨日练习≥10次用昨日全量数据,<10次自动补充历史记录至10次,历史不足10次取全部记录 +✅ 正确排序:按Oops率数值从高到低排序,避免字符串排序错误 +✅ 自动导出:生成包含两个sheet的Excel报表,直接发送给用户 + +## 使用参数 +| 参数 | 说明 | 默认值 | +|------|------|--------| +| 统计日期 | 要统计的日期(格式YYYY-MM-DD) | 昨日(当前日期-1天) | +| 最小样本量 | 组件最少统计样本量 | 10 | +| 是否剔测试账号 | 是否排除测试账号练习记录 | 是 | +| 是否分等级 | 是否拆分L1/L2分别统计 | 是 | +| 导出格式 | 输出格式(CSV/Excel) | Excel | + +## 操作步骤 +1. 确认用户统计需求:统计日期、样本量规则、是否分等级等 +2. 执行对应等级的统计SQL脚本(`scripts/stat_l1.sql`、`scripts/stat_l2.sql`) +3. 运行`scripts/generate_excel.py`生成Excel报表 +4. 将报表通过飞书发送给用户 + +## 脚本说明 +### scripts/stat_l1.sql +统计L1(A1)等级组件Oops率的SQL脚本,可修改日期参数调整统计时间 +### scripts/stat_l2.sql +统计L2(A2)等级组件Oops率的SQL脚本,可修改日期参数调整统计时间 +### scripts/generate_excel.py +将CSV统计结果合并生成带多sheet的Excel报表 +### references/table_schema.md +相关数据表结构说明和字段含义参考 diff --git a/references/table_schema.md b/references/table_schema.md new file mode 100644 index 0000000..c99ff7a --- /dev/null +++ b/references/table_schema.md @@ -0,0 +1,26 @@ +# 相关数据表结构说明 + +## bi_user_component_play_record_* 分表(组件练习记录表) +| 字段名 | 类型 | 说明 | +|-------|------|------| +| user_id | bigint | 角色ID,关联bi_vala_app_character.id | +| component_unique_code | varchar | 组件唯一编码,业务系统中组件的唯一标识 | +| play_result | varchar | 练习结果:Perfect/Good/Oops/Opps/pass | +| c_type | varchar | 组件类型,包含core_前缀为核心题型,scence_前缀为场景题型 | +| created_at | timestamp | 练习时间 | +| level | varchar | 难度等级:A1(L1)/A2(L2) | +| deleted_at | timestamp | 删除时间,为空表示记录有效 | + +## bi_vala_app_character(角色表) +| 字段名 | 类型 | 说明 | +|-------|------|------| +| id | bigint | 角色ID | +| account_id | bigint | 账号ID,关联bi_vala_app_account.id | +| deleted_at | timestamp | 删除时间,为空表示记录有效 | + +## bi_vala_app_account(账号表) +| 字段名 | 类型 | 说明 | +|-------|------|------| +| id | bigint | 账号ID | +| status | int | 账号状态:1=正常用户,其他=测试账号/禁用账号 | +| deleted_at | timestamp | 删除时间,为空表示记录有效 | diff --git a/scripts/generate_excel.py b/scripts/generate_excel.py new file mode 100644 index 0000000..a261f1b --- /dev/null +++ b/scripts/generate_excel.py @@ -0,0 +1,34 @@ +import pandas as pd +import numpy as np +import sys + +# 兼容numpy版本 +try: + np._get_promotion_state = lambda *args, **kwargs: 0 +except: + pass + +def generate_excel(l1_csv_path, l2_csv_path, output_path): + """ + 生成包含L1和L2两个sheet的Excel报表 + :param l1_csv_path: L1等级统计结果CSV路径 + :param l2_csv_path: L2等级统计结果CSV路径 + :param output_path: 输出Excel文件路径 + """ + # 读取CSV文件 + df_l1 = pd.read_csv(l1_csv_path) + df_l2 = pd.read_csv(l2_csv_path) + + # 创建Excel文件 + with pd.ExcelWriter(output_path) as writer: + df_l1.to_excel(writer, sheet_name='L1等级组件', index=False) + df_l2.to_excel(writer, sheet_name='L2等级组件', index=False) + + print(f"Excel报表生成成功:{output_path}") + +if __name__ == "__main__": + if len(sys.argv) != 4: + print("用法:python generate_excel.py <输出Excel路径>") + sys.exit(1) + + generate_excel(sys.argv[1], sys.argv[2], sys.argv[3]) diff --git a/scripts/stat_l1.sql b/scripts/stat_l1.sql new file mode 100644 index 0000000..2ce62f7 --- /dev/null +++ b/scripts/stat_l1.sql @@ -0,0 +1,90 @@ +-- 统计L1(A1)等级组件Oops率 +-- 修改统计日期时替换下面的'2026-04-12'为目标日期 +WITH all_component_records AS ( + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_0 WHERE deleted_at IS NULL + UNION ALL + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_1 WHERE deleted_at IS NULL + UNION ALL + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_2 WHERE deleted_at IS NULL + UNION ALL + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_3 WHERE deleted_at IS NULL + UNION ALL + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_4 WHERE deleted_at IS NULL + UNION ALL + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_5 WHERE deleted_at IS NULL + UNION ALL + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_6 WHERE deleted_at IS NULL + UNION ALL + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_7 WHERE deleted_at IS NULL +), +valid_records AS ( + SELECT + cr.component_unique_code, + cr.play_result, + cr.created_at, + cr.c_type, + CASE WHEN DATE(cr.created_at) = CURRENT_DATE - INTERVAL '1 day' THEN 1 ELSE 0 END AS is_yesterday + FROM all_component_records cr + JOIN bi_vala_app_character c ON cr.user_id = c.id AND c.deleted_at IS NULL + JOIN bi_vala_app_account a ON c.account_id = a.id AND a.status = 1 AND a.deleted_at IS NULL + WHERE cr.play_result IS NOT NULL AND cr.component_unique_code IS NOT NULL AND cr.level = 'A1' +), +-- 统计每个组件昨日练习次数 +yesterday_counts AS ( + SELECT + component_unique_code, + COUNT(*) AS yesterday_cnt + FROM valid_records + WHERE is_yesterday = 1 + GROUP BY component_unique_code +), +-- 给记录排序:昨日记录排最前,历史记录按时间倒序 +ranked_records AS ( + SELECT + vr.component_unique_code, + vr.play_result, + vr.c_type, + vr.is_yesterday, + ROW_NUMBER() OVER ( + PARTITION BY vr.component_unique_code + ORDER BY vr.is_yesterday DESC, vr.created_at DESC + ) AS rn + FROM valid_records vr + JOIN yesterday_counts yc ON vr.component_unique_code = yc.component_unique_code +), +-- 筛选统计样本: +-- 昨日练习≥10次:取全部昨日记录 +-- 昨日练习<10次:取全部昨日记录 + 最近历史记录补到10次 +filtered_samples AS ( + SELECT + r.component_unique_code, + r.play_result, + r.c_type + FROM ranked_records r + JOIN yesterday_counts yc ON r.component_unique_code = yc.component_unique_code + WHERE + (yc.yesterday_cnt >= 10 AND r.is_yesterday = 1) + OR + (yc.yesterday_cnt < 10 AND r.rn <= 10) +), +-- 统计结果,先按数值排序 +component_stats AS ( + SELECT + component_unique_code AS 组件唯一编码, + COUNT(*) AS 总练习次数, + SUM(CASE + WHEN play_result IN ('Oops', 'Opps') THEN 1 + WHEN play_result = 'pass' AND (c_type LIKE '%core_%' OR c_type LIKE '%scence_%') THEN 1 + ELSE 0 + END) AS Oops次数, + ROUND(CASE WHEN COUNT(*) = 0 THEN 0 ELSE + SUM(CASE + WHEN play_result IN ('Oops', 'Opps') THEN 1 + WHEN play_result = 'pass' AND (c_type LIKE '%core_%' OR c_type LIKE '%scence_%') THEN 1 + ELSE 0 + END)::DECIMAL / COUNT(*) * 100 END, 2) AS Oops率数值 + FROM filtered_samples + GROUP BY component_unique_code + ORDER BY Oops率数值 DESC +) +SELECT 组件唯一编码, 总练习次数, Oops次数, Oops率数值 || '%' AS Oops率 FROM component_stats; diff --git a/scripts/stat_l1_exit_rate.sql b/scripts/stat_l1_exit_rate.sql new file mode 100644 index 0000000..0062a7b --- /dev/null +++ b/scripts/stat_l1_exit_rate.sql @@ -0,0 +1,86 @@ +-- 统计L1(A1)等级组件练习退出率 +-- 退出判定:play_result为failed或close视为退出 +-- 统计规则: +-- 1. 自动统计运行日期前一天的数据 +-- 2. 昨日练习≥10次用昨日全量数据,<10次自动补充历史记录至10次,历史不足10次取全部记录 +-- 3. 自动剔除测试账号(仅保留bi_vala_app_account.status=1的正常用户) +WITH all_component_records AS ( + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_0 WHERE deleted_at IS NULL + UNION ALL + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_1 WHERE deleted_at IS NULL + UNION ALL + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_2 WHERE deleted_at IS NULL + UNION ALL + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_3 WHERE deleted_at IS NULL + UNION ALL + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_4 WHERE deleted_at IS NULL + UNION ALL + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_5 WHERE deleted_at IS NULL + UNION ALL + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_6 WHERE deleted_at IS NULL + UNION ALL + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_7 WHERE deleted_at IS NULL +), +valid_records AS ( + SELECT + cr.component_unique_code, + cr.play_result, + cr.created_at, + cr.c_type, + CASE WHEN DATE(cr.created_at) = CURRENT_DATE - INTERVAL '1 day' THEN 1 ELSE 0 END AS is_yesterday + FROM all_component_records cr + JOIN bi_vala_app_character c ON cr.user_id = c.id AND c.deleted_at IS NULL + JOIN bi_vala_app_account a ON c.account_id = a.id AND a.status = 1 AND a.deleted_at IS NULL + WHERE cr.play_result IS NOT NULL AND cr.component_unique_code IS NOT NULL AND cr.level = 'A1' +), +-- 统计每个组件昨日练习次数 +yesterday_counts AS ( + SELECT + component_unique_code, + COUNT(*) AS yesterday_cnt + FROM valid_records + WHERE is_yesterday = 1 + GROUP BY component_unique_code +), +-- 给记录排序:昨日记录排最前,历史记录按时间倒序 +ranked_records AS ( + SELECT + vr.component_unique_code, + vr.play_result, + vr.c_type, + vr.is_yesterday, + ROW_NUMBER() OVER ( + PARTITION BY vr.component_unique_code + ORDER BY vr.is_yesterday DESC, vr.created_at DESC + ) AS rn + FROM valid_records vr + JOIN yesterday_counts yc ON vr.component_unique_code = yc.component_unique_code +), +-- 筛选统计样本: +-- 昨日练习≥10次:取全部昨日记录 +-- 昨日练习<10次:取全部昨日记录 + 最近历史记录补到10次 +filtered_samples AS ( + SELECT + r.component_unique_code, + r.play_result, + r.c_type + FROM ranked_records r + JOIN yesterday_counts yc ON r.component_unique_code = yc.component_unique_code + WHERE + (yc.yesterday_cnt >= 10 AND r.is_yesterday = 1) + OR + (yc.yesterday_cnt < 10 AND r.rn <= 10) +), +-- 统计结果,先按数值排序 +component_stats AS ( + SELECT + component_unique_code AS 组件唯一编码, + COUNT(*) AS 总练习次数, + SUM(CASE WHEN play_result IN ('failed', 'close') THEN 1 ELSE 0 END) AS 退出次数, + ROUND(CASE WHEN COUNT(*) = 0 THEN 0 ELSE + SUM(CASE WHEN play_result IN ('failed', 'close') THEN 1 ELSE 0 END)::DECIMAL / COUNT(*) * 100 END, 2) AS 退出率数值 + FROM filtered_samples + GROUP BY component_unique_code + ORDER BY 退出率数值 DESC +) +SELECT 组件唯一编码, 总练习次数, 退出次数, 退出率数值 || '%' AS 退出率 FROM component_stats; diff --git a/scripts/stat_l2.sql b/scripts/stat_l2.sql new file mode 100644 index 0000000..201d344 --- /dev/null +++ b/scripts/stat_l2.sql @@ -0,0 +1,90 @@ +-- 统计L2(A2)等级组件Oops率 +-- 修改统计日期时替换下面的'2026-04-12'为目标日期 +WITH all_component_records AS ( + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_0 WHERE deleted_at IS NULL + UNION ALL + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_1 WHERE deleted_at IS NULL + UNION ALL + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_2 WHERE deleted_at IS NULL + UNION ALL + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_3 WHERE deleted_at IS NULL + UNION ALL + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_4 WHERE deleted_at IS NULL + UNION ALL + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_5 WHERE deleted_at IS NULL + UNION ALL + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_6 WHERE deleted_at IS NULL + UNION ALL + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_7 WHERE deleted_at IS NULL +), +valid_records AS ( + SELECT + cr.component_unique_code, + cr.play_result, + cr.created_at, + cr.c_type, + CASE WHEN DATE(cr.created_at) = CURRENT_DATE - INTERVAL '1 day' THEN 1 ELSE 0 END AS is_yesterday + FROM all_component_records cr + JOIN bi_vala_app_character c ON cr.user_id = c.id AND c.deleted_at IS NULL + JOIN bi_vala_app_account a ON c.account_id = a.id AND a.status = 1 AND a.deleted_at IS NULL + WHERE cr.play_result IS NOT NULL AND cr.component_unique_code IS NOT NULL AND cr.level = 'A2' +), +-- 统计每个组件昨日练习次数 +yesterday_counts AS ( + SELECT + component_unique_code, + COUNT(*) AS yesterday_cnt + FROM valid_records + WHERE is_yesterday = 1 + GROUP BY component_unique_code +), +-- 给记录排序:昨日记录排最前,历史记录按时间倒序 +ranked_records AS ( + SELECT + vr.component_unique_code, + vr.play_result, + vr.c_type, + vr.is_yesterday, + ROW_NUMBER() OVER ( + PARTITION BY vr.component_unique_code + ORDER BY vr.is_yesterday DESC, vr.created_at DESC + ) AS rn + FROM valid_records vr + JOIN yesterday_counts yc ON vr.component_unique_code = yc.component_unique_code +), +-- 筛选统计样本: +-- 昨日练习≥10次:取全部昨日记录 +-- 昨日练习<10次:取全部昨日记录 + 最近历史记录补到10次 +filtered_samples AS ( + SELECT + r.component_unique_code, + r.play_result, + r.c_type + FROM ranked_records r + JOIN yesterday_counts yc ON r.component_unique_code = yc.component_unique_code + WHERE + (yc.yesterday_cnt >= 10 AND r.is_yesterday = 1) + OR + (yc.yesterday_cnt < 10 AND r.rn <= 10) +), +-- 统计结果,先按数值排序 +component_stats AS ( + SELECT + component_unique_code AS 组件唯一编码, + COUNT(*) AS 总练习次数, + SUM(CASE + WHEN play_result IN ('Oops', 'Opps') THEN 1 + WHEN play_result = 'pass' AND (c_type LIKE '%core_%' OR c_type LIKE '%scence_%') THEN 1 + ELSE 0 + END) AS Oops次数, + ROUND(CASE WHEN COUNT(*) = 0 THEN 0 ELSE + SUM(CASE + WHEN play_result IN ('Oops', 'Opps') THEN 1 + WHEN play_result = 'pass' AND (c_type LIKE '%core_%' OR c_type LIKE '%scence_%') THEN 1 + ELSE 0 + END)::DECIMAL / COUNT(*) * 100 END, 2) AS Oops率数值 + FROM filtered_samples + GROUP BY component_unique_code + ORDER BY Oops率数值 DESC +) +SELECT 组件唯一编码, 总练习次数, Oops次数, Oops率数值 || '%' AS Oops率 FROM component_stats; diff --git a/scripts/stat_l2_exit_rate.sql b/scripts/stat_l2_exit_rate.sql new file mode 100644 index 0000000..e600919 --- /dev/null +++ b/scripts/stat_l2_exit_rate.sql @@ -0,0 +1,86 @@ +-- 统计L2(A2)等级组件练习退出率 +-- 退出判定:play_result为failed或close视为退出 +-- 统计规则: +-- 1. 自动统计运行日期前一天的数据 +-- 2. 昨日练习≥10次用昨日全量数据,<10次自动补充历史记录至10次,历史不足10次取全部记录 +-- 3. 自动剔除测试账号(仅保留bi_vala_app_account.status=1的正常用户) +WITH all_component_records AS ( + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_0 WHERE deleted_at IS NULL + UNION ALL + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_1 WHERE deleted_at IS NULL + UNION ALL + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_2 WHERE deleted_at IS NULL + UNION ALL + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_3 WHERE deleted_at IS NULL + UNION ALL + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_4 WHERE deleted_at IS NULL + UNION ALL + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_5 WHERE deleted_at IS NULL + UNION ALL + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_6 WHERE deleted_at IS NULL + UNION ALL + SELECT user_id, component_unique_code, play_result, created_at, level, c_type FROM bi_user_component_play_record_7 WHERE deleted_at IS NULL +), +valid_records AS ( + SELECT + cr.component_unique_code, + cr.play_result, + cr.created_at, + cr.c_type, + CASE WHEN DATE(cr.created_at) = CURRENT_DATE - INTERVAL '1 day' THEN 1 ELSE 0 END AS is_yesterday + FROM all_component_records cr + JOIN bi_vala_app_character c ON cr.user_id = c.id AND c.deleted_at IS NULL + JOIN bi_vala_app_account a ON c.account_id = a.id AND a.status = 1 AND a.deleted_at IS NULL + WHERE cr.play_result IS NOT NULL AND cr.component_unique_code IS NOT NULL AND cr.level = 'A2' +), +-- 统计每个组件昨日练习次数 +yesterday_counts AS ( + SELECT + component_unique_code, + COUNT(*) AS yesterday_cnt + FROM valid_records + WHERE is_yesterday = 1 + GROUP BY component_unique_code +), +-- 给记录排序:昨日记录排最前,历史记录按时间倒序 +ranked_records AS ( + SELECT + vr.component_unique_code, + vr.play_result, + vr.c_type, + vr.is_yesterday, + ROW_NUMBER() OVER ( + PARTITION BY vr.component_unique_code + ORDER BY vr.is_yesterday DESC, vr.created_at DESC + ) AS rn + FROM valid_records vr + JOIN yesterday_counts yc ON vr.component_unique_code = yc.component_unique_code +), +-- 筛选统计样本: +-- 昨日练习≥10次:取全部昨日记录 +-- 昨日练习<10次:取全部昨日记录 + 最近历史记录补到10次 +filtered_samples AS ( + SELECT + r.component_unique_code, + r.play_result, + r.c_type + FROM ranked_records r + JOIN yesterday_counts yc ON r.component_unique_code = yc.component_unique_code + WHERE + (yc.yesterday_cnt >= 10 AND r.is_yesterday = 1) + OR + (yc.yesterday_cnt < 10 AND r.rn <= 10) +), +-- 统计结果,先按数值排序 +component_stats AS ( + SELECT + component_unique_code AS 组件唯一编码, + COUNT(*) AS 总练习次数, + SUM(CASE WHEN play_result IN ('failed', 'close') THEN 1 ELSE 0 END) AS 退出次数, + ROUND(CASE WHEN COUNT(*) = 0 THEN 0 ELSE + SUM(CASE WHEN play_result IN ('failed', 'close') THEN 1 ELSE 0 END)::DECIMAL / COUNT(*) * 100 END, 2) AS 退出率数值 + FROM filtered_samples + GROUP BY component_unique_code + ORDER BY 退出率数值 DESC +) +SELECT 组件唯一编码, 总练习次数, 退出次数, 退出率数值 || '%' AS 退出率 FROM component_stats;