auto-sync: vala-component-oops-stat 2026-04-13_12:29
This commit is contained in:
commit
f1057bb361
41
SKILL.md
Normal file
41
SKILL.md
Normal file
@ -0,0 +1,41 @@
|
||||
---
|
||||
name: vala-component-oops-stat
|
||||
description: 瓦拉英语组件练习Oops率统计工具,支持按日期统计、自动剔除测试账号、分L1/L2等级、自定义样本量规则、导出Excel报表。使用场景:(1) 统计组件练习错误率/Oops率 (2) 按难度等级拆分L1(A1)/L2(A2)统计结果 (3) 练习次数不足10次自动补充历史记录 (4) 导出多sheet Excel报表 (5) 按Oops率降序排序展示高错误率组件
|
||||
---
|
||||
|
||||
# 瓦拉英语组件Oops率统计技能
|
||||
|
||||
## 功能说明
|
||||
用于统计飞书多维表格中组件练习的Oops(错误)率,支持灵活配置统计规则,自动处理数据口径问题,输出标准化统计结果。
|
||||
|
||||
## 核心特性
|
||||
✅ 自动剔除测试账号:仅保留`bi_vala_app_account.status = 1`的正常用户练习记录
|
||||
✅ 等级拆分:自动按`level`字段拆分L1(A1)/L2(A2)两个难度等级分别统计
|
||||
✅ 样本量规则:昨日练习≥10次用昨日全量数据,<10次自动补充历史记录至10次,历史不足10次取全部记录
|
||||
✅ 正确排序:按Oops率数值从高到低排序,避免字符串排序错误
|
||||
✅ 自动导出:生成包含两个sheet的Excel报表,直接发送给用户
|
||||
|
||||
## 使用参数
|
||||
| 参数 | 说明 | 默认值 |
|
||||
|------|------|--------|
|
||||
| 统计日期 | 要统计的日期(格式YYYY-MM-DD) | 昨日(当前日期-1天) |
|
||||
| 最小样本量 | 组件最少统计样本量 | 10 |
|
||||
| 是否剔测试账号 | 是否排除测试账号练习记录 | 是 |
|
||||
| 是否分等级 | 是否拆分L1/L2分别统计 | 是 |
|
||||
| 导出格式 | 输出格式(CSV/Excel) | Excel |
|
||||
|
||||
## 操作步骤
|
||||
1. 确认用户统计需求:统计日期、样本量规则、是否分等级等
|
||||
2. 执行对应等级的统计SQL脚本(`scripts/stat_l1.sql`、`scripts/stat_l2.sql`)
|
||||
3. 运行`scripts/generate_excel.py`生成Excel报表
|
||||
4. 将报表通过飞书发送给用户
|
||||
|
||||
## 脚本说明
|
||||
### scripts/stat_l1.sql
|
||||
统计L1(A1)等级组件Oops率的SQL脚本,可修改日期参数调整统计时间
|
||||
### scripts/stat_l2.sql
|
||||
统计L2(A2)等级组件Oops率的SQL脚本,可修改日期参数调整统计时间
|
||||
### scripts/generate_excel.py
|
||||
将CSV统计结果合并生成带多sheet的Excel报表
|
||||
### references/table_schema.md
|
||||
相关数据表结构说明和字段含义参考
|
||||
25
references/table_schema.md
Normal file
25
references/table_schema.md
Normal file
@ -0,0 +1,25 @@
|
||||
# 相关数据表结构说明
|
||||
|
||||
## bi_user_component_play_record_* 分表(组件练习记录表)
|
||||
| 字段名 | 类型 | 说明 |
|
||||
|-------|------|------|
|
||||
| user_id | bigint | 角色ID,关联bi_vala_app_character.id |
|
||||
| component_unique_code | varchar | 组件唯一编码,业务系统中组件的唯一标识 |
|
||||
| play_result | varchar | 练习结果:Perfect/Good/Oops/Opps(Oops和Opps都是错误结果) |
|
||||
| created_at | timestamp | 练习时间 |
|
||||
| level | varchar | 难度等级:A1(L1)/A2(L2) |
|
||||
| deleted_at | timestamp | 删除时间,为空表示记录有效 |
|
||||
|
||||
## bi_vala_app_character(角色表)
|
||||
| 字段名 | 类型 | 说明 |
|
||||
|-------|------|------|
|
||||
| id | bigint | 角色ID |
|
||||
| account_id | bigint | 账号ID,关联bi_vala_app_account.id |
|
||||
| deleted_at | timestamp | 删除时间,为空表示记录有效 |
|
||||
|
||||
## bi_vala_app_account(账号表)
|
||||
| 字段名 | 类型 | 说明 |
|
||||
|-------|------|------|
|
||||
| id | bigint | 账号ID |
|
||||
| status | int | 账号状态:1=正常用户,其他=测试账号/禁用账号 |
|
||||
| deleted_at | timestamp | 删除时间,为空表示记录有效 |
|
||||
34
scripts/generate_excel.py
Normal file
34
scripts/generate_excel.py
Normal file
@ -0,0 +1,34 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import sys
|
||||
|
||||
# 兼容numpy版本
|
||||
try:
|
||||
np._get_promotion_state = lambda *args, **kwargs: 0
|
||||
except:
|
||||
pass
|
||||
|
||||
def generate_excel(l1_csv_path, l2_csv_path, output_path):
|
||||
"""
|
||||
生成包含L1和L2两个sheet的Excel报表
|
||||
:param l1_csv_path: L1等级统计结果CSV路径
|
||||
:param l2_csv_path: L2等级统计结果CSV路径
|
||||
:param output_path: 输出Excel文件路径
|
||||
"""
|
||||
# 读取CSV文件
|
||||
df_l1 = pd.read_csv(l1_csv_path)
|
||||
df_l2 = pd.read_csv(l2_csv_path)
|
||||
|
||||
# 创建Excel文件
|
||||
with pd.ExcelWriter(output_path) as writer:
|
||||
df_l1.to_excel(writer, sheet_name='L1等级组件', index=False)
|
||||
df_l2.to_excel(writer, sheet_name='L2等级组件', index=False)
|
||||
|
||||
print(f"Excel报表生成成功:{output_path}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 4:
|
||||
print("用法:python generate_excel.py <l1_csv路径> <l2_csv路径> <输出Excel路径>")
|
||||
sys.exit(1)
|
||||
|
||||
generate_excel(sys.argv[1], sys.argv[2], sys.argv[3])
|
||||
78
scripts/stat_l1.sql
Normal file
78
scripts/stat_l1.sql
Normal file
@ -0,0 +1,78 @@
|
||||
-- 统计L1(A1)等级组件Oops率
|
||||
-- 修改统计日期时替换下面的'2026-04-12'为目标日期
|
||||
WITH all_component_records AS (
|
||||
SELECT user_id, component_unique_code, play_result, created_at, level FROM bi_user_component_play_record_0 WHERE deleted_at IS NULL
|
||||
UNION ALL
|
||||
SELECT user_id, component_unique_code, play_result, created_at, level FROM bi_user_component_play_record_1 WHERE deleted_at IS NULL
|
||||
UNION ALL
|
||||
SELECT user_id, component_unique_code, play_result, created_at, level FROM bi_user_component_play_record_2 WHERE deleted_at IS NULL
|
||||
UNION ALL
|
||||
SELECT user_id, component_unique_code, play_result, created_at, level FROM bi_user_component_play_record_3 WHERE deleted_at IS NULL
|
||||
UNION ALL
|
||||
SELECT user_id, component_unique_code, play_result, created_at, level FROM bi_user_component_play_record_4 WHERE deleted_at IS NULL
|
||||
UNION ALL
|
||||
SELECT user_id, component_unique_code, play_result, created_at, level FROM bi_user_component_play_record_5 WHERE deleted_at IS NULL
|
||||
UNION ALL
|
||||
SELECT user_id, component_unique_code, play_result, created_at, level FROM bi_user_component_play_record_6 WHERE deleted_at IS NULL
|
||||
UNION ALL
|
||||
SELECT user_id, component_unique_code, play_result, created_at, level FROM bi_user_component_play_record_7 WHERE deleted_at IS NULL
|
||||
),
|
||||
valid_records AS (
|
||||
SELECT
|
||||
cr.component_unique_code,
|
||||
cr.play_result,
|
||||
cr.created_at,
|
||||
CASE WHEN DATE(cr.created_at) = '2026-04-12' THEN 1 ELSE 0 END AS is_yesterday
|
||||
FROM all_component_records cr
|
||||
JOIN bi_vala_app_character c ON cr.user_id = c.id AND c.deleted_at IS NULL
|
||||
JOIN bi_vala_app_account a ON c.account_id = a.id AND a.status = 1 AND a.deleted_at IS NULL
|
||||
WHERE cr.play_result IS NOT NULL AND cr.component_unique_code IS NOT NULL AND cr.level = 'A1'
|
||||
),
|
||||
-- 统计每个组件昨日练习次数
|
||||
yesterday_counts AS (
|
||||
SELECT
|
||||
component_unique_code,
|
||||
COUNT(*) AS yesterday_cnt
|
||||
FROM valid_records
|
||||
WHERE is_yesterday = 1
|
||||
GROUP BY component_unique_code
|
||||
),
|
||||
-- 给记录排序:昨日记录排最前,历史记录按时间倒序
|
||||
ranked_records AS (
|
||||
SELECT
|
||||
vr.component_unique_code,
|
||||
vr.play_result,
|
||||
vr.is_yesterday,
|
||||
ROW_NUMBER() OVER (
|
||||
PARTITION BY vr.component_unique_code
|
||||
ORDER BY vr.is_yesterday DESC, vr.created_at DESC
|
||||
) AS rn
|
||||
FROM valid_records vr
|
||||
JOIN yesterday_counts yc ON vr.component_unique_code = yc.component_unique_code
|
||||
),
|
||||
-- 筛选统计样本:
|
||||
-- 昨日练习≥10次:取全部昨日记录
|
||||
-- 昨日练习<10次:取全部昨日记录 + 最近历史记录补到10次
|
||||
filtered_samples AS (
|
||||
SELECT
|
||||
r.component_unique_code,
|
||||
r.play_result
|
||||
FROM ranked_records r
|
||||
JOIN yesterday_counts yc ON r.component_unique_code = yc.component_unique_code
|
||||
WHERE
|
||||
(yc.yesterday_cnt >= 10 AND r.is_yesterday = 1)
|
||||
OR
|
||||
(yc.yesterday_cnt < 10 AND r.rn <= 10)
|
||||
),
|
||||
-- 统计结果,先按数值排序
|
||||
component_stats AS (
|
||||
SELECT
|
||||
component_unique_code AS 组件唯一编码,
|
||||
COUNT(*) AS 总练习次数,
|
||||
SUM(CASE WHEN play_result IN ('Oops', 'Opps') THEN 1 ELSE 0 END) AS Oops次数,
|
||||
ROUND(CASE WHEN COUNT(*) = 0 THEN 0 ELSE SUM(CASE WHEN play_result IN ('Oops', 'Opps') THEN 1 ELSE 0 END)::DECIMAL / COUNT(*) * 100 END, 2) AS Oops率数值
|
||||
FROM filtered_samples
|
||||
GROUP BY component_unique_code
|
||||
ORDER BY Oops率数值 DESC
|
||||
)
|
||||
SELECT 组件唯一编码, 总练习次数, Oops次数, Oops率数值 || '%' AS Oops率 FROM component_stats;
|
||||
78
scripts/stat_l2.sql
Normal file
78
scripts/stat_l2.sql
Normal file
@ -0,0 +1,78 @@
|
||||
-- 统计L2(A2)等级组件Oops率
|
||||
-- 修改统计日期时替换下面的'2026-04-12'为目标日期
|
||||
WITH all_component_records AS (
|
||||
SELECT user_id, component_unique_code, play_result, created_at, level FROM bi_user_component_play_record_0 WHERE deleted_at IS NULL
|
||||
UNION ALL
|
||||
SELECT user_id, component_unique_code, play_result, created_at, level FROM bi_user_component_play_record_1 WHERE deleted_at IS NULL
|
||||
UNION ALL
|
||||
SELECT user_id, component_unique_code, play_result, created_at, level FROM bi_user_component_play_record_2 WHERE deleted_at IS NULL
|
||||
UNION ALL
|
||||
SELECT user_id, component_unique_code, play_result, created_at, level FROM bi_user_component_play_record_3 WHERE deleted_at IS NULL
|
||||
UNION ALL
|
||||
SELECT user_id, component_unique_code, play_result, created_at, level FROM bi_user_component_play_record_4 WHERE deleted_at IS NULL
|
||||
UNION ALL
|
||||
SELECT user_id, component_unique_code, play_result, created_at, level FROM bi_user_component_play_record_5 WHERE deleted_at IS NULL
|
||||
UNION ALL
|
||||
SELECT user_id, component_unique_code, play_result, created_at, level FROM bi_user_component_play_record_6 WHERE deleted_at IS NULL
|
||||
UNION ALL
|
||||
SELECT user_id, component_unique_code, play_result, created_at, level FROM bi_user_component_play_record_7 WHERE deleted_at IS NULL
|
||||
),
|
||||
valid_records AS (
|
||||
SELECT
|
||||
cr.component_unique_code,
|
||||
cr.play_result,
|
||||
cr.created_at,
|
||||
CASE WHEN DATE(cr.created_at) = '2026-04-12' THEN 1 ELSE 0 END AS is_yesterday
|
||||
FROM all_component_records cr
|
||||
JOIN bi_vala_app_character c ON cr.user_id = c.id AND c.deleted_at IS NULL
|
||||
JOIN bi_vala_app_account a ON c.account_id = a.id AND a.status = 1 AND a.deleted_at IS NULL
|
||||
WHERE cr.play_result IS NOT NULL AND cr.component_unique_code IS NOT NULL AND cr.level = 'A2'
|
||||
),
|
||||
-- 统计每个组件昨日练习次数
|
||||
yesterday_counts AS (
|
||||
SELECT
|
||||
component_unique_code,
|
||||
COUNT(*) AS yesterday_cnt
|
||||
FROM valid_records
|
||||
WHERE is_yesterday = 1
|
||||
GROUP BY component_unique_code
|
||||
),
|
||||
-- 给记录排序:昨日记录排最前,历史记录按时间倒序
|
||||
ranked_records AS (
|
||||
SELECT
|
||||
vr.component_unique_code,
|
||||
vr.play_result,
|
||||
vr.is_yesterday,
|
||||
ROW_NUMBER() OVER (
|
||||
PARTITION BY vr.component_unique_code
|
||||
ORDER BY vr.is_yesterday DESC, vr.created_at DESC
|
||||
) AS rn
|
||||
FROM valid_records vr
|
||||
JOIN yesterday_counts yc ON vr.component_unique_code = yc.component_unique_code
|
||||
),
|
||||
-- 筛选统计样本:
|
||||
-- 昨日练习≥10次:取全部昨日记录
|
||||
-- 昨日练习<10次:取全部昨日记录 + 最近历史记录补到10次
|
||||
filtered_samples AS (
|
||||
SELECT
|
||||
r.component_unique_code,
|
||||
r.play_result
|
||||
FROM ranked_records r
|
||||
JOIN yesterday_counts yc ON r.component_unique_code = yc.component_unique_code
|
||||
WHERE
|
||||
(yc.yesterday_cnt >= 10 AND r.is_yesterday = 1)
|
||||
OR
|
||||
(yc.yesterday_cnt < 10 AND r.rn <= 10)
|
||||
),
|
||||
-- 统计结果,先按数值排序
|
||||
component_stats AS (
|
||||
SELECT
|
||||
component_unique_code AS 组件唯一编码,
|
||||
COUNT(*) AS 总练习次数,
|
||||
SUM(CASE WHEN play_result IN ('Oops', 'Opps') THEN 1 ELSE 0 END) AS Oops次数,
|
||||
ROUND(CASE WHEN COUNT(*) = 0 THEN 0 ELSE SUM(CASE WHEN play_result IN ('Oops', 'Opps') THEN 1 ELSE 0 END)::DECIMAL / COUNT(*) * 100 END, 2) AS Oops率数值
|
||||
FROM filtered_samples
|
||||
GROUP BY component_unique_code
|
||||
ORDER BY Oops率数值 DESC
|
||||
)
|
||||
SELECT 组件唯一编码, 总练习次数, Oops次数, Oops率数值 || '%' AS Oops率 FROM component_stats;
|
||||
Loading…
Reference in New Issue
Block a user