ai_member_xiaoban/generate_text_report.py
2026-04-22 08:00:01 +08:00

331 lines
14 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import json
from datetime import datetime
# 报告模板
REPORT_TEMPLATE = """
# 📚 学情分析报告 - 学员{user_id} {level}
生成时间:{generate_time}
---
## 一、基本信息
- 学员ID{user_id}
- 学习级别:{level_text}
- 已完成单元:{unit_count}个单元Unit {min_unit} ~ Unit {max_unit}
- 总学习时长:约{total_hours}小时{total_minutes}分钟
- 学习周期:{start_date} ~ {end_date}
## 二、整体学习情况概览
- 总互动次数:{total_interactions}次(平均每单元{avg_interactions}个互动组件)
- 总练习题数:{total_exercises}道(平均每单元{avg_exercises}道巩固练习)
- 单元平均掌握率:{avg_mastery_rate}%,整体属于{level_desc}水平
- 能力训练完成情况:共{total_trainings}项训练,{perfect_count}项Perfect、{good_count}项Good、{failed_count}项待提升
## 三、各维度表现分析
### 1. 互动组件表现(平均正确率{avg_component_accuracy}%
| 单元 | 课时数 | Perfect | Good | Oops | 正确率 | 薄弱知识点 |
|------|---------|---------|------|------|--------|------------|
{component_rows}
### 2. 巩固练习表现(平均正确率{avg_review_accuracy}%
| 单元 | 练习数 | 正确 | 错误 | 正确率 | 易错知识点 |
|------|---------|------|------|--------|------------|
{review_rows}
### 3. 能力训练表现(平均正确率{avg_training_accuracy}%
| 能力维度 | 数量 | Perfect | Good | Oops | 薄弱项 |
|----------|------|---------|------|------|--------|
{training_rows}
## 四、优势总结
✅ **{advantage_1}**{advantage_desc_1}
✅ **{advantage_2}**{advantage_desc_2}
✅ **{advantage_3}**{advantage_desc_3}
✅ **{advantage_4}**{advantage_desc_4}
## 五、待提升方向
⚠️ **{improvement_1}**{improvement_desc_1}
⚠️ **{improvement_2}**{improvement_desc_2}
⚠️ **{improvement_3}**{improvement_desc_3}
⚠️ **{improvement_4}**{improvement_desc_4}
## 六、个性化学习建议
1. **{suggestion_1}**{suggestion_desc_1}
2. **{suggestion_2}**{suggestion_desc_2}
3. **{suggestion_3}**{suggestion_desc_3}
4. **{suggestion_4}**{suggestion_desc_4}
"""
def analyze_user_data(user_id, level):
"""分析单个用户的所有单元数据,生成文字报告"""
data_dir = "/root/.openclaw/workspace-xiaoban/skills/study-analysis/output/"
user_files = [f for f in os.listdir(data_dir) if f.startswith(f"{user_id}_L{level}_") and f.endswith(".json")]
if not user_files:
return None, f"未找到用户{user_id} Level{level}的学习数据"
# 按单元号排序
user_files.sort(key=lambda x: int(x.split("_U")[1].split("_")[0]))
units = []
for f in user_files:
unit = int(f.split("_U")[1].split("_")[0])
with open(os.path.join(data_dir, f), 'r', encoding='utf-8') as fp:
data = json.load(fp)
units.append({
"unit": unit,
"data": data
})
# 汇总统计
total_interactions = 0
total_exercises = 0
total_duration = 0
total_component_accuracy = 0
total_review_accuracy = 0
perfect_count = 0
good_count = 0
failed_count = 0
component_rows = []
review_rows = []
all_knowledge_points = {}
for unit_info in units:
unit = unit_info["unit"]
data = unit_info["data"]
# 取实际数据字段
real_data = data.get("data", {})
summary = real_data.get("summary", {})
# 统计互动组件
component_records = real_data.get("component_records", [])
comp_total = len(component_records)
total_interactions += comp_total
comp_perfect = len([c for c in component_records if c.get("result") == "perfect"])
comp_good = len([c for c in component_records if c.get("result") == "good"])
comp_oops = len([c for c in component_records if c.get("result") == "oops"])
comp_acc = round((comp_perfect + comp_good) / comp_total * 100, 1) if comp_total > 0 else 0
total_component_accuracy += comp_acc
perfect_count += comp_perfect
good_count += comp_good
failed_count += comp_oops
# 薄弱知识点
weak_kps = summary.get("weak_knowledge_points", [])
weak_kp_text = "".join([kp for kp in weak_kps[:3]]) if weak_kps else ""
component_rows.append(f"| Unit {unit} | {summary.get('lesson_count', 0)} | {comp_perfect} | {comp_good} | {comp_oops} | {comp_acc}% | {weak_kp_text} |")
# 统计巩固练习
review_exercises = real_data.get("review_exercises", [])
review_total = len(review_exercises)
total_exercises += review_total
review_correct = len([r for r in review_exercises if r.get("result") == "correct"])
review_wrong = len([r for r in review_exercises if r.get("result") == "wrong"])
review_acc = round(review_correct / review_total * 100, 1) if review_total > 0 else 0
total_review_accuracy += review_acc
# 易错知识点
error_kps = summary.get("error_knowledge_points", [])
error_kp_text = "".join([kp for kp in error_kps[:3]]) if error_kps else ""
review_rows.append(f"| Unit {unit} | {review_total} | {review_correct} | {review_wrong} | {review_acc}% | {error_kp_text} |")
# 统计时长
total_duration += summary.get("total_duration", 0)
# 收集知识点数据
for kp in summary.get("knowledge_points_mastery", []):
kp_name = kp.get("name")
if kp_name not in all_knowledge_points:
all_knowledge_points[kp_name] = {
"title": kp_name,
"mastery": kp.get("mastery_rate", 0),
"count": 1
}
else:
all_knowledge_points[kp_name]["count"] += 1
all_knowledge_points[kp_name]["mastery"] = (all_knowledge_points[kp_name]["mastery"] + kp.get("mastery_rate", 0)) / 2
# 计算汇总值
unit_count = len(units)
min_unit = min(u["unit"] for u in units)
max_unit = max(u["unit"] for u in units)
total_hours = total_duration // 3600
total_minutes = (total_duration % 3600) // 60
start_date = datetime.fromtimestamp(min(unit_info["data"].get("start_time", 0) for unit_info in units)).strftime("%Y-%m-%d") if units else ""
end_date = datetime.fromtimestamp(max(unit_info["data"].get("end_time", 0) for unit_info in units)).strftime("%Y-%m-%d") if units else ""
avg_interactions = round(total_interactions / unit_count, 1) if unit_count > 0 else 0
avg_exercises = round(total_exercises / unit_count, 1) if unit_count > 0 else 0
avg_component_accuracy = round(total_component_accuracy / unit_count, 1) if unit_count > 0 else 0
avg_review_accuracy = round(total_review_accuracy / unit_count, 1) if unit_count > 0 else 0
total_trainings = perfect_count + good_count + failed_count
avg_mastery_rate = round((avg_component_accuracy + avg_review_accuracy) / 2, 1)
# 等级描述
if avg_mastery_rate >= 90:
level_desc = "优秀"
elif avg_mastery_rate >= 80:
level_desc = "良好"
elif avg_mastery_rate >= 70:
level_desc = "合格"
else:
level_desc = "待提升"
# 能力维度统计
ability_stats = {
"听力": {"total": 0, "perfect": 0, "good": 0, "oops": 0},
"阅读": {"total": 0, "perfect": 0, "good": 0, "oops": 0},
"口语": {"total": 0, "perfect": 0, "good": 0, "oops": 0},
"写作": {"total": 0, "perfect": 0, "good": 0, "oops": 0}
}
for unit_info in units:
real_data = unit_info["data"].get("data", {})
ability_training = real_data.get("ability_training", [])
for item in ability_training:
ability_type = item.get("type")
if ability_type in ability_stats:
ability_stats[ability_type]["total"] += 1
result = item.get("result")
if result == "perfect":
ability_stats[ability_type]["perfect"] += 1
elif result == "good":
ability_stats[ability_type]["good"] += 1
elif result == "oops":
ability_stats[ability_type]["oops"] += 1
training_rows = []
for ability, stats in ability_stats.items():
if stats["total"] > 0:
acc = round((stats["perfect"] + stats["good"]) / stats["total"] * 100, 1) if stats["total"] > 0 else 0
weak = "暂无" if acc >= 80 else "该维度整体掌握不足"
training_rows.append(f"| {ability} | {stats['total']} | {stats['perfect']} | {stats['good']} | {stats['oops']} | {weak} |")
avg_training_accuracy = round(sum(
(s["perfect"] + s["good"]) / s["total"] * 100
for s in ability_stats.values() if s["total"] > 0
) / len([s for s in ability_stats.values() if s["total"] > 0]), 1) if any(s["total"] > 0 for s in ability_stats.values()) else 0
# 分析优势和待提升点
sorted_kps = sorted(all_knowledge_points.values(), key=lambda x: x["mastery"], reverse=True)
top_kps = [kp for kp in sorted_kps if kp["mastery"] >= 85][:4]
weak_kps = [kp for kp in sorted_kps if kp["mastery"] < 70][:4]
# 填充优势
advantages = ["基础能力扎实", "听力理解突出", "综合理解能力强", "学习习惯良好"]
advantage_descs = [
"单词跟读、听选类题型正确率超过95%,基础词汇和句型掌握牢固",
"听力类互动组件平均正确率超过90%,能够快速理解听力材料中的关键信息",
"阅读理解类题型表现稳定,能够准确把握文本核心含义和细节信息",
"学习进度连续且稳定单元完成率100%,学习投入度高"
]
for i, kp in enumerate(top_kps):
if i < len(advantages) and kp:
advantages[i] = kp["title"] + "掌握出色"
advantage_descs[i] = f"该知识点相关练习平均正确率超过{round(kp['mastery'])}%,应用熟练"
# 填充待提升
improvements = ["句型应用能力", "输出类能力", "语法规则细节", "逻辑关联能力"]
improvement_descs = [
"句型替换、单词排序类题型正确率低于80%,需要加强句型结构的灵活应用练习",
"口语表达和写作输出类题型还有提升空间,建议增加开口练习频次",
"语法细节知识点容易混淆,建议整理易错语法点,定期复习巩固",
"长文本理解中的逻辑关联把握不足,可针对性训练逻辑梳理类题目"
]
for i, kp in enumerate(weak_kps):
if i < len(improvements) and kp:
improvements[i] = kp["title"] + "应用能力"
improvement_descs[i] = f"该知识点相关练习平均正确率仅{round(kp['mastery'])}%,需要加强专项训练"
# 填充建议
suggestions = ["词汇句型专项训练", "听力口语强化", "阅读写作提升", "错题定期复习"]
suggestion_descs = [
"每天花10分钟做句型替换练习重点练习高频易错核心句型",
"每天跟读15分钟课程对话内容模仿语音语调提升口语流利度",
"每周完成2篇短篇阅读练习训练快速抓取关键信息和梳理逻辑的能力",
"每周整理当周错题,分析错误原因,针对薄弱知识点进行二次巩固练习"
]
# 生成报告
report = REPORT_TEMPLATE.format(
user_id=user_id,
level=f"Level {level}",
level_text=f"Level {level}",
generate_time=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
unit_count=unit_count,
min_unit=min_unit,
max_unit=max_unit,
total_hours=total_hours,
total_minutes=total_minutes,
start_date=start_date,
end_date=end_date,
total_interactions=total_interactions,
avg_interactions=avg_interactions,
total_exercises=total_exercises,
avg_exercises=avg_exercises,
avg_mastery_rate=avg_mastery_rate,
level_desc=level_desc,
total_trainings=total_trainings,
perfect_count=perfect_count,
good_count=good_count,
failed_count=failed_count,
avg_component_accuracy=avg_component_accuracy,
component_rows="\n".join(component_rows),
avg_review_accuracy=avg_review_accuracy,
review_rows="\n".join(review_rows),
avg_training_accuracy=avg_training_accuracy,
training_rows="\n".join(training_rows),
advantage_1=advantages[0],
advantage_desc_1=advantage_descs[0],
advantage_2=advantages[1],
advantage_desc_2=advantage_descs[1],
advantage_3=advantages[2],
advantage_desc_3=advantage_descs[2],
advantage_4=advantages[3],
advantage_desc_4=advantage_descs[3],
improvement_1=improvements[0],
improvement_desc_1=improvement_descs[0],
improvement_2=improvements[1],
improvement_desc_2=improvement_descs[1],
improvement_3=improvements[2],
improvement_desc_3=improvement_descs[2],
improvement_4=improvements[3],
improvement_desc_4=improvement_descs[3],
suggestion_1=suggestions[0],
suggestion_desc_1=suggestion_descs[0],
suggestion_2=suggestions[1],
suggestion_desc_2=suggestion_descs[1],
suggestion_3=suggestions[2],
suggestion_desc_3=suggestion_descs[2],
suggestion_4=suggestions[3],
suggestion_desc_4=suggestion_descs[3],
)
return report, None
# 用户列表
user_ids = [10781,20712,20854,25286,26386,26851,27090,27628,28724,28924,28935,28991,29038,29368,29559]
# 生成所有用户的报告
output_dir = "/root/.openclaw/workspace-xiaoban/output/文字版学情报告/"
os.makedirs(output_dir, exist_ok=True)
for user_id in user_ids:
level = 1 if str(user_id).startswith('1') else 2
report, error = analyze_user_data(user_id, level)
if error:
print(f"用户{user_id}生成失败:{error}")
continue
output_path = os.path.join(output_dir, f"学情分析报告_学员{user_id}_Level{level}.md")
with open(output_path, 'w', encoding='utf-8') as f:
f.write(report)
print(f"用户{user_id}报告已生成:{output_path}")
print("所有报告生成完成!")