更新:新增用户学习行为数据导出技能

- 新增 user_export_skill.md 完整导出技能说明
- 支持导出指定账户ID或角色ID的完整学习行为数据
- 包含6个sheet:音频数据、互动组件、课程巩固、单元挑战、单元总结、汇总统计
- 已成功验证导出两个用户数据,功能正常可用
This commit is contained in:
小溪 2026-03-02 23:21:58 +08:00
parent 037a620798
commit 2ee12bae8e
7 changed files with 7773 additions and 0 deletions

View File

@ -0,0 +1,70 @@
# 用户学习行为数据导出技能
## 功能说明
可以导出指定账户ID或角色ID的完整学习行为数据输出为Excel文件包含多个sheet。
## 导出内容说明
Excel包含以下sheet
1. **全部音频数据**用户的所有语音交互数据包含音频地址、ASR结果等
2. **互动组件学习记录**:所有组件互动记录,包含组件类型、名称、知识点、互动结果等
3. **课程巩固记录**:课程课后巩固的做题记录
4. **单元挑战记录**:单元挑战的答题记录
5. **单元总结记录**:单元总结的学习记录
6. **汇总统计**:自动统计的组件通过率、知识点掌握情况、单元学习时长等
## 使用方法
### 1. 导出单个角色ID
修改脚本变量:
```python
USER_ID = "角色ID"
USER_ID_LIST = None
ACCOUNT_ID_LIST = None
```
### 2. 导出单个/多个账户ID
修改脚本变量:
```python
USER_ID = None
USER_ID_LIST = None
ACCOUNT_ID_LIST = [账户ID1, 账户ID2, ...]
```
脚本会自动查询账户对应的所有角色ID并分别导出。
## 依赖环境
需要配置以下环境变量:
```
# ES 配置
ES_HOST=es-7vd7jcu9.public.tencentelasticsearch.com
ES_PORT=9200
ES_SCHEME=https
ES_USER=elastic
ES_PASSWORD=F%?QDcWes7N2WTuiYD11
# PG 配置
PG_DB_HOST=bj-postgres-16pob4sg.sql.tencentcdb.com
PG_DB_PORT=28591
PG_DB_USER=ai_member
PG_DB_PASSWORD=LdfjdjL83h3h3^$&**YGG*
PG_DB_DATABASE=vala
# MySQL 配置
MYSQL_HOST=bj-cdb-8frbdwju.sql.tencentcdb.com
MYSQL_USERNAME=read_only
MYSQL_PASSWORD=fdsfiidier^$*hjfdijjd232
MYSQL_PORT=25413
# MySQL Online 配置
MYSQL_HOST_online=bj-cdb-dh2fkqa0.sql.tencentcdb.com
MYSQL_USERNAME_online=read_only
MYSQL_PASSWORD_online=fsdo45ijfmfmuu77$%^&
MYSQL_PORT_online=27751
```
## 常见问题排查
1. **事务异常错误**:一般是前面某个查询失败导致,检查是否有权限、表是否存在
2. **权限不足**检查数据库账号的表权限需要有各分表的SELECT权限
3. **0条记录**:对应角色没有学习数据,属于正常情况
## 导出示例
- 账户ID 9343角色12699导出199条学习记录
- 角色ID 14607导出855条完整学习记录所有sheet都有数据

1846
export_14607.py Normal file

File diff suppressed because it is too large Load Diff

144
export_only_12698.py Normal file
View File

@ -0,0 +1,144 @@
#!/usr/bin/env python3
"""单独测试角色12698的导出查看具体报错"""
import os
import json
import sys
import datetime
from typing import Any, Dict, List
# 加载环境变量
def load_env():
env_path = os.path.join(os.getcwd(), ".env")
if os.path.exists(env_path):
with open(env_path, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
k, v = line.split("=", 1)
os.environ[k.strip()] = v.strip().strip('"').strip("'")
load_env()
import psycopg2
from psycopg2.extras import RealDictCursor
import pymysql
import requests
from requests.auth import HTTPBasicAuth
import warnings
warnings.filterwarnings('ignore')
def test_role_12698():
print("="*60)
print("单独测试角色ID=12698的查询")
print("="*60)
# 连接PG
try:
conn = psycopg2.connect(
host=os.getenv("PG_DB_HOST"),
port=int(os.getenv("PG_DB_PORT")),
user=os.getenv("PG_DB_USER"),
password=os.getenv("PG_DB_PASSWORD"),
dbname=os.getenv("PG_DB_DATABASE"),
connect_timeout=10
)
print("✅ PG连接成功")
except Exception as e:
print(f"❌ PG连接失败: {e}")
return
user_id = "12698"
# 测试第一个查询user_component_play_record_0
print(f"\n测试查询表 user_component_play_record_0user_id={user_id}")
try:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
sql = f"""
SELECT user_id, component_unique_code, session_id, c_type, c_id,
play_result, user_behavior_info, updated_at
FROM user_component_play_record_0
WHERE user_id = %s
ORDER BY updated_at DESC
"""
cur.execute(sql, (user_id,))
rows = cur.fetchall()
print(f"✅ 查询成功,返回{len(rows)}条记录")
except Exception as e:
print(f"❌ 查询失败: {e}")
print(f"错误类型: {type(e).__name__}")
# 回滚事务
print("\n尝试回滚事务...")
try:
conn.rollback()
print("✅ 事务回滚成功")
except Exception as e2:
print(f"❌ 回滚失败: {e2}")
# 测试查询课程巩固记录表
print(f"\n测试查询表 user_unit_review_question_resultuser_id={user_id}")
try:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
sql = f"""
SELECT user_id, story_id, chapter_id, question_list, updated_at
FROM user_unit_review_question_result
WHERE user_id = %s
ORDER BY updated_at DESC
"""
cur.execute(sql, (user_id,))
rows = cur.fetchall()
print(f"✅ 查询成功,返回{len(rows)}条记录")
except Exception as e:
print(f"❌ 查询失败: {e}")
print(f"错误类型: {type(e).__name__}")
# 回滚事务
print("\n尝试回滚事务...")
try:
conn.rollback()
print("✅ 事务回滚成功")
except Exception as e2:
print(f"❌ 回滚失败: {e2}")
# 测试查询单元挑战记录表
print(f"\n测试查询表 user_unit_challenge_question_resultuser_id={user_id}")
try:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
sql = f"""
SELECT user_id, story_id, category, score_text, question_list, updated_at
FROM user_unit_challenge_question_result
WHERE user_id = %s
ORDER BY updated_at DESC
"""
cur.execute(sql, (user_id,))
rows = cur.fetchall()
print(f"✅ 查询成功,返回{len(rows)}条记录")
except Exception as e:
print(f"❌ 查询失败: {e}")
print(f"错误类型: {type(e).__name__}")
# 测试查询单元总结记录表
print(f"\n测试查询表 user_unit_summary_recorduser_id={user_id}")
try:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
sql = f"""
SELECT id, user_id, unit_id, updated_at, km_id, km_type, play_time_seconds
FROM user_unit_summary_record
WHERE user_id = %s
ORDER BY updated_at DESC
"""
cur.execute(sql, (user_id,))
rows = cur.fetchall()
print(f"✅ 查询成功,返回{len(rows)}条记录")
except Exception as e:
print(f"❌ 查询失败: {e}")
print(f"错误类型: {type(e).__name__}")
import traceback
traceback.print_exc()
conn.close()
if __name__ == "__main__":
test_role_12698()

1846
export_user_id_data.py Normal file

File diff suppressed because it is too large Load Diff

1845
export_user_id_data_debug.py Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

176
test_db_connections.py Normal file
View File

@ -0,0 +1,176 @@
#!/usr/bin/env python3
"""测试各个数据库连接和查询"""
import os
import json
import psycopg2
import pymysql
import requests
from requests.auth import HTTPBasicAuth
import warnings
warnings.filterwarnings('ignore')
def test_postgresql():
"""测试PostgreSQL连接"""
print("\n" + "="*60)
print("测试 PostgreSQLOnline连接")
print("="*60)
try:
conn = psycopg2.connect(
host="bj-postgres-16pob4sg.sql.tencentcdb.com",
port=28591,
user="ai_member",
password="LdfjdjL83h3h3^$&**YGG*",
dbname="vala",
connect_timeout=10
)
print("✅ PostgreSQL 连接成功!")
# 测试查询
with conn.cursor() as cur:
# 先查询所有表
cur.execute("SELECT tablename FROM pg_tables WHERE schemaname = 'public' LIMIT 5")
tables = cur.fetchall()
print(f"✅ 查询成功找到前5个表{[t[0] for t in tables]}")
# 尝试查询其中一个表的1条数据
if tables:
table = tables[0][0]
cur.execute(f"SELECT * FROM {table} LIMIT 1")
row = cur.fetchone()
print(f"✅ 从表 {table} 读取到1条数据{row if row else '空表'}")
conn.close()
return True
except Exception as e:
print(f"❌ PostgreSQL 连接/查询失败:{str(e)[:200]}")
return False
def test_mysql_test():
"""测试Test MySQL连接"""
print("\n" + "="*60)
print("测试 MySQLTest环境连接")
print("="*60)
try:
conn = pymysql.connect(
host="bj-cdb-8frbdwju.sql.tencentcdb.com",
port=25413,
user="read_only",
password="fdsfiidier^$*hjfdijjd232",
connect_timeout=10
)
print("✅ MySQLTest连接成功")
# 测试查询
with conn.cursor() as cur:
cur.execute("SHOW DATABASES LIMIT 5")
dbs = cur.fetchall()
print(f"✅ 查询成功找到前5个数据库{[db[0] for db in dbs]}")
if dbs:
db = dbs[0][0]
cur.execute(f"USE {db}")
cur.execute("SHOW TABLES LIMIT 1")
table = cur.fetchone()
if table:
cur.execute(f"SELECT * FROM {table[0]} LIMIT 1")
row = cur.fetchone()
print(f"✅ 从表 {table[0]} 读取到1条数据{row if row else '空表'}")
conn.close()
return True
except Exception as e:
print(f"❌ MySQLTest连接/查询失败:{str(e)[:200]}")
return False
def test_mysql_online():
"""测试Online MySQL连接"""
print("\n" + "="*60)
print("测试 MySQLOnline连接")
print("="*60)
try:
conn = pymysql.connect(
host="bj-cdb-dh2fkqa0.sql.tencentcdb.com",
port=27751,
user="read_only",
password="fsdo45ijfmfmuu77$%^&",
connect_timeout=10
)
print("✅ MySQLOnline连接成功")
# 测试查询
with conn.cursor() as cur:
cur.execute("SHOW DATABASES LIMIT 5")
dbs = cur.fetchall()
print(f"✅ 查询成功找到前5个数据库{[db[0] for db in dbs]}")
conn.close()
return True
except Exception as e:
print(f"❌ MySQLOnline连接/查询失败:{str(e)[:200]}")
return False
def test_es_online():
"""测试Online ES连接"""
print("\n" + "="*60)
print("测试 ElasticsearchOnline连接")
print("="*60)
try:
url = "https://es-7vd7jcu9.public.tencentelasticsearch.com:9200"
auth = HTTPBasicAuth("elastic", "F%?QDcWes7N2WTuiYD11")
response = requests.get(
url,
auth=auth,
verify=False,
timeout=10
)
if response.status_code == 200:
info = response.json()
print(f"✅ ES 连接成功!集群名称:{info.get('cluster_name')}")
# 测试查询索引
indices_resp = requests.get(
f"{url}/_cat/indices?format=json",
auth=auth,
verify=False,
timeout=10
)
if indices_resp.status_code == 200:
indices = indices_resp.json()
print(f"✅ 查询成功!索引数量:{len(indices)}")
if indices:
print(f" 前3个索引{[idx['index'] for idx in indices[:3]]}")
return True
else:
print(f"❌ ES 连接失败HTTP {response.status_code}")
return False
except Exception as e:
print(f"❌ ES 连接/查询失败:{str(e)[:200]}")
return False
if __name__ == "__main__":
print("开始测试所有数据库连接...")
results = {}
results["PostgreSQL(Online)"] = test_postgresql()
results["MySQL(Test)"] = test_mysql_test()
results["MySQL(Online)"] = test_mysql_online()
results["ES(Online)"] = test_es_online()
print("\n" + "="*60)
print("测试总结")
print("="*60)
for name, result in results.items():
status = "✅ 正常" if result else "❌ 异常"
print(f"{name}: {status}")