diff --git a/.gitignore b/.gitignore
index abde2a6..6bb3d29 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,6 +24,9 @@ __pycache__/
 .openclaw/
 .clawhub/
 
+# 归档备份目录
+backup/
+
 # 日志文件
 *.log
 logs/
diff --git a/.vala_skill_hashes b/.vala_skill_hashes
index 3fbe56d..83768e5 100644
--- a/.vala_skill_hashes
+++ b/.vala_skill_hashes
@@ -10,3 +10,4 @@ smart-auto-model-switch f94a7b1ff6357c287a374366b339cd06b0dde5ca9e965a71a369ff66
 vala-component-oops-stat 2e3a3dc8ee252f133256ec5fb8edc031fc25ff602158fd2ddf778ce4d2b961f2
 vala-component-practice-stat 8e768e2641019d27bd41f4647d2d90f24182a0554dad5ad9f4136e9ce0bae147
 cron-schedule e103cbb1806b28c891b9c856963325086ecaff32edec208f0a841865f26e8f3e
+refund-user-learning-analysis 18f23bb62167466319ab667225cad5fc654680fe7820a1f3d87cfe98c30e96fd
diff --git a/AGENTS.md b/AGENTS.md
index 557d7ae..9456509 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -133,6 +133,34 @@ System: [...] Feishu[xiaobian] group oc_xxx | 发送者 (ou_xxx) [msg:om_xxx]
 
 参与，而非主导。质量 > 数量。
 
+## 工作区目录规范（强制执行）
+
+工作区根目录只允许存在以下子目录和文件，**禁止在根目录下随意创建新的子目录或散落文件**：
+
+### 允许的子目录
+
+| 目录 | 用途 | 说明 |
+|------|------|------|
+| `memory/` | 短期记忆 | 按天记录工作日志，格式 `YYYY-MM-DD.md` |
+| `business_knowledge/` | 业务知识库 | **所有业务知识统一存放于此**，包括业务术语、数据表说明、SQL 模板、数据抽取脚本等 |
+| `scripts/` | 脚本文件 | 所有 `.py`、`.sh`、`.sql` 等脚本文件必须放在此目录 |
+| `output/` | 输出文件 | 所有生成的报表（`.xlsx`、`.csv`）、日志（`.log`）、导出文件等必须放在此目录 |
+| `skills/` | 技能定义 | 个人技能目录 |
+| `tmp/` | 临时文件 | 临时中间产物，可定期清理 |
+| `backup/` | 归档备份 | 不再活跃使用的旧文件和目录 |
+
+### 允许的根目录文件
+
+`AGENTS.md`、`SOUL.md`、`USER.md`、`MEMORY.md`、`TOOLS.md`、`IDENTITY.md`、`HEARTBEAT.md`、`BOOTSTRAP.md`、`secrets.env`、`.env`、`.gitignore`
+
+### 强制规则
+
+1. **脚本文件** → 始终创建在 `scripts/` 目录下，绝不放在根目录
+2. **输出文件**（xlsx/csv/log/报表等）→ 始终创建在 `output/` 目录下，绝不放在根目录
+3. **业务知识** → 统一记录到 `business_knowledge/` 目录
+4. **新增子目录** → 禁止在根目录下随意创建新子目录。如有特殊需要，须经技术负责人确认
+5. **临时文件** → 使用 `tmp/`，用完即清
+
 ## 工具
 
 Skills 提供你的工具。当你需要某个工具时，查看它的 `SKILL.md`。在 `TOOLS.md` 中保存环境相关的备注（数据库连接、API 配置等）。
diff --git a/ai_member_xiaoxi b/ai_member_xiaoxi
deleted file mode 160000
index c7e1952..0000000
--- a/ai_member_xiaoxi
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit c7e1952f72fd3e7379b14c10a87540fa2ce84037
diff --git a/ai_member_xiaoxi.bfg-report/2026-03-03/16-01-30/cache-stats.txt b/ai_member_xiaoxi.bfg-report/2026-03-03/16-01-30/cache-stats.txt
deleted file mode 100644
index d0b8d79..0000000
--- a/ai_member_xiaoxi.bfg-report/2026-03-03/16-01-30/cache-stats.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-(apply,CacheStats{hitCount=16, missCount=5, loadSuccessCount=5, loadExceptionCount=0, totalLoadTime=28346537, evictionCount=0})
-(tree,CacheStats{hitCount=5, missCount=8, loadSuccessCount=8, loadExceptionCount=0, totalLoadTime=13193310, evictionCount=0})
-(commit,CacheStats{hitCount=2, missCount=3, loadSuccessCount=3, loadExceptionCount=0, totalLoadTime=27853456, evictionCount=0})
-(tag,CacheStats{hitCount=0, missCount=0, loadSuccessCount=0, loadExceptionCount=0, totalLoadTime=0, evictionCount=0})
diff --git a/ai_member_xiaoxi.bfg-report/2026-03-03/16-01-30/changed-files.txt b/ai_member_xiaoxi.bfg-report/2026-03-03/16-01-30/changed-files.txt
deleted file mode 100644
index 32bf58d..0000000
--- a/ai_member_xiaoxi.bfg-report/2026-03-03/16-01-30/changed-files.txt
+++ /dev/null
@@ -1 +0,0 @@
-8e09023a9bda22c602a1c8dc1eb2f7a0ebdcfb2d 3062188004198c6a73c95042a620fd20f1492c39 TOOLS.md
diff --git a/ai_member_xiaoxi.bfg-report/2026-03-03/16-01-30/object-id-map.old-new.txt b/ai_member_xiaoxi.bfg-report/2026-03-03/16-01-30/object-id-map.old-new.txt
deleted file mode 100644
index 1013c09..0000000
--- a/ai_member_xiaoxi.bfg-report/2026-03-03/16-01-30/object-id-map.old-new.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-037a62079890d989cdcd02b38d4e3951a92fc87e 8ab4e0749014d066411ca1b40bc3db69af9a0c6e
-2ee12bae8e137cf71fe5ab95293623d2017b7985 339001c1df863b8e5eaaf54ad3fff2d0aa35bce5
-b91ce3a3a73bd86c4ee7f2616c8259fb8ae1bcfc 85f58778f25a6ac5a5959531ef601405e827e902
diff --git a/backup_git b/backup_git
deleted file mode 160000
index 2ee12ba..0000000
--- a/backup_git
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 2ee12bae8e137cf71fe5ab95293623d2017b7985
diff --git a/bfg.jar b/bfg.jar
deleted file mode 100644
index 688fe71..0000000
Binary files a/bfg.jar and /dev/null differ
diff --git a/makee_vala/business_rules.md b/business_knowledge/business_rules.md
similarity index 100%
rename from makee_vala/business_rules.md
rename to business_knowledge/business_rules.md
diff --git a/database/mysql_online_full.txt b/database/mysql_online_full.txt
deleted file mode 100644
index 5434b21..0000000
--- a/database/mysql_online_full.txt
+++ /dev/null
@@ -1,441 +0,0 @@
-TABLE_NAME	COLUMN_NAME	DATA_TYPE	IS_NULLABLE
-casbin_rule	id	bigint	NO
-casbin_rule	ptype	varchar	YES
-casbin_rule	v0	varchar	YES
-casbin_rule	v1	varchar	YES
-casbin_rule	v2	varchar	YES
-casbin_rule	v3	varchar	YES
-casbin_rule	v4	varchar	YES
-casbin_rule	v5	varchar	YES
-exa_customers	id	bigint	NO
-exa_customers	created_at	datetime	YES
-exa_customers	updated_at	datetime	YES
-exa_customers	deleted_at	datetime	YES
-exa_customers	customer_name	varchar	YES
-exa_customers	customer_phone_data	varchar	YES
-exa_customers	sys_user_id	bigint	YES
-exa_customers	sys_user_authority_id	bigint	YES
-exa_file_chunks	id	bigint	NO
-exa_file_chunks	created_at	datetime	YES
-exa_file_chunks	updated_at	datetime	YES
-exa_file_chunks	deleted_at	datetime	YES
-exa_file_chunks	exa_file_id	bigint	YES
-exa_file_chunks	file_chunk_number	bigint	YES
-exa_file_chunks	file_chunk_path	varchar	YES
-exa_file_upload_and_downloads	id	bigint	NO
-exa_file_upload_and_downloads	name	varchar	YES
-exa_file_upload_and_downloads	url	varchar	YES
-exa_file_upload_and_downloads	tag	varchar	YES
-exa_file_upload_and_downloads	key	varchar	YES
-exa_file_upload_and_downloads	created_at	datetime	YES
-exa_file_upload_and_downloads	updated_at	datetime	YES
-exa_file_upload_and_downloads	deleted_at	datetime	YES
-exa_files	id	bigint	NO
-exa_files	created_at	datetime	YES
-exa_files	updated_at	datetime	YES
-exa_files	deleted_at	datetime	YES
-exa_files	file_name	varchar	YES
-exa_files	file_md5	varchar	YES
-exa_files	file_path	varchar	YES
-exa_files	chunk_total	bigint	YES
-exa_files	is_finish	tinyint	YES
-jwt_blacklists	id	bigint	NO
-jwt_blacklists	created_at	datetime	YES
-jwt_blacklists	updated_at	datetime	YES
-jwt_blacklists	deleted_at	datetime	YES
-jwt_blacklists	jwt	text	YES
-sys_apis	id	bigint	NO
-sys_apis	created_at	datetime	YES
-sys_apis	updated_at	datetime	YES
-sys_apis	deleted_at	datetime	YES
-sys_apis	path	varchar	YES
-sys_apis	description	varchar	YES
-sys_apis	api_group	varchar	YES
-sys_apis	method	varchar	YES
-sys_authorities	created_at	datetime	YES
-sys_authorities	updated_at	datetime	YES
-sys_authorities	deleted_at	datetime	YES
-sys_authorities	authority_id	bigint	NO
-sys_authorities	authority_name	varchar	YES
-sys_authorities	parent_id	bigint	YES
-sys_authorities	default_router	varchar	YES
-sys_authority_btns	authority_id	bigint	YES
-sys_authority_btns	sys_menu_id	bigint	YES
-sys_authority_btns	sys_base_menu_btn_id	bigint	YES
-sys_authority_menus	sys_base_menu_id	bigint	NO
-sys_authority_menus	sys_authority_authority_id	bigint	NO
-sys_auto_code_histories	id	bigint	NO
-sys_auto_code_histories	created_at	datetime	YES
-sys_auto_code_histories	updated_at	datetime	YES
-sys_auto_code_histories	deleted_at	datetime	YES
-sys_auto_code_histories	package	varchar	YES
-sys_auto_code_histories	business_db	varchar	YES
-sys_auto_code_histories	table_name	varchar	YES
-sys_auto_code_histories	request_meta	text	YES
-sys_auto_code_histories	auto_code_path	text	YES
-sys_auto_code_histories	injection_meta	text	YES
-sys_auto_code_histories	struct_name	varchar	YES
-sys_auto_code_histories	struct_cn_name	varchar	YES
-sys_auto_code_histories	api_ids	varchar	YES
-sys_auto_code_histories	flag	bigint	YES
-sys_auto_codes	id	bigint	NO
-sys_auto_codes	created_at	datetime	YES
-sys_auto_codes	updated_at	datetime	YES
-sys_auto_codes	deleted_at	datetime	YES
-sys_auto_codes	package_name	varchar	YES
-sys_auto_codes	label	varchar	YES
-sys_auto_codes	desc	varchar	YES
-sys_base_menu_btns	id	bigint	NO
-sys_base_menu_btns	created_at	datetime	YES
-sys_base_menu_btns	updated_at	datetime	YES
-sys_base_menu_btns	deleted_at	datetime	YES
-sys_base_menu_btns	name	varchar	YES
-sys_base_menu_btns	desc	varchar	YES
-sys_base_menu_btns	sys_base_menu_id	bigint	YES
-sys_base_menu_parameters	id	bigint	NO
-sys_base_menu_parameters	created_at	datetime	YES
-sys_base_menu_parameters	updated_at	datetime	YES
-sys_base_menu_parameters	deleted_at	datetime	YES
-sys_base_menu_parameters	sys_base_menu_id	bigint	YES
-sys_base_menu_parameters	type	varchar	YES
-sys_base_menu_parameters	key	varchar	YES
-sys_base_menu_parameters	value	varchar	YES
-sys_base_menus	id	bigint	NO
-sys_base_menus	created_at	datetime	YES
-sys_base_menus	updated_at	datetime	YES
-sys_base_menus	deleted_at	datetime	YES
-sys_base_menus	menu_level	bigint	YES
-sys_base_menus	parent_id	varchar	YES
-sys_base_menus	path	varchar	YES
-sys_base_menus	name	varchar	YES
-sys_base_menus	hidden	tinyint	YES
-sys_base_menus	component	varchar	YES
-sys_base_menus	sort	bigint	YES
-sys_base_menus	active_name	varchar	YES
-sys_base_menus	keep_alive	tinyint	YES
-sys_base_menus	default_menu	tinyint	YES
-sys_base_menus	title	varchar	YES
-sys_base_menus	icon	varchar	YES
-sys_base_menus	close_tab	tinyint	YES
-sys_chat_gpt_options	sk	varchar	YES
-sys_data_authority_id	sys_authority_authority_id	bigint	NO
-sys_data_authority_id	data_authority_id_authority_id	bigint	NO
-sys_dictionaries	id	bigint	NO
-sys_dictionaries	created_at	datetime	YES
-sys_dictionaries	updated_at	datetime	YES
-sys_dictionaries	deleted_at	datetime	YES
-sys_dictionaries	name	varchar	YES
-sys_dictionaries	type	varchar	YES
-sys_dictionaries	status	tinyint	YES
-sys_dictionaries	desc	varchar	YES
-sys_dictionary_details	id	bigint	NO
-sys_dictionary_details	created_at	datetime	YES
-sys_dictionary_details	updated_at	datetime	YES
-sys_dictionary_details	deleted_at	datetime	YES
-sys_dictionary_details	label	varchar	YES
-sys_dictionary_details	value	bigint	YES
-sys_dictionary_details	extend	varchar	YES
-sys_dictionary_details	status	tinyint	YES
-sys_dictionary_details	sort	bigint	YES
-sys_dictionary_details	sys_dictionary_id	bigint	YES
-sys_operation_records	id	bigint	NO
-sys_operation_records	updated_at	datetime	YES
-sys_operation_records	nick_name	varchar	YES
-sys_operation_records	body	text	YES
-sys_operation_records	path	varchar	YES
-sys_operation_records	ip	varchar	YES
-sys_operation_records	method	varchar	YES
-sys_operation_records	status	bigint	YES
-sys_operation_records	latency	bigint	YES
-sys_operation_records	agent	varchar	YES
-sys_operation_records	error_message	varchar	YES
-sys_operation_records	resp	text	YES
-sys_operation_records	user_id	bigint	YES
-sys_operation_records	deleted_at	datetime	YES
-sys_operation_records	created_at	datetime	YES
-sys_user_authority	sys_user_id	bigint	NO
-sys_user_authority	sys_authority_authority_id	bigint	NO
-sys_users	id	bigint	NO
-sys_users	uuid	varchar	YES
-sys_users	username	varchar	YES
-sys_users	password	varchar	YES
-sys_users	nick_name	varchar	YES
-sys_users	authority_id	bigint	YES
-sys_users	side_mode	varchar	YES
-sys_users	header_img	varchar	YES
-sys_users	base_color	varchar	YES
-sys_users	active_color	varchar	YES
-sys_users	phone	varchar	YES
-sys_users	email	varchar	YES
-sys_users	enable	bigint	YES
-sys_users	created_at	datetime	YES
-sys_users	updated_at	datetime	YES
-sys_users	deleted_at	datetime	YES
-vala_app_account	id	bigint	NO
-vala_app_account	tel	varchar	YES
-vala_app_account	tel_encrypt	varchar	YES
-vala_app_account	name	varchar	YES
-vala_app_account	id_card	varchar	YES
-vala_app_account	status	tinyint	NO
-vala_app_account	pay_status	int	YES
-vala_app_account	login_times	int	YES
-vala_app_account	remark	text	YES
-vala_app_account	key_from	varchar	NO
-vala_app_account	download_channel	varchar	YES
-vala_app_account	created_at	datetime	YES
-vala_app_account	updated_at	datetime	YES
-vala_app_account	deleted_at	datetime	YES
-vala_app_character	id	bigint	NO
-vala_app_character	account_id	bigint	YES
-vala_app_character	nickname	varchar	YES
-vala_app_character	gender	tinyint	YES
-vala_app_character	birthday	varchar	YES
-vala_app_character	avatar	text	YES
-vala_app_character	spine_name	varchar	YES
-vala_app_character	latest_login	datetime	YES
-vala_app_character	reputation	char	YES
-vala_app_character	robots_cnt	int	YES
-vala_app_character	head_image	varchar	YES
-vala_app_character	status	tinyint	YES
-vala_app_character	purchase_season_package	text	YES
-vala_app_character	total_sp_point	int	YES
-vala_app_character	balance_sp_point	int	YES
-vala_app_character	point	int	YES
-vala_app_character	pay_status	tinyint	YES
-vala_app_character	created_at	datetime	YES
-vala_app_character	updated_at	datetime	YES
-vala_app_character	deleted_at	datetime	YES
-vala_app_character	mood	int	NO
-vala_app_character	mood_start_time	bigint	NO
-vala_applet_user	id	bigint	NO
-vala_applet_user	tel	varchar	YES
-vala_applet_user	open_id	varchar	YES
-vala_applet_user	session_key	varchar	YES
-vala_applet_user	union_id	varchar	YES
-vala_applet_user	status	tinyint	NO
-vala_applet_user	created_at	datetime	YES
-vala_applet_user	updated_at	datetime	YES
-vala_applet_user	deleted_at	datetime	YES
-vala_card_use_statistics	id	bigint	NO
-vala_card_use_statistics	user_id	int	NO
-vala_card_use_statistics	card_type	tinyint	YES
-vala_card_use_statistics	card_id	int	YES
-vala_card_use_statistics	count_num	int	NO
-vala_card_use_statistics	created_at	datetime	YES
-vala_card_use_statistics	updated_at	datetime	YES
-vala_card_use_statistics	deleted_at	datetime	YES
-vala_chat	id	bigint	NO
-vala_chat	user_id	bigint	NO
-vala_chat	mod_id	bigint	NO
-vala_chat	npc_id	bigint	NO
-vala_chat	chat_id	varchar	NO
-vala_chat	msg	varchar	NO
-vala_chat	assistant	text	YES
-vala_chat	created_at	datetime	YES
-vala_chat	updated_at	datetime	YES
-vala_chat	deleted_at	datetime	YES
-vala_friend_relation	id	int	NO
-vala_friend_relation	resource_id	int	NO
-vala_friend_relation	relation_type	tinyint	YES
-vala_friend_relation	chapter_id	int	NO
-vala_friend_relation	component_id	int	NO
-vala_friend_relation	interact_times	int	NO
-vala_friend_relation	favor_level	int	NO
-vala_friend_relation	content_show	varchar	YES
-vala_friend_relation	created_at	datetime	YES
-vala_friend_relation	updated_at	datetime	YES
-vala_friend_relation	deleted_at	datetime	YES
-vala_invitation_code	id	bigint	NO
-vala_invitation_code	code	varchar	YES
-vala_invitation_code	owner	varchar	YES
-vala_invitation_code	status	tinyint	NO
-vala_invitation_code	created_at	datetime	YES
-vala_invitation_code	updated_at	datetime	YES
-vala_invitation_code	deleted_at	datetime	YES
-vala_invitation_code_log	id	bigint	NO
-vala_invitation_code_log	code_id	bigint	NO
-vala_invitation_code_log	ip_address	varchar	YES
-vala_invitation_code_log	city	varchar	YES
-vala_invitation_code_log	created_at	datetime	YES
-vala_invitation_code_log	updated_at	datetime	YES
-vala_invitation_code_log	deleted_at	timestamp	YES
-vala_relation_level	id	int	NO
-vala_relation_level	level	int	NO
-vala_relation_level	upgrade_need_value	int	NO
-vala_relation_level	total_require_value	int	NO
-vala_relation_level	favorability_lower_limit	int	NO
-vala_relation_level	favorability_upper_limit	int	NO
-vala_relation_level	created_at	datetime	YES
-vala_relation_level	updated_at	datetime	YES
-vala_relation_level	deleted_at	datetime	YES
-vala_seasonal_ticket	id	bigint	NO
-vala_seasonal_ticket	unique_code	varchar	YES
-vala_seasonal_ticket	account_id	bigint	YES
-vala_seasonal_ticket	account_name	varchar	YES
-vala_seasonal_ticket	tel	varchar	YES
-vala_seasonal_ticket	give_time	int	YES
-vala_seasonal_ticket	expire_time	int	YES
-vala_seasonal_ticket	expire_type	tinyint	NO
-vala_seasonal_ticket	ticket_type	tinyint	NO
-vala_seasonal_ticket	used_time	int	YES
-vala_seasonal_ticket	season_package_id	bigint	YES
-vala_seasonal_ticket	season_package_name	varchar	YES
-vala_seasonal_ticket	status	tinyint	NO
-vala_seasonal_ticket	created_at	datetime	YES
-vala_seasonal_ticket	updated_at	datetime	YES
-vala_seasonal_ticket	deleted_at	datetime	YES
-vala_user_card_component_record	id	bigint	NO
-vala_user_card_component_record	user_id	bigint	NO
-vala_user_card_component_record	game_id	bigint	YES
-vala_user_card_component_record	chapter_id	bigint	YES
-vala_user_card_component_record	component_type	bigint	YES
-vala_user_card_component_record	sub_component_type	bigint	YES
-vala_user_card_component_record	component_id	bigint	NO
-vala_user_card_component_record	card_content	text	YES
-vala_user_card_component_record	is_new	tinyint	NO
-vala_user_card_component_record	created_at	datetime	YES
-vala_user_card_component_record	updated_at	datetime	YES
-vala_user_card_component_record	deleted_at	datetime	YES
-vala_user_card_plot_upgrade	id	bigint	NO
-vala_user_card_plot_upgrade	user_id	bigint	NO
-vala_user_card_plot_upgrade	chapter_id	bigint	YES
-vala_user_card_plot_upgrade	plot_card_id	bigint	NO
-vala_user_card_plot_upgrade	is_have_upgrade	bigint	YES
-vala_user_card_plot_upgrade	white_card_status	tinyint	NO
-vala_user_card_plot_upgrade	flash_card_status	tinyint	NO
-vala_user_card_plot_upgrade	is_new	tinyint	YES
-vala_user_card_plot_upgrade	created_at	datetime	YES
-vala_user_card_plot_upgrade	updated_at	datetime	YES
-vala_user_card_plot_upgrade	deleted_at	datetime	YES
-vala_user_chapter_and_component_record	id	bigint	NO
-vala_user_chapter_and_component_record	user_id	bigint	NO
-vala_user_chapter_and_component_record	chapter_id	text	NO
-vala_user_chapter_and_component_record	component_id	text	NO
-vala_user_chapter_and_component_record	created_at	datetime	YES
-vala_user_chapter_and_component_record	updated_at	datetime	YES
-vala_user_chapter_and_component_record	deleted_at	datetime	YES
-vala_user_chapter_finish_record	id	bigint	NO
-vala_user_chapter_finish_record	user_id	bigint	NO
-vala_user_chapter_finish_record	game_id	bigint	YES
-vala_user_chapter_finish_record	chapter_id	bigint	NO
-vala_user_chapter_finish_record	created_at	datetime	YES
-vala_user_chapter_finish_record	updated_at	datetime	YES
-vala_user_chapter_finish_record	deleted_at	datetime	YES
-vala_user_chat_log	id	bigint	NO
-vala_user_chat_log	user_id	bigint	NO
-vala_user_chat_log	game_id	bigint	YES
-vala_user_chat_log	chapter_id	bigint	YES
-vala_user_chat_log	sub_component_type	bigint	YES
-vala_user_chat_log	component_id	bigint	NO
-vala_user_chat_log	npc_id	bigint	NO
-vala_user_chat_log	session_id	varchar	NO
-vala_user_chat_log	dialog_list	text	YES
-vala_user_chat_log	dialog_score	text	YES
-vala_user_chat_log	tencent_soe	text	YES
-vala_user_chat_log	chat_review_resp	text	YES
-vala_user_chat_log	cost_time	int	NO
-vala_user_chat_log	insert_time	datetime	YES
-vala_user_chat_log	log_type	tinyint	YES
-vala_user_chat_log	deal_status	tinyint	YES
-vala_user_chat_log	created_at	datetime	YES
-vala_user_chat_log	updated_at	datetime	YES
-vala_user_chat_log	deleted_at	datetime	YES
-vala_user_drama_log	id	bigint	NO
-vala_user_drama_log	user_id	bigint	NO
-vala_user_drama_log	game_id	bigint	YES
-vala_user_drama_log	chapter_id	bigint	YES
-vala_user_drama_log	component_id	bigint	YES
-vala_user_drama_log	npc_id	bigint	YES
-vala_user_drama_log	created_at	datetime	YES
-vala_user_drama_log	updated_at	datetime	YES
-vala_user_drama_log	deleted_at	datetime	YES
-vala_user_friend_info	id	bigint	NO
-vala_user_friend_info	user_id	bigint	NO
-vala_user_friend_info	npc_id	bigint	NO
-vala_user_friend_info	favorability_value	int	YES
-vala_user_friend_info	level	int	YES
-vala_user_friend_info	relation_type	tinyint	YES
-vala_user_friend_info	interact_times	int	YES
-vala_user_friend_info	story_id	int	YES
-vala_user_friend_info	created_at	datetime	YES
-vala_user_friend_info	updated_at	datetime	YES
-vala_user_friend_info	deleted_at	datetime	YES
-vala_user_game_finish_record	id	bigint	NO
-vala_user_game_finish_record	user_id	bigint	NO
-vala_user_game_finish_record	game_id	bigint	YES
-vala_user_game_finish_record	created_at	datetime	YES
-vala_user_game_finish_record	updated_at	datetime	YES
-vala_user_game_finish_record	deleted_at	datetime	YES
-vala_user_interact_chat_log	id	bigint	NO
-vala_user_interact_chat_log	user_id	bigint	NO
-vala_user_interact_chat_log	npc_id	bigint	NO
-vala_user_interact_chat_log	session_id	varchar	YES
-vala_user_interact_chat_log	type	varchar	NO
-vala_user_interact_chat_log	name	varchar	NO
-vala_user_interact_chat_log	text	varchar	NO
-vala_user_interact_chat_log	interactive_type	text	YES
-vala_user_interact_chat_log	interactive_selected	tinyint	YES
-vala_user_interact_chat_log	chat_time	datetime	YES
-vala_user_interact_chat_log	created_at	datetime	YES
-vala_user_interact_chat_log	updated_at	datetime	YES
-vala_user_interact_chat_log	deleted_at	datetime	YES
-vala_user_interact_log	id	bigint	NO
-vala_user_interact_log	user_id	bigint	NO
-vala_user_interact_log	component_type	tinyint	YES
-vala_user_interact_log	relation_type	tinyint	YES
-vala_user_interact_log	npc_id	bigint	NO
-vala_user_interact_log	session_id	varchar	NO
-vala_user_interact_log	summary	text	NO
-vala_user_interact_log	dialog_list	text	YES
-vala_user_interact_log	dialog_score	text	YES
-vala_user_interact_log	tencent_soe	text	YES
-vala_user_interact_log	chat_review_resp	text	YES
-vala_user_interact_log	cost_time	int	NO
-vala_user_interact_log	insert_time	datetime	YES
-vala_user_interact_log	created_at	datetime	YES
-vala_user_interact_log	updated_at	datetime	YES
-vala_user_interact_log	deleted_at	datetime	YES
-vala_user_interact_log	favor_value	int	YES
-vala_user_learn_record	id	int	NO
-vala_user_learn_record	user_id	bigint	NO
-vala_user_learn_record	game_id	bigint	NO
-vala_user_learn_record	chapter_id	bigint	NO
-vala_user_learn_record	score	float	NO
-vala_user_learn_record	score_level	char	YES
-vala_user_learn_record	unique_code	varchar	YES
-vala_user_learn_record	source_type	tinyint	YES
-vala_user_learn_record	created_at	datetime	YES
-vala_user_learn_record	updated_at	datetime	YES
-vala_user_learn_record	deleted_at	timestamp	YES
-vala_user_long_term_memory	id	bigint	NO
-vala_user_long_term_memory	user_id	int	YES
-vala_user_long_term_memory	npc_id	int	YES
-vala_user_long_term_memory	robot_id	int	YES
-vala_user_long_term_memory	config_id	int	YES
-vala_user_long_term_memory	session_id	varchar	YES
-vala_user_long_term_memory	log_id	int	YES
-vala_user_long_term_memory	source	varchar	YES
-vala_user_long_term_memory	source_type	varchar	YES
-vala_user_long_term_memory	memory_type	varchar	YES
-vala_user_long_term_memory	sub_type	varchar	YES
-vala_user_long_term_memory	user_content	text	YES
-vala_user_long_term_memory	system_content	text	YES
-vala_user_long_term_memory	created_at	datetime	YES
-vala_user_long_term_memory	updated_at	datetime	YES
-vala_user_long_term_memory	deleted_at	datetime	YES
-vala_user_robot	id	int	NO
-vala_user_robot	user_id	bigint	NO
-vala_user_robot	robot_id	bigint	NO
-vala_user_robot	robot_level	tinyint	YES
-vala_user_robot	created_at	datetime	YES
-vala_user_robot	updated_at	datetime	YES
-vala_user_robot	deleted_at	timestamp	YES
-vala_user_word_statistics	id	bigint	NO
-vala_user_word_statistics	user_id	int	NO
-vala_user_word_statistics	word_pattern_id	varchar	YES
-vala_user_word_statistics	count_num	int	NO
-vala_user_word_statistics	content_type	char	YES
-vala_user_word_statistics	source_type	tinyint	YES
-vala_user_word_statistics	created_at	datetime	YES
-vala_user_word_statistics	updated_at	datetime	YES
-vala_user_word_statistics	deleted_at	datetime	YES
diff --git a/database/mysql_test_full.txt b/database/mysql_test_full.txt
deleted file mode 100644
index e1029bb..0000000
--- a/database/mysql_test_full.txt
+++ /dev/null
@@ -1,971 +0,0 @@
-TABLE_NAME	COLUMN_NAME	DATA_TYPE	IS_NULLABLE
-ai_reviewer_issue_records	id	int	NO
-ai_reviewer_issue_records	table_name	varchar	NO
-ai_reviewer_issue_records	record_key	varchar	NO
-ai_reviewer_issue_records	issue_type	varchar	NO
-ai_reviewer_issue_records	issue_location	varchar	YES
-ai_reviewer_issue_records	issue_description	text	NO
-ai_reviewer_issue_records	issue_detail	text	YES
-ai_reviewer_issue_records	source_content	text	YES
-ai_reviewer_issue_records	raw_llm_output	text	YES
-ai_reviewer_issue_records	review_model	varchar	YES
-ai_reviewer_issue_records	review_result	varchar	YES
-ai_reviewer_issue_records	review_reason	text	YES
-ai_reviewer_issue_records	is_notified	tinyint	YES
-ai_reviewer_issue_records	is_exported	tinyint	YES
-ai_reviewer_issue_records	created_at	datetime	YES
-ai_reviewer_issue_records	notified_at	datetime	YES
-ai_reviewer_scan_metadata	id	int	NO
-ai_reviewer_scan_metadata	table_name	varchar	NO
-ai_reviewer_scan_metadata	last_full_scan_at	datetime	YES
-ai_reviewer_scan_metadata	full_scan_status	varchar	NO
-ai_reviewer_scan_metadata	scan_offset_hours	int	NO
-ai_reviewer_scan_metadata	total_records_scanned	int	YES
-ai_reviewer_scan_metadata	created_at	datetime	YES
-ai_reviewer_scan_metadata	updated_at	datetime	YES
-ai_reviewer_scan_records	id	int	NO
-ai_reviewer_scan_records	table_name	varchar	NO
-ai_reviewer_scan_records	record_key	varchar	NO
-ai_reviewer_scan_records	last_updated_at	datetime	NO
-ai_reviewer_scan_records	last_scanned_at	datetime	NO
-ai_reviewer_scan_records	scan_status	varchar	NO
-ai_reviewer_scan_records	has_issues	tinyint	YES
-ai_reviewer_scan_records	content_hash	varchar	YES
-ai_reviewer_scan_records	created_at	datetime	YES
-ai_reviewer_scan_records	updated_at	datetime	YES
-casbin_rule	id	bigint	NO
-casbin_rule	ptype	varchar	YES
-casbin_rule	v0	varchar	YES
-casbin_rule	v1	varchar	YES
-casbin_rule	v2	varchar	YES
-casbin_rule	v3	varchar	YES
-casbin_rule	v4	varchar	YES
-casbin_rule	v5	varchar	YES
-chapter_task_desc	id	bigint	NO
-chapter_task_desc	chapter_id	bigint	NO
-chapter_task_desc	section_target	varchar	NO
-chapter_task_desc	reviewCount	int	NO
-chapter_task_desc	task_desc	text	YES
-chapter_task_desc	question_list	text	YES
-chapter_task_desc	created_at	datetime	YES
-chapter_task_desc	updated_at	datetime	YES
-chapter_task_desc	deleted_at	datetime	YES
-core_interaction_component	id	bigint	NO
-core_interaction_component	c_type	varchar	YES
-core_interaction_component	c_id	varchar	YES
-core_interaction_component	title	varchar	YES
-core_interaction_component	component_config	text	YES
-core_interaction_component	notice_component_config	text	YES
-core_interaction_component	err_msg	text	YES
-core_interaction_component	material_img_list	text	YES
-core_interaction_component	audio_list	text	YES
-core_interaction_component	text_analysis	text	YES
-core_interaction_component	related_path	text	YES
-core_interaction_component	read_status	tinyint	YES
-core_interaction_component	config_status	tinyint	YES
-core_interaction_component	lock_status	tinyint	YES
-core_interaction_component	kp_relation_info	text	YES
-core_interaction_component	created_at	datetime	YES
-core_interaction_component	updated_at	datetime	YES
-core_interaction_component	deleted_at	datetime	YES
-dev_knowledge_point_config	id	int	NO
-dev_knowledge_point_config	content	varchar	NO
-dev_knowledge_point_config	type	varchar	NO
-dev_knowledge_point_config	definition	text	NO
-dev_knowledge_point_config	example	text	YES
-dev_knowledge_point_config	created_at	timestamp	NO
-dev_knowledge_point_config	updated_at	timestamp	NO
-dev_question_system_config	id	int	NO
-dev_question_system_config	knowledge_point_id	int	NO
-dev_question_system_config	question_type	varchar	NO
-dev_question_system_config	question_type_name	varchar	NO
-dev_question_system_config	content	text	NO
-dev_question_system_config	format	varchar	NO
-dev_question_system_config	options	json	YES
-dev_question_system_config	answer	int	YES
-dev_question_system_config	fill_answer	text	YES
-dev_question_system_config	created_at	timestamp	NO
-dev_question_system_config	updated_at	timestamp	NO
-exa_customers	id	bigint	NO
-exa_customers	created_at	datetime	YES
-exa_customers	updated_at	datetime	YES
-exa_customers	deleted_at	datetime	YES
-exa_customers	customer_name	varchar	YES
-exa_customers	customer_phone_data	varchar	YES
-exa_customers	sys_user_id	bigint	YES
-exa_customers	sys_user_authority_id	bigint	YES
-exa_file_chunks	id	bigint	NO
-exa_file_chunks	created_at	datetime	YES
-exa_file_chunks	updated_at	datetime	YES
-exa_file_chunks	deleted_at	datetime	YES
-exa_file_chunks	exa_file_id	bigint	YES
-exa_file_chunks	file_chunk_number	bigint	YES
-exa_file_chunks	file_chunk_path	varchar	YES
-exa_file_upload_and_downloads	id	bigint	NO
-exa_file_upload_and_downloads	name	varchar	YES
-exa_file_upload_and_downloads	url	varchar	YES
-exa_file_upload_and_downloads	tag	varchar	YES
-exa_file_upload_and_downloads	key	varchar	YES
-exa_file_upload_and_downloads	created_at	datetime	YES
-exa_file_upload_and_downloads	updated_at	datetime	YES
-exa_file_upload_and_downloads	deleted_at	datetime	YES
-exa_files	id	bigint	NO
-exa_files	created_at	datetime	YES
-exa_files	updated_at	datetime	YES
-exa_files	deleted_at	datetime	YES
-exa_files	file_name	varchar	YES
-exa_files	file_md5	varchar	YES
-exa_files	file_path	varchar	YES
-exa_files	chunk_total	bigint	YES
-exa_files	is_finish	tinyint	YES
-jwt_blacklists	id	bigint	NO
-jwt_blacklists	created_at	datetime	YES
-jwt_blacklists	updated_at	datetime	YES
-jwt_blacklists	deleted_at	datetime	YES
-jwt_blacklists	jwt	text	YES
-lesson_config_info	id	bigint	NO
-lesson_config_info	chapter_id	bigint	YES
-lesson_config_info	lock_status	tinyint	YES
-lesson_config_info	review_info	text	YES
-lesson_config_info	created_at	datetime	YES
-lesson_config_info	updated_at	datetime	YES
-lesson_config_info	deleted_at	datetime	YES
-llm_words_mapping	group	text	YES
-llm_words_mapping	wordsMapping	text	YES
-llm_words_mapping	updated_at	datetime	YES
-middle_interaction_component	id	bigint	NO
-middle_interaction_component	c_type	varchar	YES
-middle_interaction_component	c_id	varchar	YES
-middle_interaction_component	title	varchar	YES
-middle_interaction_component	component_config	text	YES
-middle_interaction_component	notice_component_config	text	YES
-middle_interaction_component	err_msg	text	YES
-middle_interaction_component	audio_list	text	YES
-middle_interaction_component	text_analysis	text	YES
-middle_interaction_component	related_path	text	YES
-middle_interaction_component	read_status	tinyint	YES
-middle_interaction_component	config_status	tinyint	YES
-middle_interaction_component	lock_status	tinyint	YES
-middle_interaction_component	kp_relation_info	text	YES
-middle_interaction_component	created_at	datetime	YES
-middle_interaction_component	updated_at	datetime	YES
-middle_interaction_component	deleted_at	datetime	YES
-prompt_templates	id	int	NO
-prompt_templates	user_id	varchar	NO
-prompt_templates	user_name	varchar	YES
-prompt_templates	template_name	varchar	NO
-prompt_templates	prompt_content	text	NO
-prompt_templates	description	text	YES
-prompt_templates	model_name	varchar	YES
-prompt_templates	model_params	json	YES
-prompt_templates	is_public	tinyint	YES
-prompt_templates	usage_count	int	YES
-prompt_templates	hidden_count	int	YES
-prompt_templates	created_at	timestamp	NO
-prompt_templates	updated_at	timestamp	NO
-prompt_templates	public_at	timestamp	YES
-sys_apis	id	bigint	NO
-sys_apis	created_at	datetime	YES
-sys_apis	updated_at	datetime	YES
-sys_apis	deleted_at	datetime	YES
-sys_apis	path	varchar	YES
-sys_apis	description	varchar	YES
-sys_apis	api_group	varchar	YES
-sys_apis	method	varchar	YES
-sys_authorities	created_at	datetime	YES
-sys_authorities	updated_at	datetime	YES
-sys_authorities	deleted_at	datetime	YES
-sys_authorities	authority_id	bigint	NO
-sys_authorities	authority_name	varchar	YES
-sys_authorities	parent_id	bigint	YES
-sys_authorities	default_router	varchar	YES
-sys_authority_btns	authority_id	bigint	YES
-sys_authority_btns	sys_menu_id	bigint	YES
-sys_authority_btns	sys_base_menu_btn_id	bigint	YES
-sys_authority_menus	sys_base_menu_id	bigint	NO
-sys_authority_menus	sys_authority_authority_id	bigint	NO
-sys_auto_code_histories	id	bigint	NO
-sys_auto_code_histories	created_at	datetime	YES
-sys_auto_code_histories	updated_at	datetime	YES
-sys_auto_code_histories	deleted_at	datetime	YES
-sys_auto_code_histories	package	varchar	YES
-sys_auto_code_histories	business_db	varchar	YES
-sys_auto_code_histories	table_name	varchar	YES
-sys_auto_code_histories	request_meta	text	YES
-sys_auto_code_histories	auto_code_path	text	YES
-sys_auto_code_histories	injection_meta	text	YES
-sys_auto_code_histories	struct_name	varchar	YES
-sys_auto_code_histories	struct_cn_name	varchar	YES
-sys_auto_code_histories	api_ids	varchar	YES
-sys_auto_code_histories	flag	bigint	YES
-sys_auto_codes	id	bigint	NO
-sys_auto_codes	created_at	datetime	YES
-sys_auto_codes	updated_at	datetime	YES
-sys_auto_codes	deleted_at	datetime	YES
-sys_auto_codes	package_name	varchar	YES
-sys_auto_codes	label	varchar	YES
-sys_auto_codes	desc	varchar	YES
-sys_base_menu_btns	id	bigint	NO
-sys_base_menu_btns	created_at	datetime	YES
-sys_base_menu_btns	updated_at	datetime	YES
-sys_base_menu_btns	deleted_at	datetime	YES
-sys_base_menu_btns	name	varchar	YES
-sys_base_menu_btns	desc	varchar	YES
-sys_base_menu_btns	sys_base_menu_id	bigint	YES
-sys_base_menu_parameters	id	bigint	NO
-sys_base_menu_parameters	created_at	datetime	YES
-sys_base_menu_parameters	updated_at	datetime	YES
-sys_base_menu_parameters	deleted_at	datetime	YES
-sys_base_menu_parameters	sys_base_menu_id	bigint	YES
-sys_base_menu_parameters	type	varchar	YES
-sys_base_menu_parameters	key	varchar	YES
-sys_base_menu_parameters	value	varchar	YES
-sys_base_menus	id	bigint	NO
-sys_base_menus	created_at	datetime	YES
-sys_base_menus	updated_at	datetime	YES
-sys_base_menus	deleted_at	datetime	YES
-sys_base_menus	menu_level	bigint	YES
-sys_base_menus	parent_id	varchar	YES
-sys_base_menus	path	varchar	YES
-sys_base_menus	name	varchar	YES
-sys_base_menus	hidden	tinyint	YES
-sys_base_menus	component	varchar	YES
-sys_base_menus	sort	bigint	YES
-sys_base_menus	active_name	varchar	YES
-sys_base_menus	keep_alive	tinyint	YES
-sys_base_menus	default_menu	tinyint	YES
-sys_base_menus	title	varchar	YES
-sys_base_menus	icon	varchar	YES
-sys_base_menus	close_tab	tinyint	YES
-sys_chat_gpt_options	sk	varchar	YES
-sys_data_authority_id	sys_authority_authority_id	bigint	NO
-sys_data_authority_id	data_authority_id_authority_id	bigint	NO
-sys_dictionaries	id	bigint	NO
-sys_dictionaries	created_at	datetime	YES
-sys_dictionaries	updated_at	datetime	YES
-sys_dictionaries	deleted_at	datetime	YES
-sys_dictionaries	name	varchar	YES
-sys_dictionaries	type	varchar	YES
-sys_dictionaries	status	tinyint	YES
-sys_dictionaries	desc	varchar	YES
-sys_dictionary_details	id	bigint	NO
-sys_dictionary_details	created_at	datetime	YES
-sys_dictionary_details	updated_at	datetime	YES
-sys_dictionary_details	deleted_at	datetime	YES
-sys_dictionary_details	label	varchar	YES
-sys_dictionary_details	value	bigint	YES
-sys_dictionary_details	extend	varchar	YES
-sys_dictionary_details	status	tinyint	YES
-sys_dictionary_details	sort	bigint	YES
-sys_dictionary_details	sys_dictionary_id	bigint	YES
-sys_operation_records	id	bigint	NO
-sys_operation_records	updated_at	datetime	YES
-sys_operation_records	nick_name	varchar	YES
-sys_operation_records	body	text	YES
-sys_operation_records	path	varchar	YES
-sys_operation_records	ip	varchar	YES
-sys_operation_records	method	varchar	YES
-sys_operation_records	status	bigint	YES
-sys_operation_records	latency	bigint	YES
-sys_operation_records	agent	varchar	YES
-sys_operation_records	error_message	varchar	YES
-sys_operation_records	resp	text	YES
-sys_operation_records	user_id	bigint	YES
-sys_operation_records	deleted_at	datetime	YES
-sys_operation_records	created_at	datetime	YES
-sys_user_authority	sys_user_id	bigint	NO
-sys_user_authority	sys_authority_authority_id	bigint	NO
-sys_users	id	bigint	NO
-sys_users	uuid	varchar	YES
-sys_users	username	varchar	YES
-sys_users	password	varchar	YES
-sys_users	nick_name	varchar	YES
-sys_users	authority_id	bigint	YES
-sys_users	side_mode	varchar	YES
-sys_users	header_img	varchar	YES
-sys_users	base_color	varchar	YES
-sys_users	active_color	varchar	YES
-sys_users	phone	varchar	YES
-sys_users	email	varchar	YES
-sys_users	enable	bigint	YES
-sys_users	created_at	datetime	YES
-sys_users	updated_at	datetime	YES
-sys_users	deleted_at	datetime	YES
-unit_challenge_question	id	bigint	NO
-unit_challenge_question	category	varchar	YES
-unit_challenge_question	type	varchar	YES
-unit_challenge_question	question_set_id	varchar	YES
-unit_challenge_question	question_content	text	YES
-unit_challenge_question	notice_content	text	YES
-unit_challenge_question	err_msg	text	YES
-unit_challenge_question	related_path	text	YES
-unit_challenge_question	material_list	text	YES
-unit_challenge_question	created_at	datetime	YES
-unit_challenge_question	updated_at	datetime	YES
-unit_challenge_question	deleted_at	datetime	YES
-unit_chapter_text_parse	id	bigint	NO
-unit_chapter_text_parse	chapter_id	bigint	NO
-unit_chapter_text_parse	text_parse	mediumtext	YES
-unit_chapter_text_parse	created_at	datetime	YES
-unit_chapter_text_parse	updated_at	datetime	YES
-unit_chapter_text_parse	deleted_at	datetime	YES
-unity_chapter_section	id	bigint	NO
-unity_chapter_section	chapter_id	bigint	NO
-unity_chapter_section	section_id	bigint	NO
-unity_chapter_section	section_target	varchar	NO
-unity_chapter_section	start_section	text	YES
-unity_chapter_section	end_section	text	YES
-unity_chapter_section	component_config	text	YES
-unity_chapter_section	pre_cartoon_file	varchar	YES
-unity_chapter_section	pre_cartoon_desc	varchar	YES
-unity_chapter_section	post_cartoon_file	varchar	YES
-unity_chapter_section	post_cartoon_desc	varchar	YES
-unity_chapter_section	index	int	NO
-unity_chapter_section	status	int	NO
-unity_chapter_section	mission_id	int	NO
-unity_chapter_section	mission_name	varchar	NO
-unity_chapter_section	created_at	datetime	YES
-unity_chapter_section	updated_at	datetime	YES
-unity_chapter_section	deleted_at	datetime	YES
-user_template_preferences	id	int	NO
-user_template_preferences	user_id	varchar	NO
-user_template_preferences	template_id	int	NO
-user_template_preferences	is_hidden	tinyint	YES
-user_template_preferences	is_favorite	tinyint	YES
-user_template_preferences	created_at	timestamp	NO
-user_template_preferences	updated_at	timestamp	NO
-vala_activity	id	int	NO
-vala_activity	code	varchar	NO
-vala_activity	name	varchar	NO
-vala_activity	stime	bigint	NO
-vala_activity	etime	bigint	NO
-vala_activity	is_online	tinyint	NO
-vala_activity	times	int	NO
-vala_activity	scope	enum	NO
-vala_activity	config	text	YES
-vala_activity	created_at	datetime	YES
-vala_activity	updated_at	datetime	YES
-vala_activity	deleted_at	datetime	YES
-vala_activity_condition	id	bigint	NO
-vala_activity_condition	activity_id	int	NO
-vala_activity_condition	condition	varchar	NO
-vala_activity_condition	created_at	datetime	YES
-vala_app_account	id	bigint	NO
-vala_app_account	tel	varchar	YES
-vala_app_account	tel_encrypt	varchar	YES
-vala_app_account	name	varchar	YES
-vala_app_account	id_card	varchar	YES
-vala_app_account	status	tinyint	NO
-vala_app_account	pay_status	tinyint	YES
-vala_app_account	login_times	int	YES
-vala_app_account	remark	text	YES
-vala_app_account	key_from	varchar	NO
-vala_app_account	download_channel	varchar	YES
-vala_app_account	created_at	datetime	YES
-vala_app_account	updated_at	datetime	YES
-vala_app_account	deleted_at	datetime	YES
-vala_app_character	id	bigint	NO
-vala_app_character	account_id	bigint	YES
-vala_app_character	nickname	varchar	YES
-vala_app_character	gender	tinyint	YES
-vala_app_character	birthday	varchar	YES
-vala_app_character	avatar	text	YES
-vala_app_character	spine_name	varchar	YES
-vala_app_character	latest_login	datetime	YES
-vala_app_character	reputation	char	YES
-vala_app_character	robots_cnt	int	YES
-vala_app_character	head_image	varchar	YES
-vala_app_character	status	tinyint	YES
-vala_app_character	purchase_season_package	text	YES
-vala_app_character	total_sp_point	int	YES
-vala_app_character	balance_sp_point	int	YES
-vala_app_character	point	int	YES
-vala_app_character	pay_status	tinyint	YES
-vala_app_character	created_at	datetime	YES
-vala_app_character	updated_at	datetime	YES
-vala_app_character	deleted_at	datetime	YES
-vala_app_character	mood	int	NO
-vala_app_character	mood_start_time	bigint	NO
-vala_app_log_upload	id	bigint	NO
-vala_app_log_upload	name	varchar	YES
-vala_app_log_upload	url	varchar	YES
-vala_app_log_upload	created_at	datetime	YES
-vala_app_log_upload	updated_at	datetime	YES
-vala_app_log_upload	deleted_at	datetime	YES
-vala_applet_user	id	bigint	NO
-vala_applet_user	tel	varchar	YES
-vala_applet_user	open_id	varchar	YES
-vala_applet_user	session_key	varchar	YES
-vala_applet_user	union_id	varchar	YES
-vala_applet_user	status	tinyint	NO
-vala_applet_user	created_at	datetime	YES
-vala_applet_user	updated_at	datetime	YES
-vala_applet_user	deleted_at	datetime	YES
-vala_area	id	bigint	NO
-vala_area	cn_name	varchar	NO
-vala_area	en_name	varchar	NO
-vala_area	desc	text	YES
-vala_area	location_num	int	YES
-vala_area	is_hide	tinyint	NO
-vala_area	created_at	datetime	YES
-vala_area	updated_at	datetime	YES
-vala_area	deleted_at	datetime	YES
-vala_area_location	id	bigint	NO
-vala_area_location	location_id	int	NO
-vala_area_location	area_id	bigint	NO
-vala_area_location	created_at	datetime	YES
-vala_area_location	updated_at	datetime	YES
-vala_area_location	deleted_at	datetime	YES
-vala_card_package	id	bigint	NO
-vala_card_package	name	varchar	YES
-vala_card_package	icon_name	varchar	YES
-vala_card_package	card_face_name	varchar	YES
-vala_card_package	card_back_name	varchar	YES
-vala_card_package	card_config	text	YES
-vala_card_package	relation_story_id	int	YES
-vala_card_package	created_at	datetime	YES
-vala_card_package	updated_at	datetime	YES
-vala_card_package	deleted_at	datetime	YES
-vala_card_plot	id	bigint	NO
-vala_card_plot	cn_name	varchar	YES
-vala_card_plot	en_name	varchar	YES
-vala_card_plot	plot_desc	varchar	YES
-vala_card_plot	plot_sentence	text	YES
-vala_card_plot	card_face_name	varchar	YES
-vala_card_plot	card_back_name	varchar	YES
-vala_card_plot	white_plot_card_config	text	YES
-vala_card_plot	flash_plot_card_config	text	YES
-vala_card_plot	relation_story_id	int	YES
-vala_card_plot	created_at	datetime	YES
-vala_card_plot	updated_at	datetime	YES
-vala_card_plot	deleted_at	datetime	YES
-vala_card_plot_new	id	bigint	NO
-vala_card_plot_new	cn_name	varchar	YES
-vala_card_plot_new	en_name	varchar	YES
-vala_card_plot_new	rarity	char	YES
-vala_card_plot_new	base_card_main_pic	varchar	YES
-vala_card_plot_new	advanced_card_main_pic	varchar	YES
-vala_card_plot_new	plot_desc	varchar	YES
-vala_card_plot_new	detail_title	varchar	YES
-vala_card_plot_new	detail_desc	text	YES
-vala_card_plot_new	relation_story_id	int	NO
-vala_card_plot_new	relation_chapter_id	int	NO
-vala_card_plot_new	created_at	datetime	YES
-vala_card_plot_new	updated_at	datetime	YES
-vala_card_plot_new	deleted_at	datetime	YES
-vala_card_use_statistics	id	bigint	NO
-vala_card_use_statistics	user_id	int	NO
-vala_card_use_statistics	card_type	tinyint	YES
-vala_card_use_statistics	card_id	int	YES
-vala_card_use_statistics	count_num	int	NO
-vala_card_use_statistics	created_at	datetime	YES
-vala_card_use_statistics	updated_at	datetime	YES
-vala_card_use_statistics	deleted_at	datetime	YES
-vala_chat	id	bigint	NO
-vala_chat	user_id	bigint	NO
-vala_chat	mod_id	bigint	NO
-vala_chat	npc_id	bigint	NO
-vala_chat	chat_id	varchar	NO
-vala_chat	msg	varchar	NO
-vala_chat	assistant	text	YES
-vala_chat	created_at	datetime	YES
-vala_chat	updated_at	datetime	YES
-vala_chat	deleted_at	datetime	YES
-vala_chatbot	id	bigint	NO
-vala_chatbot	type	varchar	NO
-vala_chatbot	ver	varchar	NO
-vala_chatbot	title	varchar	NO
-vala_chatbot	desc	varchar	NO
-vala_chatbot	config	text	NO
-vala_chatbot	created_at	datetime	YES
-vala_chatbot	updated_at	datetime	YES
-vala_chatbot	deleted_at	datetime	YES
-vala_chatbot	source_type	varchar	NO
-vala_component_chat_config	id	bigint	NO
-vala_component_chat_config	component_id	bigint	YES
-vala_component_chat_config	title	varchar	NO
-vala_component_chat_config	type	varchar	YES
-vala_component_chat_config	desc	varchar	NO
-vala_component_chat_config	config	text	NO
-vala_component_chat_config	created_at	datetime	YES
-vala_component_chat_config	updated_at	datetime	YES
-vala_component_chat_config	deleted_at	datetime	YES
-vala_friend_relation	id	int	NO
-vala_friend_relation	resource_id	int	NO
-vala_friend_relation	relation_type	tinyint	YES
-vala_friend_relation	chapter_id	int	NO
-vala_friend_relation	component_id	int	NO
-vala_friend_relation	interact_times	int	NO
-vala_friend_relation	favor_level	int	NO
-vala_friend_relation	content_show	varchar	YES
-vala_friend_relation	created_at	datetime	YES
-vala_friend_relation	updated_at	datetime	YES
-vala_friend_relation	deleted_at	datetime	YES
-vala_game_chapter	id	bigint	NO
-vala_game_chapter	game_id	bigint	YES
-vala_game_chapter	season_package_id	bigint	YES
-vala_game_chapter	en_name	varchar	NO
-vala_game_chapter	cn_name	varchar	NO
-vala_game_chapter	scene_id	varchar	NO
-vala_game_chapter	lesson_type	tinyint	YES
-vala_game_chapter	event_id	int	YES
-vala_game_chapter	desc	text	NO
-vala_game_chapter	index	smallint	YES
-vala_game_chapter	learn_config	text	YES
-vala_game_chapter	scene_path	text	YES
-vala_game_chapter	self_knowledge_card_config	text	YES
-vala_game_chapter	other_knowledge_card_config	text	YES
-vala_game_chapter	review_knowledge_card_config	text	YES
-vala_game_chapter	chapter_extension_config	text	YES
-vala_game_chapter	created_at	datetime	YES
-vala_game_chapter	updated_at	datetime	YES
-vala_game_chapter	deleted_at	datetime	YES
-vala_game_chapter_section	id	bigint	NO
-vala_game_chapter_section	chapter_id	bigint	YES
-vala_game_chapter_section	section_target	varchar	YES
-vala_game_chapter_section	start_section	text	YES
-vala_game_chapter_section	end_section	text	YES
-vala_game_chapter_section	pre_cartoon_file	varchar	YES
-vala_game_chapter_section	pre_cartoon_desc	varchar	YES
-vala_game_chapter_section	post_cartoon_file	varchar	YES
-vala_game_chapter_section	post_cartoon_desc	varchar	YES
-vala_game_chapter_section	special_event_type	tinyint	YES
-vala_game_chapter_section	special_event_en_content	text	YES
-vala_game_chapter_section	special_event_cn_content	text	YES
-vala_game_chapter_section	standard_event_type	tinyint	YES
-vala_game_chapter_section	standard_event_en_content	text	YES
-vala_game_chapter_section	standard_event_cn_content	text	YES
-vala_game_chapter_section	story_remind_type	tinyint	YES
-vala_game_chapter_section	story_remind_en_content	text	YES
-vala_game_chapter_section	story_remind_cn_content	text	YES
-vala_game_chapter_section	component_config	text	YES
-vala_game_chapter_section	mission_id	int	YES
-vala_game_chapter_section	mission_name	varchar	YES
-vala_game_chapter_section	index	int	YES
-vala_game_chapter_section	created_at	datetime	YES
-vala_game_chapter_section	updated_at	datetime	YES
-vala_game_chapter_section	deleted_at	datetime	YES
-vala_game_component_brain_config	id	bigint	NO
-vala_game_component_brain_config	brain_config	text	YES
-vala_game_component_brain_config	created_at	datetime	YES
-vala_game_component_brain_config	updated_at	datetime	YES
-vala_game_component_brain_config	deleted_at	datetime	YES
-vala_game_info	id	bigint	NO
-vala_game_info	en_name	varchar	NO
-vala_game_info	cn_name	varchar	NO
-vala_game_info	game_code	varchar	NO
-vala_game_info	game_category	tinyint	NO
-vala_game_info	season_package_id	bigint	YES
-vala_game_info	level_type	tinyint	NO
-vala_game_info	desc	text	NO
-vala_game_info	is_hide	tinyint	NO
-vala_game_info	card_package_id	text	YES
-vala_game_info	card_plot_id	text	YES
-vala_game_info	index	int	YES
-vala_game_info	plot_card_config	text	YES
-vala_game_info	created_at	datetime	YES
-vala_game_info	updated_at	datetime	YES
-vala_game_info	deleted_at	datetime	YES
-vala_game_season_package	id	bigint	NO
-vala_game_season_package	cn_name	varchar	YES
-vala_game_season_package	en_name	varchar	YES
-vala_game_season_package	season_of_year	int	NO
-vala_game_season_package	season_of_quarter	int	NO
-vala_game_season_package	season_cn_desc	text	YES
-vala_game_season_package	season_en_desc	text	YES
-vala_game_season_package	is_hide	tinyint	YES
-vala_game_season_package	index	int	YES
-vala_game_season_package	level	char	YES
-vala_game_season_package	created_at	datetime	YES
-vala_game_season_package	updated_at	datetime	YES
-vala_game_season_package	deleted_at	datetime	YES
-vala_invitation_code	id	bigint	NO
-vala_invitation_code	code	varchar	YES
-vala_invitation_code	owner	varchar	YES
-vala_invitation_code	status	tinyint	NO
-vala_invitation_code	created_at	datetime	YES
-vala_invitation_code	updated_at	datetime	YES
-vala_invitation_code	deleted_at	datetime	YES
-vala_invitation_code_log	id	bigint	NO
-vala_invitation_code_log	code_id	bigint	NO
-vala_invitation_code_log	ip_address	varchar	YES
-vala_invitation_code_log	city	varchar	YES
-vala_invitation_code_log	created_at	datetime	YES
-vala_invitation_code_log	updated_at	datetime	YES
-vala_invitation_code_log	deleted_at	timestamp	YES
-vala_km	id	bigint	NO
-vala_km	type	varchar	YES
-vala_km	km_id	varchar	YES
-vala_km	title	varchar	YES
-vala_km	bg_img	varchar	YES
-vala_km	kp_set	text	YES
-vala_km	sum_explain	text	YES
-vala_km	notice_km_content	text	YES
-vala_km	err_msg	text	YES
-vala_km	material_list	text	YES
-vala_km	created_at	datetime	YES
-vala_km	updated_at	datetime	YES
-vala_km	deleted_at	datetime	YES
-vala_knowledge_module	id	bigint	NO
-vala_knowledge_module	cn_title	varchar	NO
-vala_knowledge_module	en_title	varchar	NO
-vala_knowledge_module	module_type	tinyint	YES
-vala_knowledge_module	description	text	NO
-vala_knowledge_module	word_knowledge_point	text	YES
-vala_knowledge_module	phoneme_knowledge_point	text	YES
-vala_knowledge_module	grammar_knowledge_point	text	YES
-vala_knowledge_module	sentence_knowledge_point	text	YES
-vala_knowledge_module	dialogue_knowledge_point	text	YES
-vala_knowledge_module	relation_test_question	text	YES
-vala_knowledge_module	created_at	datetime	YES
-vala_knowledge_module	updated_at	datetime	YES
-vala_knowledge_module	deleted_at	datetime	YES
-vala_knowledge_point	id	bigint	NO
-vala_knowledge_point	title	varchar	YES
-vala_knowledge_point	knowledge_point_type	tinyint	NO
-vala_knowledge_point	knowledge_point_content	text	YES
-vala_knowledge_point	relation_knowledge_point	text	YES
-vala_knowledge_point	created_at	datetime	YES
-vala_knowledge_point	updated_at	datetime	YES
-vala_knowledge_point	deleted_at	datetime	YES
-vala_kp	id	bigint	NO
-vala_kp	type	varchar	YES
-vala_kp	kp_id	varchar	YES
-vala_kp	title	varchar	YES
-vala_kp	ipa	varchar	YES
-vala_kp	vala_level	varchar	YES
-vala_kp	cambridge_level	varchar	YES
-vala_kp	cefr_level	varchar	YES
-vala_kp	nc_level	varchar	YES
-vala_kp	en_desc	varchar	YES
-vala_kp	scene	varchar	YES
-vala_kp	knowledge_point_content	text	YES
-vala_kp	notice_kp_content	text	YES
-vala_kp	err_msg	text	YES
-vala_kp	related_path	text	YES
-vala_kp	created_at	datetime	YES
-vala_kp	updated_at	datetime	YES
-vala_kp	deleted_at	datetime	YES
-vala_kp_card	id	bigint	NO
-vala_kp_card	kp_id	varchar	YES
-vala_kp_card	category	varchar	YES
-vala_kp_card	exp_type	varchar	YES
-vala_kp_card	card_content	text	YES
-vala_kp_card	question	text	YES
-vala_kp_card	notice_content	text	YES
-vala_kp_card	err_msg	varchar	YES
-vala_kp_card	created_at	datetime	YES
-vala_kp_card	updated_at	datetime	YES
-vala_kp_card	deleted_at	datetime	YES
-vala_kp_question	id	bigint	NO
-vala_kp_question	kp_id	varchar	YES
-vala_kp_question	category	varchar	YES
-vala_kp_question	skill	varchar	YES
-vala_kp_question	type	varchar	YES
-vala_kp_question	question	text	YES
-vala_kp_question	created_at	datetime	YES
-vala_kp_question	updated_at	datetime	YES
-vala_kp_question	deleted_at	datetime	YES
-vala_learn_ability	id	bigint	NO
-vala_learn_ability	en_name	varchar	NO
-vala_learn_ability	cn_name	varchar	NO
-vala_learn_ability	season_package_id	bigint	YES
-vala_learn_ability	game_id	bigint	YES
-vala_learn_ability	chapter_id	bigint	YES
-vala_learn_ability	ability_type	tinyint	NO
-vala_learn_ability	sub_component_type	tinyint	NO
-vala_learn_ability	component_id	bigint	YES
-vala_learn_ability	created_at	datetime	YES
-vala_learn_ability	updated_at	datetime	YES
-vala_learn_ability	deleted_at	datetime	YES
-vala_location	id	bigint	NO
-vala_location	location_id	int	NO
-vala_location	cn_name	varchar	NO
-vala_location	en_name	varchar	NO
-vala_location	desc	text	YES
-vala_location	area_id	bigint	NO
-vala_location	is_related	tinyint	NO
-vala_location	location_size	tinyint	NO
-vala_location	actual_size	varchar	NO
-vala_location	resource_url	varchar	NO
-vala_location	created_at	datetime	YES
-vala_location	updated_at	datetime	YES
-vala_location	deleted_at	datetime	YES
-vala_minimaxi_tts	id	bigint	NO
-vala_minimaxi_tts	local_name	varchar	NO
-vala_minimaxi_tts	voice_id	varchar	NO
-vala_minimaxi_tts	gender	varchar	YES
-vala_minimaxi_tts	gender_desc	varchar	YES
-vala_minimaxi_tts	created_at	datetime	YES
-vala_minimaxi_tts	updated_at	datetime	YES
-vala_minimaxi_tts	deleted_at	datetime	YES
-vala_picture_text_material	id	bigint	NO
-vala_picture_text_material	material_name	varchar	YES
-vala_picture_text_material	material_type	varchar	YES
-vala_picture_text_material	material_content	text	YES
-vala_picture_text_material	created_at	datetime	YES
-vala_picture_text_material	updated_at	datetime	YES
-vala_picture_text_material	deleted_at	datetime	YES
-vala_relation_level	id	int	NO
-vala_relation_level	level	int	NO
-vala_relation_level	upgrade_need_value	int	NO
-vala_relation_level	total_require_value	int	NO
-vala_relation_level	favorability_lower_limit	int	NO
-vala_relation_level	favorability_upper_limit	int	NO
-vala_relation_level	created_at	datetime	YES
-vala_relation_level	updated_at	datetime	YES
-vala_relation_level	deleted_at	datetime	YES
-vala_resource_base	id	bigint	NO
-vala_resource_base	cn_name	varchar	NO
-vala_resource_base	en_name	varchar	NO
-vala_resource_base	type	varchar	NO
-vala_resource_base	voice_list	text	YES
-vala_resource_base	voice_id	varchar	YES
-vala_resource_base	is_hide	tinyint	YES
-vala_resource_base	gender	tinyint	YES
-vala_resource_base	age	int	YES
-vala_resource_base	half_body	varchar	YES
-vala_resource_base	thumbnail	varchar	YES
-vala_resource_base	profile	text	YES
-vala_resource_base	spine_file_name	varchar	YES
-vala_resource_base	is_friend	tinyint	YES
-vala_resource_base	nick_name	varchar	YES
-vala_resource_base	mbti	varchar	YES
-vala_resource_base	interest	varchar	YES
-vala_resource_base	birthday	varchar	YES
-vala_resource_base	tips	varchar	YES
-vala_resource_base	desc	text	YES
-vala_resource_base	stream_voice_id	varchar	YES
-vala_resource_base	created_at	datetime	YES
-vala_resource_base	updated_at	datetime	YES
-vala_resource_base	deleted_at	datetime	YES
-vala_scene	id	bigint	NO
-vala_scene	name	varchar	NO
-vala_scene	scene_id	varchar	NO
-vala_scene	location_id	int	NO
-vala_scene	desc	text	YES
-vala_scene	is_related	tinyint	YES
-vala_scene	created_at	datetime	YES
-vala_scene	updated_at	datetime	YES
-vala_scene	deleted_at	datetime	YES
-vala_scene_resource_v3	id	int	NO
-vala_scene_resource_v3	location_id	int	NO
-vala_scene_resource_v3	scene_id	varchar	NO
-vala_scene_resource_v3	resource_id	int	NO
-vala_scene_resource_v3	created_at	datetime	YES
-vala_scene_resource_v3	updated_at	datetime	YES
-vala_scene_resource_v3	deleted_at	timestamp	YES
-vala_seasonal_ticket	id	bigint	NO
-vala_seasonal_ticket	unique_code	varchar	YES
-vala_seasonal_ticket	account_id	bigint	YES
-vala_seasonal_ticket	account_name	varchar	YES
-vala_seasonal_ticket	tel	varchar	YES
-vala_seasonal_ticket	give_time	int	YES
-vala_seasonal_ticket	expire_time	int	YES
-vala_seasonal_ticket	expire_type	tinyint	NO
-vala_seasonal_ticket	ticket_type	tinyint	NO
-vala_seasonal_ticket	used_time	int	YES
-vala_seasonal_ticket	season_package_id	bigint	YES
-vala_seasonal_ticket	season_package_name	varchar	YES
-vala_seasonal_ticket	status	tinyint	NO
-vala_seasonal_ticket	created_at	datetime	YES
-vala_seasonal_ticket	updated_at	datetime	YES
-vala_seasonal_ticket	deleted_at	datetime	YES
-vala_tts_voice	id	bigint	NO
-vala_tts_voice	name	varchar	NO
-vala_tts_voice	voice_id	varchar	NO
-vala_tts_voice	preview_url	varchar	YES
-vala_tts_voice	model_id	varchar	YES
-vala_tts_voice	type	tinyint	NO
-vala_tts_voice	created_at	datetime	YES
-vala_tts_voice	updated_at	datetime	YES
-vala_tts_voice	deleted_at	datetime	YES
-vala_user_card_component_record	id	bigint	NO
-vala_user_card_component_record	user_id	bigint	NO
-vala_user_card_component_record	game_id	bigint	YES
-vala_user_card_component_record	chapter_id	bigint	YES
-vala_user_card_component_record	component_type	bigint	YES
-vala_user_card_component_record	sub_component_type	bigint	YES
-vala_user_card_component_record	component_id	bigint	NO
-vala_user_card_component_record	card_content	text	YES
-vala_user_card_component_record	is_new	tinyint	NO
-vala_user_card_component_record	created_at	datetime	YES
-vala_user_card_component_record	updated_at	datetime	YES
-vala_user_card_component_record	deleted_at	datetime	YES
-vala_user_card_plot_upgrade	id	bigint	NO
-vala_user_card_plot_upgrade	user_id	bigint	NO
-vala_user_card_plot_upgrade	chapter_id	bigint	YES
-vala_user_card_plot_upgrade	plot_card_id	bigint	NO
-vala_user_card_plot_upgrade	is_have_upgrade	bigint	YES
-vala_user_card_plot_upgrade	white_card_status	tinyint	NO
-vala_user_card_plot_upgrade	flash_card_status	tinyint	NO
-vala_user_card_plot_upgrade	is_new	tinyint	YES
-vala_user_card_plot_upgrade	created_at	datetime	YES
-vala_user_card_plot_upgrade	updated_at	datetime	YES
-vala_user_card_plot_upgrade	deleted_at	datetime	YES
-vala_user_chapter_finish_record	id	bigint	NO
-vala_user_chapter_finish_record	user_id	bigint	NO
-vala_user_chapter_finish_record	game_id	bigint	YES
-vala_user_chapter_finish_record	chapter_id	bigint	NO
-vala_user_chapter_finish_record	created_at	datetime	YES
-vala_user_chapter_finish_record	updated_at	datetime	YES
-vala_user_chapter_finish_record	deleted_at	datetime	YES
-vala_user_chat_log	id	bigint	NO
-vala_user_chat_log	user_id	bigint	NO
-vala_user_chat_log	game_id	bigint	YES
-vala_user_chat_log	chapter_id	bigint	YES
-vala_user_chat_log	sub_component_type	bigint	YES
-vala_user_chat_log	component_id	bigint	NO
-vala_user_chat_log	npc_id	bigint	NO
-vala_user_chat_log	session_id	varchar	NO
-vala_user_chat_log	dialog_list	text	YES
-vala_user_chat_log	dialog_score	text	YES
-vala_user_chat_log	tencent_soe	text	YES
-vala_user_chat_log	chat_review_resp	text	YES
-vala_user_chat_log	cost_time	int	NO
-vala_user_chat_log	insert_time	datetime	YES
-vala_user_chat_log	log_type	tinyint	YES
-vala_user_chat_log	deal_status	tinyint	YES
-vala_user_chat_log	created_at	datetime	YES
-vala_user_chat_log	updated_at	datetime	YES
-vala_user_chat_log	deleted_at	datetime	YES
-vala_user_drama_log	id	bigint	NO
-vala_user_drama_log	user_id	bigint	NO
-vala_user_drama_log	game_id	bigint	YES
-vala_user_drama_log	chapter_id	bigint	YES
-vala_user_drama_log	component_id	bigint	YES
-vala_user_drama_log	npc_id	bigint	YES
-vala_user_drama_log	created_at	datetime	YES
-vala_user_drama_log	updated_at	datetime	YES
-vala_user_drama_log	deleted_at	datetime	YES
-vala_user_friend_info	id	bigint	NO
-vala_user_friend_info	user_id	bigint	NO
-vala_user_friend_info	npc_id	bigint	NO
-vala_user_friend_info	favorability_value	int	YES
-vala_user_friend_info	level	int	YES
-vala_user_friend_info	relation_type	tinyint	YES
-vala_user_friend_info	interact_times	int	YES
-vala_user_friend_info	story_id	int	YES
-vala_user_friend_info	created_at	datetime	YES
-vala_user_friend_info	updated_at	datetime	YES
-vala_user_friend_info	deleted_at	datetime	YES
-vala_user_game_finish_record	id	bigint	NO
-vala_user_game_finish_record	user_id	bigint	NO
-vala_user_game_finish_record	game_id	bigint	YES
-vala_user_game_finish_record	created_at	datetime	YES
-vala_user_game_finish_record	updated_at	datetime	YES
-vala_user_game_finish_record	deleted_at	datetime	YES
-vala_user_interact_chat_log	id	bigint	NO
-vala_user_interact_chat_log	user_id	bigint	NO
-vala_user_interact_chat_log	npc_id	bigint	NO
-vala_user_interact_chat_log	session_id	varchar	YES
-vala_user_interact_chat_log	type	varchar	NO
-vala_user_interact_chat_log	name	varchar	NO
-vala_user_interact_chat_log	text	varchar	NO
-vala_user_interact_chat_log	interactive_type	text	YES
-vala_user_interact_chat_log	interactive_selected	tinyint	YES
-vala_user_interact_chat_log	chat_time	datetime	YES
-vala_user_interact_chat_log	created_at	datetime	YES
-vala_user_interact_chat_log	updated_at	datetime	YES
-vala_user_interact_chat_log	deleted_at	datetime	YES
-vala_user_interact_log	id	bigint	NO
-vala_user_interact_log	user_id	bigint	NO
-vala_user_interact_log	component_type	tinyint	YES
-vala_user_interact_log	relation_type	tinyint	YES
-vala_user_interact_log	npc_id	bigint	NO
-vala_user_interact_log	session_id	varchar	NO
-vala_user_interact_log	summary	text	NO
-vala_user_interact_log	dialog_list	text	YES
-vala_user_interact_log	dialog_score	text	YES
-vala_user_interact_log	tencent_soe	text	YES
-vala_user_interact_log	chat_review_resp	text	YES
-vala_user_interact_log	cost_time	int	NO
-vala_user_interact_log	insert_time	datetime	YES
-vala_user_interact_log	created_at	datetime	YES
-vala_user_interact_log	updated_at	datetime	YES
-vala_user_interact_log	deleted_at	datetime	YES
-vala_user_interact_log	favor_value	int	YES
-vala_user_learn_record	id	int	NO
-vala_user_learn_record	user_id	bigint	NO
-vala_user_learn_record	game_id	bigint	NO
-vala_user_learn_record	chapter_id	bigint	NO
-vala_user_learn_record	score	float	NO
-vala_user_learn_record	score_level	char	YES
-vala_user_learn_record	unique_code	varchar	YES
-vala_user_learn_record	source_type	tinyint	YES
-vala_user_learn_record	created_at	datetime	YES
-vala_user_learn_record	updated_at	datetime	YES
-vala_user_learn_record	deleted_at	timestamp	YES
-vala_user_long_term_memory	id	bigint	NO
-vala_user_long_term_memory	user_id	int	YES
-vala_user_long_term_memory	npc_id	int	YES
-vala_user_long_term_memory	robot_id	int	YES
-vala_user_long_term_memory	config_id	int	YES
-vala_user_long_term_memory	session_id	varchar	YES
-vala_user_long_term_memory	log_id	int	YES
-vala_user_long_term_memory	source	varchar	YES
-vala_user_long_term_memory	source_type	varchar	YES
-vala_user_long_term_memory	memory_type	varchar	YES
-vala_user_long_term_memory	sub_type	varchar	YES
-vala_user_long_term_memory	user_content	text	YES
-vala_user_long_term_memory	system_content	text	YES
-vala_user_long_term_memory	created_at	datetime	YES
-vala_user_long_term_memory	updated_at	datetime	YES
-vala_user_long_term_memory	deleted_at	datetime	YES
-vala_user_robot	id	int	NO
-vala_user_robot	user_id	bigint	NO
-vala_user_robot	robot_id	bigint	NO
-vala_user_robot	robot_level	tinyint	YES
-vala_user_robot	created_at	datetime	YES
-vala_user_robot	updated_at	datetime	YES
-vala_user_robot	deleted_at	timestamp	YES
-vala_user_word_statistics	id	bigint	NO
-vala_user_word_statistics	user_id	int	NO
-vala_user_word_statistics	word_pattern_id	varchar	YES
-vala_user_word_statistics	count_num	int	NO
-vala_user_word_statistics	content_type	char	YES
-vala_user_word_statistics	source_type	tinyint	YES
-vala_user_word_statistics	created_at	datetime	YES
-vala_user_word_statistics	updated_at	datetime	YES
-vala_user_word_statistics	deleted_at	datetime	YES
-vala_wx_chatbot	id	bigint	NO
-vala_wx_chatbot	type	varchar	NO
-vala_wx_chatbot	component_id	bigint	YES
-vala_wx_chatbot	chatbot_id	bigint	YES
-vala_wx_chatbot	title	varchar	NO
-vala_wx_chatbot	desc	text	NO
-vala_wx_chatbot	desc_detail	text	NO
-vala_wx_chatbot	npc_id	bigint	NO
-vala_wx_chatbot	target	varchar	NO
-vala_wx_chatbot	index	int	YES
-vala_wx_chatbot	complete_story	text	YES
-vala_wx_chatbot	limit_type	int	YES
-vala_wx_chatbot	limit_data	int	YES
-vala_wx_chatbot	created_at	datetime	YES
-vala_wx_chatbot	updated_at	datetime	YES
-vala_wx_chatbot	deleted_at	datetime	YES
-vala_wxchat_history	id	bigint	NO
-vala_wxchat_history	user_id	bigint	YES
-vala_wxchat_history	wx_chatbot_id	bigint	YES
-vala_wxchat_history	chatbot_id	bigint	YES
-vala_wxchat_history	session_id	varchar	YES
-vala_wxchat_history	chat_content	text	YES
-vala_wxchat_history	question_result	text	YES
-vala_wxchat_history	status	tinyint	YES
-vala_wxchat_history	created_at	datetime	YES
-vala_wxchat_history	updated_at	datetime	YES
-vala_wxchat_history	deleted_at	datetime	YES
-voice_info	id	bigint	NO
-voice_info	voice_id	varchar	YES
-voice_info	elabs_id	varchar	YES
-voice_info	voice_name	varchar	YES
-voice_info	des	varchar	YES
-voice_info	gender	varchar	YES
-voice_info	mini_speed	float	YES
-voice_info	mini_volume	float	YES
-voice_info	mini_emotion	varchar	YES
-voice_info	elabs_speed	float	YES
-voice_info	default_platform	varchar	YES
-voice_info	created_at	datetime	YES
-voice_info	updated_at	datetime	YES
-voice_info	deleted_at	datetime	YES
diff --git a/database/pg_online_full.txt b/database/pg_online_full.txt
deleted file mode 100644
index e7f6dc2..0000000
--- a/database/pg_online_full.txt
+++ /dev/null
@@ -1,678 +0,0 @@
-               table_name               |       column_name       |        data_type         | is_nullable 
-----------------------------------------+-------------------------+--------------------------+-------------
- account_activity_count                 | id                      | bigint                   | NO
- account_activity_count                 | time_period             | integer                  | YES
- account_activity_count                 | counts                  | integer                  | YES
- account_activity_count                 | stat_date               | text                     | NO
- account_activity_count                 | created_time            | timestamp with time zone | NO
- account_detail_info                    | id                      | bigint                   | NO
- account_detail_info                    | account_id              | integer                  | YES
- account_detail_info                    | login_time              | text                     | YES
- account_detail_info                    | device                  | text                     | YES
- account_detail_info                    | device_os               | text                     | YES
- account_detail_info                    | login_address           | text                     | YES
- account_detail_info                    | login_times             | integer                  | YES
- account_detail_info                    | created_time            | timestamp with time zone | NO
- account_detail_info                    | phone_login_times       | integer                  | YES
- account_device                         | id                      | bigint                   | NO
- account_device                         | account_id              | integer                  | YES
- account_device                         | device                  | text                     | YES
- account_device                         | created_time            | timestamp with time zone | NO
- account_login                          | id                      | bigint                   | NO
- account_login                          | account_id              | integer                  | YES
- account_login                          | login_date              | text                     | NO
- account_login                          | created_time            | timestamp with time zone | NO
- account_login                          | status                  | integer                  | NO
- account_paid_activity_count            | id                      | integer                  | NO
- account_paid_activity_count            | time_period             | integer                  | YES
- account_paid_activity_count            | counts                  | integer                  | YES
- account_paid_activity_count            | stat_date               | text                     | NO
- account_paid_activity_count            | created_time            | timestamp with time zone | NO
- bi_level_unit_lesson                   | course_level            | character varying        | YES
- bi_level_unit_lesson                   | course_season           | character varying        | YES
- bi_level_unit_lesson                   | course_unit             | character varying        | YES
- bi_level_unit_lesson                   | course_lesson           | character varying        | YES
- bi_level_unit_lesson                   | id                      | integer                  | YES
- bi_refund_order                        | id                      | bigint                   | NO
- bi_refund_order                        | account_id              | bigint                   | YES
- bi_refund_order                        | out_trade_no            | character varying        | YES
- bi_refund_order                        | trade_no                | character varying        | YES
- bi_refund_order                        | refund_amount           | character varying        | YES
- bi_refund_order                        | created_at              | timestamp with time zone | YES
- bi_refund_order                        | updated_at              | timestamp with time zone | YES
- bi_refund_order                        | deleted_at              | timestamp with time zone | YES
- bi_refund_order                        | refund_amount_int       | integer                  | NO
- bi_refund_order                        | reason                  | text                     | NO
- bi_refund_order                        | sale_channel            | integer                  | NO
- bi_refund_order                        | status                  | integer                  | NO
- bi_refund_order                        | is_admin                | boolean                  | NO
- bi_refund_order                        | channel_refund_id       | bigint                   | NO
- bi_refund_order                        | refund_ticket_ids       | text                     | YES
- bi_refund_order                        | refund_type             | integer                  | NO
- bi_refund_order                        | refund_method           | integer                  | NO
- bi_refund_order                        | after_sale_no           | text                     | YES
- bi_user_chapter_play_record_0          | id                      | bigint                   | NO
- bi_user_chapter_play_record_0          | user_id                 | bigint                   | YES
- bi_user_chapter_play_record_0          | chapter_id              | bigint                   | YES
- bi_user_chapter_play_record_0          | chapter_unique_id       | character varying        | YES
- bi_user_chapter_play_record_0          | play_status             | smallint                 | YES
- bi_user_chapter_play_record_0          | story_id                | bigint                   | YES
- bi_user_chapter_play_record_0          | created_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_0          | updated_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_0          | deleted_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_0          | map_position            | text                     | YES
- bi_user_chapter_play_record_0          | dialog_info             | text                     | YES
- bi_user_chapter_play_record_0          | level                   | character varying        | YES
- bi_user_chapter_play_record_1          | id                      | bigint                   | NO
- bi_user_chapter_play_record_1          | user_id                 | bigint                   | YES
- bi_user_chapter_play_record_1          | chapter_id              | bigint                   | YES
- bi_user_chapter_play_record_1          | chapter_unique_id       | character varying        | YES
- bi_user_chapter_play_record_1          | play_status             | smallint                 | YES
- bi_user_chapter_play_record_1          | story_id                | bigint                   | YES
- bi_user_chapter_play_record_1          | created_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_1          | updated_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_1          | deleted_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_1          | map_position            | text                     | YES
- bi_user_chapter_play_record_1          | dialog_info             | text                     | YES
- bi_user_chapter_play_record_1          | level                   | character varying        | YES
- bi_user_chapter_play_record_2          | id                      | bigint                   | NO
- bi_user_chapter_play_record_2          | user_id                 | bigint                   | YES
- bi_user_chapter_play_record_2          | chapter_id              | bigint                   | YES
- bi_user_chapter_play_record_2          | chapter_unique_id       | character varying        | YES
- bi_user_chapter_play_record_2          | play_status             | smallint                 | YES
- bi_user_chapter_play_record_2          | story_id                | bigint                   | YES
- bi_user_chapter_play_record_2          | created_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_2          | updated_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_2          | deleted_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_2          | map_position            | text                     | YES
- bi_user_chapter_play_record_2          | dialog_info             | text                     | YES
- bi_user_chapter_play_record_2          | level                   | character varying        | YES
- bi_user_chapter_play_record_3          | id                      | bigint                   | NO
- bi_user_chapter_play_record_3          | user_id                 | bigint                   | YES
- bi_user_chapter_play_record_3          | chapter_id              | bigint                   | YES
- bi_user_chapter_play_record_3          | chapter_unique_id       | character varying        | YES
- bi_user_chapter_play_record_3          | play_status             | smallint                 | YES
- bi_user_chapter_play_record_3          | story_id                | bigint                   | YES
- bi_user_chapter_play_record_3          | created_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_3          | updated_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_3          | deleted_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_3          | map_position            | text                     | YES
- bi_user_chapter_play_record_3          | dialog_info             | text                     | YES
- bi_user_chapter_play_record_3          | level                   | character varying        | YES
- bi_user_chapter_play_record_4          | id                      | bigint                   | NO
- bi_user_chapter_play_record_4          | user_id                 | bigint                   | YES
- bi_user_chapter_play_record_4          | chapter_id              | bigint                   | YES
- bi_user_chapter_play_record_4          | chapter_unique_id       | character varying        | YES
- bi_user_chapter_play_record_4          | play_status             | smallint                 | YES
- bi_user_chapter_play_record_4          | story_id                | bigint                   | YES
- bi_user_chapter_play_record_4          | created_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_4          | updated_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_4          | deleted_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_4          | map_position            | text                     | YES
- bi_user_chapter_play_record_4          | dialog_info             | text                     | YES
- bi_user_chapter_play_record_4          | level                   | character varying        | YES
- bi_user_chapter_play_record_5          | id                      | bigint                   | NO
- bi_user_chapter_play_record_5          | user_id                 | bigint                   | YES
- bi_user_chapter_play_record_5          | chapter_id              | bigint                   | YES
- bi_user_chapter_play_record_5          | chapter_unique_id       | character varying        | YES
- bi_user_chapter_play_record_5          | play_status             | smallint                 | YES
- bi_user_chapter_play_record_5          | story_id                | bigint                   | YES
- bi_user_chapter_play_record_5          | created_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_5          | updated_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_5          | deleted_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_5          | map_position            | text                     | YES
- bi_user_chapter_play_record_5          | dialog_info             | text                     | YES
- bi_user_chapter_play_record_5          | level                   | character varying        | YES
- bi_user_chapter_play_record_6          | id                      | bigint                   | NO
- bi_user_chapter_play_record_6          | user_id                 | bigint                   | YES
- bi_user_chapter_play_record_6          | chapter_id              | bigint                   | YES
- bi_user_chapter_play_record_6          | chapter_unique_id       | character varying        | YES
- bi_user_chapter_play_record_6          | play_status             | smallint                 | YES
- bi_user_chapter_play_record_6          | story_id                | bigint                   | YES
- bi_user_chapter_play_record_6          | created_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_6          | updated_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_6          | deleted_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_6          | map_position            | text                     | YES
- bi_user_chapter_play_record_6          | dialog_info             | text                     | YES
- bi_user_chapter_play_record_6          | level                   | character varying        | YES
- bi_user_chapter_play_record_7          | id                      | bigint                   | NO
- bi_user_chapter_play_record_7          | user_id                 | bigint                   | YES
- bi_user_chapter_play_record_7          | chapter_id              | bigint                   | YES
- bi_user_chapter_play_record_7          | chapter_unique_id       | character varying        | YES
- bi_user_chapter_play_record_7          | play_status             | smallint                 | YES
- bi_user_chapter_play_record_7          | story_id                | bigint                   | YES
- bi_user_chapter_play_record_7          | created_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_7          | updated_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_7          | deleted_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_7          | map_position            | text                     | YES
- bi_user_chapter_play_record_7          | dialog_info             | text                     | YES
- bi_user_chapter_play_record_7          | level                   | character varying        | YES
- bi_user_component_play_record_0        | id                      | bigint                   | NO
- bi_user_component_play_record_0        | user_id                 | bigint                   | YES
- bi_user_component_play_record_0        | chapter_id              | bigint                   | YES
- bi_user_component_play_record_0        | chapter_unique_id       | character varying        | YES
- bi_user_component_play_record_0        | component_id            | bigint                   | YES
- bi_user_component_play_record_0        | sub_component_type      | smallint                 | YES
- bi_user_component_play_record_0        | component_type          | smallint                 | YES
- bi_user_component_play_record_0        | comp_type               | character varying        | YES
- bi_user_component_play_record_0        | play_status             | smallint                 | YES
- bi_user_component_play_record_0        | pass_time               | integer                  | YES
- bi_user_component_play_record_0        | interval_time           | integer                  | YES
- bi_user_component_play_record_0        | read_word_count         | integer                  | YES
- bi_user_component_play_record_0        | speak_count             | integer                  | YES
- bi_user_component_play_record_0        | listen_sentence_count   | integer                  | YES
- bi_user_component_play_record_0        | write_word_count        | integer                  | YES
- bi_user_component_play_record_0        | created_at              | timestamp with time zone | YES
- bi_user_component_play_record_0        | updated_at              | timestamp with time zone | YES
- bi_user_component_play_record_0        | deleted_at              | timestamp with time zone | YES
- bi_user_component_play_record_0        | section_id              | bigint                   | YES
- bi_user_component_play_record_0        | component_unique_code   | character varying        | YES
- bi_user_component_play_record_0        | play_result             | character varying        | YES
- bi_user_component_play_record_0        | finish_question_count   | integer                  | YES
- bi_user_component_play_record_0        | skill_points            | integer                  | YES
- bi_user_component_play_record_0        | c_type                  | character varying        | YES
- bi_user_component_play_record_0        | c_id                    | character varying        | YES
- bi_user_component_play_record_0        | user_behavior_info      | text                     | YES
- bi_user_component_play_record_0        | behavior_analysis_info  | text                     | YES
- bi_user_component_play_record_0        | session_id              | character varying        | YES
- bi_user_component_play_record_0        | listen_time             | integer                  | YES
- bi_user_component_play_record_0        | dialog_list             | text                     | YES
- bi_user_component_play_record_0        | level                   | character varying        | YES
- bi_user_component_play_record_1        | id                      | bigint                   | NO
- bi_user_component_play_record_1        | user_id                 | bigint                   | YES
- bi_user_component_play_record_1        | chapter_id              | bigint                   | YES
- bi_user_component_play_record_1        | chapter_unique_id       | character varying        | YES
- bi_user_component_play_record_1        | component_id            | bigint                   | YES
- bi_user_component_play_record_1        | sub_component_type      | smallint                 | YES
- bi_user_component_play_record_1        | component_type          | smallint                 | YES
- bi_user_component_play_record_1        | comp_type               | character varying        | YES
- bi_user_component_play_record_1        | play_status             | smallint                 | YES
- bi_user_component_play_record_1        | pass_time               | integer                  | YES
- bi_user_component_play_record_1        | interval_time           | integer                  | YES
- bi_user_component_play_record_1        | read_word_count         | integer                  | YES
- bi_user_component_play_record_1        | speak_count             | integer                  | YES
- bi_user_component_play_record_1        | listen_sentence_count   | integer                  | YES
- bi_user_component_play_record_1        | write_word_count        | integer                  | YES
- bi_user_component_play_record_1        | created_at              | timestamp with time zone | YES
- bi_user_component_play_record_1        | updated_at              | timestamp with time zone | YES
- bi_user_component_play_record_1        | deleted_at              | timestamp with time zone | YES
- bi_user_component_play_record_1        | section_id              | bigint                   | YES
- bi_user_component_play_record_1        | component_unique_code   | character varying        | YES
- bi_user_component_play_record_1        | play_result             | character varying        | YES
- bi_user_component_play_record_1        | finish_question_count   | integer                  | YES
- bi_user_component_play_record_1        | skill_points            | integer                  | YES
- bi_user_component_play_record_1        | c_type                  | character varying        | YES
- bi_user_component_play_record_1        | c_id                    | character varying        | YES
- bi_user_component_play_record_1        | user_behavior_info      | text                     | YES
- bi_user_component_play_record_1        | behavior_analysis_info  | text                     | YES
- bi_user_component_play_record_1        | session_id              | character varying        | YES
- bi_user_component_play_record_1        | listen_time             | integer                  | YES
- bi_user_component_play_record_1        | dialog_list             | text                     | YES
- bi_user_component_play_record_1        | level                   | character varying        | YES
- bi_user_component_play_record_2        | id                      | bigint                   | NO
- bi_user_component_play_record_2        | user_id                 | bigint                   | YES
- bi_user_component_play_record_2        | chapter_id              | bigint                   | YES
- bi_user_component_play_record_2        | chapter_unique_id       | character varying        | YES
- bi_user_component_play_record_2        | component_id            | bigint                   | YES
- bi_user_component_play_record_2        | sub_component_type      | smallint                 | YES
- bi_user_component_play_record_2        | component_type          | smallint                 | YES
- bi_user_component_play_record_2        | comp_type               | character varying        | YES
- bi_user_component_play_record_2        | play_status             | smallint                 | YES
- bi_user_component_play_record_2        | pass_time               | integer                  | YES
- bi_user_component_play_record_2        | interval_time           | integer                  | YES
- bi_user_component_play_record_2        | read_word_count         | integer                  | YES
- bi_user_component_play_record_2        | speak_count             | integer                  | YES
- bi_user_component_play_record_2        | listen_sentence_count   | integer                  | YES
- bi_user_component_play_record_2        | write_word_count        | integer                  | YES
- bi_user_component_play_record_2        | created_at              | timestamp with time zone | YES
- bi_user_component_play_record_2        | updated_at              | timestamp with time zone | YES
- bi_user_component_play_record_2        | deleted_at              | timestamp with time zone | YES
- bi_user_component_play_record_2        | section_id              | bigint                   | YES
- bi_user_component_play_record_2        | component_unique_code   | character varying        | YES
- bi_user_component_play_record_2        | play_result             | character varying        | YES
- bi_user_component_play_record_2        | finish_question_count   | integer                  | YES
- bi_user_component_play_record_2        | skill_points            | integer                  | YES
- bi_user_component_play_record_2        | c_type                  | character varying        | YES
- bi_user_component_play_record_2        | c_id                    | character varying        | YES
- bi_user_component_play_record_2        | user_behavior_info      | text                     | YES
- bi_user_component_play_record_2        | behavior_analysis_info  | text                     | YES
- bi_user_component_play_record_2        | session_id              | character varying        | YES
- bi_user_component_play_record_2        | listen_time             | integer                  | YES
- bi_user_component_play_record_2        | dialog_list             | text                     | YES
- bi_user_component_play_record_2        | level                   | character varying        | YES
- bi_user_component_play_record_3        | id                      | bigint                   | NO
- bi_user_component_play_record_3        | user_id                 | bigint                   | YES
- bi_user_component_play_record_3        | chapter_id              | bigint                   | YES
- bi_user_component_play_record_3        | chapter_unique_id       | character varying        | YES
- bi_user_component_play_record_3        | component_id            | bigint                   | YES
- bi_user_component_play_record_3        | sub_component_type      | smallint                 | YES
- bi_user_component_play_record_3        | component_type          | smallint                 | YES
- bi_user_component_play_record_3        | comp_type               | character varying        | YES
- bi_user_component_play_record_3        | play_status             | smallint                 | YES
- bi_user_component_play_record_3        | pass_time               | integer                  | YES
- bi_user_component_play_record_3        | interval_time           | integer                  | YES
- bi_user_component_play_record_3        | read_word_count         | integer                  | YES
- bi_user_component_play_record_3        | speak_count             | integer                  | YES
- bi_user_component_play_record_3        | listen_sentence_count   | integer                  | YES
- bi_user_component_play_record_3        | write_word_count        | integer                  | YES
- bi_user_component_play_record_3        | created_at              | timestamp with time zone | YES
- bi_user_component_play_record_3        | updated_at              | timestamp with time zone | YES
- bi_user_component_play_record_3        | deleted_at              | timestamp with time zone | YES
- bi_user_component_play_record_3        | section_id              | bigint                   | YES
- bi_user_component_play_record_3        | component_unique_code   | character varying        | YES
- bi_user_component_play_record_3        | play_result             | character varying        | YES
- bi_user_component_play_record_3        | finish_question_count   | integer                  | YES
- bi_user_component_play_record_3        | skill_points            | integer                  | YES
- bi_user_component_play_record_3        | c_type                  | character varying        | YES
- bi_user_component_play_record_3        | c_id                    | character varying        | YES
- bi_user_component_play_record_3        | user_behavior_info      | text                     | YES
- bi_user_component_play_record_3        | behavior_analysis_info  | text                     | YES
- bi_user_component_play_record_3        | session_id              | character varying        | YES
- bi_user_component_play_record_3        | listen_time             | integer                  | YES
- bi_user_component_play_record_3        | dialog_list             | text                     | YES
- bi_user_component_play_record_3        | level                   | character varying        | YES
- bi_user_component_play_record_4        | id                      | bigint                   | NO
- bi_user_component_play_record_4        | user_id                 | bigint                   | YES
- bi_user_component_play_record_4        | chapter_id              | bigint                   | YES
- bi_user_component_play_record_4        | chapter_unique_id       | character varying        | YES
- bi_user_component_play_record_4        | component_id            | bigint                   | YES
- bi_user_component_play_record_4        | sub_component_type      | smallint                 | YES
- bi_user_component_play_record_4        | component_type          | smallint                 | YES
- bi_user_component_play_record_4        | comp_type               | character varying        | YES
- bi_user_component_play_record_4        | play_status             | smallint                 | YES
- bi_user_component_play_record_4        | pass_time               | integer                  | YES
- bi_user_component_play_record_4        | interval_time           | integer                  | YES
- bi_user_component_play_record_4        | read_word_count         | integer                  | YES
- bi_user_component_play_record_4        | speak_count             | integer                  | YES
- bi_user_component_play_record_4        | listen_sentence_count   | integer                  | YES
- bi_user_component_play_record_4        | write_word_count        | integer                  | YES
- bi_user_component_play_record_4        | created_at              | timestamp with time zone | YES
- bi_user_component_play_record_4        | updated_at              | timestamp with time zone | YES
- bi_user_component_play_record_4        | deleted_at              | timestamp with time zone | YES
- bi_user_component_play_record_4        | section_id              | bigint                   | YES
- bi_user_component_play_record_4        | component_unique_code   | character varying        | YES
- bi_user_component_play_record_4        | play_result             | character varying        | YES
- bi_user_component_play_record_4        | finish_question_count   | integer                  | YES
- bi_user_component_play_record_4        | skill_points            | integer                  | YES
- bi_user_component_play_record_4        | c_type                  | character varying        | YES
- bi_user_component_play_record_4        | c_id                    | character varying        | YES
- bi_user_component_play_record_4        | user_behavior_info      | text                     | YES
- bi_user_component_play_record_4        | behavior_analysis_info  | text                     | YES
- bi_user_component_play_record_4        | session_id              | character varying        | YES
- bi_user_component_play_record_4        | listen_time             | integer                  | YES
- bi_user_component_play_record_4        | dialog_list             | text                     | YES
- bi_user_component_play_record_4        | level                   | character varying        | YES
- bi_user_component_play_record_5        | id                      | bigint                   | NO
- bi_user_component_play_record_5        | user_id                 | bigint                   | YES
- bi_user_component_play_record_5        | chapter_id              | bigint                   | YES
- bi_user_component_play_record_5        | chapter_unique_id       | character varying        | YES
- bi_user_component_play_record_5        | component_id            | bigint                   | YES
- bi_user_component_play_record_5        | sub_component_type      | smallint                 | YES
- bi_user_component_play_record_5        | component_type          | smallint                 | YES
- bi_user_component_play_record_5        | comp_type               | character varying        | YES
- bi_user_component_play_record_5        | play_status             | smallint                 | YES
- bi_user_component_play_record_5        | pass_time               | integer                  | YES
- bi_user_component_play_record_5        | interval_time           | integer                  | YES
- bi_user_component_play_record_5        | read_word_count         | integer                  | YES
- bi_user_component_play_record_5        | speak_count             | integer                  | YES
- bi_user_component_play_record_5        | listen_sentence_count   | integer                  | YES
- bi_user_component_play_record_5        | write_word_count        | integer                  | YES
- bi_user_component_play_record_5        | created_at              | timestamp with time zone | YES
- bi_user_component_play_record_5        | updated_at              | timestamp with time zone | YES
- bi_user_component_play_record_5        | deleted_at              | timestamp with time zone | YES
- bi_user_component_play_record_5        | section_id              | bigint                   | YES
- bi_user_component_play_record_5        | component_unique_code   | character varying        | YES
- bi_user_component_play_record_5        | play_result             | character varying        | YES
- bi_user_component_play_record_5        | finish_question_count   | integer                  | YES
- bi_user_component_play_record_5        | skill_points            | integer                  | YES
- bi_user_component_play_record_5        | c_type                  | character varying        | YES
- bi_user_component_play_record_5        | c_id                    | character varying        | YES
- bi_user_component_play_record_5        | user_behavior_info      | text                     | YES
- bi_user_component_play_record_5        | behavior_analysis_info  | text                     | YES
- bi_user_component_play_record_5        | session_id              | character varying        | YES
- bi_user_component_play_record_5        | listen_time             | integer                  | YES
- bi_user_component_play_record_5        | dialog_list             | text                     | YES
- bi_user_component_play_record_5        | level                   | character varying        | YES
- bi_user_component_play_record_6        | id                      | bigint                   | NO
- bi_user_component_play_record_6        | user_id                 | bigint                   | YES
- bi_user_component_play_record_6        | chapter_id              | bigint                   | YES
- bi_user_component_play_record_6        | chapter_unique_id       | character varying        | YES
- bi_user_component_play_record_6        | component_id            | bigint                   | YES
- bi_user_component_play_record_6        | sub_component_type      | smallint                 | YES
- bi_user_component_play_record_6        | component_type          | smallint                 | YES
- bi_user_component_play_record_6        | comp_type               | character varying        | YES
- bi_user_component_play_record_6        | play_status             | smallint                 | YES
- bi_user_component_play_record_6        | pass_time               | integer                  | YES
- bi_user_component_play_record_6        | interval_time           | integer                  | YES
- bi_user_component_play_record_6        | read_word_count         | integer                  | YES
- bi_user_component_play_record_6        | speak_count             | integer                  | YES
- bi_user_component_play_record_6        | listen_sentence_count   | integer                  | YES
- bi_user_component_play_record_6        | write_word_count        | integer                  | YES
- bi_user_component_play_record_6        | created_at              | timestamp with time zone | YES
- bi_user_component_play_record_6        | updated_at              | timestamp with time zone | YES
- bi_user_component_play_record_6        | deleted_at              | timestamp with time zone | YES
- bi_user_component_play_record_6        | section_id              | bigint                   | YES
- bi_user_component_play_record_6        | component_unique_code   | character varying        | YES
- bi_user_component_play_record_6        | play_result             | character varying        | YES
- bi_user_component_play_record_6        | finish_question_count   | integer                  | YES
- bi_user_component_play_record_6        | skill_points            | integer                  | YES
- bi_user_component_play_record_6        | c_type                  | character varying        | YES
- bi_user_component_play_record_6        | c_id                    | character varying        | YES
- bi_user_component_play_record_6        | user_behavior_info      | text                     | YES
- bi_user_component_play_record_6        | behavior_analysis_info  | text                     | YES
- bi_user_component_play_record_6        | session_id              | character varying        | YES
- bi_user_component_play_record_6        | listen_time             | integer                  | YES
- bi_user_component_play_record_6        | dialog_list             | text                     | YES
- bi_user_component_play_record_6        | level                   | character varying        | YES
- bi_user_component_play_record_7        | id                      | bigint                   | NO
- bi_user_component_play_record_7        | user_id                 | bigint                   | YES
- bi_user_component_play_record_7        | chapter_id              | bigint                   | YES
- bi_user_component_play_record_7        | chapter_unique_id       | character varying        | YES
- bi_user_component_play_record_7        | component_id            | bigint                   | YES
- bi_user_component_play_record_7        | sub_component_type      | smallint                 | YES
- bi_user_component_play_record_7        | component_type          | smallint                 | YES
- bi_user_component_play_record_7        | comp_type               | character varying        | YES
- bi_user_component_play_record_7        | play_status             | smallint                 | YES
- bi_user_component_play_record_7        | pass_time               | integer                  | YES
- bi_user_component_play_record_7        | interval_time           | integer                  | YES
- bi_user_component_play_record_7        | read_word_count         | integer                  | YES
- bi_user_component_play_record_7        | speak_count             | integer                  | YES
- bi_user_component_play_record_7        | listen_sentence_count   | integer                  | YES
- bi_user_component_play_record_7        | write_word_count        | integer                  | YES
- bi_user_component_play_record_7        | created_at              | timestamp with time zone | YES
- bi_user_component_play_record_7        | updated_at              | timestamp with time zone | YES
- bi_user_component_play_record_7        | deleted_at              | timestamp with time zone | YES
- bi_user_component_play_record_7        | section_id              | bigint                   | YES
- bi_user_component_play_record_7        | component_unique_code   | character varying        | YES
- bi_user_component_play_record_7        | play_result             | character varying        | YES
- bi_user_component_play_record_7        | finish_question_count   | integer                  | YES
- bi_user_component_play_record_7        | skill_points            | integer                  | YES
- bi_user_component_play_record_7        | c_type                  | character varying        | YES
- bi_user_component_play_record_7        | c_id                    | character varying        | YES
- bi_user_component_play_record_7        | user_behavior_info      | text                     | YES
- bi_user_component_play_record_7        | behavior_analysis_info  | text                     | YES
- bi_user_component_play_record_7        | session_id              | character varying        | YES
- bi_user_component_play_record_7        | listen_time             | integer                  | YES
- bi_user_component_play_record_7        | dialog_list             | text                     | YES
- bi_user_component_play_record_7        | level                   | character varying        | YES
- bi_user_course_detail                  | id                      | integer                  | NO
- bi_user_course_detail                  | created_at              | timestamp with time zone | YES
- bi_user_course_detail                  | updated_at              | timestamp with time zone | YES
- bi_user_course_detail                  | account_id              | integer                  | YES
- bi_user_course_detail                  | user_id                 | integer                  | YES
- bi_user_course_detail                  | course_level            | text                     | YES
- bi_user_course_detail                  | active_time             | timestamp with time zone | YES
- bi_user_course_detail                  | expire_time             | timestamp with time zone | YES
- bi_user_course_detail                  | latest_unit_index       | integer                  | YES
- bi_user_course_detail                  | latest_lesson_index     | integer                  | YES
- bi_user_course_detail                  | learn_duration          | integer                  | YES
- bi_user_course_detail                  | last_learn_time         | timestamp with time zone | YES
- bi_user_course_detail                  | deleted_at              | timestamp with time zone | YES
- bi_user_task_log                       | id                      | integer                  | NO
- bi_user_task_log                       | user_id                 | integer                  | NO
- bi_user_task_log                       | task_type               | integer                  | NO
- bi_user_task_log                       | task_id                 | integer                  | NO
- bi_user_task_log                       | status                  | integer                  | NO
- bi_user_task_log                       | start_time              | bigint                   | NO
- bi_user_task_log                       | updated_at              | timestamp with time zone | YES
- bi_user_task_log                       | created_at              | timestamp with time zone | YES
- bi_user_unit_challenge_question_result | id                      | bigint                   | NO
- bi_user_unit_challenge_question_result | user_id                 | bigint                   | YES
- bi_user_unit_challenge_question_result | story_id                | bigint                   | YES
- bi_user_unit_challenge_question_result | score                   | integer                  | YES
- bi_user_unit_challenge_question_result | score_text              | character varying        | YES
- bi_user_unit_challenge_question_result | question_list           | text                     | YES
- bi_user_unit_challenge_question_result | sp_value                | integer                  | YES
- bi_user_unit_challenge_question_result | created_at              | timestamp with time zone | YES
- bi_user_unit_challenge_question_result | updated_at              | timestamp with time zone | YES
- bi_user_unit_challenge_question_result | deleted_at              | timestamp with time zone | YES
- bi_user_unit_challenge_question_result | category                | character varying        | YES
- bi_user_unit_challenge_question_result | exp                     | integer                  | YES
- bi_user_unit_challenge_question_result | play_time               | bigint                   | YES
- bi_user_unit_challenge_question_result | read_word_count         | bigint                   | YES
- bi_user_unit_challenge_question_result | listen_time             | bigint                   | YES
- bi_user_unit_challenge_question_result | speak_count             | bigint                   | YES
- bi_user_unit_challenge_question_result | level                   | character varying        | YES
- bi_user_unit_review_question_result    | id                      | bigint                   | NO
- bi_user_unit_review_question_result    | user_id                 | bigint                   | YES
- bi_user_unit_review_question_result    | story_id                | bigint                   | YES
- bi_user_unit_review_question_result    | chapter_id              | bigint                   | YES
- bi_user_unit_review_question_result    | unique_id               | character varying        | YES
- bi_user_unit_review_question_result    | score                   | integer                  | YES
- bi_user_unit_review_question_result    | score_text              | character varying        | YES
- bi_user_unit_review_question_result    | question_list           | text                     | YES
- bi_user_unit_review_question_result    | sp_value                | integer                  | YES
- bi_user_unit_review_question_result    | exp                     | integer                  | YES
- bi_user_unit_review_question_result    | created_at              | timestamp with time zone | YES
- bi_user_unit_review_question_result    | updated_at              | timestamp with time zone | YES
- bi_user_unit_review_question_result    | deleted_at              | timestamp with time zone | YES
- bi_user_unit_review_question_result    | play_time               | bigint                   | YES
- bi_user_unit_review_question_result    | level                   | character varying        | YES
- bi_user_unit_summary_km_result         | id                      | bigint                   | NO
- bi_user_unit_summary_km_result         | user_id                 | bigint                   | YES
- bi_user_unit_summary_km_result         | story_id                | bigint                   | YES
- bi_user_unit_summary_km_result         | km_id                   | character varying        | YES
- bi_user_unit_summary_km_result         | km_type                 | character varying        | YES
- bi_user_unit_summary_km_result         | score_text              | character varying        | YES
- bi_user_unit_summary_km_result         | sp_value                | integer                  | YES
- bi_user_unit_summary_km_result         | created_at              | timestamp with time zone | YES
- bi_user_unit_summary_km_result         | updated_at              | timestamp with time zone | YES
- bi_user_unit_summary_km_result         | deleted_at              | timestamp with time zone | YES
- bi_user_unit_summary_km_result         | play_time               | bigint                   | YES
- bi_user_unit_summary_km_result         | sum_explain             | text                     | YES
- bi_user_unit_summary_km_result         | level                   | character varying        | YES
- bi_vala_app_account                    | id                      | bigint                   | NO
- bi_vala_app_account                    | tel                     | character varying        | YES
- bi_vala_app_account                    | tel_encrypt             | character varying        | YES
- bi_vala_app_account                    | name                    | character varying        | YES
- bi_vala_app_account                    | id_card                 | character varying        | YES
- bi_vala_app_account                    | status                  | smallint                 | NO
- bi_vala_app_account                    | pay_status              | integer                  | YES
- bi_vala_app_account                    | login_times             | integer                  | YES
- bi_vala_app_account                    | remark                  | text                     | YES
- bi_vala_app_account                    | key_from                | character varying        | NO
- bi_vala_app_account                    | created_at              | timestamp with time zone | YES
- bi_vala_app_account                    | updated_at              | timestamp with time zone | YES
- bi_vala_app_account                    | deleted_at              | timestamp with time zone | YES
- bi_vala_app_account                    | download_channel        | text                     | YES
- bi_vala_app_character                  | id                      | bigint                   | NO
- bi_vala_app_character                  | account_id              | bigint                   | YES
- bi_vala_app_character                  | nickname                | character varying        | YES
- bi_vala_app_character                  | gender                  | smallint                 | YES
- bi_vala_app_character                  | birthday                | character varying        | YES
- bi_vala_app_character                  | avatar                  | text                     | YES
- bi_vala_app_character                  | spine_name              | character varying        | YES
- bi_vala_app_character                  | latest_login            | timestamp with time zone | YES
- bi_vala_app_character                  | reputation              | character                | YES
- bi_vala_app_character                  | robots_cnt              | integer                  | YES
- bi_vala_app_character                  | head_image              | character varying        | YES
- bi_vala_app_character                  | status                  | smallint                 | YES
- bi_vala_app_character                  | purchase_season_package | text                     | YES
- bi_vala_app_character                  | total_sp_point          | integer                  | YES
- bi_vala_app_character                  | balance_sp_point        | integer                  | YES
- bi_vala_app_character                  | point                   | integer                  | YES
- bi_vala_app_character                  | pay_status              | smallint                 | YES
- bi_vala_app_character                  | created_at              | timestamp with time zone | YES
- bi_vala_app_character                  | updated_at              | timestamp with time zone | YES
- bi_vala_app_character                  | deleted_at              | timestamp with time zone | YES
- bi_vala_app_character                  | mood                    | integer                  | YES
- bi_vala_app_character                  | mood_start_time         | bigint                   | YES
- bi_vala_order                          | id                      | bigint                   | NO
- bi_vala_order                          | account_id              | bigint                   | YES
- bi_vala_order                          | out_trade_no            | character varying        | YES
- bi_vala_order                          | trade_no                | character varying        | YES
- bi_vala_order                          | pay_amount              | character varying        | YES
- bi_vala_order                          | goods_id                | bigint                   | YES
- bi_vala_order                          | order_status            | integer                  | YES
- bi_vala_order                          | order_status_comment    | text                     | YES
- bi_vala_order                          | is_used                 | integer                  | YES
- bi_vala_order                          | sale_channel            | integer                  | YES
- bi_vala_order                          | created_at              | timestamp with time zone | YES
- bi_vala_order                          | updated_at              | timestamp with time zone | YES
- bi_vala_order                          | deleted_at              | timestamp with time zone | YES
- bi_vala_order                          | pay_channel             | integer                  | YES
- bi_vala_order                          | pay_amount_int          | integer                  | YES
- bi_vala_order                          | pay_success_date        | timestamp with time zone | YES
- bi_vala_order                          | quantity                | integer                  | NO
- bi_vala_order                          | goods_name              | text                     | NO
- bi_vala_order                          | goods_apply_package_ids | text                     | NO
- bi_vala_order                          | key_from                | text                     | NO
- bi_vala_order                          | expire_days             | integer                  | YES
- data_sync_config                       | id                      | bigint                   | NO
- data_sync_config                       | job_name                | text                     | NO
- data_sync_config                       | database_name           | text                     | YES
- data_sync_config                       | origin_table            | text                     | YES
- data_sync_config                       | dest_table              | text                     | YES
- data_sync_config                       | deal_time               | timestamp with time zone | YES
- data_sync_config                       | full_id                 | bigint                   | NO
- data_sync_config                       | created_at              | timestamp with time zone | NO
- data_sync_config                       | updated_at              | timestamp with time zone | YES
- feishu_table_record                    | id                      | bigint                   | NO
- feishu_table_record                    | data_table              | text                     | NO
- feishu_table_record                    | record_id               | text                     | YES
- feishu_table_record                    | created_at              | timestamp with time zone | NO
- growth_activity_behavior               | id                      | integer                  | NO
- growth_activity_behavior               | es_id                   | character varying        | NO
- growth_activity_behavior               | account_id              | integer                  | YES
- growth_activity_behavior               | account_name            | character varying        | YES
- growth_activity_behavior               | activity                | text                     | YES
- growth_activity_behavior               | behavior                | character varying        | YES
- growth_activity_behavior               | created_at              | timestamp with time zone | YES
- growth_activity_behavior               | info                    | text                     | YES
- growth_activity_behavior               | source                  | character varying        | YES
- growth_activity_behavior               | sub_behavior            | character varying        | YES
- growth_activity_behavior               | user_id                 | integer                  | YES
- growth_activity_behavior               | user_name               | character varying        | YES
- user_behavior_0                        | id                      | bigint                   | NO
- user_behavior_0                        | user_id                 | integer                  | YES
- user_behavior_0                        | behavior                | text                     | YES
- user_behavior_0                        | behavior_time_date      | text                     | NO
- user_behavior_0                        | created_time            | timestamp with time zone | NO
- user_behavior_1                        | id                      | bigint                   | NO
- user_behavior_1                        | user_id                 | integer                  | YES
- user_behavior_1                        | behavior                | text                     | YES
- user_behavior_1                        | behavior_time_date      | text                     | NO
- user_behavior_1                        | created_time            | timestamp with time zone | NO
- user_behavior_10                       | id                      | bigint                   | NO
- user_behavior_10                       | user_id                 | integer                  | YES
- user_behavior_10                       | behavior                | text                     | YES
- user_behavior_10                       | behavior_time_date      | text                     | NO
- user_behavior_10                       | created_time            | timestamp with time zone | NO
- user_behavior_11                       | id                      | bigint                   | NO
- user_behavior_11                       | user_id                 | integer                  | YES
- user_behavior_11                       | behavior                | text                     | YES
- user_behavior_11                       | behavior_time_date      | text                     | NO
- user_behavior_11                       | created_time            | timestamp with time zone | NO
- user_behavior_12                       | id                      | bigint                   | NO
- user_behavior_12                       | user_id                 | integer                  | YES
- user_behavior_12                       | behavior                | text                     | YES
- user_behavior_12                       | behavior_time_date      | text                     | NO
- user_behavior_12                       | created_time            | timestamp with time zone | NO
- user_behavior_13                       | id                      | bigint                   | NO
- user_behavior_13                       | user_id                 | integer                  | YES
- user_behavior_13                       | behavior                | text                     | YES
- user_behavior_13                       | behavior_time_date      | text                     | NO
- user_behavior_13                       | created_time            | timestamp with time zone | NO
- user_behavior_14                       | id                      | bigint                   | NO
- user_behavior_14                       | user_id                 | integer                  | YES
- user_behavior_14                       | behavior                | text                     | YES
- user_behavior_14                       | behavior_time_date      | text                     | NO
- user_behavior_14                       | created_time            | timestamp with time zone | NO
- user_behavior_15                       | id                      | bigint                   | NO
- user_behavior_15                       | user_id                 | integer                  | YES
- user_behavior_15                       | behavior                | text                     | YES
- user_behavior_15                       | behavior_time_date      | text                     | NO
- user_behavior_15                       | created_time            | timestamp with time zone | NO
- user_behavior_2                        | id                      | bigint                   | NO
- user_behavior_2                        | user_id                 | integer                  | YES
- user_behavior_2                        | behavior                | text                     | YES
- user_behavior_2                        | behavior_time_date      | text                     | NO
- user_behavior_2                        | created_time            | timestamp with time zone | NO
- user_behavior_3                        | id                      | bigint                   | NO
- user_behavior_3                        | user_id                 | integer                  | YES
- user_behavior_3                        | behavior                | text                     | YES
- user_behavior_3                        | behavior_time_date      | text                     | NO
- user_behavior_3                        | created_time            | timestamp with time zone | NO
- user_behavior_4                        | id                      | bigint                   | NO
- user_behavior_4                        | user_id                 | integer                  | YES
- user_behavior_4                        | behavior                | text                     | YES
- user_behavior_4                        | behavior_time_date      | text                     | NO
- user_behavior_4                        | created_time            | timestamp with time zone | NO
- user_behavior_5                        | id                      | bigint                   | NO
- user_behavior_5                        | user_id                 | integer                  | YES
- user_behavior_5                        | behavior                | text                     | YES
- user_behavior_5                        | behavior_time_date      | text                     | NO
- user_behavior_5                        | created_time            | timestamp with time zone | NO
- user_behavior_6                        | id                      | bigint                   | NO
- user_behavior_6                        | user_id                 | integer                  | YES
- user_behavior_6                        | behavior                | text                     | YES
- user_behavior_6                        | behavior_time_date      | text                     | NO
- user_behavior_6                        | created_time            | timestamp with time zone | NO
- user_behavior_7                        | id                      | bigint                   | NO
- user_behavior_7                        | user_id                 | integer                  | YES
- user_behavior_7                        | behavior                | text                     | YES
- user_behavior_7                        | behavior_time_date      | text                     | NO
- user_behavior_7                        | created_time            | timestamp with time zone | NO
- user_behavior_8                        | id                      | bigint                   | NO
- user_behavior_8                        | user_id                 | integer                  | YES
- user_behavior_8                        | behavior                | text                     | YES
- user_behavior_8                        | behavior_time_date      | text                     | NO
- user_behavior_8                        | created_time            | timestamp with time zone | NO
- user_behavior_9                        | id                      | bigint                   | NO
- user_behavior_9                        | user_id                 | integer                  | YES
- user_behavior_9                        | behavior                | text                     | YES
- user_behavior_9                        | behavior_time_date      | text                     | NO
- user_behavior_9                        | created_time            | timestamp with time zone | NO
- user_behavior_buried_points            | id                      | integer                  | NO
- user_behavior_buried_points            | burying_point_id        | integer                  | YES
- user_behavior_buried_points            | burying_point_name      | character varying        | YES
- user_behavior_buried_points            | burying_point_sub_id    | integer                  | YES
- user_behavior_buried_points            | burying_point_sub_name  | character varying        | YES
- user_behavior_buried_points            | account_id              | integer                  | YES
- user_behavior_buried_points            | account_name            | character varying        | YES
- user_behavior_buried_points            | character_id            | integer                  | YES
- user_behavior_buried_points            | character_name          | character varying        | YES
- user_behavior_buried_points            | active_time             | integer                  | YES
- user_behavior_buried_points            | version_id              | character varying        | YES
- user_behavior_buried_points            | step_duration           | integer                  | YES
- user_behavior_buried_points            | finish_status           | character varying        | YES
- user_behavior_buried_points            | season_package_id       | integer                  | YES
- user_behavior_buried_points            | season_package_name     | character varying        | YES
- user_behavior_buried_points            | unit_id                 | integer                  | YES
- user_behavior_buried_points            | unit_name               | character varying        | YES
- user_behavior_buried_points            | lesson_id               | integer                  | YES
- user_behavior_buried_points            | lesson_name             | character varying        | YES
- user_behavior_buried_points            | component_id            | integer                  | YES
- user_behavior_buried_points            | component_name          | character varying        | YES
- user_behavior_buried_points            | c_type                  | character varying        | YES
- user_behavior_buried_points            | c_id                    | character varying        | YES
- user_behavior_buried_points            | learning_module_id      | integer                  | YES
- user_behavior_buried_points            | learning_module_name    | character varying        | YES
- user_behavior_buried_points            | learning_point_id       | integer                  | YES
- user_behavior_buried_points            | learning_point_name     | character varying        | YES
- user_behavior_buried_points            | card_id                 | integer                  | YES
- user_behavior_buried_points            | card_name               | character varying        | YES
- user_behavior_buried_points            | data_version            | character varying        | YES
- user_behavior_buried_points            | ex2                     | text                     | YES
- user_behavior_buried_points            | ex3                     | text                     | YES
- user_behavior_buried_points            | es_id                   | character varying        | YES
- user_behavior_count_tmp                | behavior                | text                     | YES
- user_behavior_count_tmp                | register_time_date      | text                     | NO
- user_behavior_count_tmp                | count                   | integer                  | YES
- user_detail_info                       | id                      | bigint                   | NO
- user_detail_info                       | user_id                 | integer                  | YES
- user_detail_info                       | latest_login_time       | bigint                   | YES
- user_detail_info                       | latest_active_time      | bigint                   | YES
- user_detail_info                       | created_time            | timestamp with time zone | NO
- user_done_duration                     | id                      | bigint                   | NO
- user_done_duration                     | user_id                 | integer                  | YES
- user_done_duration                     | chapter_id              | integer                  | YES
- user_done_duration                     | duration                | integer                  | YES
- user_done_duration                     | created_time            | timestamp with time zone | NO
- user_learning                          | id                      | bigint                   | NO
- user_learning                          | user_id                 | integer                  | YES
- user_learning                          | stat_date               | text                     | NO
- user_learning                          | learning_time           | integer                  | YES
- user_learning                          | created_time            | timestamp with time zone | NO
-(674 rows)
-
diff --git a/database/pg_test_full.txt b/database/pg_test_full.txt
deleted file mode 100644
index bee2858..0000000
--- a/database/pg_test_full.txt
+++ /dev/null
@@ -1,628 +0,0 @@
-               table_name               |       column_name       |        data_type         | is_nullable 
-----------------------------------------+-------------------------+--------------------------+-------------
- account_activity_count                 | id                      | bigint                   | NO
- account_activity_count                 | time_period             | integer                  | YES
- account_activity_count                 | counts                  | integer                  | YES
- account_activity_count                 | stat_date               | text                     | NO
- account_activity_count                 | created_time            | timestamp with time zone | NO
- account_behavior_count                 | id                      | bigint                   | NO
- account_behavior_count                 | count                   | integer                  | YES
- account_behavior_count                 | behavior                | text                     | YES
- account_behavior_count                 | behavior_group          | text                     | NO
- account_behavior_count                 | stat_date               | text                     | NO
- account_behavior_count                 | dt                      | text                     | NO
- account_behavior_count                 | created_time            | timestamp with time zone | NO
- account_detail_info                    | id                      | bigint                   | NO
- account_detail_info                    | account_id              | integer                  | YES
- account_detail_info                    | login_time              | text                     | YES
- account_detail_info                    | device                  | text                     | YES
- account_detail_info                    | device_os               | text                     | YES
- account_detail_info                    | login_address           | text                     | YES
- account_detail_info                    | login_times             | integer                  | YES
- account_detail_info                    | created_time            | timestamp with time zone | NO
- account_device                         | id                      | bigint                   | NO
- account_device                         | account_id              | integer                  | YES
- account_device                         | device                  | text                     | YES
- account_device                         | created_time            | timestamp with time zone | NO
- account_login                          | id                      | bigint                   | NO
- account_login                          | account_id              | integer                  | YES
- account_login                          | login_date              | text                     | NO
- account_login                          | created_time            | timestamp with time zone | NO
- account_login                          | status                  | integer                  | NO
- account_paid_activity_count            | id                      | integer                  | NO
- account_paid_activity_count            | time_period             | integer                  | YES
- account_paid_activity_count            | counts                  | integer                  | YES
- account_paid_activity_count            | stat_date               | text                     | NO
- account_paid_activity_count            | created_time            | timestamp with time zone | NO
- bi_refund_order                        | id                      | bigint                   | NO
- bi_refund_order                        | account_id              | bigint                   | YES
- bi_refund_order                        | out_trade_no            | character varying        | YES
- bi_refund_order                        | trade_no                | character varying        | YES
- bi_refund_order                        | refund_amount           | character varying        | YES
- bi_refund_order                        | created_at              | timestamp with time zone | YES
- bi_refund_order                        | updated_at              | timestamp with time zone | YES
- bi_refund_order                        | deleted_at              | timestamp with time zone | YES
- bi_refund_order                        | refund_amount_int       | integer                  | NO
- bi_refund_order                        | reason                  | text                     | NO
- bi_refund_order                        | sale_channel            | integer                  | NO
- bi_refund_order                        | status                  | integer                  | NO
- bi_refund_order                        | is_admin                | boolean                  | NO
- bi_refund_order                        | channel_refund_id       | bigint                   | NO
- bi_refund_order                        | refund_ticket_ids       | text                     | YES
- bi_refund_order                        | refund_type             | integer                  | NO
- bi_refund_order                        | refund_method           | integer                  | NO
- bi_refund_order                        | after_sale_no           | text                     | YES
- bi_user_chapter_play_record_0          | id                      | bigint                   | NO
- bi_user_chapter_play_record_0          | user_id                 | bigint                   | YES
- bi_user_chapter_play_record_0          | chapter_id              | bigint                   | YES
- bi_user_chapter_play_record_0          | chapter_unique_id       | character varying        | YES
- bi_user_chapter_play_record_0          | play_status             | smallint                 | YES
- bi_user_chapter_play_record_0          | story_id                | bigint                   | YES
- bi_user_chapter_play_record_0          | created_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_0          | updated_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_0          | deleted_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_0          | map_position            | text                     | YES
- bi_user_chapter_play_record_0          | dialog_info             | text                     | YES
- bi_user_chapter_play_record_0          | level                   | character varying        | YES
- bi_user_chapter_play_record_1          | id                      | bigint                   | NO
- bi_user_chapter_play_record_1          | user_id                 | bigint                   | YES
- bi_user_chapter_play_record_1          | chapter_id              | bigint                   | YES
- bi_user_chapter_play_record_1          | chapter_unique_id       | character varying        | YES
- bi_user_chapter_play_record_1          | play_status             | smallint                 | YES
- bi_user_chapter_play_record_1          | story_id                | bigint                   | YES
- bi_user_chapter_play_record_1          | created_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_1          | updated_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_1          | deleted_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_1          | map_position            | text                     | YES
- bi_user_chapter_play_record_1          | dialog_info             | text                     | YES
- bi_user_chapter_play_record_1          | level                   | character varying        | YES
- bi_user_chapter_play_record_2          | id                      | bigint                   | NO
- bi_user_chapter_play_record_2          | user_id                 | bigint                   | YES
- bi_user_chapter_play_record_2          | chapter_id              | bigint                   | YES
- bi_user_chapter_play_record_2          | chapter_unique_id       | character varying        | YES
- bi_user_chapter_play_record_2          | play_status             | smallint                 | YES
- bi_user_chapter_play_record_2          | story_id                | bigint                   | YES
- bi_user_chapter_play_record_2          | created_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_2          | updated_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_2          | deleted_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_2          | map_position            | text                     | YES
- bi_user_chapter_play_record_2          | dialog_info             | text                     | YES
- bi_user_chapter_play_record_2          | level                   | character varying        | YES
- bi_user_chapter_play_record_3          | id                      | bigint                   | NO
- bi_user_chapter_play_record_3          | user_id                 | bigint                   | YES
- bi_user_chapter_play_record_3          | chapter_id              | bigint                   | YES
- bi_user_chapter_play_record_3          | chapter_unique_id       | character varying        | YES
- bi_user_chapter_play_record_3          | play_status             | smallint                 | YES
- bi_user_chapter_play_record_3          | story_id                | bigint                   | YES
- bi_user_chapter_play_record_3          | created_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_3          | updated_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_3          | deleted_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_3          | map_position            | text                     | YES
- bi_user_chapter_play_record_3          | dialog_info             | text                     | YES
- bi_user_chapter_play_record_3          | level                   | character varying        | YES
- bi_user_chapter_play_record_4          | id                      | bigint                   | NO
- bi_user_chapter_play_record_4          | user_id                 | bigint                   | YES
- bi_user_chapter_play_record_4          | chapter_id              | bigint                   | YES
- bi_user_chapter_play_record_4          | chapter_unique_id       | character varying        | YES
- bi_user_chapter_play_record_4          | play_status             | smallint                 | YES
- bi_user_chapter_play_record_4          | story_id                | bigint                   | YES
- bi_user_chapter_play_record_4          | created_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_4          | updated_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_4          | deleted_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_4          | map_position            | text                     | YES
- bi_user_chapter_play_record_4          | dialog_info             | text                     | YES
- bi_user_chapter_play_record_4          | level                   | character varying        | YES
- bi_user_chapter_play_record_5          | id                      | bigint                   | NO
- bi_user_chapter_play_record_5          | user_id                 | bigint                   | YES
- bi_user_chapter_play_record_5          | chapter_id              | bigint                   | YES
- bi_user_chapter_play_record_5          | chapter_unique_id       | character varying        | YES
- bi_user_chapter_play_record_5          | play_status             | smallint                 | YES
- bi_user_chapter_play_record_5          | story_id                | bigint                   | YES
- bi_user_chapter_play_record_5          | created_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_5          | updated_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_5          | deleted_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_5          | map_position            | text                     | YES
- bi_user_chapter_play_record_5          | dialog_info             | text                     | YES
- bi_user_chapter_play_record_5          | level                   | character varying        | YES
- bi_user_chapter_play_record_6          | id                      | bigint                   | NO
- bi_user_chapter_play_record_6          | user_id                 | bigint                   | YES
- bi_user_chapter_play_record_6          | chapter_id              | bigint                   | YES
- bi_user_chapter_play_record_6          | chapter_unique_id       | character varying        | YES
- bi_user_chapter_play_record_6          | play_status             | smallint                 | YES
- bi_user_chapter_play_record_6          | story_id                | bigint                   | YES
- bi_user_chapter_play_record_6          | created_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_6          | updated_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_6          | deleted_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_6          | map_position            | text                     | YES
- bi_user_chapter_play_record_6          | dialog_info             | text                     | YES
- bi_user_chapter_play_record_6          | level                   | character varying        | YES
- bi_user_chapter_play_record_7          | id                      | bigint                   | NO
- bi_user_chapter_play_record_7          | user_id                 | bigint                   | YES
- bi_user_chapter_play_record_7          | chapter_id              | bigint                   | YES
- bi_user_chapter_play_record_7          | chapter_unique_id       | character varying        | YES
- bi_user_chapter_play_record_7          | play_status             | smallint                 | YES
- bi_user_chapter_play_record_7          | story_id                | bigint                   | YES
- bi_user_chapter_play_record_7          | created_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_7          | updated_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_7          | deleted_at              | timestamp with time zone | YES
- bi_user_chapter_play_record_7          | map_position            | text                     | YES
- bi_user_chapter_play_record_7          | dialog_info             | text                     | YES
- bi_user_chapter_play_record_7          | level                   | character varying        | YES
- bi_user_component_play_record_0        | id                      | bigint                   | NO
- bi_user_component_play_record_0        | user_id                 | bigint                   | YES
- bi_user_component_play_record_0        | chapter_id              | bigint                   | YES
- bi_user_component_play_record_0        | chapter_unique_id       | character varying        | YES
- bi_user_component_play_record_0        | component_id            | bigint                   | YES
- bi_user_component_play_record_0        | sub_component_type      | smallint                 | YES
- bi_user_component_play_record_0        | component_type          | smallint                 | YES
- bi_user_component_play_record_0        | comp_type               | character varying        | YES
- bi_user_component_play_record_0        | play_status             | smallint                 | YES
- bi_user_component_play_record_0        | pass_time               | integer                  | YES
- bi_user_component_play_record_0        | interval_time           | integer                  | YES
- bi_user_component_play_record_0        | read_word_count         | integer                  | YES
- bi_user_component_play_record_0        | speak_count             | integer                  | YES
- bi_user_component_play_record_0        | listen_sentence_count   | integer                  | YES
- bi_user_component_play_record_0        | write_word_count        | integer                  | YES
- bi_user_component_play_record_0        | created_at              | timestamp with time zone | YES
- bi_user_component_play_record_0        | updated_at              | timestamp with time zone | YES
- bi_user_component_play_record_0        | deleted_at              | timestamp with time zone | YES
- bi_user_component_play_record_0        | section_id              | bigint                   | YES
- bi_user_component_play_record_0        | component_unique_code   | character varying        | YES
- bi_user_component_play_record_0        | play_result             | character varying        | YES
- bi_user_component_play_record_0        | finish_question_count   | integer                  | YES
- bi_user_component_play_record_0        | skill_points            | integer                  | YES
- bi_user_component_play_record_0        | c_type                  | character varying        | YES
- bi_user_component_play_record_0        | c_id                    | character varying        | YES
- bi_user_component_play_record_0        | user_behavior_info      | text                     | YES
- bi_user_component_play_record_0        | behavior_analysis_info  | text                     | YES
- bi_user_component_play_record_0        | session_id              | character varying        | YES
- bi_user_component_play_record_0        | listen_time             | integer                  | YES
- bi_user_component_play_record_0        | dialog_list             | text                     | YES
- bi_user_component_play_record_0        | level                   | character varying        | YES
- bi_user_component_play_record_1        | id                      | bigint                   | NO
- bi_user_component_play_record_1        | user_id                 | bigint                   | YES
- bi_user_component_play_record_1        | chapter_id              | bigint                   | YES
- bi_user_component_play_record_1        | chapter_unique_id       | character varying        | YES
- bi_user_component_play_record_1        | component_id            | bigint                   | YES
- bi_user_component_play_record_1        | sub_component_type      | smallint                 | YES
- bi_user_component_play_record_1        | component_type          | smallint                 | YES
- bi_user_component_play_record_1        | comp_type               | character varying        | YES
- bi_user_component_play_record_1        | play_status             | smallint                 | YES
- bi_user_component_play_record_1        | pass_time               | integer                  | YES
- bi_user_component_play_record_1        | interval_time           | integer                  | YES
- bi_user_component_play_record_1        | read_word_count         | integer                  | YES
- bi_user_component_play_record_1        | speak_count             | integer                  | YES
- bi_user_component_play_record_1        | listen_sentence_count   | integer                  | YES
- bi_user_component_play_record_1        | write_word_count        | integer                  | YES
- bi_user_component_play_record_1        | created_at              | timestamp with time zone | YES
- bi_user_component_play_record_1        | updated_at              | timestamp with time zone | YES
- bi_user_component_play_record_1        | deleted_at              | timestamp with time zone | YES
- bi_user_component_play_record_1        | section_id              | bigint                   | YES
- bi_user_component_play_record_1        | component_unique_code   | character varying        | YES
- bi_user_component_play_record_1        | play_result             | character varying        | YES
- bi_user_component_play_record_1        | finish_question_count   | integer                  | YES
- bi_user_component_play_record_1        | skill_points            | integer                  | YES
- bi_user_component_play_record_1        | c_type                  | character varying        | YES
- bi_user_component_play_record_1        | c_id                    | character varying        | YES
- bi_user_component_play_record_1        | user_behavior_info      | text                     | YES
- bi_user_component_play_record_1        | behavior_analysis_info  | text                     | YES
- bi_user_component_play_record_1        | session_id              | character varying        | YES
- bi_user_component_play_record_1        | listen_time             | integer                  | YES
- bi_user_component_play_record_1        | dialog_list             | text                     | YES
- bi_user_component_play_record_1        | level                   | character varying        | YES
- bi_user_component_play_record_2        | id                      | bigint                   | NO
- bi_user_component_play_record_2        | user_id                 | bigint                   | YES
- bi_user_component_play_record_2        | chapter_id              | bigint                   | YES
- bi_user_component_play_record_2        | chapter_unique_id       | character varying        | YES
- bi_user_component_play_record_2        | component_id            | bigint                   | YES
- bi_user_component_play_record_2        | sub_component_type      | smallint                 | YES
- bi_user_component_play_record_2        | component_type          | smallint                 | YES
- bi_user_component_play_record_2        | comp_type               | character varying        | YES
- bi_user_component_play_record_2        | play_status             | smallint                 | YES
- bi_user_component_play_record_2        | pass_time               | integer                  | YES
- bi_user_component_play_record_2        | interval_time           | integer                  | YES
- bi_user_component_play_record_2        | read_word_count         | integer                  | YES
- bi_user_component_play_record_2        | speak_count             | integer                  | YES
- bi_user_component_play_record_2        | listen_sentence_count   | integer                  | YES
- bi_user_component_play_record_2        | write_word_count        | integer                  | YES
- bi_user_component_play_record_2        | created_at              | timestamp with time zone | YES
- bi_user_component_play_record_2        | updated_at              | timestamp with time zone | YES
- bi_user_component_play_record_2        | deleted_at              | timestamp with time zone | YES
- bi_user_component_play_record_2        | section_id              | bigint                   | YES
- bi_user_component_play_record_2        | component_unique_code   | character varying        | YES
- bi_user_component_play_record_2        | play_result             | character varying        | YES
- bi_user_component_play_record_2        | finish_question_count   | integer                  | YES
- bi_user_component_play_record_2        | skill_points            | integer                  | YES
- bi_user_component_play_record_2        | c_type                  | character varying        | YES
- bi_user_component_play_record_2        | c_id                    | character varying        | YES
- bi_user_component_play_record_2        | user_behavior_info      | text                     | YES
- bi_user_component_play_record_2        | behavior_analysis_info  | text                     | YES
- bi_user_component_play_record_2        | session_id              | character varying        | YES
- bi_user_component_play_record_2        | listen_time             | integer                  | YES
- bi_user_component_play_record_2        | dialog_list             | text                     | YES
- bi_user_component_play_record_2        | level                   | character varying        | YES
- bi_user_component_play_record_3        | id                      | bigint                   | NO
- bi_user_component_play_record_3        | user_id                 | bigint                   | YES
- bi_user_component_play_record_3        | chapter_id              | bigint                   | YES
- bi_user_component_play_record_3        | chapter_unique_id       | character varying        | YES
- bi_user_component_play_record_3        | component_id            | bigint                   | YES
- bi_user_component_play_record_3        | sub_component_type      | smallint                 | YES
- bi_user_component_play_record_3        | component_type          | smallint                 | YES
- bi_user_component_play_record_3        | comp_type               | character varying        | YES
- bi_user_component_play_record_3        | play_status             | smallint                 | YES
- bi_user_component_play_record_3        | pass_time               | integer                  | YES
- bi_user_component_play_record_3        | interval_time           | integer                  | YES
- bi_user_component_play_record_3        | read_word_count         | integer                  | YES
- bi_user_component_play_record_3        | speak_count             | integer                  | YES
- bi_user_component_play_record_3        | listen_sentence_count   | integer                  | YES
- bi_user_component_play_record_3        | write_word_count        | integer                  | YES
- bi_user_component_play_record_3        | created_at              | timestamp with time zone | YES
- bi_user_component_play_record_3        | updated_at              | timestamp with time zone | YES
- bi_user_component_play_record_3        | deleted_at              | timestamp with time zone | YES
- bi_user_component_play_record_3        | section_id              | bigint                   | YES
- bi_user_component_play_record_3        | component_unique_code   | character varying        | YES
- bi_user_component_play_record_3        | play_result             | character varying        | YES
- bi_user_component_play_record_3        | finish_question_count   | integer                  | YES
- bi_user_component_play_record_3        | skill_points            | integer                  | YES
- bi_user_component_play_record_3        | c_type                  | character varying        | YES
- bi_user_component_play_record_3        | c_id                    | character varying        | YES
- bi_user_component_play_record_3        | user_behavior_info      | text                     | YES
- bi_user_component_play_record_3        | behavior_analysis_info  | text                     | YES
- bi_user_component_play_record_3        | session_id              | character varying        | YES
- bi_user_component_play_record_3        | listen_time             | integer                  | YES
- bi_user_component_play_record_3        | dialog_list             | text                     | YES
- bi_user_component_play_record_3        | level                   | character varying        | YES
- bi_user_component_play_record_4        | id                      | bigint                   | NO
- bi_user_component_play_record_4        | user_id                 | bigint                   | YES
- bi_user_component_play_record_4        | chapter_id              | bigint                   | YES
- bi_user_component_play_record_4        | chapter_unique_id       | character varying        | YES
- bi_user_component_play_record_4        | component_id            | bigint                   | YES
- bi_user_component_play_record_4        | sub_component_type      | smallint                 | YES
- bi_user_component_play_record_4        | component_type          | smallint                 | YES
- bi_user_component_play_record_4        | comp_type               | character varying        | YES
- bi_user_component_play_record_4        | play_status             | smallint                 | YES
- bi_user_component_play_record_4        | pass_time               | integer                  | YES
- bi_user_component_play_record_4        | interval_time           | integer                  | YES
- bi_user_component_play_record_4        | read_word_count         | integer                  | YES
- bi_user_component_play_record_4        | speak_count             | integer                  | YES
- bi_user_component_play_record_4        | listen_sentence_count   | integer                  | YES
- bi_user_component_play_record_4        | write_word_count        | integer                  | YES
- bi_user_component_play_record_4        | created_at              | timestamp with time zone | YES
- bi_user_component_play_record_4        | updated_at              | timestamp with time zone | YES
- bi_user_component_play_record_4        | deleted_at              | timestamp with time zone | YES
- bi_user_component_play_record_4        | section_id              | bigint                   | YES
- bi_user_component_play_record_4        | component_unique_code   | character varying        | YES
- bi_user_component_play_record_4        | play_result             | character varying        | YES
- bi_user_component_play_record_4        | finish_question_count   | integer                  | YES
- bi_user_component_play_record_4        | skill_points            | integer                  | YES
- bi_user_component_play_record_4        | c_type                  | character varying        | YES
- bi_user_component_play_record_4        | c_id                    | character varying        | YES
- bi_user_component_play_record_4        | user_behavior_info      | text                     | YES
- bi_user_component_play_record_4        | behavior_analysis_info  | text                     | YES
- bi_user_component_play_record_4        | session_id              | character varying        | YES
- bi_user_component_play_record_4        | listen_time             | integer                  | YES
- bi_user_component_play_record_4        | dialog_list             | text                     | YES
- bi_user_component_play_record_4        | level                   | character varying        | YES
- bi_user_component_play_record_5        | id                      | bigint                   | NO
- bi_user_component_play_record_5        | user_id                 | bigint                   | YES
- bi_user_component_play_record_5        | chapter_id              | bigint                   | YES
- bi_user_component_play_record_5        | chapter_unique_id       | character varying        | YES
- bi_user_component_play_record_5        | component_id            | bigint                   | YES
- bi_user_component_play_record_5        | sub_component_type      | smallint                 | YES
- bi_user_component_play_record_5        | component_type          | smallint                 | YES
- bi_user_component_play_record_5        | comp_type               | character varying        | YES
- bi_user_component_play_record_5        | play_status             | smallint                 | YES
- bi_user_component_play_record_5        | pass_time               | integer                  | YES
- bi_user_component_play_record_5        | interval_time           | integer                  | YES
- bi_user_component_play_record_5        | read_word_count         | integer                  | YES
- bi_user_component_play_record_5        | speak_count             | integer                  | YES
- bi_user_component_play_record_5        | listen_sentence_count   | integer                  | YES
- bi_user_component_play_record_5        | write_word_count        | integer                  | YES
- bi_user_component_play_record_5        | created_at              | timestamp with time zone | YES
- bi_user_component_play_record_5        | updated_at              | timestamp with time zone | YES
- bi_user_component_play_record_5        | deleted_at              | timestamp with time zone | YES
- bi_user_component_play_record_5        | section_id              | bigint                   | YES
- bi_user_component_play_record_5        | component_unique_code   | character varying        | YES
- bi_user_component_play_record_5        | play_result             | character varying        | YES
- bi_user_component_play_record_5        | finish_question_count   | integer                  | YES
- bi_user_component_play_record_5        | skill_points            | integer                  | YES
- bi_user_component_play_record_5        | c_type                  | character varying        | YES
- bi_user_component_play_record_5        | c_id                    | character varying        | YES
- bi_user_component_play_record_5        | user_behavior_info      | text                     | YES
- bi_user_component_play_record_5        | behavior_analysis_info  | text                     | YES
- bi_user_component_play_record_5        | session_id              | character varying        | YES
- bi_user_component_play_record_5        | listen_time             | integer                  | YES
- bi_user_component_play_record_5        | dialog_list             | text                     | YES
- bi_user_component_play_record_5        | level                   | character varying        | YES
- bi_user_component_play_record_6        | id                      | bigint                   | NO
- bi_user_component_play_record_6        | user_id                 | bigint                   | YES
- bi_user_component_play_record_6        | chapter_id              | bigint                   | YES
- bi_user_component_play_record_6        | chapter_unique_id       | character varying        | YES
- bi_user_component_play_record_6        | component_id            | bigint                   | YES
- bi_user_component_play_record_6        | sub_component_type      | smallint                 | YES
- bi_user_component_play_record_6        | component_type          | smallint                 | YES
- bi_user_component_play_record_6        | comp_type               | character varying        | YES
- bi_user_component_play_record_6        | play_status             | smallint                 | YES
- bi_user_component_play_record_6        | pass_time               | integer                  | YES
- bi_user_component_play_record_6        | interval_time           | integer                  | YES
- bi_user_component_play_record_6        | read_word_count         | integer                  | YES
- bi_user_component_play_record_6        | speak_count             | integer                  | YES
- bi_user_component_play_record_6        | listen_sentence_count   | integer                  | YES
- bi_user_component_play_record_6        | write_word_count        | integer                  | YES
- bi_user_component_play_record_6        | created_at              | timestamp with time zone | YES
- bi_user_component_play_record_6        | updated_at              | timestamp with time zone | YES
- bi_user_component_play_record_6        | deleted_at              | timestamp with time zone | YES
- bi_user_component_play_record_6        | section_id              | bigint                   | YES
- bi_user_component_play_record_6        | component_unique_code   | character varying        | YES
- bi_user_component_play_record_6        | play_result             | character varying        | YES
- bi_user_component_play_record_6        | finish_question_count   | integer                  | YES
- bi_user_component_play_record_6        | skill_points            | integer                  | YES
- bi_user_component_play_record_6        | c_type                  | character varying        | YES
- bi_user_component_play_record_6        | c_id                    | character varying        | YES
- bi_user_component_play_record_6        | user_behavior_info      | text                     | YES
- bi_user_component_play_record_6        | behavior_analysis_info  | text                     | YES
- bi_user_component_play_record_6        | session_id              | character varying        | YES
- bi_user_component_play_record_6        | listen_time             | integer                  | YES
- bi_user_component_play_record_6        | dialog_list             | text                     | YES
- bi_user_component_play_record_6        | level                   | character varying        | YES
- bi_user_course_detail                  | id                      | integer                  | NO
- bi_user_course_detail                  | created_at              | timestamp with time zone | YES
- bi_user_course_detail                  | updated_at              | timestamp with time zone | YES
- bi_user_course_detail                  | account_id              | integer                  | YES
- bi_user_course_detail                  | user_id                 | integer                  | YES
- bi_user_course_detail                  | course_level            | text                     | YES
- bi_user_course_detail                  | active_time             | timestamp with time zone | YES
- bi_user_course_detail                  | expire_time             | timestamp with time zone | YES
- bi_user_course_detail                  | latest_unit_index       | integer                  | YES
- bi_user_course_detail                  | latest_lesson_index     | integer                  | YES
- bi_user_course_detail                  | learn_duration          | integer                  | YES
- bi_user_course_detail                  | last_learn_time         | timestamp with time zone | YES
- bi_user_course_detail                  | deleted_at              | timestamp with time zone | YES
- bi_user_unit_challenge_question_result | id                      | bigint                   | NO
- bi_user_unit_challenge_question_result | user_id                 | bigint                   | YES
- bi_user_unit_challenge_question_result | story_id                | bigint                   | YES
- bi_user_unit_challenge_question_result | score                   | integer                  | YES
- bi_user_unit_challenge_question_result | score_text              | character varying        | YES
- bi_user_unit_challenge_question_result | question_list           | text                     | YES
- bi_user_unit_challenge_question_result | sp_value                | integer                  | YES
- bi_user_unit_challenge_question_result | created_at              | timestamp with time zone | YES
- bi_user_unit_challenge_question_result | updated_at              | timestamp with time zone | YES
- bi_user_unit_challenge_question_result | deleted_at              | timestamp with time zone | YES
- bi_user_unit_challenge_question_result | category                | character varying        | YES
- bi_user_unit_challenge_question_result | exp                     | integer                  | YES
- bi_user_unit_challenge_question_result | play_time               | bigint                   | YES
- bi_user_unit_challenge_question_result | read_word_count         | bigint                   | YES
- bi_user_unit_challenge_question_result | listen_time             | bigint                   | YES
- bi_user_unit_challenge_question_result | speak_count             | bigint                   | YES
- bi_user_unit_challenge_question_result | level                   | character varying        | YES
- bi_user_unit_summary_km_result         | id                      | bigint                   | NO
- bi_user_unit_summary_km_result         | user_id                 | bigint                   | YES
- bi_user_unit_summary_km_result         | story_id                | bigint                   | YES
- bi_user_unit_summary_km_result         | km_id                   | character varying        | YES
- bi_user_unit_summary_km_result         | km_type                 | character varying        | YES
- bi_user_unit_summary_km_result         | score_text              | character varying        | YES
- bi_user_unit_summary_km_result         | sp_value                | integer                  | YES
- bi_user_unit_summary_km_result         | created_at              | timestamp with time zone | YES
- bi_user_unit_summary_km_result         | updated_at              | timestamp with time zone | YES
- bi_user_unit_summary_km_result         | deleted_at              | timestamp with time zone | YES
- bi_user_unit_summary_km_result         | play_time               | bigint                   | YES
- bi_user_unit_summary_km_result         | sum_explain             | text                     | YES
- bi_user_unit_summary_km_result         | level                   | character varying        | YES
- bi_vala_app_account                    | id                      | bigint                   | NO
- bi_vala_app_account                    | tel                     | character varying        | YES
- bi_vala_app_account                    | tel_encrypt             | character varying        | YES
- bi_vala_app_account                    | name                    | character varying        | YES
- bi_vala_app_account                    | id_card                 | character varying        | YES
- bi_vala_app_account                    | status                  | smallint                 | NO
- bi_vala_app_account                    | pay_status              | integer                  | YES
- bi_vala_app_account                    | login_times             | integer                  | YES
- bi_vala_app_account                    | remark                  | text                     | YES
- bi_vala_app_account                    | key_from                | character varying        | NO
- bi_vala_app_account                    | created_at              | timestamp with time zone | YES
- bi_vala_app_account                    | updated_at              | timestamp with time zone | YES
- bi_vala_app_account                    | deleted_at              | timestamp with time zone | YES
- bi_vala_app_account                    | download_channel        | text                     | YES
- bi_vala_app_character                  | id                      | bigint                   | NO
- bi_vala_app_character                  | account_id              | bigint                   | YES
- bi_vala_app_character                  | nickname                | character varying        | YES
- bi_vala_app_character                  | gender                  | smallint                 | YES
- bi_vala_app_character                  | birthday                | character varying        | YES
- bi_vala_app_character                  | avatar                  | text                     | YES
- bi_vala_app_character                  | spine_name              | character varying        | YES
- bi_vala_app_character                  | latest_login            | timestamp with time zone | YES
- bi_vala_app_character                  | reputation              | character                | YES
- bi_vala_app_character                  | robots_cnt              | integer                  | YES
- bi_vala_app_character                  | head_image              | character varying        | YES
- bi_vala_app_character                  | status                  | smallint                 | YES
- bi_vala_app_character                  | purchase_season_package | text                     | YES
- bi_vala_app_character                  | total_sp_point          | integer                  | YES
- bi_vala_app_character                  | balance_sp_point        | integer                  | YES
- bi_vala_app_character                  | point                   | integer                  | YES
- bi_vala_app_character                  | pay_status              | smallint                 | YES
- bi_vala_app_character                  | created_at              | timestamp with time zone | YES
- bi_vala_app_character                  | updated_at              | timestamp with time zone | YES
- bi_vala_app_character                  | deleted_at              | timestamp with time zone | YES
- bi_vala_app_character                  | mood                    | integer                  | YES
- bi_vala_app_character                  | mood_start_time         | bigint                   | YES
- bi_vala_order                          | id                      | bigint                   | NO
- bi_vala_order                          | account_id              | bigint                   | YES
- bi_vala_order                          | out_trade_no            | character varying        | YES
- bi_vala_order                          | trade_no                | character varying        | YES
- bi_vala_order                          | pay_amount              | character varying        | YES
- bi_vala_order                          | goods_id                | bigint                   | YES
- bi_vala_order                          | order_status            | integer                  | YES
- bi_vala_order                          | order_status_comment    | text                     | YES
- bi_vala_order                          | is_used                 | integer                  | YES
- bi_vala_order                          | sale_channel            | integer                  | YES
- bi_vala_order                          | created_at              | timestamp with time zone | YES
- bi_vala_order                          | updated_at              | timestamp with time zone | YES
- bi_vala_order                          | deleted_at              | timestamp with time zone | YES
- bi_vala_order                          | pay_channel             | integer                  | YES
- bi_vala_order                          | pay_amount_int          | integer                  | YES
- bi_vala_order                          | pay_success_date        | timestamp with time zone | YES
- bi_vala_order                          | quantity                | integer                  | NO
- bi_vala_order                          | goods_name              | text                     | NO
- bi_vala_order                          | goods_apply_package_ids | text                     | NO
- bi_vala_order                          | key_from                | text                     | NO
- bi_vala_order                          | expire_days             | integer                  | YES
- data_sync_config                       | id                      | bigint                   | NO
- data_sync_config                       | job_name                | text                     | NO
- data_sync_config                       | database_name           | text                     | YES
- data_sync_config                       | origin_table            | text                     | YES
- data_sync_config                       | dest_table              | text                     | YES
- data_sync_config                       | deal_time               | timestamp with time zone | YES
- data_sync_config                       | full_id                 | bigint                   | NO
- data_sync_config                       | created_at              | timestamp with time zone | NO
- data_sync_config                       | updated_at              | timestamp with time zone | YES
- feishu_table_record                    | id                      | bigint                   | NO
- feishu_table_record                    | data_table              | text                     | NO
- feishu_table_record                    | record_id               | text                     | YES
- feishu_table_record                    | created_at              | timestamp with time zone | NO
- growth_activity_behavior               | id                      | integer                  | NO
- growth_activity_behavior               | es_id                   | character varying        | NO
- growth_activity_behavior               | account_id              | integer                  | YES
- growth_activity_behavior               | account_name            | character varying        | YES
- growth_activity_behavior               | activity                | text                     | YES
- growth_activity_behavior               | behavior                | character varying        | YES
- growth_activity_behavior               | created_at              | timestamp with time zone | YES
- growth_activity_behavior               | info                    | text                     | YES
- growth_activity_behavior               | source                  | character varying        | YES
- growth_activity_behavior               | sub_behavior            | character varying        | YES
- growth_activity_behavior               | user_id                 | integer                  | YES
- growth_activity_behavior               | user_name               | character varying        | YES
- user_behavior_0                        | id                      | bigint                   | NO
- user_behavior_0                        | user_id                 | integer                  | YES
- user_behavior_0                        | behavior                | text                     | YES
- user_behavior_0                        | behavior_time_date      | text                     | NO
- user_behavior_0                        | created_time            | timestamp with time zone | NO
- user_behavior_1                        | id                      | bigint                   | NO
- user_behavior_1                        | user_id                 | integer                  | YES
- user_behavior_1                        | behavior                | text                     | YES
- user_behavior_1                        | behavior_time_date      | text                     | NO
- user_behavior_1                        | created_time            | timestamp with time zone | NO
- user_behavior_10                       | id                      | bigint                   | NO
- user_behavior_10                       | user_id                 | integer                  | YES
- user_behavior_10                       | behavior                | text                     | YES
- user_behavior_10                       | behavior_time_date      | text                     | NO
- user_behavior_10                       | created_time            | timestamp with time zone | NO
- user_behavior_11                       | id                      | bigint                   | NO
- user_behavior_11                       | user_id                 | integer                  | YES
- user_behavior_11                       | behavior                | text                     | YES
- user_behavior_11                       | behavior_time_date      | text                     | NO
- user_behavior_11                       | created_time            | timestamp with time zone | NO
- user_behavior_12                       | id                      | bigint                   | NO
- user_behavior_12                       | user_id                 | integer                  | YES
- user_behavior_12                       | behavior                | text                     | YES
- user_behavior_12                       | behavior_time_date      | text                     | NO
- user_behavior_12                       | created_time            | timestamp with time zone | NO
- user_behavior_13                       | id                      | bigint                   | NO
- user_behavior_13                       | user_id                 | integer                  | YES
- user_behavior_13                       | behavior                | text                     | YES
- user_behavior_13                       | behavior_time_date      | text                     | NO
- user_behavior_13                       | created_time            | timestamp with time zone | NO
- user_behavior_14                       | id                      | bigint                   | NO
- user_behavior_14                       | user_id                 | integer                  | YES
- user_behavior_14                       | behavior                | text                     | YES
- user_behavior_14                       | behavior_time_date      | text                     | NO
- user_behavior_14                       | created_time            | timestamp with time zone | NO
- user_behavior_15                       | id                      | bigint                   | NO
- user_behavior_15                       | user_id                 | integer                  | YES
- user_behavior_15                       | behavior                | text                     | YES
- user_behavior_15                       | behavior_time_date      | text                     | NO
- user_behavior_15                       | created_time            | timestamp with time zone | NO
- user_behavior_2                        | id                      | bigint                   | NO
- user_behavior_2                        | user_id                 | integer                  | YES
- user_behavior_2                        | behavior                | text                     | YES
- user_behavior_2                        | behavior_time_date      | text                     | NO
- user_behavior_2                        | created_time            | timestamp with time zone | NO
- user_behavior_3                        | id                      | bigint                   | NO
- user_behavior_3                        | user_id                 | integer                  | YES
- user_behavior_3                        | behavior                | text                     | YES
- user_behavior_3                        | behavior_time_date      | text                     | NO
- user_behavior_3                        | created_time            | timestamp with time zone | NO
- user_behavior_4                        | id                      | bigint                   | NO
- user_behavior_4                        | user_id                 | integer                  | YES
- user_behavior_4                        | behavior                | text                     | YES
- user_behavior_4                        | behavior_time_date      | text                     | NO
- user_behavior_4                        | created_time            | timestamp with time zone | NO
- user_behavior_5                        | id                      | bigint                   | NO
- user_behavior_5                        | user_id                 | integer                  | YES
- user_behavior_5                        | behavior                | text                     | YES
- user_behavior_5                        | behavior_time_date      | text                     | NO
- user_behavior_5                        | created_time            | timestamp with time zone | NO
- user_behavior_6                        | id                      | bigint                   | NO
- user_behavior_6                        | user_id                 | integer                  | YES
- user_behavior_6                        | behavior                | text                     | YES
- user_behavior_6                        | behavior_time_date      | text                     | NO
- user_behavior_6                        | created_time            | timestamp with time zone | NO
- user_behavior_7                        | id                      | bigint                   | NO
- user_behavior_7                        | user_id                 | integer                  | YES
- user_behavior_7                        | behavior                | text                     | YES
- user_behavior_7                        | behavior_time_date      | text                     | NO
- user_behavior_7                        | created_time            | timestamp with time zone | NO
- user_behavior_8                        | id                      | bigint                   | NO
- user_behavior_8                        | user_id                 | integer                  | YES
- user_behavior_8                        | behavior                | text                     | YES
- user_behavior_8                        | behavior_time_date      | text                     | NO
- user_behavior_8                        | created_time            | timestamp with time zone | NO
- user_behavior_9                        | id                      | bigint                   | NO
- user_behavior_9                        | user_id                 | integer                  | YES
- user_behavior_9                        | behavior                | text                     | YES
- user_behavior_9                        | behavior_time_date      | text                     | NO
- user_behavior_9                        | created_time            | timestamp with time zone | NO
- user_behavior_buried_points            | id                      | integer                  | NO
- user_behavior_buried_points            | burying_point_id        | integer                  | YES
- user_behavior_buried_points            | burying_point_name      | character varying        | YES
- user_behavior_buried_points            | burying_point_sub_id    | integer                  | YES
- user_behavior_buried_points            | burying_point_sub_name  | character varying        | YES
- user_behavior_buried_points            | account_id              | integer                  | YES
- user_behavior_buried_points            | account_name            | character varying        | YES
- user_behavior_buried_points            | character_id            | integer                  | YES
- user_behavior_buried_points            | character_name          | character varying        | YES
- user_behavior_buried_points            | active_time             | integer                  | YES
- user_behavior_buried_points            | version_id              | character varying        | YES
- user_behavior_buried_points            | step_duration           | integer                  | YES
- user_behavior_buried_points            | finish_status           | character varying        | YES
- user_behavior_buried_points            | season_package_id       | integer                  | YES
- user_behavior_buried_points            | season_package_name     | character varying        | YES
- user_behavior_buried_points            | unit_id                 | integer                  | YES
- user_behavior_buried_points            | unit_name               | character varying        | YES
- user_behavior_buried_points            | lesson_id               | integer                  | YES
- user_behavior_buried_points            | lesson_name             | character varying        | YES
- user_behavior_buried_points            | component_id            | integer                  | YES
- user_behavior_buried_points            | component_name          | character varying        | YES
- user_behavior_buried_points            | c_type                  | character varying        | YES
- user_behavior_buried_points            | c_id                    | character varying        | YES
- user_behavior_buried_points            | learning_module_id      | integer                  | YES
- user_behavior_buried_points            | learning_module_name    | character varying        | YES
- user_behavior_buried_points            | learning_point_id       | integer                  | YES
- user_behavior_buried_points            | learning_point_name     | character varying        | YES
- user_behavior_buried_points            | card_id                 | integer                  | YES
- user_behavior_buried_points            | card_name               | character varying        | YES
- user_behavior_buried_points            | data_version            | character varying        | YES
- user_behavior_buried_points            | ex2                     | text                     | YES
- user_behavior_buried_points            | ex3                     | text                     | YES
- user_behavior_buried_points            | es_id                   | character varying        | YES
- user_behavior_count_tmp                | behavior                | text                     | YES
- user_behavior_count_tmp                | register_time_date      | text                     | NO
- user_behavior_count_tmp                | count                   | integer                  | YES
- user_detail_info                       | id                      | bigint                   | NO
- user_detail_info                       | user_id                 | integer                  | YES
- user_detail_info                       | latest_login_time       | bigint                   | YES
- user_detail_info                       | latest_active_time      | bigint                   | YES
- user_detail_info                       | created_time            | timestamp with time zone | NO
- user_info                              | id                      | bigint                   | NO
- user_info                              | account_id              | integer                  | YES
- user_info                              | user_id                 | integer                  | YES
- user_info                              | gender                  | integer                  | YES
- user_info                              | birthday                | text                     | YES
- user_info                              | register_time_date      | text                     | NO
- user_info                              | created_time            | timestamp with time zone | NO
- user_info                              | register_time           | text                     | YES
- user_learning                          | id                      | bigint                   | NO
- user_learning                          | user_id                 | integer                  | YES
- user_learning                          | stat_date               | text                     | NO
- user_learning                          | learning_time           | integer                  | YES
- user_learning                          | created_time            | timestamp with time zone | NO
-(624 rows)
-
diff --git a/database/数据库表结构总览_完整版.md b/database/数据库表结构总览_完整版.md
deleted file mode 100644
index 5c1c553..0000000
--- a/database/数据库表结构总览_完整版.md
+++ /dev/null
@@ -1,643 +0,0 @@
-# 瓦拉英语业务数据库表结构总览（完整版）
-## 使用说明
-- 每个表开头的【表用途注释】行可填写该表的业务用途说明
-- 每个字段的【注释】列可填写该字段的业务含义说明
-- 所有空注释位置均可直接编辑补充
----
-## 一、线上PostgreSQL库（正式环境用户行为数据）
-- **地址：** bj-postgres-16pob4sg.sql.tencentcdb.com:28591
-- **库名：** vala_bi
-- **权限：** 只读
----
-### account_activity_count
-【表用途注释：】
-| 字段名 | 数据类型 | 注释 |
-|--------|----------|------|
-| id | bigint | |
-| time_period | integer | |
-| counts | integer | |
-| stat_date | text | |
-| created_time | timestamp with time zone | |
-
-### account_detail_info
-【表用途注释：】
-| 字段名 | 数据类型 | 注释 |
-|--------|----------|------|
-| id | bigint | |
-| account_id | integer | 关联用户账户表id |
-| login_time | text | 登录时间 |
-| device | text | 登录设备 |
-| device_os | text | 设备操作系统 |
-| login_address | text | 登录地点 |
-| login_times | integer | 登录次数 |
-| created_time | timestamp with time zone | |
-| phone_login_times | integer | 手机号登录次数 |
-
-### account_device
-【表用途注释：存储用户使用过的设备信息】
-| 字段名 | 数据类型 | 注释 |
-|--------|----------|------|
-| id | bigint | |
-| account_id | integer | 关联用户账户表id |
-| device | text | 设备标识 |
-| created_time | timestamp with time zone | |
-
-### account_login
-【表用途注释：用户登录记录表】
-| 字段名 | 数据类型 | 注释 |
-|--------|----------|------|
-| id | bigint | |
-| account_id | integer | 关联用户账户表id |
-| login_date | text | 登录日期 |
-| created_time | timestamp with time zone | |
-| status | integer | 登录状态 |
-
-### account_paid_activity_count
-【表用途注释：】
-| 字段名 | 数据类型 | 注释 |
-|--------|----------|------|
-| id | integer | |
-| time_period | integer | |
-| counts | integer | |
-| stat_date | text | |
-| created_time | timestamp with time zone | |
-
-### bi_level_unit_lesson
-【表用途注释：课程层级映射表，存储等级、季、单元、课时的对应关系】
-| 字段名 | 数据类型 | 注释 |
-|--------|----------|------|
-| course_level | character varying | 课程等级（A1/L1、A2/L2等） |
-| course_season | character varying | 课程季 |
-| course_unit | character varying | 课程单元 |
-| course_lesson | character varying | 课程课时 |
-| id | integer | |
-
-### bi_refund_order
-【表用途注释：退款订单表，存储所有退款订单的信息】
-| 字段名 | 数据类型 | 注释 |
-|--------|----------|------|
-| id | bigint | |
-| account_id | bigint | 关联下单用户账户id |
-| out_trade_no | character varying | 商户订单号，关联订单表out_trade_no |
-| trade_no | character varying | 支付平台交易号 |
-| refund_amount | character varying | 退款金额（字符串格式，单位元） |
-| created_at | timestamp with time zone | 创建时间 |
-| updated_at | timestamp with time zone | 更新时间 |
-| deleted_at | timestamp with time zone | 删除时间（为空表示未删除） |
-| refund_amount_int | integer | 退款金额（整数格式，单位分） |
-| reason | text | 退款原因 |
-| sale_channel | integer | 销售渠道 |
-| status | integer | 退款状态：3=退款成功 |
-| is_admin | boolean | 是否为后台操作退款 |
-| channel_refund_id | bigint | 渠道退款id |
-| refund_ticket_ids | text | 关联优惠券id |
-| refund_type | integer | 退款类型 |
-| refund_method | integer | 退款方式 |
-| after_sale_no | text | 售后单号 |
-
-### bi_user_chapter_play_record_0
-【表用途注释：用户课时完成记录表（分表0），存储用户每个课时的学习完成情况】
-| 字段名 | 数据类型 | 注释 |
-|--------|----------|------|
-| id | bigint | |
-| user_id | bigint | 角色id，关联用户角色表id |
-| chapter_id | bigint | 课时id |
-| chapter_unique_id | character varying | 课时唯一标识 |
-| play_status | smallint | 课时完成状态：1=正常完成 |
-| story_id | bigint | 关联故事id |
-| created_at | timestamp with time zone | 创建时间 |
-| updated_at | timestamp with time zone | 更新时间 |
-| deleted_at | timestamp with time zone | 删除时间（为空表示未删除） |
-| map_position | text | 地图位置 |
-| dialog_info | text | 对话信息 |
-| level | character varying | 课程等级 |
-
-### bi_user_chapter_play_record_1
-【表用途注释：用户课时完成记录表（分表1），存储用户每个课时的学习完成情况】
-| 字段名 | 数据类型 | 注释 |
-|--------|----------|------|
-| id | bigint | |
-| user_id | bigint | 角色id，关联用户角色表id |
-| chapter_id | bigint | 课时id |
-| chapter_unique_id | character varying | 课时唯一标识 |
-| play_status | smallint | 课时完成状态：1=正常完成 |
-| story_id | bigint | 关联故事id |
-| created_at | timestamp with time zone | 创建时间 |
-| updated_at | timestamp with time zone | 更新时间 |
-| deleted_at | timestamp with time zone | 删除时间（为空表示未删除） |
-| map_position | text | 地图位置 |
-| dialog_info | text | 对话信息 |
-| level | character varying | 课程等级 |
-
-### bi_user_chapter_play_record_2
-【表用途注释：用户课时完成记录表（分表2），存储用户每个课时的学习完成情况】
-| 字段名 | 数据类型 | 注释 |
-|--------|----------|------|
-| id | bigint | |
-| user_id | bigint | 角色id，关联用户角色表id |
-| chapter_id | bigint | 课时id |
-| chapter_unique_id | character varying | 课时唯一标识 |
-| play_status | smallint | 课时完成状态：1=正常完成 |
-| story_id | bigint | 关联故事id |
-| created_at | timestamp with time zone | 创建时间 |
-| updated_at | timestamp with time zone | 更新时间 |
-| deleted_at | timestamp with time zone | 删除时间（为空表示未删除） |
-| map_position | text | 地图位置 |
-| dialog_info | text | 对话信息 |
-| level | character varying | 课程等级 |
-
-### bi_user_chapter_play_record_3
-【表用途注释：用户课时完成记录表（分表3），存储用户每个课时的学习完成情况】
-| 字段名 | 数据类型 | 注释 |
-|--------|----------|------|
-| id | bigint | |
-| user_id | bigint | 角色id，关联用户角色表id |
-| chapter_id | bigint | 课时id |
-| chapter_unique_id | character varying | 课时唯一标识 |
-| play_status | smallint | 课时完成状态：1=正常完成 |
-| story_id | bigint | 关联故事id |
-| created_at | timestamp with time zone | 创建时间 |
-| updated_at | timestamp with time zone | 更新时间 |
-| deleted_at | timestamp with time zone | 删除时间（为空表示未删除） |
-| map_position | text | 地图位置 |
-| dialog_info | text | 对话信息 |
-| level | character varying | 课程等级 |
-
-### bi_user_chapter_play_record_4
-【表用途注释：用户课时完成记录表（分表4），存储用户每个课时的学习完成情况】
-| 字段名 | 数据类型 | 注释 |
-|--------|----------|------|
-| id | bigint | |
-| user_id | bigint | 角色id，关联用户角色表id |
-| chapter_id | bigint | 课时id |
-| chapter_unique_id | character varying | 课时唯一标识 |
-| play_status | smallint | 课时完成状态：1=正常完成 |
-| story_id | bigint | 关联故事id |
-| created_at | timestamp with time zone | 创建时间 |
-| updated_at | timestamp with time zone | 更新时间 |
-| deleted_at | timestamp with time zone | 删除时间（为空表示未删除） |
-| map_position | text | 地图位置 |
-| dialog_info | text | 对话信息 |
-| level | character varying | 课程等级 |
-
-### bi_user_chapter_play_record_5
-【表用途注释：用户课时完成记录表（分表5），存储用户每个课时的学习完成情况】
-| 字段名 | 数据类型 | 注释 |
-|--------|----------|------|
-| id | bigint | |
-| user_id | bigint | 角色id，关联用户角色表id |
-| chapter_id | bigint | 课时id |
-| chapter_unique_id | character varying | 课时唯一标识 |
-| play_status | smallint | 课时完成状态：1=正常完成 |
-| story_id | bigint | 关联故事id |
-| created_at | timestamp with time zone | 创建时间 |
-| updated_at | timestamp with time zone | 更新时间 |
-| deleted_at | timestamp with time zone | 删除时间（为空表示未删除） |
-| map_position | text | 地图位置 |
-| dialog_info | text | 对话信息 |
-| level | character varying | 课程等级 |
-
-### bi_user_chapter_play_record_6
-【表用途注释：用户课时完成记录表（分表6），存储用户每个课时的学习完成情况】
-| 字段名 | 数据类型 | 注释 |
-|--------|----------|------|
-| id | bigint | |
-| user_id | bigint | 角色id，关联用户角色表id |
-| chapter_id | bigint | 课时id |
-| chapter_unique_id | character varying | 课时唯一标识 |
-| play_status | smallint | 课时完成状态：1=正常完成 |
-| story_id | bigint | 关联故事id |
-| created_at | timestamp with time zone | 创建时间 |
-| updated_at | timestamp with time zone | 更新时间 |
-| deleted_at | timestamp with time zone | 删除时间（为空表示未删除） |
-| map_position | text | 地图位置 |
-| dialog_info | text | 对话信息 |
-| level | character varying | 课程等级 |
-
-### bi_user_chapter_play_record_7
-【表用途注释：用户课时完成记录表（分表7），存储用户每个课时的学习完成情况】
-| 字段名 | 数据类型 | 注释 |
-|--------|----------|------|
-| id | bigint | |
-| user_id | bigint | 角色id，关联用户角色表id |
-| chapter_id | bigint | 课时id |
-| chapter_unique_id | character varying | 课时唯一标识 |
-| play_status | smallint | 课时完成状态：1=正常完成 |
-| story_id | bigint | 关联故事id |
-| created_at | timestamp with time zone | 创建时间 |
-| updated_at | timestamp with time zone | 更新时间 |
-| deleted_at | timestamp with time zone | 删除时间（为空表示未删除） |
-| map_position | text | 地图位置 |
-| dialog_info | text | 对话信息 |
-| level | character varying | 课程等级 |
-
-### bi_user_component_play_record_0
-【表用途注释：用户组件完成记录表（分表0），存储用户每个课时下每个组件的学习情况】
-| 字段名 | 数据类型 | 注释 |
-|--------|----------|------|
-| id | bigint | |
-| user_id | bigint | 角色id，关联用户角色表id |
-| chapter_id | bigint | 课时id |
-| chapter_unique_id | character varying | 课时唯一标识，关联课时记录表chapter_unique_id |
-| component_id | bigint | 组件id |
-| sub_component_type | smallint | 子组件类型 |
-| component_type | smallint | 组件类型 |
-| comp_type | character varying | 组件类型标识 |
-| play_status | smallint | 组件完成状态 |
-| pass_time | integer | 通过时间 |
-| interval_time | integer | 组件学习耗时（单位：毫秒） |
-| read_word_count | integer | 读单词数量 |
-| speak_count | integer | 开口次数 |
-| listen_sentence_count | integer | 听句子数量 |
-| write_word_count | integer | 写单词数量 |
-| created_at | timestamp with time zone | 创建时间 |
-| updated_at | timestamp with time zone | 更新时间 |
-| deleted_at | timestamp with time zone | 删除时间（为空表示未删除） |
-| section_id | bigint | 关联章节id |
-| component_unique_code | character varying | 组件唯一编码 |
-| play_result | character varying | 组件完成结果（Perfect/Good/Oops等） |
-| finish_question_count | integer | 完成题目数量 |
-| skill_points | integer | 获得技能点数 |
-| c_type | character varying | 组件类型 |
-| c_id | character varying | 组件配置id |
-| user_behavior_info | text | 用户行为信息 |
-| behavior_analysis_info | text | 行为分析信息 |
-| session_id | character varying | 会话id |
-| listen_time | integer | 听力时长 |
-| dialog_list | text | 对话列表 |
-| level | character varying | 课程等级 |
-
-### bi_user_component_play_record_1
-【表用途注释：用户组件完成记录表（分表1），存储用户每个课时下每个组件的学习情况】
-| 字段名 | 数据类型 | 注释 |
-|--------|----------|------|
-| id | bigint | |
-| user_id | bigint | 角色id，关联用户角色表id |
-| chapter_id | bigint | 课时id |
-| chapter_unique_id | character varying | 课时唯一标识，关联课时记录表chapter_unique_id |
-| component_id | bigint | 组件id |
-| sub_component_type | smallint | 子组件类型 |
-| component_type | smallint | 组件类型 |
-| comp_type | character varying | 组件类型标识 |
-| play_status | smallint | 组件完成状态 |
-| pass_time | integer | 通过时间 |
-| interval_time | integer | 组件学习耗时（单位：毫秒） |
-| read_word_count | integer | 读单词数量 |
-| speak_count | integer | 开口次数 |
-| listen_sentence_count | integer | 听句子数量 |
-| write_word_count | integer | 写单词数量 |
-| created_at | timestamp with time zone | 创建时间 |
-| updated_at | timestamp with time zone | 更新时间 |
-| deleted_at | timestamp with time zone | 删除时间（为空表示未删除） |
-| section_id | bigint | 关联章节id |
-| component_unique_code | character varying | 组件唯一编码 |
-| play_result | character varying | 组件完成结果（Perfect/Good/Oops等） |
-| finish_question_count | integer | 完成题目数量 |
-| skill_points | integer | 获得技能点数 |
-| c_type | character varying | 组件类型 |
-| c_id | character varying | 组件配置id |
-| user_behavior_info | text | 用户行为信息 |
-| behavior_analysis_info | text | 行为分析信息 |
-| session_id | character varying | 会话id |
-| listen_time | integer | 听力时长 |
-| dialog_list | text | 对话列表 |
-| level | character varying | 课程等级 |
-
-### bi_user_component_play_record_2
-【表用途注释：用户组件完成记录表（分表2），存储用户每个课时下每个组件的学习情况】
-| 字段名 | 数据类型 | 注释 |
-|--------|----------|------|
-| id | bigint | |
-| user_id | bigint | 角色id，关联用户角色表id |
-| chapter_id | bigint | 课时id |
-| chapter_unique_id | character varying | 课时唯一标识，关联课时记录表chapter_unique_id |
-| component_id | bigint | 组件id |
-| sub_component_type | smallint | 子组件类型 |
-| component_type | smallint | 组件类型 |
-| comp_type | character varying | 组件类型标识 |
-| play_status | smallint | 组件完成状态 |
-| pass_time | integer | 通过时间 |
-| interval_time | integer | 组件学习耗时（单位：毫秒） |
-| read_word_count | integer | 读单词数量 |
-| speak_count | integer | 开口次数 |
-| listen_sentence_count | integer | 听句子数量 |
-| write_word_count | integer | 写单词数量 |
-| created_at | timestamp with time zone | 创建时间 |
-| updated_at | timestamp with time zone | 更新时间 |
-| deleted_at | timestamp with time zone | 删除时间（为空表示未删除） |
-| section_id | bigint | 关联章节id |
-| component_unique_code | character varying | 组件唯一编码 |
-| play_result | character varying | 组件完成结果（Perfect/Good/Oops等） |
-| finish_question_count | integer | 完成题目数量 |
-| skill_points | integer | 获得技能点数 |
-| c_type | character varying | 组件类型 |
-| c_id | character varying | 组件配置id |
-| user_behavior_info | text | 用户行为信息 |
-| behavior_analysis_info | text | 行为分析信息 |
-| session_id | character varying | 会话id |
-| listen_time | integer | 听力时长 |
-| dialog_list | text | 对话列表 |
-| level | character varying | 课程等级 |
-
-### bi_user_component_play_record_3
-【表用途注释：用户组件完成记录表（分表3），存储用户每个课时下每个组件的学习情况】
-| 字段名 | 数据类型 | 注释 |
-|--------|----------|------|
-| id | bigint | |
-| user_id | bigint | 角色id，关联用户角色表id |
-| chapter_id | bigint | 课时id |
-| chapter_unique_id | character varying | 课时唯一标识，关联课时记录表chapter_unique_id |
-| component_id | bigint | 组件id |
-| sub_component_type | smallint | 子组件类型 |
-| component_type | smallint | 组件类型 |
-| comp_type | character varying | 组件类型标识 |
-| play_status | smallint | 组件完成状态 |
-| pass_time | integer | 通过时间 |
-| interval_time | integer | 组件学习耗时（单位：毫秒） |
-| read_word_count | integer | 读单词数量 |
-| speak_count | integer | 开口次数 |
-| listen_sentence_count | integer | 听句子数量 |
-| write_word_count | integer | 写单词数量 |
-| created_at | timestamp with time zone | 创建时间 |
-| updated_at | timestamp with time zone | 更新时间 |
-| deleted_at | timestamp with time zone | 删除时间（为空表示未删除） |
-| section_id | bigint | 关联章节id |
-| component_unique_code | character varying | 组件唯一编码 |
-| play_result | character varying | 组件完成结果（Perfect/Good/Oops等） |
-| finish_question_count | integer | 完成题目数量 |
-| skill_points | integer | 获得技能点数 |
-| c_type | character varying | 组件类型 |
-| c_id | character varying | 组件配置id |
-| user_behavior_info | text | 用户行为信息 |
-| behavior_analysis_info | text | 行为分析信息 |
-| session_id | character varying | 会话id |
-| listen_time | integer | 听力时长 |
-| dialog_list | text | 对话列表 |
-| level | character varying | 课程等级 |
-
-### bi_user_component_play_record_4
-【表用途注释：用户组件完成记录表（分表4），存储用户每个课时下每个组件的学习情况】
-| 字段名 | 数据类型 | 注释 |
-|--------|----------|------|
-| id | bigint | |
-| user_id | bigint | 角色id，关联用户角色表id |
-| chapter_id | bigint | 课时id |
-| chapter_unique_id | character varying | 课时唯一标识，关联课时记录表chapter_unique_id |
-| component_id | bigint | 组件id |
-| sub_component_type | smallint | 子组件类型 |
-| component_type | smallint | 组件类型 |
-| comp_type | character varying | 组件类型标识 |
-| play_status | smallint | 组件完成状态 |
-| pass_time | integer | 通过时间 |
-| interval_time | integer | 组件学习耗时（单位：毫秒） |
-| read_word_count | integer | 读单词数量 |
-| speak_count | integer | 开口次数 |
-| listen_sentence_count | integer | 听句子数量 |
-| write_word_count | integer | 写单词数量 |
-| created_at | timestamp with time zone | 创建时间 |
-| updated_at | timestamp with time zone | 更新时间 |
-| deleted_at | timestamp with time zone | 删除时间（为空表示未删除） |
-| section_id | bigint | 关联章节id |
-| component_unique_code | character varying | 组件唯一编码 |
-| play_result | character varying | 组件完成结果（Perfect/Good/Oops等） |
-| finish_question_count | integer | 完成题目数量 |
-| skill_points | integer | 获得技能点数 |
-| c_type | character varying | 组件类型 |
-| c_id | character varying | 组件配置id |
-| user_behavior_info | text | 用户行为信息 |
-| behavior_analysis_info | text | 行为分析信息 |
-| session_id | character varying | 会话id |
-| listen_time | integer | 听力时长 |
-| dialog_list | text | 对话列表 |
-| level | character varying | 课程等级 |
-
-### bi_user_component_play_record_5
-【表用途注释：用户组件完成记录表（分表5），存储用户每个课时下每个组件的学习情况】
-| 字段名 | 数据类型 | 注释 |
-|--------|----------|------|
-| id | bigint | |
-| user_id | bigint | 角色id，关联用户角色表id |
-| chapter_id | bigint | 课时id |
-| chapter_unique_id | character varying | 课时唯一标识，关联课时记录表chapter_unique_id |
-| component_id | bigint | 组件id |
-| sub_component_type | smallint | 子组件类型 |
-| component_type | smallint | 组件类型 |
-| comp_type | character varying | 组件类型标识 |
-| play_status | smallint | 组件完成状态 |
-| pass_time | integer | 通过时间 |
-| interval_time | integer | 组件学习耗时（单位：毫秒） |
-| read_word_count | integer | 读单词数量 |
-| speak_count | integer | 开口次数 |
-| listen_sentence_count | integer | 听句子数量 |
-| write_word_count | integer | 写单词数量 |
-| created_at | timestamp with time zone | 创建时间 |
-| updated_at | timestamp with time zone | 更新时间 |
-| deleted_at | timestamp with time zone | 删除时间（为空表示未删除） |
-| section_id | bigint | 关联章节id |
-| component_unique_code | character varying | 组件唯一编码 |
-| play_result | character varying | 组件完成结果（Perfect/Good/Oops等） |
-| finish_question_count | integer | 完成题目数量 |
-| skill_points | integer | 获得技能点数 |
-| c_type | character varying | 组件类型 |
-| c_id | character varying | 组件配置id |
-| user_behavior_info | text | 用户行为信息 |
-| behavior_analysis_info | text | 行为分析信息 |
-| session_id | character varying | 会话id |
-| listen_time | integer | 听力时长 |
-| dialog_list | text | 对话列表 |
-| level | character varying | 课程等级 |
-
-### bi_user_component_play_record_6
-【表用途注释：用户组件完成记录表（分表6），存储用户每个课时下每个组件的学习情况】
-| 字段名 | 数据类型 | 注释 |
-|--------|----------|------|
-| id | bigint | |
-| user_id | bigint | 角色id，关联用户角色表id |
-| chapter_id | bigint | 课时id |
-| chapter_unique_id | character varying | 课时唯一标识，关联课时记录表chapter_unique_id |
-| component_id | bigint | 组件id |
-| sub_component_type | smallint | 子组件类型 |
-| component_type | smallint | 组件类型 |
-| comp_type | character varying | 组件类型标识 |
-| play_status | smallint | 组件完成状态 |
-| pass_time | integer | 通过时间 |
-| interval_time | integer | 组件学习耗时（单位：毫秒） |
-| read_word_count | integer | 读单词数量 |
-| speak_count | integer | 开口次数 |
-| listen_sentence_count | integer | 听句子数量 |
-| write_word_count | integer | 写单词数量 |
-| created_at | timestamp with time zone | 创建时间 |
-| updated_at | timestamp with time zone | 更新时间 |
-| deleted_at | timestamp with time zone | 删除时间（为空表示未删除） |
-| section_id | bigint | 关联章节id |
-| component_unique_code | character varying | 组件唯一编码 |
-| play_result | character varying | 组件完成结果（Perfect/Good/Oops等） |
-| finish_question_count | integer | 完成题目数量 |
-| skill_points | integer | 获得技能点数 |
-| c_type | character varying | 组件类型 |
-| c_id | character varying | 组件配置id |
-| user_behavior_info | text | 用户行为信息 |
-| behavior_analysis_info | text | 行为分析信息 |
-| session_id | character varying | 会话id |
-| listen_time | integer | 听力时长 |
-| dialog_list | text | 对话列表 |
-| level | character varying | 课程等级 |
-
-### bi_user_component_play_record_7
-【表用途注释：用户组件完成记录表（分表7），存储用户每个课时下每个组件的学习情况】
-| 字段名 | 数据类型 | 注释 |
-|--------|----------|------|
-| id | bigint | |
-| user_id | bigint | 角色id，关联用户角色表id |
-| chapter_id | bigint | 课时id |
-| chapter_unique_id | character varying | 课时唯一标识，关联课时记录表chapter_unique_id |
-| component_id | bigint | 组件id |
-| sub_component_type | smallint | 子组件类型 |
-| component_type | smallint | 组件类型 |
-| comp_type | character varying | 组件类型标识 |
-| play_status | smallint | 组件完成状态 |
-| pass_time | integer | 通过时间 |
-| interval_time | integer | 组件学习耗时（单位：毫秒） |
-| read_word_count | integer | 读单词数量 |
-| speak_count | integer | 开口次数 |
-| listen_sentence_count | integer | 听句子数量 |
-| write_word_count | integer | 写单词数量 |
-| created_at | timestamp with time zone | 创建时间 |
-| updated_at | timestamp with time zone | 更新时间 |
-| deleted_at | timestamp with time zone | 删除时间（为空表示未删除） |
-| section_id | bigint | 关联章节id |
-| component_unique_code | character varying | 组件唯一编码 |
-| play_result | character varying | 组件完成结果（Perfect/Good/Oops等） |
-| finish_question_count | integer | 完成题目数量 |
-| skill_points | integer | 获得技能点数 |
-| c_type | character varying | 组件类型 |
-| c_id | character varying | 组件配置id |
-| user_behavior_info | text | 用户行为信息 |
-| behavior_analysis_info | text | 行为分析信息 |
-| session_id | character varying | 会话id |
-| listen_time | integer | 听力时长 |
-| dialog_list | text | 对话列表 |
-| level | character varying | 课程等级 |
-
-### bi_user_course_detail
-【表用途注释：用户课程明细表，存储用户购买的课程信息】
-| 字段名 | 数据类型 | 注释 |
-|--------|----------|------|
-| id | integer | |
-| created_at | timestamp with time zone | 创建时间 |
-| updated_at | timestamp with time zone | 更新时间 |
-| account_id | integer | 关联用户账户表id |
-| user_id | integer | 关联用户角色表id |
-| course_level | text | 课程等级：A1=L1，A2=L2 |
-| active_time | timestamp with time zone | 课程激活时间 |
-| expire_time | timestamp with time zone | 课程过期时间：不为空=正式课，为空=体验课 |
-| latest_unit_index | integer | 当前学习到的单元序号 |
-| latest_lesson_index | integer | 当前学习到的课时序号 |
-| learn_duration | integer | 累计学习时长（单位：秒） |
-| last_learn_time | timestamp with time zone | 最近一次学习时间 |
-| deleted_at | timestamp with time zone | 删除时间：为空=未删除，有值=已删除 |
-
-### bi_user_task_log
-【表用途注释：用户任务完成日志表】
-| 字段名 | 数据类型 | 注释 |
-|--------|----------|------|
-| id | integer | |
-| user_id | integer | 关联用户角色表id |
-| task_type | integer | 任务类型 |
-| task_id | integer | 任务id |
-| status | integer | 任务完成状态 |
-| start_time | bigint | 任务开始时间（时间戳） |
-| updated_at | timestamp with time zone | 更新时间 |
-| created_at | timestamp with time zone | 创建时间 |
-
-### bi_user_unit_challenge_question_result
-【表用途注释：用户单元挑战题结果表】
-| 字段名 | 数据类型 | 注释 |
-|--------|----------|------|
-| id | bigint | |
-| user_id | bigint | 关联用户角色表id |
-| story_id | bigint | 关联故事id |
-| score | integer | 挑战得分 |
-| score_text | character varying | 得分等级（Perfect/Good/Oops等） |
-| question_list | text | 题目列表 |
-| sp_value | integer | 获得SP点数 |
-| created_at | timestamp with time zone | 创建时间 |
-| updated_at | timestamp with time zone | 更新时间 |
-| deleted_at | timestamp with time zone | 删除时间 |
-| category | character varying | 挑战类型 |
-| exp | integer | 获得经验值 |
-| play_time | bigint | 挑战耗时 |
-| read_word_count | bigint | 读单词数量 |
-| listen_time | bigint | 听力时长 |
-| speak_count | bigint | 开口次数 |
-| level | character varying | 课程等级 |
-
-### bi_user_unit_review_question_result
-【表用途注释：用户单元复习题结果表】
-| 字段名 | 数据类型 | 注释 |
-|--------|----------|------|
-| id | bigint | |
-| user_id | bigint | 关联用户角色表id |
-| story_id | bigint | 关联故事id |
-| chapter_id | bigint | 关联课时id |
-| unique_id | character varying | 唯一标识 |
-| score | integer | 复习题得分 |
-| score_text | character varying | 得分等级 |
-| question_list | text | 题目列表 |
-| sp_value | integer | 获得SP点数 |
-| exp | integer | 获得经验值 |
-| created_at | timestamp with time zone | 创建时间 |
-| updated_at | timestamp with time zone | 更新时间 |
-| deleted_at | timestamp with time zone | 删除时间 |
-| play_time | bigint | 答题耗时 |
-| level | character varying | 课程等级 |
-
-### bi_user_unit_summary_km_result
-【表用途注释：用户单元知识点总结结果表】
-| 字段名 | 数据类型 | 注释 |
-|--------|----------|------|
-| id | bigint | |
-| user_id | bigint | 关联用户角色表id |
-| story_id | bigint | 关联故事id |
-| km_id | character varying | 知识点id |
-| km_type | character varying | 知识点类型 |
-| score_text | character varying | 知识点掌握等级 |
-| sp_value | integer | 获得SP点数 |
-| created_at | timestamp with time zone | 创建时间 |
-| updated_at | timestamp with time zone | 更新时间 |
-| deleted_at | timestamp with time zone | 删除时间 |
-| play_time | bigint | 学习耗时 |
-| sum_explain | text | 知识点总结说明 |
-| level | character varying | 课程等级 |
-
-### bi_vala_app_account
-【表用途注释：用户账户表，存储用户的账号基本信息】
-| 字段名 | 数据类型 | 注释 |
-|--------|----------|------|
-| id | bigint | 账户id，主键 |
-| tel | character varying | 手机号（明文） |
-| tel_encrypt | character varying | 手机号加密存储 |
-| name | character varying | 用户姓名 |
-| id_card | character varying | 身份证号 |
-| status | smallint | 账户状态 |
-| pay_status | integer | 付费状态 |
-| login_times | integer | 累计登录次数 |
-| remark | text | 备注 |
-| key_from | character varying | 注册来源渠道 |
-| created_at | timestamp with time zone | 创建时间 |
-| updated_at | timestamp with time zone | 更新时间 |
-| deleted_at | timestamp with time zone | 删除时间（为空表示未删除） |
-| download_channel | text | 用户下载渠道（汉字格式，用于统计新增用户来源） |
-
-### bi_vala_app_character
-【表用途注释：用户角色表，一个账户可以创建多个角色】
-| 字段名 | 数据类型 | 注释 |
-|--------|----------|------|
-| id | bigint | 角色id，主键 |
-| account_id | bigint | 关联账户表id |
-| nickname | character varying | 角色昵称 |
-| gender | smallint | 角色性别：1=男，2=女 |
-| birthday | character varying | 角色生日 |
-| avatar | text | 角色头像 |
-| spine_name | character varying | 角色 spine 资源名 |
-| latest_login | timestamp with time zone | 最近登录
\ No newline at end of file
diff --git a/generate_report.py b/generate_report.py
deleted file mode 100644
index 0b41638..0000000
--- a/generate_report.py
+++ /dev/null
@@ -1,152 +0,0 @@
-import pandas as pd
-from datetime import datetime
-
-# 1. 整体统计数据
-overall_data = [
-    {"渠道": "学而思", "新增注册总人数": 615, "购课总人数":7, "购课总金额（元）":7794},
-    {"渠道": "科大讯飞", "新增注册总人数": 377, "购课总人数":4, "购课总金额（元）":3796},
-    {"渠道": "希沃", "新增注册总人数": 122, "购课总人数":1, "购课总金额（元）":599},
-    {"渠道": "京东方", "新增注册总人数": 61, "购课总人数":1, "购课总金额（元）":599},
-    {"渠道": "合计", "新增注册总人数": 1175, "购课总人数":13, "购课总金额（元）":12788},
-]
-df_overall = pd.DataFrame(overall_data)
-
-# 2. 每日购课明细数据
-purchase_data = [
-    {"日期": "2026-03-02", "渠道": "学而思", "购课人数":1, "购课金额（元）":599, "订单号": "zfb202603022031481772454708683943"},
-    {"日期": "2026-03-07", "渠道": "学而思", "购课人数":1, "购课金额（元）":599, "订单号": "wx202603071022051772850125753228"},
-    {"日期": "2026-03-07", "渠道": "科大讯飞", "购课人数":1, "购课金额（元）":599, "订单号": "wx202603072123501772889830225976"},
-    {"日期": "2026-03-10", "渠道": "学而思", "购课人数":1, "购课金额（元）":1999, "订单号": "wx202603101820431773138043948181"},
-    {"日期": "2026-03-15", "渠道": "科大讯飞", "购课人数":2, "购课金额（元）":2598, "订单号": "wx202603150854031773536043478685、wx20260315122747177354886748896"},
-    {"日期": "2026-03-18", "渠道": "学而思", "购课人数":2, "购课金额（元）":2598, "订单号": "wx202603182055481773838548372991、zfb202603182118201773839900411837"},
-    {"日期": "2026-03-23", "渠道": "科大讯飞", "购课人数":1, "购课金额（元）":599, "订单号": "wx202603232015081774268108032833"},
-    {"日期": "2026-03-24", "渠道": "京东方", "购课人数":1, "购课金额（元）":599, "订单号": "zfb202603242026431774355203538499"},
-    {"日期": "2026-03-27", "渠道": "学而思", "购课人数":1, "购课金额（元）":1999, "订单号": "wx202603271258341774587514141956"},
-    {"日期": "2026-03-28", "渠道": "希沃", "购课人数":1, "购课金额（元）":599, "订单号": "wx20260328145038177468063894734"},
-]
-df_purchase = pd.DataFrame(purchase_data)
-
-# 3. 每日新增注册数据
-register_data = [
-    {"日期": "2026-03-01", "渠道": "京东方", "新增注册人数": 1},
-    {"日期": "2026-03-01", "渠道": "学而思", "新增注册人数": 48},
-    {"日期": "2026-03-01", "渠道": "希沃", "新增注册人数": 2},
-    {"日期": "2026-03-02", "渠道": "京东方", "新增注册人数": 3},
-    {"日期": "2026-03-02", "渠道": "学而思", "新增注册人数": 38},
-    {"日期": "2026-03-02", "渠道": "希沃", "新增注册人数": 1},
-    {"日期": "2026-03-03", "渠道": "学而思", "新增注册人数": 24},
-    {"日期": "2026-03-03", "渠道": "希沃", "新增注册人数": 4},
-    {"日期": "2026-03-04", "渠道": "京东方", "新增注册人数": 4},
-    {"日期": "2026-03-04", "渠道": "学而思", "新增注册人数": 20},
-    {"日期": "2026-03-04", "渠道": "希沃", "新增注册人数": 10},
-    {"日期": "2026-03-04", "渠道": "科大讯飞", "新增注册人数": 3},
-    {"日期": "2026-03-05", "渠道": "京东方", "新增注册人数": 7},
-    {"日期": "2026-03-05", "渠道": "学而思", "新增注册人数": 37},
-    {"日期": "2026-03-05", "渠道": "希沃", "新增注册人数": 15},
-    {"日期": "2026-03-05", "渠道": "科大讯飞", "新增注册人数": 17},
-    {"日期": "2026-03-06", "渠道": "京东方", "新增注册人数": 6},
-    {"日期": "2026-03-06", "渠道": "学而思", "新增注册人数": 26},
-    {"日期": "2026-03-06", "渠道": "希沃", "新增注册人数": 9},
-    {"日期": "2026-03-06", "渠道": "科大讯飞", "新增注册人数": 12},
-    {"日期": "2026-03-07", "渠道": "京东方", "新增注册人数": 5},
-    {"日期": "2026-03-07", "渠道": "学而思", "新增注册人数": 35},
-    {"日期": "2026-03-07", "渠道": "希沃", "新增注册人数": 5},
-    {"日期": "2026-03-07", "渠道": "科大讯飞", "新增注册人数": 34},
-    {"日期": "2026-03-08", "渠道": "京东方", "新增注册人数": 3},
-    {"日期": "2026-03-08", "渠道": "学而思", "新增注册人数": 33},
-    {"日期": "2026-03-08", "渠道": "希沃", "新增注册人数": 12},
-    {"日期": "2026-03-08", "渠道": "科大讯飞", "新增注册人数": 34},
-    {"日期": "2026-03-09", "渠道": "京东方", "新增注册人数": 2},
-    {"日期": "2026-03-09", "渠道": "学而思", "新增注册人数": 27},
-    {"日期": "2026-03-09", "渠道": "希沃", "新增注册人数": 5},
-    {"日期": "2026-03-09", "渠道": "科大讯飞", "新增注册人数": 15},
-    {"日期": "2026-03-10", "渠道": "学而思", "新增注册人数": 15},
-    {"日期": "2026-03-10", "渠道": "希沃", "新增注册人数": 3},
-    {"日期": "2026-03-10", "渠道": "科大讯飞", "新增注册人数": 9},
-    {"日期": "2026-03-11", "渠道": "京东方", "新增注册人数": 2},
-    {"日期": "2026-03-11", "渠道": "学而思", "新增注册人数": 25},
-    {"日期": "2026-03-11", "渠道": "希沃", "新增注册人数": 2},
-    {"日期": "2026-03-11", "渠道": "科大讯飞", "新增注册人数": 13},
-    {"日期": "2026-03-12", "渠道": "京东方", "新增注册人数": 5},
-    {"日期": "2026-03-12", "渠道": "学而思", "新增注册人数": 24},
-    {"日期": "2026-03-12", "渠道": "希沃", "新增注册人数": 5},
-    {"日期": "2026-03-12", "渠道": "科大讯飞", "新增注册人数": 15},
-    {"日期": "2026-03-13", "渠道": "京东方", "新增注册人数": 2},
-    {"日期": "2026-03-13", "渠道": "学而思", "新增注册人数": 31},
-    {"日期": "2026-03-13", "渠道": "希沃", "新增注册人数": 7},
-    {"日期": "2026-03-13", "渠道": "科大讯飞", "新增注册人数": 8},
-    {"日期": "2026-03-14", "渠道": "学而思", "新增注册人数": 30},
-    {"日期": "2026-03-14", "渠道": "希沃", "新增注册人数": 3},
-    {"日期": "2026-03-14", "渠道": "科大讯飞", "新增注册人数": 22},
-    {"日期": "2026-03-15", "渠道": "京东方", "新增注册人数": 1},
-    {"日期": "2026-03-15", "渠道": "学而思", "新增注册人数": 22},
-    {"日期": "2026-03-15", "渠道": "希沃", "新增注册人数": 3},
-    {"日期": "2026-03-15", "渠道": "科大讯飞", "新增注册人数": 22},
-    {"日期": "2026-03-16", "渠道": "京东方", "新增注册人数": 2},
-    {"日期": "2026-03-16", "渠道": "学而思", "新增注册人数": 6},
-    {"日期": "2026-03-16", "渠道": "希沃", "新增注册人数": 2},
-    {"日期": "2026-03-16", "渠道": "科大讯飞", "新增注册人数": 10},
-    {"日期": "2026-03-17", "渠道": "京东方", "新增注册人数": 3},
-    {"日期": "2026-03-17", "渠道": "学而思", "新增注册人数": 12},
-    {"日期": "2026-03-17", "渠道": "希沃", "新增注册人数": 3},
-    {"日期": "2026-03-17", "渠道": "科大讯飞", "新增注册人数": 6},
-    {"日期": "2026-03-18", "渠道": "京东方", "新增注册人数": 1},
-    {"日期": "2026-03-18", "渠道": "学而思", "新增注册人数": 9},
-    {"日期": "2026-03-18", "渠道": "科大讯飞", "新增注册人数": 11},
-    {"日期": "2026-03-19", "渠道": "京东方", "新增注册人数": 1},
-    {"日期": "2026-03-19", "渠道": "学而思", "新增注册人数": 6},
-    {"日期": "2026-03-19", "渠道": "希沃", "新增注册人数": 2},
-    {"日期": "2026-03-19", "渠道": "科大讯飞", "新增注册人数": 9},
-    {"日期": "2026-03-20", "渠道": "京东方", "新增注册人数": 1},
-    {"日期": "2026-03-20", "渠道": "学而思", "新增注册人数": 13},
-    {"日期": "2026-03-20", "渠道": "希沃", "新增注册人数": 2},
-    {"日期": "2026-03-20", "渠道": "科大讯飞", "新增注册人数": 12},
-    {"日期": "2026-03-21", "渠道": "京东方", "新增注册人数": 1},
-    {"日期": "2026-03-21", "渠道": "学而思", "新增注册人数": 27},
-    {"日期": "2026-03-21", "渠道": "科大讯飞", "新增注册人数": 26},
-    {"日期": "2026-03-22", "渠道": "学而思", "新增注册人数": 12},
-    {"日期": "2026-03-22", "渠道": "希沃", "新增注册人数": 4},
-    {"日期": "2026-03-22", "渠道": "科大讯飞", "新增注册人数": 22},
-    {"日期": "2026-03-23", "渠道": "京东方", "新增注册人数": 1},
-    {"日期": "2026-03-23", "渠道": "学而思", "新增注册人数": 9},
-    {"日期": "2026-03-23", "渠道": "希沃", "新增注册人数": 2},
-    {"日期": "2026-03-23", "渠道": "科大讯飞", "新增注册人数": 5},
-    {"日期": "2026-03-24", "渠道": "学而思", "新增注册人数": 4},
-    {"日期": "2026-03-24", "渠道": "希沃", "新增注册人数": 2},
-    {"日期": "2026-03-24", "渠道": "科大讯飞", "新增注册人数": 8},
-    {"日期": "2026-03-25", "渠道": "京东方", "新增注册人数": 1},
-    {"日期": "2026-03-25", "渠道": "学而思", "新增注册人数": 12},
-    {"日期": "2026-03-25", "渠道": "希沃", "新增注册人数": 5},
-    {"日期": "2026-03-25", "渠道": "科大讯飞", "新增注册人数": 13},
-    {"日期": "2026-03-26", "渠道": "京东方", "新增注册人数": 1},
-    {"日期": "2026-03-26", "渠道": "学而思", "新增注册人数": 8},
-    {"日期": "2026-03-26", "渠道": "希沃", "新增注册人数": 2},
-    {"日期": "2026-03-26", "渠道": "科大讯飞", "新增注册人数": 8},
-    {"日期": "2026-03-27", "渠道": "学而思", "新增注册人数": 9},
-    {"日期": "2026-03-27", "渠道": "希沃", "新增注册人数": 6},
-    {"日期": "2026-03-27", "渠道": "科大讯飞", "新增注册人数": 6},
-    {"日期": "2026-03-28", "渠道": "京东方", "新增注册人数": 2},
-    {"日期": "2026-03-28", "渠道": "学而思", "新增注册人数": 20},
-    {"日期": "2026-03-28", "渠道": "希沃", "新增注册人数": 4},
-    {"日期": "2026-03-28", "渠道": "科大讯飞", "新增注册人数": 12},
-    {"日期": "2026-03-29", "渠道": "京东方", "新增注册人数": 2},
-    {"日期": "2026-03-29", "渠道": "学而思", "新增注册人数": 16},
-    {"日期": "2026-03-29", "渠道": "科大讯飞", "新增注册人数": 9},
-    {"日期": "2026-03-30", "渠道": "京东方", "新增注册人数": 2},
-    {"日期": "2026-03-30", "渠道": "学而思", "新增注册人数": 7},
-    {"日期": "2026-03-30", "渠道": "希沃", "新增注册人数": 2},
-    {"日期": "2026-03-30", "渠道": "科大讯飞", "新增注册人数": 6},
-    {"日期": "2026-03-31", "渠道": "京东方", "新增注册人数": 3},
-    {"日期": "2026-03-31", "渠道": "学而思", "新增注册人数": 10},
-    {"日期": "2026-03-31", "渠道": "科大讯飞", "新增注册人数": 10},
-]
-df_register = pd.DataFrame(register_data)
-
-# 生成Excel文件
-output_path = "/root/.openclaw/workspace/2026年3月硬件渠道数据汇总.xlsx"
-with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
-    df_overall.to_excel(writer, sheet_name='整体统计', index=False)
-    df_purchase.to_excel(writer, sheet_name='每日购课明细', index=False)
-    df_register.to_excel(writer, sheet_name='每日新增注册明细', index=False)
-
-print(f"文件已生成：{output_path}")
diff --git a/git_repos/llm_offline_production b/git_repos/llm_offline_production
deleted file mode 160000
index 75ab13e..0000000
--- a/git_repos/llm_offline_production
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 75ab13e87dd0e856cb05c9515efcd507888b6486
diff --git a/makee_vala/README.md b/makee_vala/README.md
deleted file mode 100644
index 2bee9b4..0000000
--- a/makee_vala/README.md
+++ /dev/null
@@ -1,30 +0,0 @@
-# 业务知识库
-
-作为数据分析师，持续积累对公司业务和数据表的理解。
-
-## 目录结构
-
-- `sql_queries/` - 常用 SQL 查询语句和业务分析模板
-- `tables/` - 数据表结构和字段说明
-- `business_terms/` - 业务术语和指标定义
-
-## 资料来源
-
-1. 飞书 Wiki - 增长组常用查询SQL: https://makee-interactive.feishu.cn/wiki/XJuCwNol1iL3sYkXkXWc2QnJnMd
-2. Git 仓库 - 数据抽取脚本: https://git.valavala.com/vala/llm_offline_production/src/branch/master/config_user_data_extract_and_analyze
-
-## 收集的 SQL 查询文档
-
-- [ ] 全字段大表
-- [ ] 平均通关时长
-- [ ] 新增注册用户数by渠道
-- [ ] 课程进入完成率
-- [ ] 账号角色年龄地址
-- [ ] 退费率
-- [ ] 销转学习进度
-- [ ] 班主任关注数据
-- [ ] 端内GMV
-- [ ] 端内用户课程进入完成率
-- [ ] 端内购课用户学习行为
-- [ ] 转化率
-- [ ] 课程ID映射
diff --git a/makee_vala/business_terms.md b/makee_vala/business_terms.md
deleted file mode 100644
index e86f0ce..0000000
--- a/makee_vala/business_terms.md
+++ /dev/null
@@ -1,49 +0,0 @@
-# 业务术语表
-
-## 核心业务指标
-
-### 用户相关
-- **注册用户**: 在 `bi_vala_app_account` 表中 `status = 1` 且 `deleted_at is NULL` 的用户
-- **测试用户**: 需要排除的特定用户 ID，如 `id not in (51,2121)`
-- **下载渠道 (download_channel)**: 用户下载 App 的渠道
-- **key_from**: 注册或购课的来源标识
-
-### 购课相关
-- **购课渠道 (sale_channel)**: 用户购买课程的渠道，有数字编码映射到具体渠道名称
-- **有效订单**: `order_status = 3` 且 `pay_amount_int > 49800` 的订单（金额大于498元）
-- **购课标签**: 分为"未购课"、"站外购课"、"站内购课"
-- **站内购课**: 购课渠道不是"站外"的购课
-
-### 角色相关
-- **角色付费状态 (characer_pay_status)**: 0表示未付费，1表示已付费
-- **性别 (gender)**: 0=girl, 1=boy, 其他=unknow
-- **赛季包 (purchase_season_package)**: `'[1]'` 表示未购买赛季包
-
-### 课程相关
-- **完课标识 (chapter_unique_id)**: 唯一标识一次完课记录
-- **完课耗时 (finish_time)**: 完成课程所花费的时间，格式为 mm:ss
-- **课程ID (course_id)**: 由 course_level-course_season-course_unit-course_lesson 组成
-- **play_status = 1**: 表示播放完成状态
-
-## 购课渠道映射表
-
-| 编码 | 渠道名称 |
-|------|----------|
-| 11 | 苹果 |
-| 12 | 华为 |
-| 13 | 小米 |
-| 14 | 荣耀 |
-| 15 | 应用宝 |
-| 17 | 魅族 |
-| 18 | VIVO |
-| 19 | OPPO |
-| 21 | 学而思 |
-| 22 | 讯飞 |
-| 23 | 步步高 |
-| 24 | 作业帮 |
-| 25 | 小度 |
-| 26 | 希沃 |
-| 27 | 京东方 |
-| 41 | 官网 |
-| 71 | 小程序 |
-| 其他 | 站外 |
diff --git a/makee_vala/data_tables.md b/makee_vala/data_tables.md
deleted file mode 100644
index ee28241..0000000
--- a/makee_vala/data_tables.md
+++ /dev/null
@@ -1,168 +0,0 @@
-# 数据表说明
-
-## 核心业务表
-
-### 用户账号表
-**表名**: `bi_vala_app_account`
-
-**关键字段**:
-- `id`: 用户ID
-- `key_from`: 注册来源
-- `created_at`: 注册时间
-- `download_channel`: 下载渠道
-- `status`: 账号状态（1表示有效）
-- `deleted_at`: 删除时间（NULL表示未删除）
-
-**常用筛选条件**:
-```sql
-where status = 1
-  and id not in (51,2121)  -- 排除测试用户
-  and deleted_at is NULL
-```
-
----
-
-### 账号详情表
-**表名**: `account_detail_info`
-
-**关键字段**:
-- `account_id`: 账号ID（关联 bi_vala_app_account.id）
-- `login_address`: 登录地址（格式如"省份-城市"）
-- `phone_login_times`: 手机登录次数
-
-**业务逻辑**:
-```sql
--- 提取城市
-split_part(login_address,'-',2) as login_address
-
--- 判断是否手机登录
-case when phone_login_times = 0 then 0 else 1 end as phone_login
-```
-
----
-
-### 订单表
-**表名**: `bi_vala_order`
-
-**关键字段**:
-- `account_id`: 账号ID
-- `sale_channel`: 购课渠道（数字编码）
-- `key_from`: 购课来源
-- `pay_success_date`: 支付成功时间
-- `pay_amount`: 支付金额
-- `pay_amount_int`: 支付金额（整数分）
-- `order_status`: 订单状态（3表示有效订单）
-
-**常用筛选条件**:
-```sql
-where order_status = 3
-  and pay_amount_int > 49800  -- 金额大于498元
-```
-
----
-
-### 角色表
-**表名**: `bi_vala_app_character`
-
-**关键字段**:
-- `id`: 角色ID
-- `account_id`: 账号ID
-- `gender`: 性别（0=girl, 1=boy）
-- `birthday`: 生日（格式如"YYYY-MM-DD"）
-- `purchase_season_package`: 赛季包购买状态
-- `deleted_at`: 删除时间
-
-**业务逻辑**:
-```sql
--- 角色付费状态
-case when purchase_season_package = '[1]' then 0 else 1 end as characer_pay_status
-
--- 性别映射
-case when gender = 0 then 'girl'
-     when gender = 1 then 'boy'
-     else 'unknow'
-end as gender
-
--- 提取出生年份
-case when split_part(birthday,'-',1) = '' then '0000'
-     else split_part(birthday,'-',1)
-end as birthday
-```
-
----
-
-## 课程播放记录表（分表）
-
-### 用户章节播放记录
-**表名**: `bi_user_chapter_play_record_0` ~ `bi_user_chapter_play_record_7`
-
-**说明**: 按分表存储，共8张表，需要使用 UNION ALL 合并
-
-**关键字段**:
-- `user_id`: 用户ID
-- `chapter_id`: 章节ID
-- `chapter_unique_id`: 完课唯一标识
-- `updated_at`: 更新时间
-- `play_status`: 播放状态（1表示完成）
-
-**常用筛选条件**:
-```sql
-where chapter_id in (55,56,57,58,59)  -- 指定章节
-  and play_status = 1  -- 播放完成
-```
-
----
-
-### 用户组件播放记录
-**表名**: `bi_user_component_play_record_0` ~ `bi_user_component_play_record_7`
-
-**说明**: 按分表存储，共8张表，需要使用 UNION ALL 合并
-
-**关键字段**:
-- `chapter_unique_id`: 完课唯一标识
-- `interval_time`: 播放时长（毫秒）
-
-**业务逻辑**:
-```sql
--- 计算完课耗时（mm:ss格式）
-format('%s:%s',
-       floor(sum(interval_time)/1000/60),
-       mod((sum(interval_time)/1000),60)
-) as finish_time
-```
-
----
-
-## 课程信息表
-
-### 课程单元表
-**表名**: `bi_level_unit_lesson`
-
-**关键字段**:
-- `id`: ID（关联 chapter_id）
-- `course_level`: 课程级别
-- `course_season`: 课程赛季
-- `course_unit`: 课程单元
-- `course_lesson`: 课程课时
-
-**业务逻辑**:
-```sql
--- 生成课程ID
-format('%s-%s-%s-%s',
-       course_level,
-       course_season,
-       course_unit,
-       course_lesson
-) as course_id
-```
-
----
-
-## 其他表
-
-### 账号登录表
-**表名**: `account_login`
-
-**关键字段**:
-- `account_id`: 账号ID
-- `login_date`: 登录日期
diff --git a/makee_vala/feishu_format_rules.md b/makee_vala/feishu_format_rules.md
deleted file mode 100644
index fb1a2b9..0000000
--- a/makee_vala/feishu_format_rules.md
+++ /dev/null
@@ -1,53 +0,0 @@
-# 飞书文档排版规则
-
-## 飞书文档块类型
-
-根据观察，飞书文档的块类型：
-
-| block_type | 说明 |
-|-----------|------|
-| 1 | Page（页面）|
-| 2 | Text（文本块）|
-| 3 | Heading1（一级标题）|
-| 4 | Heading2（二级标题）|
-| 5 | Heading3（三级标题）|
-| 6 | Bulleted List（无序列表）|
-| 7 | Numbered List（有序列表）|
-| 8 | To-do（待办事项）|
-| 9 | Quote（引用）|
-| 10 | Code（代码块）|
-| 11 | Divider（分隔线）|
-| 34 | Quote Container（引用容器）|
-
-## 排版最佳实践
-
-### 1. 标题层级
-- 使用 Heading2/Heading3 来组织内容结构
-- 避免太多层级，保持清晰
-
-### 2. 列表使用
-- 无序列表（type 6）用于列举项目
-- 有序列表（type 7）用于步骤说明
-
-### 3. 分隔线
-- 使用 Divider（type 11）来分隔大的内容区块
-
-### 4. 引用
-- 使用 Quote（type 9）或 Quote Container（type 34）来强调重要内容
-
-### 5. 文本格式
-- 善用加粗、斜体等文本样式
-- 保持整体排版简洁美观
-
-## 更新飞书文档的注意事项
-
-⚠️ **重要：不要直接用 write 覆盖整个文档！**
-
-**推荐做法：**
-1. 先用 list_blocks 查看当前文档结构
-2. 用 update_block 逐个更新需要修改的块
-3. 或者如果必须重写，要确保保持原来的块结构和格式
-
-**避免：**
-- ❌ 直接用 write 方法覆盖整个文档（会丢失所有格式）
-- ❌ 把所有内容都放在一个 Text 块里
diff --git a/makee_vala/fetch_wiki_docs.py b/makee_vala/fetch_wiki_docs.py
deleted file mode 100644
index ea7f70f..0000000
--- a/makee_vala/fetch_wiki_docs.py
+++ /dev/null
@@ -1,83 +0,0 @@
-#!/usr/bin/env python3
-"""
-批量读取飞书 Wiki 文档并保存到本地知识库
-"""
-
-import json
-import os
-from datetime import datetime
-
-# Wiki 子页面列表
-wiki_pages = [
-    {"node_token": "O7QvwdY8piO8aUkhxYecA1qZnBe", "title": "全字段大表", "obj_token": "VVyWd5491o6tuqxceCVci6dVnFd"},
-    {"node_token": "Y6Iywqf75iepbUkvJzLcfiUYnkg", "title": "平均通关时长", "obj_token": "EpP7d6h2SoaTyJx1lZRcXXdLnVe"},
-    {"node_token": "KQihwMjO9i1zjFkqTgBcq67Snzc", "title": "新增注册用户数by渠道", "obj_token": "AzRPddp97o7To8x8VkxcFGr8nBh"},
-    {"node_token": "Zt7RwfGLWiacslkO2glcheWjnwf", "title": "课程进入完成率", "obj_token": "PwIydfZcHo5eZgxi8XLcOtjOnSb"},
-    {"node_token": "LTaiw3OmUi2pcckDWuNcyBIVnAd", "title": "账号角色年龄地址", "obj_token": "CUa2du2sSoNFSRxl3vFc8ucInEm"},
-    {"node_token": "ZAPJwIODRiNYE5kTuNtcpSlvnIX", "title": "退费率", "obj_token": "DC1Qdhpitowt9lxxo1acEzOwnFc"},
-    {"node_token": "Cb3KwPWLriG7GgkN73pcM0Idnch", "title": "销转学习进度", "obj_token": "G1p9dhK63oLWMzxyGQ8csZGMnDh"},
-    {"node_token": "EBEiwQsw2iOtgekDldHcQxgwnOh", "title": "班主任关注数据", "obj_token": "NcVqdRKtrowglNxs9CocDekunje"},
-    {"node_token": "BZPkwARxiixUZRk4BW9cij50nDe", "title": "端内GMV", "obj_token": "FkVCd1AruoD9xWxxVpzc16hinVh"},
-    {"node_token": "AQpnwpsfOixYGtk4jf0c6t9XncG", "title": "端内用户课程进入完成率", "obj_token": "Ueu7dtgSHoNYfsxCDHmcY6E4nid"},
-    {"node_token": "PyqEwXXqsiQybPkpGbscUjUFnOg", "title": "端内购课用户学习行为", "obj_token": "ZTxod4IUWo5yMexf8AHcBbpFnMg"},
-    {"node_token": "OyXlwY2vyisvV1kc3HhcMyMVnTd", "title": "转化率", "obj_token": "ATJ0dfajQo5CSexQd8hc9i3pnWe"},
-    {"node_token": "MWpZwV01fitaKjkCRSxckMUunRb", "title": "课程ID映射", "obj_token": "GenUdsXCloUdYhxMvxqcWBMdnhb"}
-]
-
-def safe_filename(title):
-    """生成安全的文件名"""
-    return "".join(c for c in title if c.isalnum() or c in (' ', '-', '_')).rstrip().replace(' ', '_')
-
-def main():
-    print("="*60)
-    print("飞书 Wiki 文档批量获取")
-    print("="*60)
-    
-    output_dir = "sql_queries"
-    os.makedirs(output_dir, exist_ok=True)
-    
-    print(f"\n共 {len(wiki_pages)} 个文档需要获取")
-    print(f"输出目录: {output_dir}")
-    
-    # 创建索引文件
-    index_content = "# SQL 查询文档索引\n\n"
-    index_content += f"创建时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
-    index_content += "## 文档列表\n\n"
-    
-    for i, page in enumerate(wiki_pages, 1):
-        filename = safe_filename(page['title']) + ".md"
-        filepath = os.path.join(output_dir, filename)
-        
-        print(f"\n[{i}/{len(wiki_pages)}] 处理: {page['title']}")
-        print(f"  文件: {filepath}")
-        
-        # 创建占位文件
-        with open(filepath, 'w', encoding='utf-8') as f:
-            f.write(f"# {page['title']}\n\n")
-            f.write(f"**获取时间:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
-            f.write(f"**飞书文档 Token:** {page['obj_token']}\n\n")
-            f.write(f"**注意:** 此文档需要通过 feishu_doc 工具读取完整内容\n\n")
-            f.write("---\n\n")
-            f.write("## 使用说明\n\n")
-            f.write("使用以下命令读取完整文档内容:\n\n")
-            f.write("```bash\n")
-            f.write(f"feishu_doc read {page['obj_token']}\n")
-            f.write("```\n")
-        
-        # 更新索引
-        index_content += f"- [{page['title']}]({filename})\n"
-        
-        print(f"  ✅ 已创建占位文件")
-    
-    # 写入索引文件
-    with open(os.path.join(output_dir, "README.md"), 'w', encoding='utf-8') as f:
-        f.write(index_content)
-    
-    print("\n" + "="*60)
-    print("✅ 初始化完成")
-    print("="*60)
-    print("\n下一步: 使用 feishu_doc 工具逐个读取文档内容")
-    print("或者让我继续为你读取这些文档的完整内容")
-
-if __name__ == "__main__":
-    main()
diff --git a/makee_vala/git_scripts/CLAUDE.md b/makee_vala/git_scripts/CLAUDE.md
deleted file mode 100644
index 7fbbbf5..0000000
--- a/makee_vala/git_scripts/CLAUDE.md
+++ /dev/null
@@ -1,70 +0,0 @@
-# 项目说明
-
-## 项目概述
-用户数据提取和分析工具集，用于从各种数据源（ES、数据库等）导出和分析用户数据。
-
-## 脚本列表
-
-### export_realtime_asr.py
-**功能**: 导出流式语音 ASR 数据
-
-**版本**: v1.0
-
-**数据源**:
-- Elasticsearch 索引: `llm_realtime_asr_log`
-
-**配置说明**:
-- 在脚本开头配置开始和结束日期（8位数字格式，如 20260101）
-- ES 连接信息通过环境变量配置（需要创建 .env 文件）
-
-**依赖包**:
-```
-elasticsearch
-pandas
-openpyxl
-python-dotenv
-```
-
-**运行方式**:
-```bash
-python export_realtime_asr.py
-```
-
-**输出**:
-- 输出目录: `output/`
-- 文件命名: `realtime_asr_export_{开始日期}_{结束日期}.xlsx`
-- Excel 列: voice_id, asr_prompt, result_str, timestamp, audio_url, source
-
-**数据处理逻辑**:
-- 从 ES 使用 scroll API 分批读取数据（每批1000条）
-- 按 voice_id 聚合，仅保留恰好有2条记录的 voice_id
-- 取两条记录中最新的 timestamp
-- 自动拼接 audio_url
-
-**特点**:
-- 支持大数据量处理（几十万级别）
-- 实时进度显示
-- 自动过滤异常数据（非2条记录的 voice_id）
-
----
-
-### 其他脚本
-- `export_user_id_data.py`: 用户ID数据导出
-- `batch_add_shengtong_result.py`: 批量添加声通评测结果
-- `shengtong_eval.py`: 声通评测
-- `calc_score_diff_stats.py`: 分数差异统计
-- `export_unit_summary.py`: 单元总结统计导出
-
-## 环境配置
-
-需要创建 `.env` 文件，包含以下配置:
-```
-ES_HOST=xxx
-ES_PORT=9200
-ES_SCHEME=https
-ES_USER=elastic
-ES_PASSWORD=xxx
-```
-
-## 最近更新
-- 2026-01-27: 新增 export_realtime_asr.py 脚本，支持流式语音 ASR 数据导出
diff --git a/makee_vala/git_scripts/batch_add_shengtong_result.py b/makee_vala/git_scripts/batch_add_shengtong_result.py
deleted file mode 100644
index 8db5962..0000000
--- a/makee_vala/git_scripts/batch_add_shengtong_result.py
+++ /dev/null
@@ -1,853 +0,0 @@
-"""
-声通语音评测批量处理工具
-
-功能说明:
-- 读取 Excel 文件，其中包含音频链接（userAudio 字段）和参考文本（refText 字段）
-- 调用声通 API 对音频进行评测，获取总分、明细和recordId
-- 在原 Excel 中添加"测试总分"、"测试明细"和"测试recordId"三个字段
-- 输出文件命名为: {原文件名}_add_shengtong_result.xlsx
-- 支持串行和并发两种处理模式
-
-环境变量配置:
-- ST_APP_KEY: 声通应用 Key
-- ST_SECRET_KEY: 声通 Secret Key
-
-声通API文档: http://api.stkouyu.com
-"""
-
-import pandas as pd
-import os
-import requests
-import tempfile
-from pathlib import Path
-import json
-import time
-import hashlib
-import uuid
-from concurrent.futures import ThreadPoolExecutor, as_completed
-import threading
-from queue import Queue
-import logging
-
-# 配置日志
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s',
-    handlers=[
-        logging.FileHandler('shengtong_batch_processing.log'),
-        logging.StreamHandler()
-    ]
-)
-
-# 从 .env 文件加载环境变量
-from dotenv import load_dotenv
-load_dotenv()
-
-# ==================== 全局配置 ====================
-# DEBUG 模式开关（控制详细日志输出）
-DEBUG_MODE = False
-
-
-def debug_print(message):
-    """
-    DEBUG 信息输出函数
-
-    Args:
-        message (str): 要输出的调试信息
-    """
-    if DEBUG_MODE:
-        print(f"[DEBUG] {message}")
-
-
-# ==================== 声通 API 相关代码 ====================
-
-class ShengtongEvaluator:
-    """声通口语评测 API 封装类"""
-
-    def __init__(self):
-        """从环境变量读取 API 配置"""
-        self.app_key = os.environ.get('ST_APP_KEY', '')
-        self.secret_key = os.environ.get('ST_SECRET_KEY', '')
-        self.api_url = "http://api.stkouyu.com:8080/sent.eval"
-
-        # 检查环境变量是否配置
-        if not all([self.app_key, self.secret_key]):
-            raise ValueError(
-                "请配置声通 API 环境变量: ST_APP_KEY, ST_SECRET_KEY"
-            )
-
-    def _generate_signature(self, data: str) -> str:
-        """生成SHA1签名"""
-        return hashlib.sha1(data.encode('utf-8')).hexdigest()
-
-    def _build_request_params(self, ref_text: str, audio_ext: str) -> dict:
-        """构建请求参数"""
-        timestamp = str(int(time.time()))
-        user_id = str(uuid.uuid4())
-
-        # 生成签名
-        connect_data = self.app_key + timestamp + self.secret_key
-        start_data = self.app_key + timestamp + user_id + self.secret_key
-        connect_sig = self._generate_signature(connect_data)
-        start_sig = self._generate_signature(start_data)
-
-        # 构建请求参数
-        params = {
-            "connect": {
-                "cmd": "connect",
-                "param": {
-                    "sdk": {
-                        "version": 16777472,
-                        "source": 9,
-                        "protocol": 2
-                    },
-                    "app": {
-                        "applicationId": self.app_key,
-                        "sig": connect_sig,
-                        "timestamp": timestamp
-                    }
-                }
-            },
-            "start": {
-                "cmd": "start",
-                "param": {
-                    "app": {
-                        "applicationId": self.app_key,
-                        "sig": start_sig,
-                        "timestamp": timestamp,
-                        "userId": user_id
-                    },
-                    "audio": {
-                        "audioType": audio_ext,
-                        "channel": 1,
-                        "sampleBytes": 2,
-                        "sampleRate": 16000
-                    },
-                    "request": {
-                        "coreType": "sent.eval",
-                        "refText": ref_text,
-                        "tokenId": "makee",
-                    }
-                }
-            }
-        }
-
-        return params
-
-    def evaluate(self, audio_file_path: str, ref_text: str) -> dict:
-        """
-        调用声通API进行口语评测
-
-        Args:
-            audio_file_path (str): 音频文件路径
-            ref_text (str): 参考文本
-
-        Returns:
-            dict: 评测结果
-        """
-        debug_print(f"开始评测音频文件: {audio_file_path}")
-        debug_print(f"评测文本: {ref_text}")
-
-        # 检查音频文件是否存在
-        if not os.path.exists(audio_file_path):
-            error_msg = f"音频文件不存在: {audio_file_path}"
-            logging.error(error_msg)
-            return {"error": error_msg}
-
-        # 获取音频文件扩展名
-        audio_ext = os.path.splitext(audio_file_path)[1][1:]  # 去掉点号
-        if not audio_ext:
-            audio_ext = "wav"  # 默认为wav
-
-        # 构建请求参数
-        params = self._build_request_params(ref_text, audio_ext)
-
-        # 读取音频文件
-        try:
-            with open(audio_file_path, 'rb') as f:
-                audio_data = f.read()
-
-            # 构建multipart/form-data请求
-            files = {
-                'text': (None, json.dumps(params)),
-                'audio': (f"{int(time.time() * 1000000)}.{audio_ext}", audio_data)
-            }
-
-            headers = {
-                'Request-Index': '0'
-            }
-
-            debug_print("开始发送请求到声通API...")
-            response = requests.post(
-                self.api_url,
-                files=files,
-                headers=headers,
-                timeout=30
-            )
-
-            if response.status_code == 200:
-                result = response.json()
-                debug_print("声通API返回成功")
-                return result
-            else:
-                error_msg = f"请求失败，状态码: {response.status_code}"
-                logging.error(f"{error_msg}, 响应: {response.text}")
-                return {
-                    "error": error_msg,
-                    "response": response.text
-                }
-
-        except requests.exceptions.RequestException as e:
-            error_msg = f"请求异常: {str(e)}"
-            logging.error(error_msg)
-            return {"error": error_msg}
-        except Exception as e:
-            error_msg = f"评测过程出错: {str(e)}"
-            logging.error(error_msg)
-            return {"error": error_msg}
-
-
-def evaluate_audio_file(audio_file_path, text="nice to meet you."):
-    """
-    简化的音频评测函数
-
-    Args:
-        audio_file_path (str): 音频文件路径
-        text (str): 评测文本内容
-
-    Returns:
-        dict: 评测结果JSON
-    """
-    api = ShengtongEvaluator()
-    return api.evaluate(audio_file_path, text)
-
-
-# ==================== 批量处理相关代码 ====================
-
-def download_audio_file(audio_url, temp_dir, max_retries=3, timeout=30):
-    """
-    下载音频文件到临时目录（增强版本）
-
-    Args:
-        audio_url (str): 音频文件URL
-        temp_dir (str): 临时目录路径
-        max_retries (int): 最大重试次数
-        timeout (int): 请求超时时间（秒）
-
-    Returns:
-        str: 下载的音频文件路径，失败返回None
-    """
-    if not audio_url or pd.isna(audio_url):
-        logging.warning("音频URL为空或无效")
-        return None
-
-    # 从URL中提取文件名
-    try:
-        file_name = os.path.basename(audio_url.split('?')[0])  # 去除URL参数
-        if not file_name or '.' not in file_name:
-            file_name = f"audio_{hash(audio_url) % 100000}.wav"  # 生成默认文件名
-
-        file_path = os.path.join(temp_dir, file_name)
-
-        # 重试机制
-        for attempt in range(max_retries):
-            try:
-                logging.info(f"正在下载音频文件 (尝试 {attempt + 1}/{max_retries}): {audio_url}")
-
-                # 设置请求头，模拟浏览器
-                headers = {
-                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
-                }
-
-                response = requests.get(audio_url, timeout=timeout, headers=headers, stream=True)
-                response.raise_for_status()
-
-                # 检查内容类型
-                content_type = response.headers.get('content-type', '')
-                if not any(audio_type in content_type.lower() for audio_type in ['audio', 'wav', 'mp3', 'ogg', 'flac']):
-                    logging.warning(f"可能不是音频文件，Content-Type: {content_type}")
-
-                # 写入文件
-                with open(file_path, 'wb') as f:
-                    for chunk in response.iter_content(chunk_size=8192):
-                        if chunk:
-                            f.write(chunk)
-
-                # 验证文件大小
-                file_size = os.path.getsize(file_path)
-                if file_size == 0:
-                    raise ValueError("下载的文件为空")
-
-                logging.info(f"音频文件下载成功: {file_path} (大小: {file_size} bytes)")
-                return file_path
-
-            except requests.exceptions.Timeout:
-                logging.warning(f"下载超时 (尝试 {attempt + 1}/{max_retries}): {audio_url}")
-                if attempt < max_retries - 1:
-                    time.sleep(2 ** attempt)  # 指数退避
-                    continue
-            except requests.exceptions.RequestException as e:
-                logging.warning(f"下载请求异常 (尝试 {attempt + 1}/{max_retries}): {str(e)}")
-                if attempt < max_retries - 1:
-                    time.sleep(2 ** attempt)
-                    continue
-            except Exception as e:
-                logging.error(f"下载过程中发生未知错误 (尝试 {attempt + 1}/{max_retries}): {str(e)}")
-                if attempt < max_retries - 1:
-                    time.sleep(2 ** attempt)
-                    continue
-
-        logging.error(f"音频文件下载失败，已达到最大重试次数: {audio_url}")
-        return None
-
-    except Exception as e:
-        logging.error(f"下载音频文件时发生异常: {str(e)}")
-        return None
-
-
-def format_shengtong_details(shengtong_result):
-    """
-    格式化声通评测结果为明细字符串
-
-    Args:
-        shengtong_result (dict): 声通API返回的结果
-
-    Returns:
-        str: 格式化的明细字符串
-    """
-    if not shengtong_result or 'error' in shengtong_result:
-        return ""
-
-    try:
-        # 从result字段中获取words数组
-        result = shengtong_result.get('result', {})
-        words = result.get('words', [])
-
-        if not words:
-            return ""
-
-        details = []
-        for word in words:
-            # 获取单词内容和得分
-            word_text = word.get('word', '')
-            scores = word.get('scores', {})
-            overall_score = scores.get('overall', 0)
-
-            # 格式化为 "单词 分数"
-            details.append(f"{word_text} {int(overall_score)}")
-
-        return "\n".join(details)
-
-    except Exception as e:
-        logging.error(f"格式化声通明细失败: {str(e)}")
-        return ""
-
-
-def get_shengtong_total_score(shengtong_result):
-    """
-    获取声通评测总分
-
-    Args:
-        shengtong_result (dict): 声通API返回的结果
-
-    Returns:
-        int: 总分，失败返回0
-    """
-    if not shengtong_result or 'error' in shengtong_result:
-        return 0
-
-    try:
-        result = shengtong_result.get('result', {})
-        overall_score = result.get('overall', 0)
-        return int(overall_score)
-    except Exception as e:
-        logging.error(f"获取声通总分失败: {str(e)}")
-        return 0
-
-
-def get_shengtong_record_id(shengtong_result):
-    """
-    获取声通评测recordId
-
-    Args:
-        shengtong_result (dict): 声通API返回的结果
-
-    Returns:
-        str: recordId，失败返回空字符串
-    """
-    if not shengtong_result or 'error' in shengtong_result:
-        return ""
-
-    try:
-        record_id = shengtong_result.get('recordId', '')
-        return str(record_id) if record_id else ""
-    except Exception as e:
-        logging.error(f"获取声通recordId失败: {str(e)}")
-        return ""
-
-
-def process_single_row(row_data, temp_dir, results_dict, lock, rate_limiter=None):
-    """
-    处理单行数据（并发版本，增强错误处理和时间分析）
-
-    Args:
-        row_data (tuple): (index, row) 数据
-        temp_dir (str): 临时目录路径
-        results_dict (dict): 结果字典
-        lock (threading.Lock): 线程锁
-        rate_limiter (Queue): 速率限制器
-
-    Returns:
-        None
-    """
-    index, row = row_data
-    start_time = time.time()
-    timing_info = {}
-
-    try:
-        # 1. 速率限制等待时间
-        rate_limit_start = time.time()
-        if rate_limiter:
-            rate_limiter.get()  # 获取令牌
-        timing_info['rate_limit_wait'] = time.time() - rate_limit_start
-
-        logging.info(f"开始处理第 {index + 1} 行数据")
-
-        # 2. 数据预处理时间
-        preprocess_start = time.time()
-        ref_text = str(row['refText']) if pd.notna(row['refText']) else ""
-        audio_url = str(row['userAudio']) if pd.notna(row['userAudio']) else ""
-
-        # 数据验证
-        if not ref_text:
-            raise ValueError("refText 为空或无效")
-
-        if not audio_url:
-            raise ValueError("userAudio 为空或无效")
-        timing_info['preprocess'] = time.time() - preprocess_start
-
-        # 3. 音频下载时间
-        download_start = time.time()
-        audio_file_path = download_audio_file(audio_url, temp_dir)
-        timing_info['audio_download'] = time.time() - download_start
-
-        if not audio_file_path:
-            raise ValueError("音频文件下载失败")
-
-        try:
-            # 4. 声通API调用时间
-            api_start = time.time()
-            logging.info(f"正在调用声通API评测: {ref_text}")
-            shengtong_result = evaluate_audio_file(audio_file_path, ref_text)
-            timing_info['api_call'] = time.time() - api_start
-
-            if not shengtong_result:
-                raise ValueError("声通API返回空结果")
-
-            # 5. 结果处理时间
-            result_process_start = time.time()
-            shengtong_details = format_shengtong_details(shengtong_result)
-            shengtong_total_score = get_shengtong_total_score(shengtong_result)
-            shengtong_record_id = get_shengtong_record_id(shengtong_result)
-            timing_info['result_process'] = time.time() - result_process_start
-
-            # 6. 数据更新时间
-            update_start = time.time()
-            with lock:
-                results_dict[index] = {
-                    '测试总分': shengtong_total_score,
-                    '测试明细': shengtong_details,
-                    '测试recordId': shengtong_record_id
-                }
-            timing_info['data_update'] = time.time() - update_start
-
-            # 计算总耗时
-            total_time = time.time() - start_time
-            timing_info['total'] = total_time
-
-            # 详细的时间分析日志
-            logging.info(f"第 {index + 1} 行处理成功 - 总分: {shengtong_total_score} | "
-                        f"总耗时: {total_time:.2f}s | "
-                        f"速率等待: {timing_info['rate_limit_wait']:.2f}s | "
-                        f"预处理: {timing_info['preprocess']:.3f}s | "
-                        f"音频下载: {timing_info['audio_download']:.2f}s | "
-                        f"API调用: {timing_info['api_call']:.2f}s | "
-                        f"结果处理: {timing_info['result_process']:.3f}s | "
-                        f"数据更新: {timing_info['data_update']:.3f}s")
-
-        except Exception as api_error:
-            total_time = time.time() - start_time
-            logging.error(f"第 {index + 1} 行声通API调用失败: {str(api_error)} | "
-                         f"总耗时: {total_time:.2f}s | "
-                         f"音频下载: {timing_info.get('audio_download', 0):.2f}s | "
-                         f"API调用: {timing_info.get('api_call', 0):.2f}s")
-            with lock:
-                results_dict[index] = {
-                    '测试总分': 0,
-                    '测试明细': "",
-                    '测试recordId': "",
-                    'error': f'API调用失败: {str(api_error)}'
-                }
-
-        finally:
-            # 7. 清理时间
-            cleanup_start = time.time()
-            try:
-                if audio_file_path and os.path.exists(audio_file_path):
-                    os.remove(audio_file_path)
-                    logging.debug(f"已删除临时文件: {audio_file_path}")
-            except Exception as cleanup_error:
-                logging.warning(f"清理临时文件失败: {str(cleanup_error)}")
-            timing_info['cleanup'] = time.time() - cleanup_start
-
-            # 释放速率限制令牌
-            if rate_limiter:
-                try:
-                    rate_limiter.put(None, timeout=1)  # 归还令牌
-                except:
-                    pass  # 队列可能已满，忽略
-
-    except Exception as e:
-        total_time = time.time() - start_time
-        logging.error(f"第 {index + 1} 行处理异常: {str(e)} | 总耗时: {total_time:.2f}s")
-        with lock:
-            results_dict[index] = {
-                '测试总分': 0,
-                '测试明细': "",
-                '测试recordId': "",
-                'error': f'处理异常: {str(e)}'
-            }
-
-        # 释放速率限制令牌
-        if rate_limiter:
-            try:
-                rate_limiter.put(None, timeout=1)
-            except:
-                pass
-
-
-def process_excel_with_shengtong_concurrent(input_file_path, output_dir="output/audio", max_workers=3, rate_limit_per_second=3):
-    """
-    处理Excel文件，添加声通评测结果（并发版本，增强控制）
-
-    Args:
-        input_file_path (str): 输入Excel文件路径
-        output_dir (str): 输出目录路径，默认为 output/audio
-        max_workers (int): 最大并发线程数，默认3
-        rate_limit_per_second (int): 每秒最大请求数，默认3
-
-    Returns:
-        bool: 处理是否成功
-    """
-    start_time = time.time()
-
-    try:
-        # 读取Excel文件
-        logging.info(f"正在读取Excel文件: {input_file_path}")
-        df = pd.read_excel(input_file_path)
-
-        # 检查必要的列是否存在
-        required_columns = ['refText', 'userAudio']
-        missing_columns = [col for col in required_columns if col not in df.columns]
-        if missing_columns:
-            logging.error(f"Excel文件缺少必要的列: {missing_columns}")
-            return False
-
-        # 数据预处理和验证
-        total_rows = len(df)
-        valid_rows = 0
-        for index, row in df.iterrows():
-            if pd.notna(row.get('refText')) and pd.notna(row.get('userAudio')):
-                valid_rows += 1
-
-        logging.info(f"总行数: {total_rows}, 有效行数: {valid_rows}")
-
-        if valid_rows == 0:
-            logging.warning("没有找到有效的数据行")
-            return False
-
-        # 添加新列
-        df['测试总分'] = 0
-        df['测试明细'] = ""
-        df['测试recordId'] = ""
-
-        # 创建优化的速率限制器
-        effective_rate_limit = max(rate_limit_per_second, max_workers)
-        rate_limiter = Queue(maxsize=effective_rate_limit * 2)
-
-        # 预填充令牌
-        for _ in range(effective_rate_limit):
-            rate_limiter.put(None)
-
-        # 启动优化的速率限制器补充线程
-        def rate_limiter_refill():
-            interval = 1.0 / effective_rate_limit
-            while True:
-                time.sleep(interval)
-                try:
-                    rate_limiter.put(None, block=False)
-                except:
-                    pass
-
-        rate_thread = threading.Thread(target=rate_limiter_refill, daemon=True)
-        rate_thread.start()
-
-        logging.info(f"速率限制设置: {effective_rate_limit} req/s (原始: {rate_limit_per_second}, 队列大小: {effective_rate_limit * 2})")
-
-        # 创建临时目录用于下载音频文件
-        with tempfile.TemporaryDirectory() as temp_dir:
-            logging.info(f"创建临时目录: {temp_dir}")
-            logging.info(f"开始并发处理，最大并发数: {max_workers}, 有效速率限制: {effective_rate_limit} req/s")
-
-            # 准备数据
-            row_data_list = [(index, row) for index, row in df.iterrows()]
-
-            # 创建结果字典和线程锁
-            results_dict = {}
-            lock = threading.Lock()
-
-            # 使用线程池进行并发处理
-            with ThreadPoolExecutor(max_workers=max_workers) as executor:
-                # 提交所有任务
-                future_to_index = {
-                    executor.submit(process_single_row, row_data, temp_dir, results_dict, lock, rate_limiter): row_data[0]
-                    for row_data in row_data_list
-                }
-
-                # 等待任务完成并显示进度
-                completed_count = 0
-                success_count = 0
-                error_count = 0
-
-                for future in as_completed(future_to_index):
-                    completed_count += 1
-                    index = future_to_index[future]
-
-                    try:
-                        future.result()  # 获取结果，如果有异常会抛出
-
-                        # 检查处理结果
-                        with lock:
-                            result = results_dict.get(index, {})
-                            if result.get('error') is None:
-                                success_count += 1
-                            else:
-                                error_count += 1
-
-                        # 显示进度
-                        if completed_count % 10 == 0 or completed_count == total_rows:
-                            elapsed_time = time.time() - start_time
-                            avg_time_per_item = elapsed_time / completed_count
-                            remaining_time = avg_time_per_item * (total_rows - completed_count)
-
-                            logging.info(f"进度: {completed_count}/{total_rows} ({completed_count/total_rows*100:.1f}%) "
-                                       f"成功: {success_count}, 失败: {error_count}, "
-                                       f"预计剩余时间: {remaining_time:.1f}秒")
-
-                    except Exception as e:
-                        error_count += 1
-                        logging.error(f"任务 {index + 1} 执行异常: {str(e)}")
-                        with lock:
-                            if index not in results_dict:
-                                results_dict[index] = {
-                                    '测试总分': 0,
-                                    '测试明细': "",
-                                    '测试recordId': "",
-                                    'error': f'任务执行异常: {str(e)}'
-                                }
-
-            # 将结果更新到DataFrame
-            logging.info("正在更新结果到DataFrame...")
-            for index in results_dict:
-                result = results_dict[index]
-                df.at[index, '测试总分'] = result.get('测试总分', 0)
-                df.at[index, '测试明细'] = result.get('测试明细', "")
-                df.at[index, '测试recordId'] = result.get('测试recordId', "")
-
-                # 如果有错误，可以选择记录到备注列（如果存在）
-                if result.get('error') and '备注' in df.columns:
-                    existing_note = str(df.at[index, '备注']) if pd.notna(df.at[index, '备注']) else ""
-                    error_note = f"声通API错误: {result['error']}"
-                    df.at[index, '备注'] = f"{existing_note}\n{error_note}".strip()
-
-        # 创建输出目录
-        output_path = Path(output_dir)
-        output_path.mkdir(parents=True, exist_ok=True)
-
-        # 生成输出文件路径
-        input_path = Path(input_file_path)
-        output_file_path = output_path / f"{input_path.stem}_add_shengtong_result.xlsx"
-
-        # 保存结果
-        logging.info(f"正在保存结果到: {output_file_path}")
-        df.to_excel(output_file_path, index=False)
-
-        # 计算总耗时
-        total_time = time.time() - start_time
-
-        # 统计处理结果
-        final_success_count = sum(1 for result in results_dict.values() if result.get('error') is None)
-        final_error_count = len(results_dict) - final_success_count
-
-        logging.info("=" * 50)
-        logging.info("并发处理完成！")
-        logging.info(f"处理统计: 成功 {final_success_count} 条，失败 {final_error_count} 条，总计 {len(results_dict)} 条")
-        logging.info(f"总耗时: {total_time:.2f} 秒")
-        logging.info(f"平均处理时间: {total_time/len(results_dict):.2f} 秒/条")
-        logging.info(f"输出文件: {output_file_path}")
-        logging.info("=" * 50)
-
-        return True
-
-    except Exception as e:
-        logging.error(f"处理Excel文件时出错: {str(e)}")
-        return False
-
-
-def process_excel_with_shengtong(input_file_path, output_dir="output/audio"):
-    """
-    处理Excel文件，添加声通评测结果（串行版本）
-
-    Args:
-        input_file_path (str): 输入Excel文件路径
-        output_dir (str): 输出目录路径，默认为 output/audio
-
-    Returns:
-        bool: 处理是否成功
-    """
-    try:
-        # 读取Excel文件
-        print(f"正在读取Excel文件: {input_file_path}")
-        df = pd.read_excel(input_file_path)
-
-        # 检查必要的列是否存在
-        required_columns = ['refText', 'userAudio']
-        missing_columns = [col for col in required_columns if col not in df.columns]
-        if missing_columns:
-            print(f"错误: Excel文件缺少必要的列: {missing_columns}")
-            return False
-
-        # 添加新列
-        df['测试总分'] = 0
-        df['测试明细'] = ""
-        df['测试recordId'] = ""
-
-        # 创建临时目录用于下载音频文件
-        with tempfile.TemporaryDirectory() as temp_dir:
-            print(f"创建临时目录: {temp_dir}")
-
-            # 处理每一行数据
-            total_rows = len(df)
-            for index, row in df.iterrows():
-                print(f"\n处理进度: {index + 1}/{total_rows}")
-
-                ref_text = str(row['refText']) if pd.notna(row['refText']) else ""
-                audio_url = str(row['userAudio']) if pd.notna(row['userAudio']) else ""
-
-                if not ref_text or not audio_url:
-                    print(f"第 {index + 1} 行数据不完整，跳过")
-                    continue
-
-                print(f"参考文本: {ref_text}")
-                print(f"音频URL: {audio_url}")
-
-                # 下载音频文件
-                audio_file_path = download_audio_file(audio_url, temp_dir)
-                if not audio_file_path:
-                    print(f"第 {index + 1} 行音频下载失败，跳过")
-                    continue
-
-                # 调用声通API进行评测
-                print("正在调用声通API进行评测...")
-                try:
-                    shengtong_result = evaluate_audio_file(audio_file_path, ref_text)
-                    print(f"声通API返回结果: {json.dumps(shengtong_result, indent=2, ensure_ascii=False)}")
-
-                    # 提取总分、明细和recordId
-                    total_score = get_shengtong_total_score(shengtong_result)
-                    details = format_shengtong_details(shengtong_result)
-                    record_id = get_shengtong_record_id(shengtong_result)
-
-                    # 更新DataFrame
-                    df.at[index, '测试总分'] = total_score
-                    df.at[index, '测试明细'] = details
-                    df.at[index, '测试recordId'] = record_id
-
-                    print(f"测试总分: {total_score}")
-                    print(f"测试明细: {details}")
-                    print(f"测试recordId: {record_id}")
-
-                except Exception as e:
-                    print(f"第 {index + 1} 行声通API调用失败: {str(e)}")
-                    continue
-
-                # 删除临时音频文件
-                try:
-                    os.remove(audio_file_path)
-                except:
-                    pass
-
-                # 添加延时避免API调用过于频繁
-                time.sleep(1)
-
-        # 创建输出目录
-        output_path = Path(output_dir)
-        output_path.mkdir(parents=True, exist_ok=True)
-
-        # 生成输出文件路径
-        input_path = Path(input_file_path)
-        output_file_path = output_path / f"{input_path.stem}_add_shengtong_result.xlsx"
-
-        # 保存结果
-        print(f"\n正在保存结果到: {output_file_path}")
-        df.to_excel(output_file_path, index=False)
-        print("处理完成！")
-
-        return True
-
-    except Exception as e:
-        print(f"处理Excel文件时出错: {str(e)}")
-        return False
-
-
-if __name__ == "__main__":
-    # ==================== 配置参数 ====================
-    input_file = "人工筛选测试集v2_denoise.xlsx"
-    output_directory = "output/audio"  # 输出目录，可以修改
-    use_concurrent = True  # True: 使用并发版本，False: 使用串行版本
-
-    # DEBUG 模式开关（True: 显示详细调试信息，False: 仅显示关键信息）
-    enable_debug = False  # 可以设置为 True 来查看详细的 DEBUG 日志
-
-    # 设置全局 DEBUG_MODE
-    globals()['DEBUG_MODE'] = enable_debug
-
-    # 检查环境变量
-    required_env_vars = ['ST_APP_KEY', 'ST_SECRET_KEY']
-    missing_vars = [var for var in required_env_vars if not os.environ.get(var)]
-
-    if missing_vars:
-        print(f"错误: 缺少必要的环境变量: {missing_vars}")
-        print("请在 .env 文件或系统环境变量中配置:")
-        print("  ST_APP_KEY=你的应用Key")
-        print("  ST_SECRET_KEY=你的Secret Key")
-    elif not os.path.exists(input_file):
-        print(f"文件不存在: {input_file}")
-        print("请确保Excel文件存在并包含 'refText' 和 'userAudio' 列")
-    else:
-        if use_concurrent:
-            print("使用并发版本处理（3路并发，3 req/s）...")
-            success = process_excel_with_shengtong_concurrent(
-                input_file,
-                output_dir=output_directory,
-                max_workers=3,
-                rate_limit_per_second=3
-            )
-        else:
-            print("使用串行版本处理...")
-            success = process_excel_with_shengtong(input_file, output_dir=output_directory)
-
-        if success:
-            print("处理成功！")
-        else:
-            print("处理失败！")
diff --git a/makee_vala/git_scripts/batch_add_xunfei_result.py b/makee_vala/git_scripts/batch_add_xunfei_result.py
deleted file mode 100644
index 3e07493..0000000
--- a/makee_vala/git_scripts/batch_add_xunfei_result.py
+++ /dev/null
@@ -1,1090 +0,0 @@
-"""
-讯飞语音评测批量处理工具
-
-功能说明:
-- 读取 Excel 文件，其中包含音频链接（userAudio 字段）和参考文本（refText 字段）
-- 调用讯飞 API 对音频进行评测，获取总分和明细
-- 在原 Excel 中添加"讯飞总分"和"讯飞明细"两个字段
-- 输出文件命名为: {原文件名}_add_xunfei_result.xlsx
-- 支持串行和并发两种处理模式
-
-环境变量配置:
-- XUNFEI_APPID: 讯飞应用 ID
-- XUNFEI_API_SECRET: 讯飞 API 密钥
-- XUNFEI_API_KEY: 讯飞 API Key
-
-讯飞技术文档: https://www.xfyun.cn/doc/Ise/IseAPI.html
-"""
-
-import pandas as pd
-import os
-import requests
-import tempfile
-from pathlib import Path
-import json
-import time
-from concurrent.futures import ThreadPoolExecutor, as_completed
-import threading
-from queue import Queue
-import logging
-import websocket
-import datetime
-import hashlib
-import base64
-import hmac
-from urllib.parse import urlencode
-import ssl
-from wsgiref.handlers import format_date_time
-from datetime import datetime
-from time import mktime
-import xml.etree.ElementTree as ET
-
-# 配置日志
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s',
-    handlers=[
-        logging.FileHandler('xunfei_batch_processing.log'),
-        logging.StreamHandler()
-    ]
-)
-
-# 从 .env 文件加载环境变量
-from dotenv import load_dotenv
-load_dotenv()
-
-# ==================== 全局配置 ====================
-# DEBUG 模式开关（控制详细日志输出）
-DEBUG_MODE = False
-
-
-def debug_print(message):
-    """
-    DEBUG 信息输出函数
-
-    Args:
-        message (str): 要输出的调试信息
-    """
-    if DEBUG_MODE:
-        print(f"[DEBUG] {message}")
-
-
-# ==================== 讯飞 API 相关代码 ====================
-
-class XunfeiISEAPI:
-    """讯飞语音评测 API 封装类"""
-
-    def __init__(self):
-        """从环境变量读取 API 配置"""
-        self.host_url = "ws://ise-api.xfyun.cn/v2/open-ise"
-        self.appid = os.environ.get('XUNFEI_APPID', '')
-        self.api_secret = os.environ.get('XUNFEI_API_SECRET', '')
-        self.api_key = os.environ.get('XUNFEI_API_KEY', '')
-
-        # 检查环境变量是否配置
-        if not all([self.appid, self.api_secret, self.api_key]):
-            raise ValueError(
-                "请配置讯飞 API 环境变量: XUNFEI_APPID, XUNFEI_API_SECRET, XUNFEI_API_KEY"
-            )
-
-        self.result = None
-        self.error = None
-
-    def _detect_audio_format(self, audio_file_path):
-        """检测音频文件格式"""
-        try:
-            # 通过文件扩展名检测
-            file_ext = os.path.splitext(audio_file_path)[1].lower()
-            if file_ext == '.wav':
-                return 'wav'
-            elif file_ext == '.mp3':
-                return 'mp3'
-
-            # 通过文件头检测
-            with open(audio_file_path, 'rb') as f:
-                header = f.read(12)
-                if len(header) >= 12:
-                    # WAV文件头: RIFF....WAVE
-                    if header[:4] == b'RIFF' and header[8:12] == b'WAVE':
-                        return 'wav'
-                    # MP3文件头: ID3 或 0xFF 0xFB/0xFA
-                    elif header[:3] == b'ID3' or (header[0] == 0xFF and (header[1] & 0xE0) == 0xE0):
-                        return 'mp3'
-
-            # 默认返回wav
-            return 'wav'
-        except Exception as e:
-            print(f"[WARNING] 音频格式检测失败: {str(e)}, 默认使用WAV格式")
-            return 'wav'
-
-    def _remove_wav_header(self, audio_file_path):
-        """去除WAV文件头部，返回纯音频数据"""
-        try:
-            with open(audio_file_path, 'rb') as f:
-                # 读取WAV文件头
-                riff_header = f.read(12)  # RIFF header (12 bytes)
-                if len(riff_header) < 12 or riff_header[:4] != b'RIFF' or riff_header[8:12] != b'WAVE':
-                    print(f"[WARNING] 不是有效的WAV文件，直接返回原始数据")
-                    f.seek(0)
-                    return f.read()
-
-                # 跳过format chunk
-                while True:
-                    chunk_header = f.read(8)
-                    if len(chunk_header) < 8:
-                        break
-
-                    chunk_id = chunk_header[:4]
-                    chunk_size = int.from_bytes(chunk_header[4:8], byteorder='little')
-
-                    if chunk_id == b'data':
-                        # 找到data chunk，返回音频数据
-                        audio_data = f.read(chunk_size)
-                        debug_print(f"WAV头部已去除，音频数据大小: {len(audio_data)} bytes")
-                        return audio_data
-                    else:
-                        # 跳过其他chunk
-                        f.seek(chunk_size, 1)
-                        if chunk_size % 2:  # 如果chunk大小是奇数，需要跳过一个字节对齐
-                            f.seek(1, 1)
-
-                # 如果没找到data chunk，返回从当前位置开始的所有数据
-                print(f"[WARNING] 未找到data chunk，返回剩余数据")
-                return f.read()
-
-        except Exception as e:
-            print(f"[ERROR] WAV头部处理失败: {str(e)}, 返回原始文件数据")
-            with open(audio_file_path, 'rb') as f:
-                return f.read()
-
-    def _generate_url(self):
-        """生成WebSocket连接URL"""
-        now_time = datetime.now()
-        now_date = format_date_time(mktime(now_time.timetuple()))
-
-        # 拼接鉴权原始字符串
-        origin_base = "host: " + "ise-api.xfyun.cn" + "\n"
-        origin_base += "date: " + now_date + "\n"
-        origin_base += "GET " + "/v2/open-ise " + "HTTP/1.1"
-
-        # sha256加密
-        signature_sha = hmac.new(self.api_secret.encode('utf-8'), origin_base.encode('utf-8'),
-                                 digestmod=hashlib.sha256).digest()
-        signature_sha = base64.b64encode(signature_sha).decode(encoding='utf-8')
-
-        authorization_origin = "api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"" % (
-            self.api_key, "hmac-sha256", "host date request-line", signature_sha)
-        authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8')
-
-        # 将请求的鉴权参数组合为字典
-        dict_data = {
-            "authorization": authorization,
-            "date": now_date,
-            "host": "ise-api.xfyun.cn"
-        }
-        ws_url = self.host_url + '?' + urlencode(dict_data)
-        return ws_url
-
-    def _on_message(self, ws, message):
-        """处理WebSocket消息"""
-        try:
-            debug_print(f"收到消息: {message}")
-            response = json.loads(message)
-            debug_print(f"解析后的响应: {json.dumps(response, indent=2, ensure_ascii=False)}")
-
-            # 检查响应结构
-            if "data" not in response:
-                print(f"[ERROR] 响应中缺少 'data' 字段")
-                self.error = f"响应格式错误: 缺少 'data' 字段"
-                ws.close()
-                return
-
-            data = response["data"]
-            if "status" not in data:
-                print(f"[ERROR] data 中缺少 'status' 字段")
-                self.error = f"响应格式错误: 缺少 'status' 字段"
-                ws.close()
-                return
-
-            status = data["status"]
-            debug_print(f"状态码: {status}")
-
-            if status == 2:  # 评测完成
-                if "data" not in data:
-                    print(f"[ERROR] data 中缺少评测结果数据")
-                    self.error = f"响应格式错误: 缺少评测结果数据"
-                    ws.close()
-                    return
-
-                xml_data = base64.b64decode(data["data"])
-                xml_string = xml_data.decode("utf-8")
-                debug_print(f"解码后的XML: {xml_string}")
-                self.result = self._parse_xml_result(xml_string)
-                debug_print(f"解析后的结果: {json.dumps(self.result, indent=2, ensure_ascii=False)}")
-                ws.close()
-        except json.JSONDecodeError as e:
-            print(f"[ERROR] JSON解析失败: {str(e)}")
-            print(f"[ERROR] 原始消息: {message}")
-            self.error = f"JSON解析错误: {str(e)}"
-            ws.close()
-        except Exception as e:
-            print(f"[ERROR] 消息处理异常: {str(e)}")
-            print(f"[ERROR] 异常类型: {type(e).__name__}")
-            print(f"[ERROR] 原始消息: {message}")
-            self.error = f"消息处理错误: {str(e)}"
-            ws.close()
-
-    def _on_error(self, ws, error):
-        """处理WebSocket错误"""
-        print(f"[ERROR] WebSocket错误: {str(error)}")
-        print(f"[ERROR] 错误类型: {type(error).__name__}")
-        self.error = f"WebSocket错误: {str(error)}"
-
-    def _on_close(self, ws, reason, res):
-        """WebSocket连接关闭"""
-        debug_print(f"WebSocket连接关闭 - 原因: {reason}, 响应: {res}")
-        pass
-
-    def _on_open(self, ws, audio_file, text="nice to meet you."):
-        """WebSocket连接打开，发送音频数据"""
-        try:
-            debug_print("WebSocket连接已打开")
-            debug_print(f"音频文件: {audio_file}")
-            debug_print(f"评测文本: {text}")
-
-            # 检测音频格式
-            audio_format = self._detect_audio_format(audio_file)
-            debug_print(f"检测到音频格式: {audio_format}")
-
-            # 根据音频格式设置aue参数
-            if audio_format == 'wav':
-                aue_param = "raw"  # WAV文件使用raw
-            else:  # mp3
-                aue_param = "lame"  # MP3文件使用lame
-
-            debug_print(f"使用aue参数: {aue_param}")
-
-            # 发送初始配置
-            send_dict = {
-                "common": {
-                    "app_id": self.appid
-                },
-                "business": {
-                    "category": "read_sentence",
-                    "rstcd": "utf8",
-                    "sub": "ise",
-                    "group": "pupil",
-                    "ent": "en_vip",
-                    "tte": "utf-8",
-                    "cmd": "ssb",
-                    "auf": "audio/L16;rate=16000",
-                    "aue": aue_param,
-                    "text": '\uFEFF' + f"[content]\n{text}",
-                    "ise_unite": "1",
-                    "extra_ability": "pitch"
-                },
-                "data": {
-                    "status": 0,
-                    "data": ""
-                }
-            }
-            debug_print(f"发送初始配置: {json.dumps(send_dict, indent=2, ensure_ascii=False)}")
-            ws.send(json.dumps(send_dict))
-
-            # 根据音频格式处理音频数据
-            if audio_format == 'wav':
-                # WAV文件需要去除头部
-                audio_data = self._remove_wav_header(audio_file)
-                debug_print(f"WAV文件头部已去除，音频数据大小: {len(audio_data)} bytes")
-            else:
-                # MP3文件直接读取
-                with open(audio_file, "rb") as f:
-                    audio_data = f.read()
-                debug_print(f"MP3文件直接读取，音频数据大小: {len(audio_data)} bytes")
-
-            # 优化音频发送逻辑
-            frame_count = 0
-            data_size = len(audio_data)
-
-            # 根据数据大小动态调整缓冲区大小和延迟
-            if data_size > 50000:  # 大于50KB的数据使用更大的缓冲区
-                buffer_size = 12800  # 20倍缓冲区
-                sleep_time = 0.02   # 减少延迟到20ms
-            else:
-                buffer_size = 1280  # 原始缓冲区
-                sleep_time = 0.01   # 小文件使用更小延迟
-
-            debug_print(f"使用缓冲区大小: {buffer_size}, 延迟: {sleep_time}s")
-
-            # 发送音频数据
-            offset = 0
-            while offset < data_size:
-                # 读取缓冲区大小的数据
-                buffer = audio_data[offset:offset + buffer_size]
-                offset += len(buffer)
-
-                if offset >= data_size:
-                    # 发送最后一帧
-                    my_dict = {
-                        "business": {"cmd": "auw", "aus": 4, "aue": aue_param},
-                        "data": {"status": 2, "data": str(base64.b64encode(buffer).decode())}
-                    }
-                    debug_print("发送最后一帧")
-                    ws.send(json.dumps(my_dict))
-                    break
-
-                # 发送中间帧
-                send_dict = {
-                    "business": {
-                        "cmd": "auw",
-                        "aus": 1,
-                        "aue": aue_param
-                    },
-                    "data": {
-                        "status": 1,
-                        "data": str(base64.b64encode(buffer).decode()),
-                        "data_type": 1,
-                        "encoding": "raw"
-                    }
-                }
-                frame_count += 1
-                if frame_count % 20 == 0:  # 减少日志频率
-                    debug_print(f"已发送 {frame_count} 帧音频数据")
-                ws.send(json.dumps(send_dict))
-                time.sleep(sleep_time)  # 使用动态延迟
-
-            debug_print(f"音频发送完成，总共发送 {frame_count} 帧")
-
-        except Exception as e:
-            print(f"[ERROR] 音频发送异常: {str(e)}")
-            print(f"[ERROR] 异常类型: {type(e).__name__}")
-            self.error = f"音频发送错误: {str(e)}"
-            ws.close()
-
-    def _parse_xml_result(self, xml_string):
-        """解析XML评测结果"""
-        try:
-            root = ET.fromstring(xml_string)
-
-            result = {
-                "total_score": 0,
-                "words": [],
-                "sentences": []
-            }
-
-            # 解析句子级别评分
-            for sentence in root.findall('.//sentence'):
-                sentence_info = {
-                    "content": sentence.get('content', ''),
-                    "total_score": float(sentence.get('total_score', 0)),
-                    "fluency_score": float(sentence.get('fluency_score', 0)),
-                    "integrity_score": float(sentence.get('integrity_score', 0)),
-                    "phone_score": float(sentence.get('phone_score', 0))
-                }
-                result["sentences"].append(sentence_info)
-                result["total_score"] = sentence_info["total_score"]
-
-            # 解析单词级别评分
-            for word in root.findall('.//word'):
-                word_info = {
-                    "content": word.get('content', ''),
-                    "total_score": float(word.get('total_score', 0)),
-                    "dp_message": int(word.get('dp_message', 0)),
-                    "time_len": int(word.get('time_len', 0)),
-                    "syllables": []
-                }
-
-                # 解析音节评分
-                for syllable in word.findall('.//syllable'):
-                    syllable_info = {
-                        "content": syllable.get('content', ''),
-                        "total_score": float(syllable.get('total_score', 0)),
-                        "phones": []
-                    }
-
-                    # 解析音素评分
-                    for phone in syllable.findall('.//phone'):
-                        phone_info = {
-                            "content": phone.get('content', ''),
-                            "total_score": float(phone.get('total_score', 0)),
-                            "dp_message": int(phone.get('dp_message', 0))
-                        }
-                        syllable_info["phones"].append(phone_info)
-
-                    word_info["syllables"].append(syllable_info)
-
-                result["words"].append(word_info)
-
-            return result
-
-        except Exception as e:
-            return {"error": f"XML解析错误: {str(e)}"}
-
-    def evaluate_audio(self, audio_file_path, text="nice to meet you.", timeout=30):
-        """
-        评测音频文件
-
-        Args:
-            audio_file_path (str): 音频文件路径
-            text (str): 评测文本内容
-            timeout (int): 超时时间（秒）
-
-        Returns:
-            dict: 评测结果JSON
-        """
-        debug_print(f"开始评测音频文件: {audio_file_path}")
-        debug_print(f"评测文本: {text}")
-
-        # 检查音频文件是否存在
-        if not os.path.exists(audio_file_path):
-            error_msg = f"音频文件不存在: {audio_file_path}"
-            print(f"[ERROR] {error_msg}")
-            return {"error": error_msg}
-
-        # 重置结果
-        self.result = None
-        self.error = None
-
-        try:
-            # 生成WebSocket URL
-            ws_url = self._generate_url()
-            debug_print(f"WebSocket URL: {ws_url}")
-
-            # 创建WebSocket连接
-            websocket.enableTrace(False)
-            ws = websocket.WebSocketApp(
-                ws_url,
-                on_message=self._on_message,
-                on_error=self._on_error,
-                on_close=self._on_close,
-                on_open=lambda ws: self._on_open(ws, audio_file_path, text)
-            )
-
-            debug_print("开始WebSocket连接...")
-            # 运行WebSocket连接
-            ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
-
-            debug_print("WebSocket连接结束")
-            # 返回结果
-            if self.error:
-                print(f"[ERROR] 评测失败: {self.error}")
-                return {"error": self.error}
-            elif self.result:
-                debug_print("评测成功")
-                return self.result
-            else:
-                error_msg = "未收到评测结果"
-                print(f"[ERROR] {error_msg}")
-                return {"error": error_msg}
-
-        except Exception as e:
-            error_msg = f"评测过程出错: {str(e)}"
-            print(f"[ERROR] {error_msg}")
-            print(f"[ERROR] 异常类型: {type(e).__name__}")
-            return {"error": error_msg}
-
-
-def evaluate_audio_file(audio_file_path, text="nice to meet you."):
-    """
-    简化的音频评测函数
-
-    Args:
-        audio_file_path (str): 音频文件路径
-        text (str): 评测文本内容
-
-    Returns:
-        dict: 评测结果JSON
-    """
-    api = XunfeiISEAPI()
-    return api.evaluate_audio(audio_file_path, text)
-
-
-# ==================== 批量处理相关代码 ====================
-
-def download_audio_file(audio_url, temp_dir, max_retries=3, timeout=30):
-    """
-    下载音频文件到临时目录（增强版本）
-
-    Args:
-        audio_url (str): 音频文件URL
-        temp_dir (str): 临时目录路径
-        max_retries (int): 最大重试次数
-        timeout (int): 请求超时时间（秒）
-
-    Returns:
-        str: 下载的音频文件路径，失败返回None
-    """
-    if not audio_url or pd.isna(audio_url):
-        logging.warning("音频URL为空或无效")
-        return None
-
-    # 从URL中提取文件名
-    try:
-        file_name = os.path.basename(audio_url.split('?')[0])  # 去除URL参数
-        if not file_name or '.' not in file_name:
-            file_name = f"audio_{hash(audio_url) % 100000}.wav"  # 生成默认文件名
-
-        file_path = os.path.join(temp_dir, file_name)
-
-        # 重试机制
-        for attempt in range(max_retries):
-            try:
-                logging.info(f"正在下载音频文件 (尝试 {attempt + 1}/{max_retries}): {audio_url}")
-
-                # 设置请求头，模拟浏览器
-                headers = {
-                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
-                }
-
-                response = requests.get(audio_url, timeout=timeout, headers=headers, stream=True)
-                response.raise_for_status()
-
-                # 检查内容类型
-                content_type = response.headers.get('content-type', '')
-                if not any(audio_type in content_type.lower() for audio_type in ['audio', 'wav', 'mp3', 'ogg', 'flac']):
-                    logging.warning(f"可能不是音频文件，Content-Type: {content_type}")
-
-                # 写入文件
-                with open(file_path, 'wb') as f:
-                    for chunk in response.iter_content(chunk_size=8192):
-                        if chunk:
-                            f.write(chunk)
-
-                # 验证文件大小
-                file_size = os.path.getsize(file_path)
-                if file_size == 0:
-                    raise ValueError("下载的文件为空")
-
-                logging.info(f"音频文件下载成功: {file_path} (大小: {file_size} bytes)")
-                return file_path
-
-            except requests.exceptions.Timeout:
-                logging.warning(f"下载超时 (尝试 {attempt + 1}/{max_retries}): {audio_url}")
-                if attempt < max_retries - 1:
-                    time.sleep(2 ** attempt)  # 指数退避
-                    continue
-            except requests.exceptions.RequestException as e:
-                logging.warning(f"下载请求异常 (尝试 {attempt + 1}/{max_retries}): {str(e)}")
-                if attempt < max_retries - 1:
-                    time.sleep(2 ** attempt)
-                    continue
-            except Exception as e:
-                logging.error(f"下载过程中发生未知错误 (尝试 {attempt + 1}/{max_retries}): {str(e)}")
-                if attempt < max_retries - 1:
-                    time.sleep(2 ** attempt)
-                    continue
-
-        logging.error(f"音频文件下载失败，已达到最大重试次数: {audio_url}")
-        return None
-
-    except Exception as e:
-        logging.error(f"下载音频文件时发生异常: {str(e)}")
-        return None
-
-
-def format_xunfei_details(xunfei_result):
-    """
-    格式化讯飞评测结果为明细字符串
-
-    Args:
-        xunfei_result (dict): 讯飞API返回的结果
-
-    Returns:
-        str: 格式化的明细字符串
-    """
-    if not xunfei_result or 'error' in xunfei_result:
-        return ""
-
-    try:
-        words = xunfei_result.get('words', [])
-        if not words:
-            return ""
-
-        details = []
-        for word in words:
-            content = word.get('content', '')
-            total_score = word.get('total_score', 0)
-            details.append(f"{content} {int(total_score)}")
-
-        return "\n".join(details)
-
-    except Exception as e:
-        print(f"格式化讯飞明细失败: {str(e)}")
-        return ""
-
-
-def get_xunfei_total_score(xunfei_result):
-    """
-    获取讯飞评测总分
-
-    Args:
-        xunfei_result (dict): 讯飞API返回的结果
-
-    Returns:
-        int: 总分，失败返回0
-    """
-    if not xunfei_result or 'error' in xunfei_result:
-        return 0
-
-    try:
-        return int(xunfei_result.get('total_score', 0))
-    except Exception as e:
-        print(f"获取讯飞总分失败: {str(e)}")
-        return 0
-
-
-def process_single_row(row_data, temp_dir, results_dict, lock, rate_limiter=None):
-    """
-    处理单行数据（并发版本，增强错误处理和时间分析）
-
-    Args:
-        row_data (tuple): (index, row) 数据
-        temp_dir (str): 临时目录路径
-        results_dict (dict): 结果字典
-        lock (threading.Lock): 线程锁
-        rate_limiter (Queue): 速率限制器
-
-    Returns:
-        None
-    """
-    index, row = row_data
-    start_time = time.time()
-    timing_info = {}
-
-    try:
-        # 1. 速率限制等待时间
-        rate_limit_start = time.time()
-        if rate_limiter:
-            rate_limiter.get()  # 获取令牌
-        timing_info['rate_limit_wait'] = time.time() - rate_limit_start
-
-        logging.info(f"开始处理第 {index + 1} 行数据")
-
-        # 2. 数据预处理时间
-        preprocess_start = time.time()
-        ref_text = str(row['refText']) if pd.notna(row['refText']) else ""
-        audio_url = str(row['userAudio']) if pd.notna(row['userAudio']) else ""
-
-        # 数据验证
-        if not ref_text:
-            raise ValueError("refText 为空或无效")
-
-        if not audio_url:
-            raise ValueError("userAudio 为空或无效")
-        timing_info['preprocess'] = time.time() - preprocess_start
-
-        # 3. 音频下载时间
-        download_start = time.time()
-        audio_file_path = download_audio_file(audio_url, temp_dir)
-        timing_info['audio_download'] = time.time() - download_start
-
-        if not audio_file_path:
-            raise ValueError("音频文件下载失败")
-
-        try:
-            # 4. 讯飞API调用时间
-            api_start = time.time()
-            logging.info(f"正在调用讯飞API评测: {ref_text}")
-            xunfei_result = evaluate_audio_file(audio_file_path, ref_text)
-            timing_info['api_call'] = time.time() - api_start
-
-            if not xunfei_result:
-                raise ValueError("讯飞API返回空结果")
-
-            # 5. 结果处理时间
-            result_process_start = time.time()
-            xunfei_details = format_xunfei_details(xunfei_result)
-            xunfei_total_score = get_xunfei_total_score(xunfei_result)
-            timing_info['result_process'] = time.time() - result_process_start
-
-            # 6. 数据更新时间
-            update_start = time.time()
-            with lock:
-                results_dict[index] = {
-                    '讯飞总分': xunfei_total_score,
-                    '讯飞明细': xunfei_details
-                }
-            timing_info['data_update'] = time.time() - update_start
-
-            # 计算总耗时
-            total_time = time.time() - start_time
-            timing_info['total'] = total_time
-
-            # 详细的时间分析日志
-            logging.info(f"第 {index + 1} 行处理成功 - 总分: {xunfei_total_score} | "
-                        f"总耗时: {total_time:.2f}s | "
-                        f"速率等待: {timing_info['rate_limit_wait']:.2f}s | "
-                        f"预处理: {timing_info['preprocess']:.3f}s | "
-                        f"音频下载: {timing_info['audio_download']:.2f}s | "
-                        f"API调用: {timing_info['api_call']:.2f}s | "
-                        f"结果处理: {timing_info['result_process']:.3f}s | "
-                        f"数据更新: {timing_info['data_update']:.3f}s")
-
-        except Exception as api_error:
-            total_time = time.time() - start_time
-            logging.error(f"第 {index + 1} 行讯飞API调用失败: {str(api_error)} | "
-                         f"总耗时: {total_time:.2f}s | "
-                         f"音频下载: {timing_info.get('audio_download', 0):.2f}s | "
-                         f"API调用: {timing_info.get('api_call', 0):.2f}s")
-            with lock:
-                results_dict[index] = {
-                    '讯飞总分': 0,
-                    '讯飞明细': "",
-                    'error': f'API调用失败: {str(api_error)}'
-                }
-
-        finally:
-            # 7. 清理时间
-            cleanup_start = time.time()
-            try:
-                if audio_file_path and os.path.exists(audio_file_path):
-                    os.remove(audio_file_path)
-                    logging.debug(f"已删除临时文件: {audio_file_path}")
-            except Exception as cleanup_error:
-                logging.warning(f"清理临时文件失败: {str(cleanup_error)}")
-            timing_info['cleanup'] = time.time() - cleanup_start
-
-            # 释放速率限制令牌
-            if rate_limiter:
-                try:
-                    rate_limiter.put(None, timeout=1)  # 归还令牌
-                except:
-                    pass  # 队列可能已满，忽略
-
-    except Exception as e:
-        total_time = time.time() - start_time
-        logging.error(f"第 {index + 1} 行处理异常: {str(e)} | 总耗时: {total_time:.2f}s")
-        with lock:
-            results_dict[index] = {
-                '讯飞总分': 0,
-                '讯飞明细': "",
-                'error': f'处理异常: {str(e)}'
-            }
-
-        # 释放速率限制令牌
-        if rate_limiter:
-            try:
-                rate_limiter.put(None, timeout=1)
-            except:
-                pass
-
-
-def process_excel_with_xunfei_concurrent(input_file_path, output_dir="output/audio", max_workers=5, rate_limit_per_second=5):
-    """
-    处理Excel文件，添加讯飞评测结果（并发版本，增强控制）
-
-    Args:
-        input_file_path (str): 输入Excel文件路径
-        output_dir (str): 输出目录路径，默认为 output/audio
-        max_workers (int): 最大并发线程数，默认5
-        rate_limit_per_second (int): 每秒最大请求数，默认5
-
-    Returns:
-        bool: 处理是否成功
-    """
-    start_time = time.time()
-
-    try:
-        # 读取Excel文件
-        logging.info(f"正在读取Excel文件: {input_file_path}")
-        df = pd.read_excel(input_file_path)
-
-        # 检查必要的列是否存在
-        required_columns = ['refText', 'userAudio']
-        missing_columns = [col for col in required_columns if col not in df.columns]
-        if missing_columns:
-            logging.error(f"Excel文件缺少必要的列: {missing_columns}")
-            return False
-
-        # 数据预处理和验证
-        total_rows = len(df)
-        valid_rows = 0
-        for index, row in df.iterrows():
-            if pd.notna(row.get('refText')) and pd.notna(row.get('userAudio')):
-                valid_rows += 1
-
-        logging.info(f"总行数: {total_rows}, 有效行数: {valid_rows}")
-
-        if valid_rows == 0:
-            logging.warning("没有找到有效的数据行")
-            return False
-
-        # 添加新列
-        df['讯飞总分'] = 0
-        df['讯飞明细'] = ""
-
-        # 创建优化的速率限制器
-        effective_rate_limit = max(rate_limit_per_second, max_workers)
-        rate_limiter = Queue(maxsize=effective_rate_limit * 2)
-
-        # 预填充令牌
-        for _ in range(effective_rate_limit):
-            rate_limiter.put(None)
-
-        # 启动优化的速率限制器补充线程
-        def rate_limiter_refill():
-            interval = 1.0 / effective_rate_limit
-            while True:
-                time.sleep(interval)
-                try:
-                    rate_limiter.put(None, block=False)
-                except:
-                    pass
-
-        rate_thread = threading.Thread(target=rate_limiter_refill, daemon=True)
-        rate_thread.start()
-
-        logging.info(f"速率限制设置: {effective_rate_limit} req/s (原始: {rate_limit_per_second}, 队列大小: {effective_rate_limit * 2})")
-
-        # 创建临时目录用于下载音频文件
-        with tempfile.TemporaryDirectory() as temp_dir:
-            logging.info(f"创建临时目录: {temp_dir}")
-            logging.info(f"开始并发处理，最大并发数: {max_workers}, 有效速率限制: {effective_rate_limit} req/s")
-
-            # 准备数据
-            row_data_list = [(index, row) for index, row in df.iterrows()]
-
-            # 创建结果字典和线程锁
-            results_dict = {}
-            lock = threading.Lock()
-
-            # 使用线程池进行并发处理
-            with ThreadPoolExecutor(max_workers=max_workers) as executor:
-                # 提交所有任务
-                future_to_index = {
-                    executor.submit(process_single_row, row_data, temp_dir, results_dict, lock, rate_limiter): row_data[0]
-                    for row_data in row_data_list
-                }
-
-                # 等待任务完成并显示进度
-                completed_count = 0
-                success_count = 0
-                error_count = 0
-
-                for future in as_completed(future_to_index):
-                    completed_count += 1
-                    index = future_to_index[future]
-
-                    try:
-                        future.result()  # 获取结果，如果有异常会抛出
-
-                        # 检查处理结果
-                        with lock:
-                            result = results_dict.get(index, {})
-                            if result.get('error') is None:
-                                success_count += 1
-                            else:
-                                error_count += 1
-
-                        # 显示进度
-                        if completed_count % 10 == 0 or completed_count == total_rows:
-                            elapsed_time = time.time() - start_time
-                            avg_time_per_item = elapsed_time / completed_count
-                            remaining_time = avg_time_per_item * (total_rows - completed_count)
-
-                            logging.info(f"进度: {completed_count}/{total_rows} ({completed_count/total_rows*100:.1f}%) "
-                                       f"成功: {success_count}, 失败: {error_count}, "
-                                       f"预计剩余时间: {remaining_time:.1f}秒")
-
-                    except Exception as e:
-                        error_count += 1
-                        logging.error(f"任务 {index + 1} 执行异常: {str(e)}")
-                        with lock:
-                            if index not in results_dict:
-                                results_dict[index] = {
-                                    '讯飞总分': 0,
-                                    '讯飞明细': "",
-                                    'error': f'任务执行异常: {str(e)}'
-                                }
-
-            # 将结果更新到DataFrame
-            logging.info("正在更新结果到DataFrame...")
-            for index in results_dict:
-                result = results_dict[index]
-                df.at[index, '讯飞总分'] = result.get('讯飞总分', 0)
-                df.at[index, '讯飞明细'] = result.get('讯飞明细', "")
-
-                # 如果有错误，可以选择记录到备注列（如果存在）
-                if result.get('error') and '备注' in df.columns:
-                    existing_note = str(df.at[index, '备注']) if pd.notna(df.at[index, '备注']) else ""
-                    error_note = f"讯飞API错误: {result['error']}"
-                    df.at[index, '备注'] = f"{existing_note}\n{error_note}".strip()
-
-        # 创建输出目录
-        output_path = Path(output_dir)
-        output_path.mkdir(parents=True, exist_ok=True)
-
-        # 生成输出文件路径
-        input_path = Path(input_file_path)
-        output_file_path = output_path / f"{input_path.stem}_add_xunfei_result.xlsx"
-
-        # 保存结果
-        logging.info(f"正在保存结果到: {output_file_path}")
-        df.to_excel(output_file_path, index=False)
-
-        # 计算总耗时
-        total_time = time.time() - start_time
-
-        # 统计处理结果
-        final_success_count = sum(1 for result in results_dict.values() if result.get('error') is None)
-        final_error_count = len(results_dict) - final_success_count
-
-        logging.info("=" * 50)
-        logging.info("并发处理完成！")
-        logging.info(f"处理统计: 成功 {final_success_count} 条，失败 {final_error_count} 条，总计 {len(results_dict)} 条")
-        logging.info(f"总耗时: {total_time:.2f} 秒")
-        logging.info(f"平均处理时间: {total_time/len(results_dict):.2f} 秒/条")
-        logging.info(f"输出文件: {output_file_path}")
-        logging.info("=" * 50)
-
-        return True
-
-    except Exception as e:
-        logging.error(f"处理Excel文件时出错: {str(e)}")
-        return False
-
-
-def process_excel_with_xunfei(input_file_path, output_dir="output/audio"):
-    """
-    处理Excel文件，添加讯飞评测结果（串行版本）
-
-    Args:
-        input_file_path (str): 输入Excel文件路径
-        output_dir (str): 输出目录路径，默认为 output/audio
-
-    Returns:
-        bool: 处理是否成功
-    """
-    try:
-        # 读取Excel文件
-        print(f"正在读取Excel文件: {input_file_path}")
-        df = pd.read_excel(input_file_path)
-
-        # 检查必要的列是否存在
-        required_columns = ['refText', 'userAudio']
-        missing_columns = [col for col in required_columns if col not in df.columns]
-        if missing_columns:
-            print(f"错误: Excel文件缺少必要的列: {missing_columns}")
-            return False
-
-        # 添加新列
-        df['讯飞总分'] = 0
-        df['讯飞明细'] = ""
-
-        # 创建临时目录用于下载音频文件
-        with tempfile.TemporaryDirectory() as temp_dir:
-            print(f"创建临时目录: {temp_dir}")
-
-            # 处理每一行数据
-            total_rows = len(df)
-            for index, row in df.iterrows():
-                print(f"\n处理进度: {index + 1}/{total_rows}")
-
-                ref_text = str(row['refText']) if pd.notna(row['refText']) else ""
-                audio_url = str(row['userAudio']) if pd.notna(row['userAudio']) else ""
-
-                if not ref_text or not audio_url:
-                    print(f"第 {index + 1} 行数据不完整，跳过")
-                    continue
-
-                print(f"参考文本: {ref_text}")
-                print(f"音频URL: {audio_url}")
-
-                # 下载音频文件
-                audio_file_path = download_audio_file(audio_url, temp_dir)
-                if not audio_file_path:
-                    print(f"第 {index + 1} 行音频下载失败，跳过")
-                    continue
-
-                # 调用讯飞API进行评测
-                print("正在调用讯飞API进行评测...")
-                try:
-                    xunfei_result = evaluate_audio_file(audio_file_path, ref_text)
-                    print(f"讯飞API返回结果: {json.dumps(xunfei_result, indent=2, ensure_ascii=False)}")
-
-                    # 提取总分和明细
-                    total_score = get_xunfei_total_score(xunfei_result)
-                    details = format_xunfei_details(xunfei_result)
-
-                    # 更新DataFrame
-                    df.at[index, '讯飞总分'] = total_score
-                    df.at[index, '讯飞明细'] = details
-
-                    print(f"讯飞总分: {total_score}")
-                    print(f"讯飞明细: {details}")
-
-                except Exception as e:
-                    print(f"第 {index + 1} 行讯飞API调用失败: {str(e)}")
-                    continue
-
-                # 删除临时音频文件
-                try:
-                    os.remove(audio_file_path)
-                except:
-                    pass
-
-                # 添加延时避免API调用过于频繁
-                time.sleep(1)
-
-        # 创建输出目录
-        output_path = Path(output_dir)
-        output_path.mkdir(parents=True, exist_ok=True)
-
-        # 生成输出文件路径
-        input_path = Path(input_file_path)
-        output_file_path = output_path / f"{input_path.stem}_add_xunfei_result.xlsx"
-
-        # 保存结果
-        print(f"\n正在保存结果到: {output_file_path}")
-        df.to_excel(output_file_path, index=False)
-        print("处理完成！")
-
-        return True
-
-    except Exception as e:
-        print(f"处理Excel文件时出错: {str(e)}")
-        return False
-
-
-if __name__ == "__main__":
-    # ==================== 配置参数 ====================
-    input_file = "user_audio_data_20251210_152807_sample.xlsx"
-    output_directory = "output/audio"  # 输出目录，可以修改
-    use_concurrent = True  # True: 使用并发版本，False: 使用串行版本
-
-    # DEBUG 模式开关（True: 显示详细调试信息，False: 仅显示关键信息）
-    enable_debug = False  # 可以设置为 True 来查看详细的 DEBUG 日志
-
-    # 设置全局 DEBUG_MODE
-    globals()['DEBUG_MODE'] = enable_debug
-
-    # 检查环境变量
-    required_env_vars = ['XUNFEI_APPID', 'XUNFEI_API_SECRET', 'XUNFEI_API_KEY']
-    missing_vars = [var for var in required_env_vars if not os.environ.get(var)]
-
-    if missing_vars:
-        print(f"错误: 缺少必要的环境变量: {missing_vars}")
-        print("请在 .env 文件或系统环境变量中配置:")
-        print("  XUNFEI_APPID=你的应用ID")
-        print("  XUNFEI_API_SECRET=你的API密钥")
-        print("  XUNFEI_API_KEY=你的API Key")
-    elif not os.path.exists(input_file):
-        print(f"文件不存在: {input_file}")
-        print("请确保Excel文件存在并包含 'refText' 和 'userAudio' 列")
-    else:
-        if use_concurrent:
-            print("使用并发版本处理（5路并发，5 req/s）...")
-            success = process_excel_with_xunfei_concurrent(
-                input_file,
-                output_dir=output_directory,
-                max_workers=5,
-                rate_limit_per_second=5
-            )
-        else:
-            print("使用串行版本处理...")
-            success = process_excel_with_xunfei(input_file, output_dir=output_directory)
-
-        if success:
-            print("处理成功！")
-        else:
-            print("处理失败！")
diff --git a/makee_vala/git_scripts/export_component_record.py b/makee_vala/git_scripts/export_component_record.py
deleted file mode 100644
index 6149a19..0000000
--- a/makee_vala/git_scripts/export_component_record.py
+++ /dev/null
@@ -1,492 +0,0 @@
-"""
-互动组件数据导出
-
-需求 20251123：
----------
-在 PGsql数据库中  筛选数据
-数据库相关配置  从.env中读取:
-PG_DB_HOST = xxx
-PG_DB_PORT = xxx
-PG_DB_USER = xxx
-PG_DB_PASSWORD = xxx
-PG_DB_DATABASE = xxx
-
-读取以下数据表:
-user_component_play_record_0 ~ user_component_play_record_7
-
-支持输入时间范围
-起始时间 和 截止时间   配置格式:   "20250110"
-
-数据表中的时间字段为 updated_at , 格式样例: "2025-11-05 19:35:46.698246+08:00"
-
-在这些时间范围内，筛选以下字段数据  导出为excel文件:
-
-c_type 与 c_id 非空
-
-输出以下字段：
-user_id,
-session_id,
-c_type,
-c_id,
-play_result,
-user_behavior_info,
-updated_at
-
-写一个简单清晰的 数据导出脚本， 输入参数都直接在脚本开头定义和修改。 不要改动文件开头的需求描述，直接追加代码。
--------
-
-需求二:
-读取上述  输出的  excel 文件， 围绕 每个组件进行 统计，
-
-统计方式如下:
-仅计算 c_type 与 c_id 非空 的记录
-
-以每个 c_type + c_id 拼接 后 作为统计维度，
-统计以下数据:
-总数量
-Perfect数量:play_result=="Perfect" 的数量
-Good数量:play_result=="Good" 的数量
-Pass数量:play_result=="Pass" 的数量
-Oops数量:play_result=="Oops" 的数量
-Failed数量:play_result=="Failed" 的数量
-Perfect+Good数量:play_result=="Perfect" 或 play_result=="Good" 的数量
-Perfect比例:Perfect数量 / 总数量
-Good比例:Good数量 / 总数量
-Pass比例:Pass数量 / 总数量
-Oops比例:Oops数量 / 总数量
-Failed比例:Failed数量 / 总数量
-Perfect+Good比例:Perfect+Good数量 / 总数量
-
-导出为excel 命名: 步骤1文件 结尾追加 _stats.xlsx
-
-需求三:
-在需求二中， 追加从另外两个mysql表关联的组件配置字段:
-MYSQL_HOST=xxx
-MYSQL_USERNAME=xxx
-MYSQL_PASSWORD=xxx
-MYSQL_DATABASE=xxx
-MYSQL_PORT=xxx
-
-以上环境变量已配置在 .env 中。
-
-1.如果 c_type 开头为"mid"
-
-则读取下表:表名:middle_interaction_component
-
-增加以下字段:
-title
-component_config
-组件类型
-
-其中:
- “组件类型”:  根据以下映射  把 c_type 转成中文名:xx互动
-{
-    "词汇类": {
-        "物品互动": "mid_vocab_item",
-        "图片互动": "mid_vocab_image",
-        "填词互动": "mid_vocab_fillBlank",
-        "指令互动": "mid_vocab_instruction"
-    },
-    "句子类": {
-        "对话互动": "mid_sentence_dialogue",
-        "语音互动": "mid_sentence_voice",
-        "材料互动": "mid_sentence_material",
-        "造句互动": "mid_sentence_makeSentence"
-    },
-    "语法类": {
-        "挖空互动": "mid_grammar_cloze",
-        "组句互动": "mid_grammar_sentence"
-    },
-    "发音类": {
-        "发音互动": "mid_pron_pron"
-    
-}
-
-2. 如果 c_type 开头为"core"
-则读取下表:表名:core_interaction_component
-
-增加以下字段:
-title
-component_config
-组件类型
-
-其中:
- “组件类型”:  根据以下映射  把 c_type 转成中文名:xx互动
-{
-    "口语类": {
-        "口语快答": "core_speaking_reply",
-        "口语妙问": "core_speaking_inquiry",
-        "口语探讨": "core_speaking_explore"
-        "口语独白": "core_speaking_monologue"
-    },
-    "阅读类": {
-        "合作阅读": "core_reading_order",
-    },
-    "听力类": {
-        "合作听力": "core_listening_order",
-    },
-    "写作类": {
-        "看图组句": "core_writing_imgMakeSentence",
-        "看图撰写": "core_writing_imgWrite",
-        "问题组句": "core_writing_questionMakeSentence",
-        "问题撰写": "core_writing_questionWrite",
-    },
-}
-
-以上追加字段  增加到 步骤二输出的表中
-
-
-
-"""
-
-import os
-from datetime import datetime
-from dotenv import load_dotenv
-import psycopg2
-import pandas as pd
-import pymysql
-
-# ==================== 配置参数 ====================
-# 时间范围配置（格式: "20250110"）
-START_DATE = "20250915"  # 起始日期
-END_DATE = "20251122"    # 截止日期
-
-# 输出文件路径
-OUTPUT_DIR = "output"
-
-# 执行步骤控制
-RUN_STEP1 = False  # 是否执行步骤1：数据导出
-RUN_STEP2 = True  # 是否执行步骤2：数据统计
-# ==================================================
-
-# c_type 到中文组件类型的映射
-C_TYPE_MAPPING = {
-    # middle_interaction_component 映射
-    "mid_vocab_item": "物品互动",
-    "mid_vocab_image": "图片互动",
-    "mid_vocab_fillBlank": "填词互动",
-    "mid_vocab_instruction": "指令互动",
-    "mid_sentence_dialogue": "对话互动",
-    "mid_sentence_voice": "语音互动",
-    "mid_sentence_material": "材料互动",
-    "mid_sentence_makeSentence": "造句互动",
-    "mid_grammar_cloze": "挖空互动",
-    "mid_grammar_sentence": "组句互动",
-    "mid_pron_pron": "发音互动",
-
-    # core_interaction_component 映射
-    "core_speaking_reply": "口语快答",
-    "core_speaking_inquiry": "口语妙问",
-    "core_speaking_explore": "口语探讨",
-    "core_speaking_monologue": "口语独白",
-    "core_reading_order": "合作阅读",
-    "core_listening_order": "合作听力",
-    "core_writing_imgMakeSentence": "看图组句",
-    "core_writing_imgWrite": "看图撰写",
-    "core_writing_questionMakeSentence": "问题组句",
-    "core_writing_questionWrite": "问题撰写",
-}
-
-
-def step1_export_data():
-    """步骤1：从数据库导出数据"""
-    print("=" * 60)
-    print("步骤1：数据导出")
-    print("=" * 60)
-
-    # 加载环境变量
-    load_dotenv()
-
-    # 获取数据库配置
-    db_config = {
-        'host': os.getenv('PG_DB_HOST'),
-        'port': os.getenv('PG_DB_PORT'),
-        'user': os.getenv('PG_DB_USER'),
-        'password': os.getenv('PG_DB_PASSWORD'),
-        'database': os.getenv('PG_DB_DATABASE')
-    }
-
-    # 转换时间格式
-    start_datetime = datetime.strptime(START_DATE, "%Y%m%d").strftime("%Y-%m-%d 00:00:00")
-    end_datetime = datetime.strptime(END_DATE, "%Y%m%d").strftime("%Y-%m-%d 23:59:59")
-
-    print(f"时间范围: {start_datetime} ~ {end_datetime}")
-
-    # 连接数据库
-    conn = psycopg2.connect(**db_config)
-
-    # 存储所有表的数据
-    all_data = []
-
-    # 遍历8个分表
-    for i in range(8):
-        table_name = f"user_component_play_record_{i}"
-        print(f"正在读取表: {table_name}")
-
-        # SQL查询
-        query = f"""
-        SELECT
-            user_id,
-            session_id,
-            c_type,
-            c_id,
-            play_result,
-            user_behavior_info,
-            updated_at
-        FROM {table_name}
-        WHERE updated_at >= %s
-          AND updated_at <= %s
-          AND c_type IS NOT NULL
-          AND c_id IS NOT NULL
-        """
-
-        # 执行查询
-        df = pd.read_sql_query(query, conn, params=(start_datetime, end_datetime))
-        all_data.append(df)
-        print(f"  - 读取到 {len(df)} 条记录")
-
-    # 关闭数据库连接
-    conn.close()
-
-    # 合并所有数据
-    result_df = pd.concat(all_data, ignore_index=True)
-    print(f"\n总共获取 {len(result_df)} 条记录")
-
-    # 移除 updated_at 字段的时区信息（Excel不支持带时区的datetime）
-    if 'updated_at' in result_df.columns and not result_df.empty:
-        result_df['updated_at'] = result_df['updated_at'].dt.tz_localize(None)
-
-    # 确保输出目录存在
-    os.makedirs(OUTPUT_DIR, exist_ok=True)
-
-    # 生成输出文件名
-    output_filename = f"component_record_{START_DATE}_{END_DATE}.xlsx"
-    output_path = os.path.join(OUTPUT_DIR, output_filename)
-
-    # 导出到Excel
-    result_df.to_excel(output_path, index=False, engine='openpyxl')
-    print(f"数据已导出到: {output_path}")
-    print()
-
-    return output_path
-
-
-def get_component_info_from_mysql(stats_df):
-    """从MySQL获取组件配置信息"""
-    # 加载环境变量
-    load_dotenv()
-
-    # 获取MySQL配置
-    mysql_config = {
-        'host': os.getenv('MYSQL_HOST'),
-        'user': os.getenv('MYSQL_USERNAME'),
-        'password': os.getenv('MYSQL_PASSWORD'),
-        'database': os.getenv('MYSQL_DATABASE'),
-        'port': int(os.getenv('MYSQL_PORT', 3306)),
-        'charset': 'utf8mb4'
-    }
-
-    print("正在连接MySQL数据库...")
-    conn = pymysql.connect(**mysql_config)
-
-    try:
-        # 分别处理 mid 和 core 类型的组件
-        mid_records = stats_df[stats_df['c_type'].str.startswith('mid', na=False)][['c_type', 'c_id']]
-        core_records = stats_df[stats_df['c_type'].str.startswith('core', na=False)][['c_type', 'c_id']]
-
-        # 存储组件信息的字典，key 为 "c_type-c_id"
-        component_info = {}
-
-        # 查询 middle_interaction_component 表
-        if not mid_records.empty:
-            print(f"正在查询 middle_interaction_component 表，共 {len(mid_records)} 个组件...")
-
-            # 获取唯一的 c_type 和 c_id 组合
-            mid_unique = mid_records.drop_duplicates()
-
-            for _, row in mid_unique.iterrows():
-                c_type = row['c_type']
-                c_id = row['c_id']
-
-                query = """
-                SELECT title, component_config
-                FROM middle_interaction_component
-                WHERE c_type = %s AND c_id = %s
-                """
-                result = pd.read_sql_query(query, conn, params=(c_type, c_id))
-
-                if not result.empty:
-                    key = f"{c_type}-{c_id}"
-                    component_info[key] = {
-                        'title': result['title'].iloc[0],
-                        'component_config': result['component_config'].iloc[0]
-                    }
-
-            print(f"  - 查询到 {len([k for k in component_info.keys() if k.startswith('mid')])} 个组件信息")
-
-        # 查询 core_interaction_component 表
-        if not core_records.empty:
-            print(f"正在查询 core_interaction_component 表，共 {len(core_records)} 个组件...")
-
-            # 获取唯一的 c_type 和 c_id 组合
-            core_unique = core_records.drop_duplicates()
-
-            for _, row in core_unique.iterrows():
-                c_type = row['c_type']
-                c_id = row['c_id']
-
-                query = """
-                SELECT title, component_config
-                FROM core_interaction_component
-                WHERE c_type = %s AND c_id = %s
-                """
-                result = pd.read_sql_query(query, conn, params=(c_type, c_id))
-
-                if not result.empty:
-                    key = f"{c_type}-{c_id}"
-                    component_info[key] = {
-                        'title': result['title'].iloc[0],
-                        'component_config': result['component_config'].iloc[0]
-                    }
-
-            print(f"  - 查询到 {len([k for k in component_info.keys() if k.startswith('core')])} 个组件信息")
-
-    finally:
-        conn.close()
-
-    return component_info
-
-
-def step2_statistics(input_file):
-    """步骤2：数据统计"""
-    print("=" * 60)
-    print("步骤2：数据统计")
-    print("=" * 60)
-
-    # 读取步骤1导出的Excel文件，c_id作为字符串读取以保留前导零
-    print(f"正在读取文件: {input_file}")
-    df = pd.read_excel(input_file, engine='openpyxl', dtype={'c_id': str})
-    print(f"读取到 {len(df)} 条记录")
-
-    # 筛选 c_type 和 c_id 非空的记录
-    df_filtered = df[(df['c_type'].notna()) & (df['c_id'].notna())].copy()
-    print(f"筛选后 {len(df_filtered)} 条有效记录")
-
-    # 确保c_type和c_id都是字符串类型（保留c_id的前导零）
-    df_filtered['c_type'] = df_filtered['c_type'].astype(str)
-    df_filtered['c_id'] = df_filtered['c_id'].astype(str)
-
-    # 创建组件ID（c_type-c_id）
-    df_filtered['component_id'] = df_filtered['c_type'] + '-' + df_filtered['c_id']
-
-    # 按组件ID分组统计
-    stats_list = []
-
-    for component_id, group in df_filtered.groupby('component_id'):
-        # 获取原始的 c_type 和 c_id
-        c_type = group['c_type'].iloc[0]
-        c_id = group['c_id'].iloc[0]
-
-        # 总数量
-        total_count = len(group)
-
-        # 各状态数量
-        perfect_count = len(group[group['play_result'] == 'Perfect'])
-        good_count = len(group[group['play_result'] == 'Good'])
-        pass_count = len(group[group['play_result'] == 'Pass'])
-        oops_count = len(group[group['play_result'] == 'Oops'])
-        failed_count = len(group[group['play_result'] == 'Failed'])
-        perfect_good_count = len(group[group['play_result'].isin(['Perfect', 'Good'])])
-
-        # 计算比例（保留两位小数）
-        perfect_ratio = round(perfect_count / total_count, 2) if total_count > 0 else 0
-        good_ratio = round(good_count / total_count, 2) if total_count > 0 else 0
-        pass_ratio = round(pass_count / total_count, 2) if total_count > 0 else 0
-        oops_ratio = round(oops_count / total_count, 2) if total_count > 0 else 0
-        failed_ratio = round(failed_count / total_count, 2) if total_count > 0 else 0
-        perfect_good_ratio = round(perfect_good_count / total_count, 2) if total_count > 0 else 0
-
-        stats_list.append({
-            'component_id': component_id,
-            'c_type': c_type,
-            'c_id': c_id,
-            '总数量': total_count,
-            'Perfect数量': perfect_count,
-            'Good数量': good_count,
-            'Pass数量': pass_count,
-            'Oops数量': oops_count,
-            'Failed数量': failed_count,
-            'Perfect+Good数量': perfect_good_count,
-            'Perfect比例': perfect_ratio,
-            'Good比例': good_ratio,
-            'Pass比例': pass_ratio,
-            'Oops比例': oops_ratio,
-            'Failed比例': failed_ratio,
-            'Perfect+Good比例': perfect_good_ratio
-        })
-
-    # 创建统计结果DataFrame
-    stats_df = pd.DataFrame(stats_list)
-
-    print(f"统计了 {len(stats_df)} 个不同的组件")
-
-    # 从MySQL获取组件配置信息
-    print("\n" + "=" * 60)
-    print("正在从MySQL获取组件配置信息...")
-    print("=" * 60)
-    component_info = get_component_info_from_mysql(stats_df)
-
-    # 添加新字段：title, component_config, 组件类型
-    # 使用 component_id (c_type-c_id) 作为 key 来匹配
-    stats_df['title'] = stats_df['component_id'].apply(lambda x: component_info.get(x, {}).get('title', ''))
-    stats_df['component_config'] = stats_df['component_id'].apply(lambda x: component_info.get(x, {}).get('component_config', ''))
-    stats_df['组件类型'] = stats_df['c_type'].apply(lambda x: C_TYPE_MAPPING.get(x, ''))
-
-    # 重新排列列顺序：将新增字段放在 c_type, c_id 后面
-    columns_order = [
-        'component_id', 'c_type', 'c_id',
-        'title', 'component_config', '组件类型',  # 新增字段
-        '总数量',
-        'Perfect数量', 'Good数量', 'Pass数量', 'Oops数量', 'Failed数量', 'Perfect+Good数量',
-        'Perfect比例', 'Good比例', 'Pass比例', 'Oops比例', 'Failed比例', 'Perfect+Good比例'
-    ]
-    stats_df = stats_df[columns_order]
-
-    # 生成输出文件名（在原文件名后追加_stats）
-    output_filename = os.path.basename(input_file).replace('.xlsx', '_stats.xlsx')
-    output_path = os.path.join(OUTPUT_DIR, output_filename)
-
-    # 导出到Excel
-    stats_df.to_excel(output_path, index=False, engine='openpyxl')
-    print(f"\n统计结果已导出到: {output_path}")
-    print()
-
-    return output_path
-
-
-def main():
-    export_file = None
-
-    # 执行步骤1：数据导出
-    if RUN_STEP1:
-        export_file = step1_export_data()
-
-    # 执行步骤2：数据统计
-    if RUN_STEP2:
-        # 如果步骤1没有执行，需要手动指定文件路径
-        if export_file is None:
-            export_file = os.path.join(OUTPUT_DIR, f"component_record_{START_DATE}_{END_DATE}.xlsx")
-            if not os.path.exists(export_file):
-                print(f"错误：找不到文件 {export_file}")
-                print("请先执行步骤1或确保文件存在")
-                return
-
-        step2_statistics(export_file)
-
-    print("=" * 60)
-    print("处理完成！")
-    print("=" * 60)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/makee_vala/git_scripts/export_lesson_review.py b/makee_vala/git_scripts/export_lesson_review.py
deleted file mode 100644
index 8808023..0000000
--- a/makee_vala/git_scripts/export_lesson_review.py
+++ /dev/null
@@ -1,572 +0,0 @@
-"""
-** 不要改动我的需求描述，直接在需求后面写代码即可 **
-
-课程巩固  数据导出 和 分析
-
------------
-需求一:
-在 PGsql数据库中  筛选数据
-数据库相关配置  从.env中读取:
-PG_DB_HOST = xxx
-PG_DB_PORT = xxx
-PG_DB_USER = xxx
-PG_DB_PASSWORD = xxx
-PG_DB_DATABASE = xxx
-
-读取以下数据表: user_unit_review_question_result
-
-支持输入时间范围
-起始时间 和 截止时间   配置格式:   "20250110"
-
-数据表中的时间字段为 updated_at , 格式样例: "2025-11-05 19:35:46.698246+08:00"
-
-在这些时间范围内，筛选数据 (要求deleted_at字段内容为null)
-
-导出以下字段:
-
-user_id
-unit_id  （读取每条记录的story_id， 根据 get_id_2_unit_index 函数返回的映射表 映射到 unit_id）
-lesson_id （读取chapter_id， 根据该值 查询 mysql表 vala_game_chapter 的 id == chapter_id， 并返回该记录的 index字段的值）
-question_list
-题目总数
-正确数量
-正确率
-play_time_seconds （读取 play_time 把ms数据转换为秒 保留整数部分）
-updated_at
-
-其中 题目总数  正确数量  正确率  都通过  question_list 计算，
-该字段为 list of json:
-[
-  {
-    "question": {
-      "type": "vocab_meaning_meaning",
-      "id": "20-0",
-      "title": "“clean” 的意思是什么？",
-      "npcId": -1
-    },
-    "answers": [
-      "2"
-    ],
-    "optionList": [
-      {
-        "option": "爬行"
-      },
-      {
-        "option": "清晰的"
-      },
-      {
-        "option": "清洁"
-      }
-    ],
-    "isRight": true
-  },
-  ...
-]
-
-每个元素为一道题目， 题目中有 "isRight": true 代表用户做对了。
-
-导出为excel文件
-----
-需求二  基于 需求一的输出文件  作为 输入文件  进行数据聚合。
-
-聚合的维度是每道题目
-
-根据 question_list 中的 每个题目  取 question -> id 作为唯一标识
-
-统计每个题目  
-总记录数量  
-正确数量  
-正确率
-
-并查询mysql表  补充题目的以下信息:
-步骤一中，每个题目id的格式是  num1-num2 (question -> id)
-查询vala_kp_question表
-其中num1部分 用于 检索vala_kp_question 中的 id,  每个id下 可能有多道题目 在 vala_kp_question的 question 字段 是一个list, num2为question 字段中的索引
-
-补充以下字段:
-kp_id (vala_kp_question字段)
-category (vala_kp_question字段)
-skill (vala_kp_question字段)
-type (vala_kp_question字段)
-题目配置 (question字段中 对应 num2 索引的内容)
-
-最终针对每道题目输出以下字段:
-出现位置 (list,  把所有出现的位置拼接 unit_id +"_"+ lesson_id  例如:"unit10-lesson1" 这样的格式)
-question_id (question -> id)
-kp_id (vala_kp_question字段)
-category (vala_kp_question字段)
-skill (vala_kp_question字段)
-type (vala_kp_question字段)
-题目配置 (question字段中 对应 num2 索引的内容)
-总记录数量  
-正确数量  
-正确率
-
-导出为excel 命名为 步骤一文件_stat.xlsx
-
-所有需要配置的参数  放在脚本开头位置
-
-"""
-
-import os
-import pymysql
-import psycopg2
-from psycopg2.extras import RealDictCursor
-from datetime import datetime
-import pandas as pd
-from dotenv import load_dotenv
-import json
-from collections import defaultdict
-
-# 加载环境变量
-load_dotenv()
-
-# ============ 配置参数 ============
-START_DATE = "20250915"  # 起始时间
-END_DATE = "20251122"    # 截止时间
-OUTPUT_NAME = "lesson_review_data_{}_{}.xlsx".format(START_DATE, END_DATE)  # 输出文件名
-OUTPUT_FILENAME = os.path.join("./output", OUTPUT_NAME)
-# =================================
-
-def get_mysql_connection():
-    """获取MySQL连接"""
-    db_host = os.getenv('MYSQL_HOST')
-    db_user = os.getenv('MYSQL_USERNAME')
-    db_password = os.getenv('MYSQL_PASSWORD')
-    db_name = os.getenv('MYSQL_DATABASE')
-    db_port = os.getenv('MYSQL_PORT')
-
-    if not all([db_host, db_user, db_password, db_name]):
-        raise Exception("Error: Missing MySQL configuration in .env file.")
-
-    connection = pymysql.connect(
-        host=db_host,
-        user=db_user,
-        password=db_password,
-        database=db_name,
-        port=int(db_port) if db_port else 3306,
-        cursorclass=pymysql.cursors.DictCursor
-    )
-    return connection
-
-def get_pgsql_connection():
-    """获取PGsql连接"""
-    pg_host = os.getenv('PG_DB_HOST')
-    pg_port = os.getenv('PG_DB_PORT')
-    pg_user = os.getenv('PG_DB_USER')
-    pg_password = os.getenv('PG_DB_PASSWORD')
-    pg_database = os.getenv('PG_DB_DATABASE')
-
-    if not all([pg_host, pg_port, pg_user, pg_password, pg_database]):
-        raise Exception("Error: Missing PGsql configuration in .env file.")
-
-    connection = psycopg2.connect(
-        host=pg_host,
-        port=int(pg_port),
-        user=pg_user,
-        password=pg_password,
-        database=pg_database,
-        cursor_factory=RealDictCursor
-    )
-    return connection
-
-def get_id_2_unit_index():
-    """获取story_id到unit_id的映射"""
-    print("正在获取 story_id 到 unit_id 的映射...")
-    connection = get_mysql_connection()
-
-    try:
-        with connection.cursor() as cursor:
-            sql = """
-            SELECT *
-            FROM `vala_game_info`
-            WHERE id > 0
-              AND `vala_game_info`.`deleted_at` IS NULL
-            ORDER BY season_package_id asc, `index` asc
-            """
-            cursor.execute(sql)
-            results = cursor.fetchall()
-
-            id_2_unit_index = {}
-            for index, row in enumerate(results):
-                id_2_unit_index[row['id']] = index
-
-            print(f"成功获取 {len(id_2_unit_index)} 个单元映射")
-            return id_2_unit_index
-    finally:
-        connection.close()
-
-def get_chapter_id_to_lesson_id():
-    """获取chapter_id到lesson_id的映射"""
-    print("正在获取 chapter_id 到 lesson_id 的映射...")
-    connection = get_mysql_connection()
-
-    try:
-        with connection.cursor() as cursor:
-            sql = """
-            SELECT id, `index`
-            FROM `vala_game_chapter`
-            WHERE deleted_at IS NULL
-            """
-            cursor.execute(sql)
-            results = cursor.fetchall()
-
-            chapter_id_to_lesson_id = {}
-            for row in results:
-                chapter_id_to_lesson_id[row['id']] = row['index']
-
-            print(f"成功获取 {len(chapter_id_to_lesson_id)} 个课程映射")
-            return chapter_id_to_lesson_id
-    finally:
-        connection.close()
-
-def analyze_question_list(question_list_json):
-    """分析题目列表，返回题目总数、正确数量、正确率"""
-    try:
-        if isinstance(question_list_json, str):
-            question_list = json.loads(question_list_json)
-        else:
-            question_list = question_list_json
-
-        if not isinstance(question_list, list):
-            return 0, 0, 0
-
-        total = len(question_list)
-        correct = sum(1 for q in question_list if q.get('isRight') == True)
-        accuracy = round(correct / total * 100, 2) if total > 0 else 0
-
-        return total, correct, accuracy
-    except Exception as e:
-        print(f"解析题目列表出错: {e}")
-        return 0, 0, 0
-
-def export_step1():
-    """需求一：导出原始数据"""
-    print("=" * 50)
-    print("开始执行需求一：导出原始数据")
-    print("=" * 50)
-
-    # 获取映射关系
-    id_2_unit_index = get_id_2_unit_index()
-    chapter_id_to_lesson_id = get_chapter_id_to_lesson_id()
-
-    # 连接PGsql
-    print("正在连接 PGsql 数据库...")
-    pg_conn = get_pgsql_connection()
-
-    try:
-        with pg_conn.cursor() as cursor:
-            # 构建时间范围
-            start_datetime = datetime.strptime(START_DATE, "%Y%m%d")
-            end_datetime = datetime.strptime(END_DATE, "%Y%m%d")
-            end_datetime = end_datetime.replace(hour=23, minute=59, second=59)
-
-            sql = """
-            SELECT user_id, story_id, chapter_id, question_list, play_time, updated_at
-            FROM user_unit_review_question_result
-            WHERE updated_at >= %s
-              AND updated_at <= %s
-              AND deleted_at IS NULL
-            ORDER BY updated_at
-            """
-
-            print(f"查询时间范围: {start_datetime} 至 {end_datetime}")
-            cursor.execute(sql, (start_datetime, end_datetime))
-            results = cursor.fetchall()
-
-            print(f"查询到 {len(results)} 条记录")
-
-            # 处理数据
-            export_data = []
-            for row in results:
-                user_id = row['user_id']
-                story_id = row['story_id']
-                chapter_id = row['chapter_id']
-                question_list_raw = row['question_list']
-                play_time = row['play_time']
-                updated_at = row['updated_at']
-
-                # 确保 question_list 是 Python 对象（PGsql 的 jsonb 会自动转换）
-                # 如果是字符串，先解析；如果已经是对象，直接使用
-                if isinstance(question_list_raw, str):
-                    try:
-                        question_list = json.loads(question_list_raw)
-                    except:
-                        question_list = []
-                else:
-                    question_list = question_list_raw if question_list_raw else []
-
-                # 映射 unit_id
-                unit_id = id_2_unit_index.get(story_id, -1)
-
-                # 映射 lesson_id
-                lesson_id = chapter_id_to_lesson_id.get(chapter_id, -1)
-
-                # 分析题目列表
-                total, correct, accuracy = analyze_question_list(question_list)
-
-                # 转换播放时长（ms -> s）
-                play_time_seconds = int(play_time / 1000) if play_time else 0
-
-                # 转换question_list为字符串（统一序列化为JSON字符串）
-                question_list_str = json.dumps(question_list, ensure_ascii=False) if question_list else ""
-
-                # 移除时区信息（Excel不支持带时区的datetime）
-                updated_at_no_tz = updated_at.replace(tzinfo=None) if updated_at else None
-
-                export_data.append({
-                    'user_id': user_id,
-                    'unit_id': unit_id,
-                    'lesson_id': lesson_id,
-                    'question_list': question_list_str,
-                    '题目总数': total,
-                    '正确数量': correct,
-                    '正确率': accuracy,
-                    'play_time_seconds': play_time_seconds,
-                    'updated_at': updated_at_no_tz
-                })
-
-            # 导出到Excel
-            df = pd.DataFrame(export_data)
-
-            # 确保输出目录存在
-            os.makedirs(os.path.dirname(OUTPUT_FILENAME), exist_ok=True)
-
-            df.to_excel(OUTPUT_FILENAME, index=False, engine='openpyxl')
-            print(f"成功导出 {len(export_data)} 条记录到: {OUTPUT_FILENAME}")
-
-            return OUTPUT_FILENAME
-
-    finally:
-        pg_conn.close()
-
-def get_all_kp_questions(question_ids):
-    """批量获取所有题目信息，避免N+1查询问题"""
-    print(f"正在批量查询 {len(question_ids)} 道题目的信息...")
-
-    # 解析所有question_id，获取需要查询的kp_question id列表
-    kp_ids = set()
-    for qid in question_ids:
-        try:
-            parts = qid.split('-')
-            if len(parts) == 2:
-                kp_ids.add(int(parts[0]))
-        except:
-            continue
-
-    print(f"需要查询 {len(kp_ids)} 条 vala_kp_question 记录")
-
-    # 批量查询MySQL
-    connection = get_mysql_connection()
-    kp_data_map = {}
-
-    try:
-        with connection.cursor() as cursor:
-            # 使用IN查询批量获取
-            if kp_ids:
-                placeholders = ','.join(['%s'] * len(kp_ids))
-                sql = f"""
-                SELECT id, kp_id, category, skill, type, question
-                FROM vala_kp_question
-                WHERE id IN ({placeholders}) AND deleted_at IS NULL
-                """
-                cursor.execute(sql, tuple(kp_ids))
-                results = cursor.fetchall()
-
-                print(f"成功查询到 {len(results)} 条记录")
-
-                # 构建映射表
-                for row in results:
-                    kp_data_map[row['id']] = row
-    finally:
-        connection.close()
-
-    # 为每个question_id构建结果
-    question_info_map = {}
-    for question_id in question_ids:
-        try:
-            parts = question_id.split('-')
-            if len(parts) != 2:
-                question_info_map[question_id] = (None, None, None, None, None)
-                continue
-
-            kp_id = int(parts[0])
-            question_index = int(parts[1])
-
-            kp_data = kp_data_map.get(kp_id)
-            if not kp_data:
-                question_info_map[question_id] = (None, None, None, None, None)
-                continue
-
-            # 解析question字段
-            question_list = kp_data['question']
-            if isinstance(question_list, str):
-                question_list = json.loads(question_list)
-
-            # 获取指定索引的题目配置
-            question_config = None
-            if isinstance(question_list, list) and 0 <= question_index < len(question_list):
-                question_config = json.dumps(question_list[question_index], ensure_ascii=False)
-
-            question_info_map[question_id] = (
-                kp_data['kp_id'],
-                kp_data['category'],
-                kp_data['skill'],
-                kp_data['type'],
-                question_config
-            )
-        except Exception as e:
-            print(f"处理题目信息出错 ({question_id}): {e}")
-            question_info_map[question_id] = (None, None, None, None, None)
-
-    return question_info_map
-
-def export_step2(input_filename):
-    """需求二：数据聚合统计"""
-    print("=" * 50)
-    print("开始执行需求二：数据聚合统计")
-    print("=" * 50)
-
-    # 读取步骤一的输出文件
-    print(f"正在读取文件: {input_filename}")
-    df = pd.read_excel(input_filename, engine='openpyxl')
-
-    print(f"读取到 {len(df)} 条记录")
-
-    # 按题目聚合统计
-    question_stats = defaultdict(lambda: {
-        'locations': set(),
-        'total_count': 0,
-        'correct_count': 0
-    })
-
-    parse_success_count = 0
-    parse_fail_count = 0
-    empty_question_list_count = 0
-    processed_question_count = 0
-
-    for idx, row in df.iterrows():
-        unit_id = row['unit_id']
-        lesson_id = row['lesson_id']
-        question_list_str = row['question_list']
-
-        # 解析question_list
-        try:
-            if pd.isna(question_list_str) or not question_list_str:
-                question_list = []
-                empty_question_list_count += 1
-            else:
-                question_list = json.loads(question_list_str)
-                parse_success_count += 1
-        except Exception as e:
-            question_list = []
-            parse_fail_count += 1
-            if parse_fail_count <= 3:
-                print(f"[警告] 第 {idx+1} 条记录解析失败: {e}")
-
-        # 统计每道题目
-        for question_item in question_list:
-            if not isinstance(question_item, dict):
-                continue
-
-            question = question_item.get('question', {})
-            question_id = question.get('id')
-            is_right = question_item.get('isRight', False)
-
-            if not question_id:
-                continue
-
-            # 添加出现位置
-            location = f"unit{unit_id}-lesson{lesson_id}"
-            question_stats[question_id]['locations'].add(location)
-
-            # 统计数量
-            question_stats[question_id]['total_count'] += 1
-            if is_right:
-                question_stats[question_id]['correct_count'] += 1
-
-            processed_question_count += 1
-
-    print(f"\n解析统计:")
-    print(f"  - 解析成功: {parse_success_count} 条")
-    print(f"  - 解析失败: {parse_fail_count} 条")
-    print(f"  - question_list 为空: {empty_question_list_count} 条")
-    print(f"  - 处理的题目总数: {processed_question_count} 道")
-    print(f"  - 聚合得到不同题目: {len(question_stats)} 道")
-
-    # 批量获取所有题目信息（优化性能）
-    all_question_ids = list(question_stats.keys())
-    question_info_map = get_all_kp_questions(all_question_ids)
-
-    # 构建导出数据
-    print(f"\n正在构建导出数据...")
-    export_data = []
-    for idx, (question_id, stats) in enumerate(question_stats.items()):
-        if (idx + 1) % 100 == 0:
-            print(f"  已处理 {idx + 1}/{len(question_stats)} 道题目")
-
-        # 从批量查询结果中获取题目信息
-        kp_id, category, skill, type_field, question_config = question_info_map.get(
-            question_id, (None, None, None, None, None)
-        )
-
-        # 计算正确率
-        total = stats['total_count']
-        correct = stats['correct_count']
-        accuracy = round(correct / total * 100, 2) if total > 0 else 0
-
-        # 出现位置列表
-        locations_list = sorted(list(stats['locations']))
-        locations_str = ', '.join(locations_list)
-
-        export_data.append({
-            '出现位置': locations_str,
-            'question_id': question_id,
-            'kp_id': kp_id,
-            'category': category,
-            'skill': skill,
-            'type': type_field,
-            '题目配置': question_config,
-            '总记录数量': total,
-            '正确数量': correct,
-            '正确率': accuracy
-        })
-
-    # 导出到Excel
-    output_stat_filename = input_filename.replace('.xlsx', '_stat.xlsx')
-    df_stat = pd.DataFrame(export_data)
-
-    print(f"\n正在导出到 Excel...")
-    df_stat.to_excel(output_stat_filename, index=False, engine='openpyxl')
-
-    print(f"成功导出 {len(export_data)} 道题目的统计数据到: {output_stat_filename}")
-
-    return output_stat_filename
-
-def main():
-    """主函数"""
-    try:
-        # 执行需求一
-        step1_output = export_step1()
-
-        print("\n")
-
-        # 执行需求二
-        step2_output = export_step2(step1_output)
-
-        print("\n" + "=" * 50)
-        print("所有任务完成！")
-        print(f"需求一输出文件: {step1_output}")
-        print(f"需求二输出文件: {step2_output}")
-        print("=" * 50)
-
-    except Exception as e:
-        print(f"执行出错: {e}")
-        import traceback
-        traceback.print_exc()
-
-if __name__ == "__main__":
-    main()
-
-
-
diff --git a/makee_vala/git_scripts/export_mid_config.py b/makee_vala/git_scripts/export_mid_config.py
deleted file mode 100644
index c536621..0000000
--- a/makee_vala/git_scripts/export_mid_config.py
+++ /dev/null
@@ -1,181 +0,0 @@
-"""
-MYSQL_HOST=xxx
-MYSQL_USERNAME=xxx
-MYSQL_PASSWORD=xxx
-MYSQL_DATABASE=xxx
-MYSQL_PORT=xxx
-
-以上环境变量已配置在 .env 中。
-
-我要导出一个数据表的某些记录  并添加一些字段。
-
-表名:middle_interaction_component
-
-根据 c_id 过滤数据:
-c_id为 7位 字符串   其中 {两位季度编号}{两位单元编号}{三位组件编号}  过滤其中 单元编号部分为 00~20 以及 26 的对应记录   也就是  xx00xxx ~ xx20xxx 以及 xx26xxx 的记录
-
-导出以下字段:
-id 
-c_type
-c_id
-title
-component_config
-related_path
-kp_relation_info
-created_at
-updated_at
-
-新增以下字段:
-1. “组件类型”:  根据以下映射  把 c_type 转成中文名:xx互动
-{
-    "词汇类": {
-        "物品互动": "mid_vocab_item",
-        "图片互动": "mid_vocab_image",
-        "填词互动": "mid_vocab_fillBlank",
-        "指令互动": "mid_vocab_instruction"
-    },
-    "句子类": {
-        "对话互动": "mid_sentence_dialogue",
-        "语音互动": "mid_sentence_voice",
-        "材料互动": "mid_sentence_material",
-        "造句互动": "mid_sentence_makeSentence"
-    },
-    "语法类": {
-        "挖空互动": "mid_grammar_cloze",
-        "组句互动": "mid_grammar_sentence"
-    },
-    "发音类": {
-        "发音互动": "mid_pron_pron"
-    
-}
-
-2. “是否关联了知识点”:  如果 kp_relation_info 不为空 且包含至少一个具体的知识点编号 则为 “是”  否则为 “否”
-有效关联知识点的一个样例数据:[{"kpId":"0326011","kpType":"sentence","kpTitle":"What does... look like?","kpSkill":"sentence_meaning","kpSkillName":"语义"}]
-
-3. "是否已组课"：  如果 related_path  不为空 则为 “是”  否则为 “否”
-一个有效的  related_path  样例: {"packageId":13,"unitId":40,"lessonId":213,"packageIndex":3,"unitIndex":2,"lessonIndex":2}
-
-4. “前置对话”:
-component_config 中的 preDialog 字段， 如果不存在 则为 “空”
-{"asrPrompt":"","cId":"0326022","cType":"mid_sentence_dialogue","meaning":"语义；语音","mode":"read","postDialog":[{"content":"Leave it to me.","npcId":540,"npcName":"Victoria","type":"npc"}],"preDialog":[{"content":"But do we still have time?","npcId":30,"type":"user"}],"question":{"content":"What if we miss the spaceship?","mode":"read","npcId":30,"type":"user"},"resourceMapping":{"Medic":503},"title":"询问万一错过飞船怎么办"}
-
-5. "后置对话":
-component_config 中的 postDialog 字段， 如果不存在 则为 “空”
-
-6. 前置/后置对话中非user角色数量
-component_config 中的 preDialog 以及 postDialog 字段中， 统计所有 type 为 npc ,根据 npcId 去重后的角色数量
-例如 
----
-前置对话：
-[{"content":"But do we still have time?","npcId":30,"type":"user"}]
-后置对话：
-[{"content":"Leave it to me.","npcId":540,"npcName":"Victoria","type":"npc"}]
-非user角色数量： 1
----
-
----
-前置对话：
-[{"content":"But do we still have time?","npcId":31,"type":"npc","npcName":"Ben"}]
-后置对话：
-[{"content":"Leave it to me.","npcId":540,"npcName":"Victoria","type":"npc"}]
-非user角色数量： 2
----
-
-最终输出一个 excel文档。
-
-"""
-
-import os
-import json
-from datetime import datetime
-import pymysql
-import pandas as pd
-from dotenv import load_dotenv
-
-load_dotenv()
-
-# 组件类型映射
-TYPE_MAP = {
-    "mid_vocab_item": "物品互动", "mid_vocab_image": "图片互动", 
-    "mid_vocab_fillBlank": "填词互动", "mid_vocab_instruction": "指令互动",
-    "mid_sentence_dialogue": "对话互动", "mid_sentence_voice": "语音互动",
-    "mid_sentence_material": "材料互动", "mid_sentence_makeSentence": "造句互动",
-    "mid_grammar_cloze": "挖空互动", "mid_grammar_sentence": "组句互动",
-    "mid_pron_pron": "发音互动"
-}
-
-def get_data():
-    conn = pymysql.connect(
-        host=os.getenv('MYSQL_HOST'), port=int(os.getenv('MYSQL_PORT', 3306)),
-        user=os.getenv('MYSQL_USERNAME'), password=os.getenv('MYSQL_PASSWORD'),
-        database=os.getenv('MYSQL_DATABASE'), charset='utf8mb4'
-    )
-    
-    # 构建c_id过滤条件
-    conditions = [f"c_id LIKE '__{i:02d}___'" for i in range(21)] + ["c_id LIKE '__26___'"]
-    where_clause = " OR ".join(conditions)
-    
-    sql = f"""SELECT id, c_type, c_id, title, component_config, related_path, 
-              kp_relation_info, created_at, updated_at 
-              FROM middle_interaction_component WHERE {where_clause}"""
-    
-    df = pd.read_sql(sql, conn)
-    conn.close()
-    return df
-
-def process_data(df):
-    # 组件类型
-    df['组件类型'] = df['c_type'].map(TYPE_MAP).fillna(df['c_type'])
-    
-    # 是否关联知识点
-    def check_kp(kp_info):
-        if not kp_info: return "否"
-        try:
-            data = json.loads(kp_info)
-            return "是" if isinstance(data, list) and any(item.get('kpId') for item in data) else "否"
-        except: return "否"
-    
-    df['是否关联了知识点'] = df['kp_relation_info'].apply(check_kp)
-    
-    # 是否已组课
-    def check_lesson(path):
-        if not path: return "否"
-        try: return "是" if json.loads(path) else "否"
-        except: return "否"
-    
-    df['是否已组课'] = df['related_path'].apply(check_lesson)
-    
-    # 前置/后置对话及NPC统计
-    def extract_dialog(config, dialog_type):
-        if not config: return "空"
-        try:
-            data = json.loads(config)
-            dialog = data.get(dialog_type, [])
-            return json.dumps(dialog, ensure_ascii=False) if dialog else "空"
-        except: return "空"
-    
-    def count_npc(config):
-        if not config: return 0
-        try:
-            data = json.loads(config)
-            npc_ids = set()
-            for dialog in ['preDialog', 'postDialog']:
-                for item in data.get(dialog, []):
-                    if item.get('type') == 'npc' and 'npcId' in item:
-                        npc_ids.add(item['npcId'])
-            return len(npc_ids)
-        except: return 0
-    
-    df['前置对话'] = df['component_config'].apply(lambda x: extract_dialog(x, 'preDialog'))
-    df['后置对话'] = df['component_config'].apply(lambda x: extract_dialog(x, 'postDialog'))
-    df['前置/后置对话中非user角色数量'] = df['component_config'].apply(count_npc)
-    
-    return df
-
-if __name__ == "__main__":
-    df = get_data()
-    df = process_data(df)
-    
-    filename = f"middle_interaction_component_export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
-    df.to_excel(filename, index=False)
-    print(f"导出完成: {filename}")
diff --git a/makee_vala/git_scripts/export_realtime_asr.py b/makee_vala/git_scripts/export_realtime_asr.py
deleted file mode 100644
index e042530..0000000
--- a/makee_vala/git_scripts/export_realtime_asr.py
+++ /dev/null
@@ -1,385 +0,0 @@
-"""
-导出  流式语音音频  脚本
-
-v1.0
----
-原始数据存储于ES数据库中
-索引: llm_realtime_asr_log
-
-es相关配置通过以下环境变量
-ES_HOST=xxx
-ES_PORT=9200
-ES_SCHEME=https
-ES_USER=elastic
-ES_PASSWORD=xxx  （注意这里可能有特殊符号）
-
-需要配置的内容放置在脚本最开头
-开始时间 (8位数字年月日)
-截止时间 (8位数字年月日)
-
-仅筛选 时间范围内的数据记录
-可以基于 timestamp_int 字段内容进行时间筛选  格式样例:1,769,496,892
-
-正常情况  每个 voice_id 会对应两条记录
-可以 以 voice_id为单位
-最终  按照每个 voice_id 聚合出以下数据:
-
-asr_prompt （其中一条记录会有这个内容）
-result_str （其中一条记录会有这个内容）
-timestamp (两条记录都会有，保留最新的一条对应的时间) 格式样例: 2023-12-12 12:12:12
-voice_id
-audio_url 按以下规则拼接:  https://static.valavala.com/vala_llm/realtime_asr_audio_backup/online/{8位年月日}/{voice_id}.wav     8位年月日 基于 timestamp计算  格式 20260121这种
-source （其中一条记录会有这个内容）
-
-最终导出一个excel。
----
-
-"""
-
-import os
-from datetime import datetime
-import requests
-import pandas as pd
-from dotenv import load_dotenv
-from collections import defaultdict
-import urllib3
-
-# ==================== 配置区域 ====================
-START_DATE = "20251201"  # 开始日期 (8位数字年月日)
-END_DATE = "20260131"    # 结束日期 (8位数字年月日)
-# =================================================
-
-# 加载环境变量
-load_dotenv()
-
-# ES配置
-ES_HOST = os.getenv("ES_HOST")
-ES_PORT = int(os.getenv("ES_PORT", "9200"))
-ES_SCHEME = os.getenv("ES_SCHEME", "https")
-ES_USER = os.getenv("ES_USER", "elastic")
-ES_PASSWORD = os.getenv("ES_PASSWORD")
-ES_INDEX = "llm_realtime_asr_log"
-
-# 每批处理的数据量
-SCROLL_SIZE = 1000
-SCROLL_TIMEOUT = "5m"
-
-
-def timestamp_int_from_date(date_str):
-    """将8位日期字符串转换为timestamp_int（秒级时间戳）"""
-    dt = datetime.strptime(date_str, "%Y%m%d")
-    return int(dt.timestamp())
-
-
-def format_timestamp(ts):
-    """将时间戳转换为格式化字符串"""
-    if isinstance(ts, (int, float)):
-        return datetime.fromtimestamp(ts).strftime("%Y-%m-%d %H:%M:%S")
-    return ts
-
-
-def generate_audio_url(voice_id, timestamp):
-    """生成audio_url"""
-    date_str = datetime.fromtimestamp(timestamp).strftime("%Y%m%d")
-    return f"https://static.valavala.com/vala_llm/realtime_asr_audio_backup/online/{date_str}/{voice_id}.wav"
-
-
-def connect_es():
-    """测试ES连接"""
-    print("正在测试 Elasticsearch 连接...")
-
-    # 禁用SSL警告
-    if ES_SCHEME == "https":
-        try:
-            urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-        except Exception:
-            pass
-
-    base_url = f"{ES_SCHEME}://{ES_HOST}:{ES_PORT}"
-    auth = (ES_USER, ES_PASSWORD) if ES_USER and ES_PASSWORD else None
-
-    try:
-        # 测试连接
-        resp = requests.get(
-            base_url,
-            auth=auth,
-            timeout=10,
-            verify=False if ES_SCHEME == "https" else True
-        )
-        resp.raise_for_status()
-
-        print(f"✓ 成功连接到 Elasticsearch: {ES_HOST}:{ES_PORT}")
-        return True
-    except Exception as e:
-        print(f"✗ 连接失败: {e}")
-        return False
-
-
-def query_data(start_date, end_date):
-    """查询ES数据"""
-    start_ts = timestamp_int_from_date(start_date)
-    end_ts = timestamp_int_from_date(end_date) + 86400  # 结束日期加一天，包含当天数据
-
-    print(f"\n开始查询数据...")
-    print(f"时间范围: {start_date} 至 {end_date}")
-    print(f"时间戳范围: {start_ts} 至 {end_ts}")
-
-    # 禁用SSL警告
-    if ES_SCHEME == "https":
-        try:
-            urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-        except Exception:
-            pass
-
-    base_url = f"{ES_SCHEME}://{ES_HOST}:{ES_PORT}"
-    search_url = f"{base_url}/{ES_INDEX}/_search"
-    headers = {"Content-Type": "application/json"}
-    auth = (ES_USER, ES_PASSWORD) if ES_USER and ES_PASSWORD else None
-
-    query = {
-        "query": {
-            "range": {
-                "timestamp_int": {
-                    "gte": start_ts,
-                    "lt": end_ts
-                }
-            }
-        },
-        "sort": [{"timestamp_int": {"order": "asc"}}],
-        "size": SCROLL_SIZE
-    }
-
-    try:
-        # 初始查询（使用scroll）
-        params = {"scroll": SCROLL_TIMEOUT}
-        response = requests.post(
-            search_url,
-            headers=headers,
-            json=query,
-            auth=auth,
-            params=params,
-            timeout=30,
-            verify=False if ES_SCHEME == "https" else True
-        )
-        response.raise_for_status()
-        data = response.json()
-
-        scroll_id = data.get("_scroll_id")
-        total_hits = data["hits"]["total"]["value"]
-
-        print(f"✓ 查询完成，共找到 {total_hits} 条记录")
-
-        return data, scroll_id, total_hits
-
-    except Exception as e:
-        raise RuntimeError(f"ES查询失败: {e}")
-
-
-def aggregate_by_voice_id(response, scroll_id, total_hits):
-    """按voice_id聚合数据"""
-    voice_data = defaultdict(list)
-    processed_count = 0
-
-    print("\n开始处理数据...")
-
-    # 禁用SSL警告
-    if ES_SCHEME == "https":
-        try:
-            urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-        except Exception:
-            pass
-
-    base_url = f"{ES_SCHEME}://{ES_HOST}:{ES_PORT}"
-    scroll_url = f"{base_url}/_search/scroll"
-    headers = {"Content-Type": "application/json"}
-    auth = (ES_USER, ES_PASSWORD) if ES_USER and ES_PASSWORD else None
-
-    while True:
-        hits = response["hits"]["hits"]
-
-        if not hits:
-            break
-
-        for hit in hits:
-            source = hit["_source"]
-            voice_id = source.get("voice_id")
-
-            if voice_id:
-                voice_data[voice_id].append(source)
-
-            processed_count += 1
-
-        # 打印进度
-        progress = (processed_count / total_hits) * 100
-        print(f"\r处理进度: {processed_count}/{total_hits} ({progress:.1f}%)", end="")
-
-        # 获取下一批数据
-        try:
-            scroll_response = requests.post(
-                scroll_url,
-                headers=headers,
-                json={
-                    "scroll": SCROLL_TIMEOUT,
-                    "scroll_id": scroll_id
-                },
-                auth=auth,
-                timeout=30,
-                verify=False if ES_SCHEME == "https" else True
-            )
-            scroll_response.raise_for_status()
-            response = scroll_response.json()
-
-            # 更新 scroll_id（可能会变化）
-            scroll_id = response.get("_scroll_id", scroll_id)
-
-        except Exception as e:
-            print(f"\n✗ 获取下一批数据失败: {e}")
-            break
-
-    print(f"\n✓ 数据处理完成，共处理 {processed_count} 条记录")
-    print(f"✓ 找到 {len(voice_data)} 个唯一的 voice_id")
-
-    # 清理scroll
-    try:
-        clear_scroll_url = f"{base_url}/_search/scroll"
-        requests.delete(
-            clear_scroll_url,
-            headers=headers,
-            json={"scroll_id": [scroll_id]},
-            auth=auth,
-            timeout=10,
-            verify=False if ES_SCHEME == "https" else True
-        )
-    except Exception:
-        pass  # 清理失败不影响结果
-
-    return voice_data
-
-
-def merge_voice_records(voice_data):
-    """合并voice_id的记录，只保留恰好2条记录的"""
-    print("\n开始聚合 voice_id 数据...")
-
-    merged_data = []
-    valid_count = 0
-    invalid_count = 0
-
-    for voice_id, records in voice_data.items():
-        # 只处理恰好有2条记录的voice_id
-        if len(records) != 2:
-            invalid_count += 1
-            continue
-
-        valid_count += 1
-
-        # 初始化合并后的数据
-        merged_record = {
-            "voice_id": voice_id,
-            "asr_prompt": None,
-            "result_str": None,
-            "timestamp": None,
-            "source": None,
-            "audio_url": None
-        }
-
-        # 找出最新的timestamp
-        max_timestamp = max(
-            records[0].get("timestamp_int", 0),
-            records[1].get("timestamp_int", 0)
-        )
-
-        # 合并数据
-        for record in records:
-            if record.get("asr_prompt"):
-                merged_record["asr_prompt"] = record["asr_prompt"]
-            if record.get("result_str"):
-                merged_record["result_str"] = record["result_str"]
-            if record.get("source"):
-                merged_record["source"] = record["source"]
-
-        # 设置timestamp和audio_url
-        merged_record["timestamp"] = format_timestamp(max_timestamp)
-        merged_record["audio_url"] = generate_audio_url(voice_id, max_timestamp)
-
-        merged_data.append(merged_record)
-
-    print(f"✓ 聚合完成")
-    print(f"  - 有效记录（2条/voice_id）: {valid_count}")
-    print(f"  - 无效记录（非2条/voice_id）: {invalid_count}")
-
-    return merged_data
-
-
-def export_to_excel(data, start_date, end_date):
-    """导出到Excel"""
-    if not data:
-        print("\n警告: 没有数据可导出")
-        return
-
-    print(f"\n开始导出数据到 Excel...")
-
-    # 创建DataFrame
-    df = pd.DataFrame(data)
-
-    # 调整列顺序
-    columns = ["voice_id", "asr_prompt", "result_str", "timestamp", "audio_url", "source"]
-    df = df[columns]
-
-    # 生成文件名
-    output_dir = "output"
-    os.makedirs(output_dir, exist_ok=True)
-    filename = f"realtime_asr_export_{start_date}_{end_date}.xlsx"
-    filepath = os.path.join(output_dir, filename)
-
-    # 导出Excel
-    df.to_excel(filepath, index=False, engine="openpyxl")
-
-    print(f"✓ 数据已导出到: {filepath}")
-    print(f"✓ 共导出 {len(df)} 条记录")
-
-
-def main():
-    """主函数"""
-    print("=" * 60)
-    print("流式语音 ASR 数据导出工具 v1.0")
-    print("=" * 60)
-
-    start_time = datetime.now()
-
-    try:
-        # 测试ES连接
-        if not connect_es():
-            raise Exception("无法连接到 Elasticsearch，请检查配置")
-
-        # 查询数据
-        response, scroll_id, total_hits = query_data(START_DATE, END_DATE)
-
-        if total_hits == 0:
-            print("\n没有找到符合条件的数据")
-            return
-
-        # 聚合数据
-        voice_data = aggregate_by_voice_id(response, scroll_id, total_hits)
-
-        # 合并记录
-        merged_data = merge_voice_records(voice_data)
-
-        # 导出Excel
-        export_to_excel(merged_data, START_DATE, END_DATE)
-
-        # 统计耗时
-        end_time = datetime.now()
-        duration = (end_time - start_time).total_seconds()
-
-        print(f"\n{'=' * 60}")
-        print(f"✓ 任务完成! 总耗时: {duration:.2f} 秒")
-        print(f"{'=' * 60}")
-
-    except Exception as e:
-        print(f"\n✗ 错误: {str(e)}")
-        import traceback
-        traceback.print_exc()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/makee_vala/git_scripts/export_resource_name.py b/makee_vala/git_scripts/export_resource_name.py
deleted file mode 100644
index 36506d6..0000000
--- a/makee_vala/git_scripts/export_resource_name.py
+++ /dev/null
@@ -1,121 +0,0 @@
-"""
-MYSQL_HOST=xxx
-MYSQL_USERNAME=xxx
-MYSQL_PASSWORD=xxx
-MYSQL_DATABASE=xxx
-MYSQL_PORT=xxx
-
-以上环境变量已配置在 .env 中。
-
-我要导出一个数据表的某些记录  并添加一些字段。
-
-表名:vala_resource_base
-
-过滤全部 type == "角色" 的记录
-
-导出以下字段:
-id 
-cn_name
-en_name
-
-
-最终输出到 excel文档。  "角色资源导出_251031.xlsx"
-
-"""
-
-import os
-import pandas as pd
-import pymysql
-from dotenv import load_dotenv
-from datetime import datetime
-
-def load_config():
-    """加载环境变量配置"""
-    load_dotenv()
-    
-    config = {
-        'host': os.getenv('MYSQL_HOST'),
-        'user': os.getenv('MYSQL_USERNAME'),
-        'password': os.getenv('MYSQL_PASSWORD'),
-        'database': os.getenv('MYSQL_DATABASE'),
-        'port': int(os.getenv('MYSQL_PORT', 3306)),
-        'charset': 'utf8mb4'
-    }
-    
-    # 验证配置
-    for key, value in config.items():
-        if value is None and key != 'charset':
-            raise ValueError(f"环境变量 {key} 未配置")
-    
-    return config
-
-def connect_mysql(config):
-    """连接MySQL数据库"""
-    try:
-        connection = pymysql.connect(**config)
-        print("MySQL数据库连接成功")
-        return connection
-    except Exception as e:
-        print(f"MySQL数据库连接失败: {e}")
-        raise
-
-def export_role_resources():
-    """导出角色资源数据"""
-    try:
-        # 加载配置
-        config = load_config()
-        
-        # 连接数据库
-        connection = connect_mysql(config)
-        
-        # SQL查询语句
-        sql = """
-        SELECT 
-            id,
-            cn_name,
-            en_name
-        FROM vala_resource_base 
-        WHERE type = '角色'
-        ORDER BY id
-        """
-        
-        print("开始查询数据...")
-        
-        # 执行查询并获取数据
-        df = pd.read_sql(sql, connection)
-        
-        print(f"查询到 {len(df)} 条记录")
-        
-        # 关闭数据库连接
-        connection.close()
-        
-        # 导出到Excel文件
-        output_filename = "角色资源导出_251031.xlsx"
-        df.to_excel(output_filename, index=False, engine='openpyxl')
-        
-        print(f"数据已成功导出到: {output_filename}")
-        print(f"导出字段: {list(df.columns)}")
-        print(f"导出记录数: {len(df)}")
-        
-        # 显示前几行数据预览
-        if len(df) > 0:
-            print("\n数据预览:")
-            print(df.head())
-        
-        return output_filename
-        
-    except Exception as e:
-        print(f"导出过程中发生错误: {e}")
-        raise
-
-if __name__ == "__main__":
-    try:
-        print("开始导出角色资源数据...")
-        print(f"执行时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
-        
-        output_file = export_role_resources()
-        
-        print(f"\n✅ 导出完成! 文件保存为: {output_file}")
-        
-    except Exception as e:
-        print(f"\n❌ 导出失败: {e}")
diff --git a/makee_vala/git_scripts/export_unit_challenge_data.py b/makee_vala/git_scripts/export_unit_challenge_data.py
deleted file mode 100644
index 9bfedd4..0000000
--- a/makee_vala/git_scripts/export_unit_challenge_data.py
+++ /dev/null
@@ -1,343 +0,0 @@
-"""
-** 不要改动我的需求描述，直接在需求后面写代码即可 **
-
-需求一:
-先写一个最简单脚本  实现下面sql功能
-
-SELECT * FROM `vala_game_info` WHERE id > 0 AND `vala_game_info`.`deleted_at` IS NULL ORDER BY season_package_id asc,`index` asc
-
-环境变量读取:
-MYSQL_HOST=xxx
-MYSQL_USERNAME=xxx
-MYSQL_PASSWORD=xxx
-MYSQL_DATABASE=xxx
-MYSQL_PORT=xxx
------------
-需求二:
-在 PGsql数据库中  筛选数据
-数据库相关配置  从.env中读取:
-PG_DB_HOST = xxx
-PG_DB_PORT = xxx
-PG_DB_USER = xxx
-PG_DB_PASSWORD = xxx
-PG_DB_DATABASE = xxx
-
-读取以下数据表:user_unit_challenge_question_result
-
-支持输入时间范围
-起始时间 和 截止时间   配置格式:   "20250110"
-
-数据表中的时间字段为 updated_at , 格式样例: "2025-11-05 19:35:46.698246+08:00"
-
-在这些时间范围内，筛选数据 (要求deleted_at字段内容为null)
-
-导出以下字段:
-
-user_id
-unit_id  （读取每条记录的story_id， 根据 get_id_2_unit_index 函数返回的映射表 映射到 unit_id）
-score_text
-question_list
-updated_at
-category
-play_time_seconds （读取 play_time 把ms数据转换为秒 保留整数部分）
-
-导出为excel文件
-
-配置参数直接在脚本开头给出即可
-
-需求三:
-需求二中 作为步骤一
-本需求为步骤二  基于 步骤一的 文档
-进行数据聚合
-
-根据每个unit_id + category 进行分组
-
-统计每个分组下的以下数值:
-总记录数量
-Perfect数量 (读取 score_text =="Perfect")
-Good数量 (读取 score_text =="Good")
-Oops数量 (读取 score_text =="Oops")
-Perfect率 (Perfect数量 / 总记录数量)
-Good率 (Good数量 / 总记录数量)
-Oops率 (Oops数量 / 总记录数量)
-
-导出为excel 命名为  步骤一名字_stats.xlsx
-
-"""
-
-import os
-import pymysql
-import psycopg2
-from psycopg2.extras import RealDictCursor
-from datetime import datetime
-import pandas as pd
-from dotenv import load_dotenv
-
-# 加载环境变量
-load_dotenv()
-
-# ============ 配置参数 ============
-START_DATE = "20250915"  # 起始时间
-END_DATE = "20251128"    # 截止时间
-OUTPUT_NAME = "unit_challenge_data_{}_{}.xlsx".format(START_DATE, END_DATE)  # 输出文件名
-OUTPUT_FILENAME = os.path.join("./output", OUTPUT_NAME)
-# =================================
-
-def get_id_2_unit_index():
-    # 读取数据库配置
-    db_host = os.getenv('MYSQL_HOST')
-    db_user = os.getenv('MYSQL_USERNAME')
-    db_password = os.getenv('MYSQL_PASSWORD')
-    db_name = os.getenv('MYSQL_DATABASE')
-    db_port = os.getenv('MYSQL_PORT')
-
-    # 简单的参数检查
-    if not all([db_host, db_user, db_password, db_name]):
-        print("Error: Missing database configuration in .env file.")
-        print("Ensure MYSQL_HOST, MYSQL_USERNAME, MYSQL_PASSWORD, MYSQL_DATABASE are set.")
-        return
-
-    try:
-        # 连接数据库
-        connection = pymysql.connect(
-            host=db_host,
-            user=db_user,
-            password=db_password,
-            database=db_name,
-            port=int(db_port) if db_port else 3306,
-            cursorclass=pymysql.cursors.DictCursor
-        )
-
-        print(f"Connected to database: {db_host}")
-
-        try:
-            with connection.cursor() as cursor:
-                # 定义 SQL 语句
-                sql = """
-                SELECT * 
-                FROM `vala_game_info` 
-                WHERE id > 0 
-                  AND `vala_game_info`.`deleted_at` IS NULL 
-                ORDER BY season_package_id asc, `index` asc
-                """
-                
-                print(f"Executing SQL: {sql}")
-                
-                # 执行查询
-                cursor.execute(sql)
-                
-                # 获取所有结果
-                results = cursor.fetchall()
-                
-                print(f"Total records found: {len(results)}")
-                print("-" * 30)
-                
-                # 打印结果
-                print(results)
-                id_2_unit_index = {}
-                for index, row in enumerate(results):
-                    id_2_unit_index[row['id']] = index
-                
-                print("映射结果:")
-                print(id_2_unit_index)
-
-
-                
-                print("-" * 30)
-                print("Done.")
-                return id_2_unit_index
-
-        finally:
-            connection.close()
-
-    except Exception as e:
-        print(f"An error occurred: {e}")
-
-
-def export_unit_challenge_data(start_date, end_date, output_filename):
-    """
-    从PostgreSQL数据库导出单元挑战数据
-    """
-    # 读取PostgreSQL数据库配置
-    pg_host = os.getenv('PG_DB_HOST')
-    pg_port = os.getenv('PG_DB_PORT')
-    pg_user = os.getenv('PG_DB_USER')
-    pg_password = os.getenv('PG_DB_PASSWORD')
-    pg_database = os.getenv('PG_DB_DATABASE')
-
-    # 检查配置
-    if not all([pg_host, pg_port, pg_user, pg_password, pg_database]):
-        print("Error: Missing PostgreSQL database configuration in .env file.")
-        print("Ensure PG_DB_HOST, PG_DB_PORT, PG_DB_USER, PG_DB_PASSWORD, PG_DB_DATABASE are set.")
-        return
-
-    # 获取 id 到 unit_index 的映射
-    print("正在获取 unit_id 映射表...")
-    id_2_unit_index = get_id_2_unit_index()
-    if not id_2_unit_index:
-        print("Error: Failed to get id_2_unit_index mapping.")
-        return
-
-    # 转换时间格式: "20250110" -> "2025-01-10 00:00:00"
-    start_datetime = datetime.strptime(start_date, "%Y%m%d").strftime("%Y-%m-%d 00:00:00")
-    end_datetime = datetime.strptime(end_date, "%Y%m%d").strftime("%Y-%m-%d 00:00:00")
-
-    print(f"时间范围: {start_datetime} 至 {end_datetime}")
-
-    try:
-        # 连接PostgreSQL数据库
-        connection = psycopg2.connect(
-            host=pg_host,
-            port=int(pg_port),
-            user=pg_user,
-            password=pg_password,
-            database=pg_database,
-            cursor_factory=RealDictCursor
-        )
-
-        print(f"已连接到 PostgreSQL 数据库: {pg_host}")
-
-        try:
-            with connection.cursor() as cursor:
-                # 定义SQL查询
-                sql = """
-                SELECT
-                    user_id,
-                    story_id,
-                    score_text,
-                    question_list,
-                    updated_at,
-                    category,
-                    play_time
-                FROM user_unit_challenge_question_result
-                WHERE deleted_at IS NULL
-                  AND updated_at >= %s
-                  AND updated_at < %s
-                ORDER BY updated_at ASC
-                """
-
-                print(f"执行查询...")
-
-                # 执行查询
-                cursor.execute(sql, (start_datetime, end_datetime))
-
-                # 获取所有结果
-                results = cursor.fetchall()
-
-                print(f"查询到 {len(results)} 条记录")
-
-                # 处理数据
-                export_data = []
-                for row in results:
-                    # 映射 story_id 到 unit_id
-                    story_id = row['story_id']
-                    unit_id = id_2_unit_index.get(story_id, None)
-
-                    # 转换 play_time (毫秒) 为秒 (整数)
-                    play_time_seconds = row['play_time'] // 1000 if row['play_time'] else 0
-
-                    # 移除 updated_at 的时区信息（Excel 不支持带时区的 datetime）
-                    updated_at = row['updated_at']
-                    if updated_at and hasattr(updated_at, 'replace'):
-                        updated_at = updated_at.replace(tzinfo=None)
-
-                    export_data.append({
-                        'user_id': row['user_id'],
-                        'unit_id': unit_id,
-                        'score_text': row['score_text'],
-                        'question_list': row['question_list'],
-                        'updated_at': updated_at,
-                        'category': row['category'],
-                        'play_time_seconds': play_time_seconds
-                    })
-
-                # 导出到Excel
-                if export_data:
-                    df = pd.DataFrame(export_data)
-                    df.to_excel(output_filename, index=False, engine='openpyxl')
-                    print(f"数据已导出到: {output_filename}")
-                    print(f"共导出 {len(export_data)} 条记录")
-                else:
-                    print("没有数据可导出")
-
-        finally:
-            connection.close()
-            print("数据库连接已关闭")
-
-    except Exception as e:
-        print(f"发生错误: {e}")
-
-
-def aggregate_stats(input_filename):
-    """
-    基于步骤一的Excel文件进行数据聚合
-    按 unit_id + category 分组，统计各项指标
-    """
-    try:
-        # 读取步骤一导出的Excel文件
-        print(f"正在读取文件: {input_filename}")
-        df = pd.read_excel(input_filename, engine='openpyxl')
-
-        print(f"读取到 {len(df)} 条记录")
-
-        # 按 unit_id + category 分组统计
-        grouped = df.groupby(['unit_id', 'category'], dropna=False)
-
-        stats_data = []
-        for (unit_id, category), group in grouped:
-            total_count = len(group)
-            perfect_count = (group['score_text'] == 'Perfect').sum()
-            good_count = (group['score_text'] == 'Good').sum()
-            oops_count = (group['score_text'] == 'Oops').sum()
-
-            # 计算占比
-            perfect_rate = round(perfect_count / total_count if total_count > 0 else 0, 2)
-            good_rate = round(good_count / total_count if total_count > 0 else 0, 2)
-            oops_rate = round(oops_count / total_count if total_count > 0 else 0, 2)
-
-            stats_data.append({
-                'unit_id': unit_id,
-                'category': category,
-                '总记录数量': total_count,
-                'Perfect数量': perfect_count,
-                'Good数量': good_count,
-                'Oops数量': oops_count,
-                'Perfect率': perfect_rate,
-                'Good率': good_rate,
-                'Oops率': oops_rate
-            })
-
-        # 生成输出文件名
-        base_name = os.path.splitext(input_filename)[0]
-        output_filename = f"{base_name}_stats.xlsx"
-
-        # 导出统计结果
-        if stats_data:
-            stats_df = pd.DataFrame(stats_data)
-            stats_df.to_excel(output_filename, index=False, engine='openpyxl')
-            print(f"统计数据已导出到: {output_filename}")
-            print(f"共 {len(stats_data)} 个分组")
-        else:
-            print("没有数据可统计")
-
-    except Exception as e:
-        print(f"数据聚合时发生错误: {e}")
-
-
-if __name__ == "__main__":
-    # 步骤一：执行导出
-    print("=" * 50)
-    print("步骤一：导出原始数据")
-    print("=" * 50)
-    export_unit_challenge_data(START_DATE, END_DATE, OUTPUT_FILENAME)
-
-    # 步骤二：数据聚合
-    print("\n" + "=" * 50)
-    print("步骤二：数据聚合统计")
-    print("=" * 50)
-    aggregate_stats(OUTPUT_FILENAME)
-
-    print("\n" + "=" * 50)
-    print("全部完成！")
-    print("=" * 50)
-
diff --git a/makee_vala/git_scripts/export_user_id_data.py b/makee_vala/git_scripts/export_user_id_data.py
deleted file mode 100644
index ba0ddcc..0000000
--- a/makee_vala/git_scripts/export_user_id_data.py
+++ /dev/null
@@ -1,1846 +0,0 @@
-"""
-初版需求v1.0: 2025.11.18
-
-导出 一个userId的多表数据， 最终按照不同sheet，输出到一个 excel文件中。
-
-1. 第一个sheet:"全部音频数据"
-es相关配置通过以下环境变量
-ES_HOST=xxx
-ES_PORT=9200
-ES_SCHEME=https
-ES_USER=elastic
-ES_PASSWORD=xxx
-
-index: user-audio
-
-脚本思路:
-过滤字段:
-userId == xxxx
-
-输出该userId的全部记录 按时间倒序排序
-包含以下字段内容:
-
-userId
-userMsg
-userName
-soeData
-audioUrl
-asrStatus
-componentId
-componentType
-dataVersion
-
-2. 第二个sheet:"互动组件学习记录"
-在 PGsql数据库中  筛选出 user_id 对应的记录 按时间(updated_at)倒序排列。
-数据库相关配置  从.env中读取:
-PG_DB_HOST = xxx
-PG_DB_PORT = xxx
-PG_DB_USER = xxx
-PG_DB_PASSWORD = xxx
-PG_DB_DATABASE = xxx
-
-读取以下数据表: 
-user_component_play_record_0 ~ user_component_play_record_7
-
-输出以下字段：
-user_id,
-component_unique_code,
-session_id,
-c_type,
-c_id,
-play_result,
-user_behavior_info,
-updated_at
-
-3.第三个sheet:"课程巩固记录"
-在 PGsql数据库中  筛选出 user_id 对应的记录 按时间(updated_at)倒序排列。
-
-数据表:user_unit_review_question_result
-
-输出以下字段:
-user_id
-story_id
-chapter_id
-question_list
-updated_at
-
-4.第四个sheet:"单元挑战记录"
-在 PGsql数据库中  筛选出 user_id 对应的记录 按时间(updated_at)倒序排列。
-
-数据表:user_unit_challenge_question_result
-
-输出以下字段:
-user_id
-story_id
-category
-score_text,
-question_list
-updated_at
-------------
-
-需求补充v1.1:
-"全部音频数据"这个sheet
-输出字段 添加timeStr 并按时间倒序排列  最新的记录 在最上面
-
-------------
-需求补充v1.2:
-"全部音频数据"这个sheet
-如果userMsg字段内容 包含 ”makee_id“ 要进行以下处理：
-
-从userMsg字段中提取出具体的makee_id:
-此时的字段样例:
-```
-asr msg信息为：{
-    "time_ms": 358,
-    "time_ms_api": 357,
-    "hot_words_str": "{\n \"context_type\": \"dialog_ctx\",\n \"context_data\": [\n  {\n   \"text\": \"planet Walla\"\n  },\n  {\n   \"text\": \"Walla\"\n  }\n ]\n}",
-    "makee_id": "d208c617-902f-4f81-8255-b5fb73599546",
-    "volcano_fast_x_tt_logid": "202511151541355DF72BE5EBFE73795BFD",
-    "api_name": "volcano-fast"
-}
-```
-然后基于makee_id 去另一个表里查记录:  index:llm_asr_log
-将查询到的记录的 result_text 字段内容 回填到 userMsg。
-将source字段内容 输出 到 source。
-
-如果userMsg字段内容 不包含 ”makee_id“ 保持之前的逻辑。
-
---------------
-需求补充 v1.3
-当前输入 只支持配置单个 userId (业务侧名称为角色id)
-
-
-期望扩展为以下逻辑:
-1. 改为配置 角色id list ， 分别 导出 多份excel文件。命名格式为 角色id_{}_导出时间_{}.xlsx
-2. 改为配置 账户id list ， 分别 导出 多份excel文件。命名格式为 账户id_{}_角色id_{}_导出时间_{}.xlsx
-
-关于 账户 id 到角色id 的映射逻辑，
-首先 读取 mysql 表 vala_app_character
-筛选 account_id字段值 == 账户id 的 记录， 其中 该记录 的 id值，则为角色id 一个 账户id 可以对应多个角色id
-
-本次需求只针对输入侧调整， 数据抽取聚合逻辑部分和之前保持一致
-
----------------
-需求补充 v1.4
-
-增加一个sheet "单元总结记录"，
-导出对应角色id的单元总结记录。   参考 export_unit_summary.py 中的原始数据提取方案即可(不必关注其中的数据统计部分)。
-
-其他已有逻辑保持不动哦。
-
-----------------
-需求补充 v1.5
-
-1."互动组件学习记录"sheet 增加以下字段
-"互动组件名称"、"组件标题"、"组件配置摘要"、"知识点":
-字段取值规则:
-根据 c_type 及组件配置(从mysql表获取) 进行映射和处理:
-```
-1）.如果 c_type 开头为"mid"
-
-则读取下表:表名:middle_interaction_component
-
-获取以下字段值:
-title (作为组件标题)
-component_config (完整的组件配置)   获取其中 的 question 字段值 作为 组件配置摘要；
-kp_relation_info 字段值  作为 知识点
-
-"互动组件名称"规则:
-
-"物品互动": "mid_vocab_item",
-"图片互动": "mid_vocab_image",
-"填词互动": "mid_vocab_fillBlank",
-"指令互动": "mid_vocab_instruction"
-"对话互动-表达": "mid_sentence_dialogue", 且 component_config->question->mode == "express"
-"对话互动-朗读": "mid_sentence_dialogue", 且 component_config->question->mode == "read"
-"语音互动": "mid_sentence_voice",
-"材料互动": "mid_sentence_material",
-"造句互动": "mid_sentence_makeSentence"
-"挖空互动": "mid_grammar_cloze",
-"组句互动": "mid_grammar_sentence"
-"发音互动": "mid_pron_pron"
-
-
-2）. 如果 c_type 开头为"core"
-则读取下表:表名:core_interaction_component
-
-获取以下字段值:
-title (作为组件标题)
-component_config (完整的组件配置)   获取其中 的 taskInfo 字段值 作为 组件配置摘要
-kp_relation_info 字段值  作为 知识点
-
-"互动组件名称"规则:
-"口语快答": "core_speaking_reply",
-"口语妙问": "core_speaking_inquiry",
-"口语探讨": "core_speaking_explore",
-"口语独白": "core_speaking_monologue"
-"合作阅读": "core_reading_order",
-"合作听力": "core_listening_order",
-"看图组句": "core_writing_imgMakeSentence",
-"看图撰写": "core_writing_imgWrite",
-"问题组句": "core_writing_questionMakeSentence",
-"问题撰写": "core_writing_questionWrite",
-```
-
-2."课程巩固记录" sheet 增加以下字段
-"正确率":  参考 export_lesson_review.py 中的计算逻辑
-
-3. 新增一个"汇总统计"sheet
-统计并展示以下内容   请以 可读性 比较好的方式排列、展示
-
-a. "所有互动-按互动组件类型-通过情况统计"
-以每种"互动组件名称"进行聚合
-统计play_result的取值分布情况，算以下指标:
-总数量、Perfect数量、Good数量、Failed数量、Pass数量、Perfect比例、Good比例、Failed比例、Pass比例
-
-b. "中互动组件-按知识点-通过情况统计"
-以每个知识点进行聚合
-
-其中 知识点配置格式如下:
-```
-[{"kpId":"0000004","kpType":"sentence","kpTitle":"My name is ...","kpSkill":"sentence_pron","kpSkillName":"语音"},{"kpId":"0000004","kpType":"sentence","kpTitle":"My name is ...","kpSkill":"sentence_meaning","kpSkillName":"语义"},{"kpId":"0000005","kpType":"sentence","kpTitle":"I'm… years old.","kpSkill":"sentence_pron","kpSkillName":"语音"},{"kpId":"0000005","kpType":"sentence","kpTitle":"I'm… years old.","kpSkill":"sentence_meaning","kpSkillName":"语义"},{"kpId":"0000014","kpType":"sentence","kpTitle":"Nice to meet you.","kpSkill":"sentence_pron","kpSkillName":"语音"},{"kpId":"0000014","kpType":"sentence","kpTitle":"Nice to meet you.","kpSkill":"sentence_meaning","kpSkillName":"语义"}]
-```
-一个组件可以绑定多个知识点，以每个知识点的 kpId + kpType + kpTitle 进行 展示及聚合
-
-对所有绑定了某个知识点的中互动组件(c_type以mid开头)
-统计play_result的取值分布情况，算以下指标:
-总数量、Perfect数量、Good数量、Failed数量、Pass数量、Perfect比例、Good比例、Failed比例、Pass比例
-
-c. "单元总结-按单元统计时长"
-
-将"单元总结记录"中的"play_time_seconds"字段值 以每个单元id 进行聚合 进行 累加 统计，并增加一列 转换为分钟为单位 取整数
-
-
-"""
-# ==== 可直接修改的脚本变量（不使用命令行传参） ====
-# 三种模式互斥，只能配置一个：
-# 模式1：单个角色id
-USER_ID = None # 单个角色ID，示例：2911
-
-# 模式2：角色id列表（多个角色id批量导出）
-USER_ID_LIST = None  # 角色ID列表，示例：[2911, 2912, 2913]
-
-# 模式3：账户id列表（通过账户id查询对应的角色id后批量导出）
-ACCOUNT_ID_LIST = [2148] # 5095[7232] # [1783,5375,5371,5345,5303,5293,5095,4289,4494,4473,4460,4452,4386,4388,4236,4043,2758,2841,2756,2750,2692,1781,1693,2256,2234,2373]  # 账户ID列表，示例：[100, 101, 102]
-
-OUTPUT_DIR = "output/260126/"  # 输出目录，默认为output文件夹
-# ==== 变量结束 ====
-import os
-import json
-import re
-from typing import Any, Dict, List, Optional
-
-import datetime
-
-try:
-    import requests
-except Exception:
-    requests = None
-
-try:
-    import psycopg2
-    from psycopg2.extras import RealDictCursor
-except Exception:
-    psycopg2 = None
-    RealDictCursor = None
-
-try:
-    import pymysql
-    import pymysql.cursors
-except Exception:
-    pymysql = None
-
-try:
-    import pandas as pd
-except Exception:
-    pd = None
-
-try:
-    import urllib3
-except Exception:
-    urllib3 = None
-
-
-SHEET1_COLUMNS = [
-    "userId",
-    "userMsg",
-    "source",
-    "userName",
-    "soeData",
-    "audioUrl",
-    "asrStatus",
-    "componentId",
-    "componentType",
-    "dataVersion",
-    "timeStr",
-]
-
-SHEET2_COLUMNS = [
-    "user_id",
-    "component_unique_code",
-    "session_id",
-    "c_type",
-    "c_id",
-    "互动组件名称",
-    "组件标题",
-    "组件配置摘要",
-    "知识点",
-    "play_result",
-    "user_behavior_info",
-    "updated_at",
-]
-
-SHEET3_COLUMNS = [
-    "user_id",
-    "unit_id",
-    "lesson_id",
-    "question_list",
-    "正确率",
-    "updated_at",
-]
-
-SHEET4_COLUMNS = [
-    "user_id",
-    "unit_id",
-    "category",
-    "score_text",
-    "question_list",
-    "updated_at",
-]
-
-SHEET5_COLUMNS = [
-    "id",
-    "user_id",
-    "unit_id",
-    "updated_at",
-    "km_id",
-    "km_type",
-    "play_time_seconds",
-]
-
-
-def _load_env_file(path: str) -> None:
-    if not os.path.exists(path):
-        return
-    try:
-        with open(path, "r", encoding="utf-8") as f:
-            for line in f:
-                line = line.strip()
-                if not line or line.startswith("#"):
-                    continue
-                if "=" not in line:
-                    continue
-                k, v = line.split("=", 1)
-                k = k.strip()
-                v = v.strip().strip('"').strip("'")
-                if k and (os.getenv(k) is None):
-                    os.environ[k] = v
-    except Exception:
-        pass
-
-
-def load_env() -> None:
-    _load_env_file(os.path.join(os.getcwd(), ".env"))
-    _load_env_file(os.path.join(os.getcwd(), ".env.local"))
-
-
-def to_json_str(v: Any) -> Any:
-    if isinstance(v, (dict, list)):
-        try:
-            return json.dumps(v, ensure_ascii=False)
-        except Exception:
-            return str(v)
-    return v
-
-
-def parse_time(value: Any) -> Optional[datetime.datetime]:
-    if value is None:
-        return None
-    if isinstance(value, (int, float)):
-        try:
-            v = float(value)
-            # 兼容毫秒级时间戳
-            if v > 1e11:
-                v = v / 1000.0
-            return datetime.datetime.fromtimestamp(v)
-        except Exception:
-            return None
-    if isinstance(value, str):
-        fmts = [
-            "%Y-%m-%dT%H:%M:%S.%fZ",
-            "%Y-%m-%dT%H:%M:%S.%f%z",
-            "%Y-%m-%dT%H:%M:%S%z",
-            "%Y-%m-%d %H:%M:%S",
-            "%Y-%m-%d",
-        ]
-        for fmt in fmts:
-            try:
-                return datetime.datetime.strptime(value, fmt)
-            except Exception:
-                continue
-        try:
-            return datetime.datetime.fromisoformat(value)
-        except Exception:
-            return None
-    return None
-
-
-def pick_time(source: Dict[str, Any]) -> Optional[datetime.datetime]:
-    candidates = [
-        "updated_at",
-        "created_at",
-        "@timestamp",
-        "timestamp",
-        "updatedAt",
-        "createdAt",
-        "time",
-        "ts",
-        "timeStr",
-        "update_time",
-        "create_time",
-    ]
-    for key in candidates:
-        if key in source:
-            t = parse_time(source.get(key))
-            if t is not None:
-                return t
-    # 宽松匹配：尝试扫描所有可能的时间相关字段
-    for k, v in source.items():
-        lk = str(k).lower()
-        if any(s in lk for s in ["time", "date", "_at", "timestamp"]):
-            t = parse_time(v)
-            if t is not None:
-                return t
-    return None
-
-
-def extract_makee_id_from_user_msg(user_msg: Any) -> Optional[str]:
-    # 支持dict或字符串形式
-    if isinstance(user_msg, dict):
-        mk = user_msg.get("makee_id")
-        if isinstance(mk, str) and mk:
-            return mk
-    if isinstance(user_msg, str) and user_msg:
-        # 1) 尝试整体解析为JSON
-        try:
-            obj = json.loads(user_msg)
-            mk = obj.get("makee_id")
-            if isinstance(mk, str) and mk:
-                return mk
-        except Exception:
-            pass
-        # 2) 尝试截取大括号中的JSON
-        try:
-            start = user_msg.find("{")
-            end = user_msg.rfind("}")
-            if start != -1 and end != -1 and end > start:
-                candidate = user_msg[start : end + 1]
-                obj = json.loads(candidate)
-                mk = obj.get("makee_id")
-                if isinstance(mk, str) and mk:
-                    return mk
-        except Exception:
-            pass
-        # 3) 正则匹配 makee_id
-        m = re.search(r"\bmakee_id\b\s*:\s*\"([^\"]+)\"", user_msg)
-        if m:
-            return m.group(1)
-    return None
-
-
-def fetch_es_asr_log(makee_id: str, es_cfg: Dict[str, Any]) -> Optional[Dict[str, Any]]:
-    if requests is None:
-        raise RuntimeError("缺少requests依赖，请安装后再运行。")
-    host = es_cfg.get("host")
-    port = es_cfg.get("port")
-    scheme = es_cfg.get("scheme", "http")
-    user = es_cfg.get("user")
-    password = es_cfg.get("password")
-    index = "llm_asr_log"
-    if not host:
-        return None
-    base = f"{scheme}://{host}:{port}"
-    url = f"{base}/{index}/_search"
-    headers = {"Content-Type": "application/json"}
-    body = {
-        "query": {
-            "bool": {
-                "should": [
-                    {"term": {"makee_id": {"value": str(makee_id)}}},
-                    {"term": {"makee_id.keyword": {"value": str(makee_id)}}},
-                ],
-                "minimum_should_match": 1,
-            }
-        },
-        "size": 10,
-        "_source": [
-            "makee_id",
-            "result_text",
-            "source",
-            "updated_at",
-            "created_at",
-            "@timestamp",
-            "timestamp",
-            "updatedAt",
-            "createdAt",
-            "time",
-            "ts",
-            "timeStr",
-            "update_time",
-            "create_time",
-        ],
-    }
-    auth = (user, password) if user and password else None
-    try:
-        if scheme == "https" and urllib3 is not None:
-            try:
-                urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-            except Exception:
-                pass
-        resp = requests.post(url, headers=headers, json=body, auth=auth, timeout=20, verify=False if scheme == "https" else True)
-        resp.raise_for_status()
-        data = resp.json()
-    except Exception:
-        return None
-    hits = data.get("hits", {}).get("hits", [])
-    if not hits:
-        return None
-    # 选最新的
-    chosen = None
-    best_t = None
-    for h in hits:
-        src = h.get("_source", {}) or {}
-        t = pick_time(src)
-        if t is None:
-            continue
-        if best_t is None or t > best_t:
-            best_t = t
-            chosen = src
-    if chosen is None:
-        # 如果都没有时间，选第一条
-        chosen = (hits[0].get("_source", {}) or {})
-    return chosen
-
-
-def get_es_config() -> Dict[str, Any]:
-    return {
-        "host": os.getenv("ES_HOST"),
-        "port": os.getenv("ES_PORT", "9200"),
-        "scheme": os.getenv("ES_SCHEME", "http"),
-        "user": os.getenv("ES_USER"),
-        "password": os.getenv("ES_PASSWORD"),
-        "index": "user-audio",
-    }
-
-
-def fetch_es_user_audio(user_id: str, es_cfg: Dict[str, Any]) -> List[Dict[str, Any]]:
-    if requests is None:
-        raise RuntimeError("缺少requests依赖，请安装后再运行。")
-
-    print(f"  [ES] 开始查询user-audio索引...")
-    start_time = datetime.datetime.now()
-
-    host = es_cfg.get("host")
-    port = es_cfg.get("port")
-    scheme = es_cfg.get("scheme", "http")
-    user = es_cfg.get("user")
-    password = es_cfg.get("password")
-    index = es_cfg.get("index", "user-audio")
-
-    if not host:
-        return []
-
-    base = f"{scheme}://{host}:{port}"
-    url = f"{base}/{index}/_search"
-    headers = {"Content-Type": "application/json"}
-
-    body = {
-        "query": {
-            "bool": {
-                "should": [
-                    {"term": {"userId": {"value": str(user_id)}}},
-                    {"term": {"userId.keyword": {"value": str(user_id)}}},
-                ],
-                "minimum_should_match": 1,
-            }
-        },
-        "size": 10000,
-        "_source": [
-            "userId",
-            "userMsg",
-            "userName",
-            "soeData",
-            "audioUrl",
-            "asrStatus",
-            "componentId",
-            "componentType",
-            "dataVersion",
-            "updated_at",
-            "created_at",
-            "@timestamp",
-            "timestamp",
-            "updatedAt",
-            "createdAt",
-            "time",
-            "ts",
-            "timeStr",
-            "update_time",
-            "create_time",
-        ],
-    }
-
-    auth = (user, password) if user and password else None
-
-    try:
-        # 抑制自签证书下的HTTPS不安全警告
-        if scheme == "https" and urllib3 is not None:
-            try:
-                urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-            except Exception:
-                pass
-        resp = requests.post(url, headers=headers, json=body, auth=auth, timeout=30, verify=False if scheme == "https" else True)
-        resp.raise_for_status()
-        data = resp.json()
-    except Exception as e:
-        raise RuntimeError(f"ES查询失败: {e}")
-
-    hits = data.get("hits", {}).get("hits", [])
-    print(f"  [ES] 查询完成，获得{len(hits)}条记录，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-
-    if not hits:
-        return []
-
-    print(f"  [ES] 开始处理音频数据...")
-    process_start = datetime.datetime.now()
-
-    rows: List[Dict[str, Any]] = []
-    asr_cache: Dict[str, Dict[str, Any]] = {}
-    makee_id_count = 0
-
-    for idx, h in enumerate(hits, 1):
-        # 每处理100条显示一次进度
-        if idx % 100 == 0 or idx == len(hits):
-            print(f"  [ES] 处理进度: {idx}/{len(hits)} ({idx*100//len(hits)}%)")
-
-        src = h.get("_source", {}) or {}
-        row = {
-            "userId": src.get("userId"),
-            "userMsg": src.get("userMsg"),
-            "source": None,
-            "userName": src.get("userName"),
-            "soeData": to_json_str(src.get("soeData")),
-            "audioUrl": src.get("audioUrl"),
-            "asrStatus": src.get("asrStatus"),
-            "componentId": src.get("componentId"),
-            "componentType": src.get("componentType"),
-            "dataVersion": src.get("dataVersion"),
-        }
-        t = pick_time(src)
-        row["_time"] = t.isoformat() if t else None
-        row["timeStr"] = t.strftime("%Y-%m-%d %H:%M:%S") if t else None
-        # v1.2: 当userMsg包含makee_id时，补充查询llm_asr_log并回填
-        mk = extract_makee_id_from_user_msg(row.get("userMsg"))
-        if mk:
-            makee_id_count += 1
-            asr_doc = asr_cache.get(mk)
-            if asr_doc is None:
-                asr_doc = fetch_es_asr_log(mk, es_cfg)
-                if asr_doc is not None:
-                    asr_cache[mk] = asr_doc
-            if asr_doc is not None:
-                rt = asr_doc.get("result_text")
-                if rt:
-                    row["userMsg"] = rt
-                row["source"] = to_json_str(asr_doc.get("source"))
-        rows.append(row)
-
-    print(f"  [ES] 数据处理完成，发现{makee_id_count}条包含makee_id的记录，耗时{(datetime.datetime.now() - process_start).total_seconds():.2f}秒")
-
-    print(f"  [ES] 开始排序...")
-    rows.sort(key=lambda x: parse_time(x.get("_time")) or datetime.datetime.min, reverse=True)
-    print(f"  [ES] 音频数据处理完成，总耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-
-    return rows
-
-
-def get_pg_conn() -> Any:
-    if psycopg2 is None:
-        raise RuntimeError("缺少psycopg2依赖，请安装后再运行。")
-    host = os.getenv("PG_DB_HOST")
-    port = int(os.getenv("PG_DB_PORT", "5432"))
-    user = os.getenv("PG_DB_USER")
-    password = os.getenv("PG_DB_PASSWORD")
-    dbname = os.getenv("PG_DB_DATABASE")
-    if not host or not dbname:
-        raise RuntimeError("PG数据库环境变量未配置完整")
-    conn = psycopg2.connect(host=host, port=port, user=user, password=password, dbname=dbname)
-    return conn
-
-
-def get_mysql_conn(database: str) -> Any:
-    """
-    获取MySQL数据库连接
-
-    Args:
-        database: 数据库名，可选值：'vala_user' 或 'vala_test'
-                 vala_user 使用 online 配置（环境变量后缀 _online）
-                 vala_test 使用默认配置
-
-    Returns:
-        MySQL连接对象
-    """
-    if pymysql is None:
-        raise RuntimeError("缺少pymysql依赖，请安装后再运行。")
-
-    # 根据数据库选择不同的环境变量配置
-    if database == "vala_user":
-        # vala_user 数据库使用 online 配置
-        host = os.getenv("MYSQL_HOST_online")
-        port = int(os.getenv("MYSQL_PORT_online", "3306"))
-        user = os.getenv("MYSQL_USERNAME_online")
-        password = os.getenv("MYSQL_PASSWORD_online")
-        if not host:
-            raise RuntimeError("MySQL数据库环境变量未配置完整（缺少MYSQL_HOST_online）")
-    else:
-        # vala_test 等其他数据库使用默认配置
-        host = os.getenv("MYSQL_HOST")
-        port = int(os.getenv("MYSQL_PORT", "3306"))
-        user = os.getenv("MYSQL_USERNAME")
-        password = os.getenv("MYSQL_PASSWORD")
-        if not host:
-            raise RuntimeError("MySQL数据库环境变量未配置完整（缺少MYSQL_HOST）")
-
-    conn = pymysql.connect(
-        host=host,
-        port=port,
-        user=user,
-        password=password,
-        database=database,  # 直接使用传入的数据库名
-        charset="utf8mb4",
-        cursorclass=pymysql.cursors.DictCursor,
-    )
-    return conn
-
-
-def get_id_2_unit_index(conn: Any) -> Dict[int, int]:
-    """
-    从MySQL获取 story_id 到 unit_id 的映射关系
-
-    Args:
-        conn: MySQL数据库连接
-
-    Returns:
-        映射字典 {story_id: unit_id}
-    """
-    sql = """
-    SELECT *
-    FROM `vala_game_info`
-    WHERE id > 0
-      AND `vala_game_info`.`deleted_at` IS NULL
-    ORDER BY season_package_id asc, `index` asc
-    """
-    try:
-        with conn.cursor() as cur:
-            cur.execute(sql)
-            rows = cur.fetchall() or []
-            # 构建映射表：按查询结果的顺序，索引即为unit_id
-            id_2_unit_index = {}
-            for index, row in enumerate(rows):
-                id_2_unit_index[row["id"]] = index
-            return id_2_unit_index
-    except Exception as e:
-        print(f"[ERROR] 获取story_id到unit_id映射失败: {e}")
-        return {}
-
-
-def get_chapter_id_to_lesson_id(conn: Any) -> Dict[int, int]:
-    """
-    从MySQL获取 chapter_id 到 lesson_id 的映射关系
-
-    Args:
-        conn: MySQL数据库连接
-
-    Returns:
-        映射字典 {chapter_id: lesson_id}
-    """
-    sql = """
-    SELECT id, `index`
-    FROM `vala_game_chapter`
-    WHERE deleted_at IS NULL
-    """
-    try:
-        with conn.cursor() as cur:
-            cur.execute(sql)
-            rows = cur.fetchall() or []
-            # 构建映射表：chapter的index字段即为lesson_id
-            chapter_id_to_lesson_id = {}
-            for row in rows:
-                chapter_id_to_lesson_id[row["id"]] = row["index"]
-            return chapter_id_to_lesson_id
-    except Exception as e:
-        print(f"[ERROR] 获取chapter_id到lesson_id映射失败: {e}")
-        return {}
-
-
-# 组件类型到组件名称的映射
-COMPONENT_TYPE_NAMES = {
-    "mid_vocab_item": "物品互动",
-    "mid_vocab_image": "图片互动",
-    "mid_vocab_fillBlank": "填词互动",
-    "mid_vocab_instruction": "指令互动",
-    "mid_sentence_dialogue": "对话互动",  # 需要根据mode进一步判断
-    "mid_sentence_voice": "语音互动",
-    "mid_sentence_material": "材料互动",
-    "mid_sentence_makeSentence": "造句互动",
-    "mid_grammar_cloze": "挖空互动",
-    "mid_grammar_sentence": "组句互动",
-    "mid_pron_pron": "发音互动",
-    "core_speaking_reply": "口语快答",
-    "core_speaking_inquiry": "口语妙问",
-    "core_speaking_explore": "口语探讨",
-    "core_speaking_monologue": "口语独白",
-    "core_reading_order": "合作阅读",
-    "core_listening_order": "合作听力",
-    "core_writing_imgMakeSentence": "看图组句",
-    "core_writing_imgWrite": "看图撰写",
-    "core_writing_questionMakeSentence": "问题组句",
-    "core_writing_questionWrite": "问题撰写",
-}
-
-
-def get_component_name(c_type: str, component_config: Optional[Dict[str, Any]]) -> str:
-    """
-    根据c_type和组件配置获取组件名称
-
-    Args:
-        c_type: 组件类型
-        component_config: 组件配置（用于判断对话互动的mode）
-
-    Returns:
-        组件名称
-    """
-    if not c_type:
-        return ""
-
-    # 特殊处理：对话互动需要根据mode判断
-    if c_type == "mid_sentence_dialogue" and component_config:
-        try:
-            question = component_config.get("question", {})
-            mode = question.get("mode", "")
-            if mode == "express":
-                return "对话互动-表达"
-            elif mode == "read":
-                return "对话互动-朗读"
-        except Exception:
-            pass
-
-    return COMPONENT_TYPE_NAMES.get(c_type, "")
-
-
-def batch_fetch_component_configs(play_records: List[Dict[str, Any]], mysql_conn: Any) -> Dict[str, Dict[str, Any]]:
-    """
-    批量查询组件配置信息
-
-    Args:
-        play_records: 播放记录列表
-        mysql_conn: MySQL连接
-
-    Returns:
-        组件配置映射 {c_type_c_id: {title, component_config, kp_relation_info}}
-    """
-    print(f"  [MySQL] 开始批量查询组件配置...")
-    start_time = datetime.datetime.now()
-
-    # 收集需要查询的c_type和c_id
-    mid_c_ids = set()
-    core_c_ids = set()
-    mid_type_id_pairs = []  # 用于调试日志
-    core_type_id_pairs = []
-
-    for record in play_records:
-        c_type = record.get("c_type", "")
-        c_id = record.get("c_id")
-        if c_type and c_id:
-            if c_type.startswith("mid"):
-                mid_c_ids.add(c_id)
-                mid_type_id_pairs.append((c_type, c_id))
-            elif c_type.startswith("core"):
-                core_c_ids.add(c_id)
-                core_type_id_pairs.append((c_type, c_id))
-
-    print(f"  [MySQL] 需要查询中互动组件: {len(mid_c_ids)}个, 核心互动组件: {len(core_c_ids)}个")
-    if mid_c_ids:
-        print(f"  [MySQL] 中互动组件ID列表（前10个）: {sorted(list(mid_c_ids))[:10]}")
-    if core_c_ids:
-        print(f"  [MySQL] 核心互动组件ID列表（前10个）: {sorted(list(core_c_ids))[:10]}")
-
-    config_map = {}
-
-    # 批量查询middle_interaction_component
-    if mid_c_ids:
-        try:
-            with mysql_conn.cursor() as cur:
-                placeholders = ','.join(['%s'] * len(mid_c_ids))
-                sql = f"""
-                SELECT c_id, c_type, title, component_config, kp_relation_info
-                FROM middle_interaction_component
-                WHERE c_id IN ({placeholders}) AND deleted_at IS NULL
-                """
-                print(f"  [MySQL] 执行中互动组件查询，查询条件: c_id IN ({len(mid_c_ids)}个ID)")
-                cur.execute(sql, tuple(mid_c_ids))
-                rows = cur.fetchall() or []
-                print(f"  [MySQL] 查询到{len(rows)}条中互动组件配置")
-
-                if len(rows) == 0 and len(mid_c_ids) > 0:
-                    print(f"  [MySQL] [警告] 查询结果为空！可能的原因：")
-                    print(f"  [MySQL]   - 数据库中没有匹配的c_id记录")
-                    print(f"  [MySQL]   - deleted_at字段不为NULL")
-                    print(f"  [MySQL]   - c_id不存在")
-
-                for idx, row in enumerate(rows):
-                    c_type = row.get("c_type", "")
-                    c_id = row.get("c_id")
-                    key = f"{c_type}_{c_id}"
-
-                    if idx < 3:  # 输出前3条的详细信息
-                        print(f"  [MySQL] [样例{idx+1}] id={c_id}, c_type={c_type}, key={key}")
-                        print(f"  [MySQL] [样例{idx+1}] title={row.get('title', '')[:50]}")
-
-                    # 解析component_config
-                    component_config = row.get("component_config")
-                    if isinstance(component_config, str):
-                        try:
-                            component_config = json.loads(component_config)
-                        except Exception as e:
-                            print(f"  [MySQL] [警告] 解析component_config失败 (id={c_id}): {e}")
-                            component_config = {}
-
-                    # 提取question字段作为摘要
-                    summary = ""
-                    if isinstance(component_config, dict):
-                        question = component_config.get("question")
-                        summary = to_json_str(question) if question else ""
-                        if idx < 3 and question:
-                            print(f"  [MySQL] [样例{idx+1}] 提取到question字段，长度: {len(summary)}")
-
-                    # 解析kp_relation_info
-                    kp_relation_info = row.get("kp_relation_info")
-                    if isinstance(kp_relation_info, str):
-                        try:
-                            kp_relation_info = json.loads(kp_relation_info)
-                        except Exception:
-                            kp_relation_info = []
-
-                    config_map[key] = {
-                        "title": row.get("title", ""),
-                        "component_config": component_config,
-                        "summary": summary,
-                        "kp_relation_info": to_json_str(kp_relation_info),
-                    }
-
-                print(f"  [MySQL] 中互动组件配置已加入config_map，当前map大小: {len(config_map)}")
-        except Exception as e:
-            print(f"  [MySQL] [错误] 查询中互动组件配置失败: {e}")
-            import traceback
-            traceback.print_exc()
-
-    # 批量查询core_interaction_component
-    if core_c_ids:
-        try:
-            with mysql_conn.cursor() as cur:
-                placeholders = ','.join(['%s'] * len(core_c_ids))
-                sql = f"""
-                SELECT c_id, c_type, title, component_config, kp_relation_info
-                FROM core_interaction_component
-                WHERE c_id IN ({placeholders}) AND deleted_at IS NULL
-                """
-                print(f"  [MySQL] 执行核心互动组件查询，查询条件: c_id IN ({len(core_c_ids)}个ID)")
-                cur.execute(sql, tuple(core_c_ids))
-                rows = cur.fetchall() or []
-                print(f"  [MySQL] 查询到{len(rows)}条核心互动组件配置")
-
-                if len(rows) == 0 and len(core_c_ids) > 0:
-                    print(f"  [MySQL] [警告] 查询结果为空！可能的原因：")
-                    print(f"  [MySQL]   - 数据库中没有匹配的c_id记录")
-                    print(f"  [MySQL]   - deleted_at字段不为NULL")
-                    print(f"  [MySQL]   - c_id不存在")
-
-                for idx, row in enumerate(rows):
-                    c_type = row.get("c_type", "")
-                    c_id = row.get("c_id")
-                    key = f"{c_type}_{c_id}"
-
-                    if idx < 3:  # 输出前3条的详细信息
-                        print(f"  [MySQL] [样例{idx+1}] id={c_id}, c_type={c_type}, key={key}")
-                        print(f"  [MySQL] [样例{idx+1}] title={row.get('title', '')[:50]}")
-
-                    # 解析component_config
-                    component_config = row.get("component_config")
-                    if isinstance(component_config, str):
-                        try:
-                            component_config = json.loads(component_config)
-                        except Exception as e:
-                            print(f"  [MySQL] [警告] 解析component_config失败 (id={c_id}): {e}")
-                            component_config = {}
-
-                    # 提取taskInfo字段作为摘要
-                    summary = ""
-                    if isinstance(component_config, dict):
-                        task_info = component_config.get("taskInfo")
-                        summary = to_json_str(task_info) if task_info else ""
-                        if idx < 3 and task_info:
-                            print(f"  [MySQL] [样例{idx+1}] 提取到taskInfo字段，长度: {len(summary)}")
-
-                    # 解析kp_relation_info
-                    kp_relation_info = row.get("kp_relation_info")
-                    if isinstance(kp_relation_info, str):
-                        try:
-                            kp_relation_info = json.loads(kp_relation_info)
-                        except Exception:
-                            kp_relation_info = []
-
-                    config_map[key] = {
-                        "title": row.get("title", ""),
-                        "component_config": component_config,
-                        "summary": summary,
-                        "kp_relation_info": to_json_str(kp_relation_info),
-                    }
-
-                print(f"  [MySQL] 核心互动组件配置已加入config_map，当前map大小: {len(config_map)}")
-        except Exception as e:
-            print(f"  [MySQL] [错误] 查询核心互动组件配置失败: {e}")
-            import traceback
-            traceback.print_exc()
-
-    print(f"  [MySQL] 组件配置查询完成，共{len(config_map)}条，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-    return config_map
-
-
-def calculate_accuracy(question_list: Any) -> float:
-    """
-    计算问题列表的正确率
-
-    Args:
-        question_list: 问题列表（可能是JSON字符串或list）
-
-    Returns:
-        正确率（百分比，保留2位小数）
-    """
-    try:
-        if isinstance(question_list, str):
-            question_list = json.loads(question_list)
-
-        if not isinstance(question_list, list) or len(question_list) == 0:
-            return 0.0
-
-        total = len(question_list)
-        correct = sum(1 for q in question_list if q.get('isRight') == True)
-        accuracy = round(correct / total * 100, 2) if total > 0 else 0.0
-
-        return accuracy
-    except Exception:
-        return 0.0
-
-
-
-def fetch_character_ids_by_account(account_id: str, conn: Any) -> List[str]:
-    """根据账户id查询对应的角色id列表"""
-    sql = "SELECT id FROM vala_app_character WHERE account_id = %s"
-    try:
-        with conn.cursor() as cur:
-            cur.execute(sql, (account_id,))
-            rows = cur.fetchall() or []
-            return [str(row["id"]) for row in rows if row.get("id")]
-    except Exception as e:
-        print(f"[ERROR] 查询账户id={account_id}的角色id失败: {e}")
-        return []
-
-
-def fetch_pg_play_records(user_id: str, conn: Any, mysql_conn: Any) -> List[Dict[str, Any]]:
-    """
-    查询互动组件学习记录并补充组件配置信息
-
-    Args:
-        user_id: 用户ID（角色ID）
-        conn: PostgreSQL数据库连接
-        mysql_conn: MySQL数据库连接
-
-    Returns:
-        互动组件学习记录列表
-    """
-    print(f"  [PG] 开始查询互动组件学习记录（8张分表）...")
-    start_time = datetime.datetime.now()
-
-    tables = [f"user_component_play_record_{i}" for i in range(8)]
-    rows: List[Dict[str, Any]] = []
-    with conn.cursor(cursor_factory=RealDictCursor) as cur:
-        for t in tables:
-            try:
-                cur.execute(
-                    f"""
-                    SELECT user_id, component_unique_code, session_id, c_type, c_id,
-                           play_result, user_behavior_info, updated_at
-                    FROM {t}
-                    WHERE user_id = %s
-                    ORDER BY updated_at DESC
-                    """,
-                    (user_id,),
-                )
-                part = cur.fetchall() or []
-                if part:
-                    print(f"  [PG] 表{t}查到{len(part)}条记录")
-                for r in part:
-                    r = dict(r)
-                    r["play_result"] = to_json_str(r.get("play_result"))
-                    r["user_behavior_info"] = to_json_str(r.get("user_behavior_info"))
-                    # 将带时区的时间转换为无时区，避免Excel写入报错
-                    upd = r.get("updated_at")
-                    if isinstance(upd, datetime.datetime):
-                        try:
-                            if upd.tzinfo is not None and upd.tzinfo.utcoffset(upd) is not None:
-                                r["updated_at"] = upd.replace(tzinfo=None)
-                        except Exception:
-                            # 回退为字符串
-                            r["updated_at"] = str(upd)
-                    rows.append(r)
-            except Exception as e:
-                print(f"  [PG] 表{t}查询失败: {e}")
-                continue
-
-    rows.sort(key=lambda x: parse_time(x.get("updated_at")) or datetime.datetime.min, reverse=True)
-    print(f"  [PG] 互动组件学习记录查询完成，共{len(rows)}条，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-
-    # 批量查询组件配置
-    if rows and mysql_conn:
-        config_map = batch_fetch_component_configs(rows, mysql_conn)
-
-        # 补充组件信息
-        print(f"  [PG] 开始补充组件配置信息...")
-        filled_count = 0
-        empty_count = 0
-        sample_keys = []
-        sample_mode_check = []  # 检查对话互动的mode
-
-        for r in rows:
-            c_type = r.get("c_type", "")
-            c_id = r.get("c_id")
-            key = f"{c_type}_{c_id}" if c_type and c_id else ""
-
-            config = config_map.get(key, {})
-            component_config = config.get("component_config", {})
-
-            component_name = get_component_name(c_type, component_config)
-            r["互动组件名称"] = component_name
-            r["组件标题"] = config.get("title", "")
-            r["组件配置摘要"] = config.get("summary", "")
-            r["知识点"] = config.get("kp_relation_info", "")
-
-            # 统计填充情况
-            if config:
-                filled_count += 1
-                if len(sample_keys) < 3:
-                    sample_keys.append((key, component_name, r["组件标题"][:30] if r["组件标题"] else ""))
-
-                # 检查对话互动的mode
-                if c_type == "mid_sentence_dialogue" and len(sample_mode_check) < 3:
-                    mode = ""
-                    if isinstance(component_config, dict):
-                        question = component_config.get("question", {})
-                        if isinstance(question, dict):
-                            mode = question.get("mode", "")
-                    sample_mode_check.append({
-                        "key": key,
-                        "mode": mode,
-                        "component_name": component_name
-                    })
-            else:
-                empty_count += 1
-                if empty_count <= 5:  # 输出前5个未匹配的key
-                    print(f"  [PG] [警告] 未找到组件配置: key={key}")
-
-        print(f"  [PG] 组件配置信息补充完成")
-        print(f"  [PG] 匹配到配置: {filled_count}条, 未匹配: {empty_count}条")
-        if sample_keys:
-            print(f"  [PG] 样例数据（前3条）:")
-            for key, name, title in sample_keys:
-                print(f"  [PG]   - key={key}, 名称={name}, 标题={title}")
-
-        if sample_mode_check:
-            print(f"  [PG] 对话互动mode检查（前3条）:")
-            for s in sample_mode_check:
-                print(f"  [PG]   - key={s['key']}, mode={s['mode']}, 最终名称={s['component_name']}")
-
-    return rows
-
-
-def fetch_pg_unit_review(user_id: str, conn: Any, id_2_unit_index: Dict[int, int], chapter_id_to_lesson_id: Dict[int, int]) -> List[Dict[str, Any]]:
-    """
-    查询课程巩固记录
-
-    Args:
-        user_id: 用户ID（角色ID）
-        conn: PostgreSQL数据库连接
-        id_2_unit_index: story_id到unit_id的映射字典
-        chapter_id_to_lesson_id: chapter_id到lesson_id的映射字典
-
-    Returns:
-        课程巩固记录列表
-    """
-    print(f"  [PG] 开始查询课程巩固记录...")
-    start_time = datetime.datetime.now()
-
-    sql = (
-        "SELECT user_id, story_id, chapter_id, question_list, updated_at "
-        "FROM user_unit_review_question_result WHERE user_id = %s ORDER BY updated_at DESC"
-    )
-    with conn.cursor(cursor_factory=RealDictCursor) as cur:
-        try:
-            cur.execute(sql, (user_id,))
-            rows = cur.fetchall() or []
-        except Exception as e:
-            print(f"  [PG] 课程巩固记录查询失败: {e}")
-            rows = []
-    out: List[Dict[str, Any]] = []
-    for r in rows:
-        d = dict(r)
-
-        # 映射 story_id 到 unit_id
-        story_id = d.get("story_id")
-        unit_id = id_2_unit_index.get(story_id) if story_id else None
-        d["unit_id"] = unit_id
-
-        # 映射 chapter_id 到 lesson_id
-        chapter_id = d.get("chapter_id")
-        lesson_id = chapter_id_to_lesson_id.get(chapter_id) if chapter_id else None
-        d["lesson_id"] = lesson_id
-
-        # 计算正确率
-        question_list = d.get("question_list")
-        d["正确率"] = calculate_accuracy(question_list)
-
-        d["question_list"] = to_json_str(question_list)
-        upd = d.get("updated_at")
-        if isinstance(upd, datetime.datetime):
-            try:
-                if upd.tzinfo is not None and upd.tzinfo.utcoffset(upd) is not None:
-                    d["updated_at"] = upd.replace(tzinfo=None)
-            except Exception:
-                d["updated_at"] = str(upd)
-        out.append(d)
-
-    print(f"  [PG] 课程巩固记录查询完成，共{len(out)}条，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-    return out
-
-
-def fetch_pg_unit_challenge(user_id: str, conn: Any, id_2_unit_index: Dict[int, int]) -> List[Dict[str, Any]]:
-    """
-    查询单元挑战记录
-
-    Args:
-        user_id: 用户ID（角色ID）
-        conn: PostgreSQL数据库连接
-        id_2_unit_index: story_id到unit_id的映射字典
-
-    Returns:
-        单元挑战记录列表
-    """
-    print(f"  [PG] 开始查询单元挑战记录...")
-    start_time = datetime.datetime.now()
-
-    sql = (
-        "SELECT user_id, story_id, category, score_text, question_list, updated_at "
-        "FROM user_unit_challenge_question_result WHERE user_id = %s ORDER BY updated_at DESC"
-    )
-    with conn.cursor(cursor_factory=RealDictCursor) as cur:
-        try:
-            cur.execute(sql, (user_id,))
-            rows = cur.fetchall() or []
-        except Exception as e:
-            print(f"  [PG] 单元挑战记录查询失败: {e}")
-            rows = []
-    out: List[Dict[str, Any]] = []
-    for r in rows:
-        d = dict(r)
-
-        # 映射 story_id 到 unit_id
-        story_id = d.get("story_id")
-        unit_id = id_2_unit_index.get(story_id) if story_id else None
-        d["unit_id"] = unit_id
-
-        d["question_list"] = to_json_str(d.get("question_list"))
-        upd = d.get("updated_at")
-        if isinstance(upd, datetime.datetime):
-            try:
-                if upd.tzinfo is not None and upd.tzinfo.utcoffset(upd) is not None:
-                    d["updated_at"] = upd.replace(tzinfo=None)
-            except Exception:
-                d["updated_at"] = str(upd)
-        out.append(d)
-
-    print(f"  [PG] 单元挑战记录查询完成，共{len(out)}条，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-    return out
-
-
-def fetch_pg_unit_summary(user_id: str, conn: Any, id_2_unit_index: Dict[int, int]) -> List[Dict[str, Any]]:
-    """
-    查询单元总结知识点结果数据
-
-    Args:
-        user_id: 用户ID（角色ID）
-        conn: PostgreSQL数据库连接
-        id_2_unit_index: story_id到unit_id的映射字典
-
-    Returns:
-        单元总结记录列表
-    """
-    print(f"  [PG] 开始查询单元总结记录...")
-    start_time = datetime.datetime.now()
-
-    sql = (
-        "SELECT id, user_id, story_id, updated_at, km_id, km_type, play_time "
-        "FROM user_unit_summary_km_result WHERE user_id = %s AND deleted_at IS NULL ORDER BY updated_at DESC"
-    )
-    with conn.cursor(cursor_factory=RealDictCursor) as cur:
-        try:
-            cur.execute(sql, (user_id,))
-            rows = cur.fetchall() or []
-        except Exception as e:
-            print(f"  [PG] 单元总结记录查询失败: {e}")
-            rows = []
-
-    out: List[Dict[str, Any]] = []
-    for r in rows:
-        d = dict(r)
-        # 映射 story_id 到 unit_id
-        story_id = d.get("story_id")
-        unit_id = id_2_unit_index.get(story_id) if story_id else None
-        d["unit_id"] = unit_id
-
-        # 转换 play_time (毫秒) 为秒 (整数)
-        play_time = d.get("play_time")
-        d["play_time_seconds"] = play_time // 1000 if play_time else 0
-
-        # 移除时区信息
-        upd = d.get("updated_at")
-        if isinstance(upd, datetime.datetime):
-            try:
-                if upd.tzinfo is not None and upd.tzinfo.utcoffset(upd) is not None:
-                    d["updated_at"] = upd.replace(tzinfo=None)
-            except Exception:
-                d["updated_at"] = str(upd)
-        out.append(d)
-
-    print(f"  [PG] 单元总结记录查询完成，共{len(out)}条，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-    return out
-
-
-def generate_statistics(sheet2_rows: List[Dict[str, Any]], sheet5_rows: List[Dict[str, Any]]) -> tuple:
-    """
-    生成汇总统计数据
-
-    Args:
-        sheet2_rows: 互动组件学习记录
-        sheet5_rows: 单元总结记录
-
-    Returns:
-        (组件统计DataFrame, 知识点统计DataFrame, 单元时长统计DataFrame)
-    """
-    if pd is None:
-        raise RuntimeError("缺少pandas依赖，请安装后再运行。")
-
-    print(f"  [统计] 开始生成汇总统计数据...")
-    start_time = datetime.datetime.now()
-
-    from collections import defaultdict
-
-    # ============ a. 所有互动-按互动组件类型-通过情况统计 ============
-    component_stats_data = []
-    component_stats = defaultdict(lambda: {"Perfect": 0, "Good": 0, "Failed": 0, "Pass": 0, "Oops": 0, "total": 0})
-
-    # 用于调试
-    sample_results = []
-    parse_error_count = 0
-
-    for idx, record in enumerate(sheet2_rows):
-        component_name = record.get("互动组件名称", "")
-        if not component_name:
-            continue
-
-        play_result_str = record.get("play_result", "")
-
-        # 解析play_result
-        result = ""
-        try:
-            # 先判断是否是简单的字符串（Perfect/Good/Failed/Pass/Oops）
-            if isinstance(play_result_str, str):
-                # 去除空格后检查
-                stripped = play_result_str.strip()
-                if stripped in ["Perfect", "Good", "Failed", "Pass", "Oops"]:
-                    # 直接使用
-                    result = stripped
-                else:
-                    # 尝试JSON解析
-                    try:
-                        play_result = json.loads(play_result_str)
-                        if isinstance(play_result, dict):
-                            result = play_result.get("result", "")
-                        else:
-                            result = ""
-                    except:
-                        result = ""
-            else:
-                # 如果不是字符串，尝试当dict处理
-                if isinstance(play_result_str, dict):
-                    result = play_result_str.get("result", "")
-                else:
-                    result = ""
-
-            # 收集前3个样例
-            if idx < 3:
-                sample_results.append({
-                    "component": component_name,
-                    "raw": str(play_result_str)[:100],
-                    "result": result
-                })
-        except Exception as e:
-            parse_error_count += 1
-            if parse_error_count <= 3:
-                print(f"  [统计] [警告] 解析play_result失败 (第{idx+1}条): {e}, 原始值: {str(play_result_str)[:100]}")
-            result = ""
-
-        component_stats[component_name]["total"] += 1
-        if result in ["Perfect", "Good", "Failed", "Pass", "Oops"]:
-            component_stats[component_name][result] += 1
-
-    print(f"  [统计] play_result解析样例（前3条）:")
-    for s in sample_results:
-        print(f"  [统计]   - 组件: {s['component']}, 结果: {s['result']}, 原始: {s['raw']}")
-    if parse_error_count > 0:
-        print(f"  [统计] play_result解析失败总数: {parse_error_count}")
-
-    # 生成统计数据行
-    for component_name in sorted(component_stats.keys()):
-        stats = component_stats[component_name]
-        total = stats["total"]
-        perfect = stats["Perfect"]
-        good = stats["Good"]
-        failed = stats["Failed"]
-        pass_count = stats["Pass"]
-        oops = stats["Oops"]
-
-        perfect_ratio = round(perfect / total * 100, 2) if total > 0 else 0
-        good_ratio = round(good / total * 100, 2) if total > 0 else 0
-        failed_ratio = round(failed / total * 100, 2) if total > 0 else 0
-        pass_ratio = round(pass_count / total * 100, 2) if total > 0 else 0
-        oops_ratio = round(oops / total * 100, 2) if total > 0 else 0
-
-        component_stats_data.append({
-            "互动组件名称": component_name,
-            "总数量": total,
-            "Perfect数量": perfect,
-            "Good数量": good,
-            "Failed数量": failed,
-            "Pass数量": pass_count,
-            "Oops数量": oops,
-            "Perfect比例(%)": perfect_ratio,
-            "Good比例(%)": good_ratio,
-            "Failed比例(%)": failed_ratio,
-            "Pass比例(%)": pass_ratio,
-            "Oops比例(%)": oops_ratio,
-        })
-
-    # ============ b. 中互动组件-按知识点-通过情况统计 ============
-    kp_stats_data = []
-    kp_stats = defaultdict(lambda: {"Perfect": 0, "Good": 0, "Failed": 0, "Pass": 0, "Oops": 0, "total": 0})
-
-    # 调试信息
-    mid_count = 0
-    has_kp_count = 0
-    sample_kp_records = []
-
-    for idx, record in enumerate(sheet2_rows):
-        c_type = record.get("c_type", "")
-        if not c_type or not c_type.startswith("mid"):
-            continue
-
-        mid_count += 1
-        kp_relation_info_str = record.get("知识点", "")
-
-        if not kp_relation_info_str:
-            continue
-
-        has_kp_count += 1
-
-        # 解析知识点
-        try:
-            if isinstance(kp_relation_info_str, str):
-                kp_relation_info = json.loads(kp_relation_info_str)
-            else:
-                kp_relation_info = kp_relation_info_str
-
-            if not isinstance(kp_relation_info, list):
-                continue
-
-            # 收集样例
-            if len(sample_kp_records) < 3:
-                sample_kp_records.append({
-                    "c_type": c_type,
-                    "kp_count": len(kp_relation_info),
-                    "kp_info": str(kp_relation_info)[:200]
-                })
-
-            # 解析play_result（使用相同的逻辑）
-            play_result_str = record.get("play_result", "")
-            result = ""
-            if isinstance(play_result_str, str):
-                stripped = play_result_str.strip()
-                if stripped in ["Perfect", "Good", "Failed", "Pass", "Oops"]:
-                    result = stripped
-                else:
-                    try:
-                        play_result = json.loads(play_result_str)
-                        if isinstance(play_result, dict):
-                            result = play_result.get("result", "")
-                    except:
-                        pass
-            elif isinstance(play_result_str, dict):
-                result = play_result_str.get("result", "")
-
-            # 为每个知识点统计
-            for kp in kp_relation_info:
-                if not isinstance(kp, dict):
-                    continue
-
-                kp_id = kp.get("kpId", "")
-                kp_type = kp.get("kpType", "")
-                kp_title = kp.get("kpTitle", "")
-
-                if not kp_id:
-                    continue
-
-                kp_key = f"{kp_id}|{kp_type}|{kp_title}"
-                kp_stats[kp_key]["total"] += 1
-                if result in ["Perfect", "Good", "Failed", "Pass", "Oops"]:
-                    kp_stats[kp_key][result] += 1
-
-        except Exception as e:
-            if len(sample_kp_records) < 5:
-                print(f"  [统计] [警告] 解析知识点失败: {e}, 原始值: {str(kp_relation_info_str)[:100]}")
-            continue
-
-    print(f"  [统计] 中互动组件统计: 总数={mid_count}, 有知识点={has_kp_count}, 知识点条目数={len(kp_stats)}")
-    if sample_kp_records:
-        print(f"  [统计] 知识点样例（前3条）:")
-        for s in sample_kp_records:
-            print(f"  [统计]   - c_type={s['c_type']}, 知识点数量={s['kp_count']}, 内容={s['kp_info']}")
-
-    # 生成知识点统计数据行
-    for kp_key in sorted(kp_stats.keys()):
-        parts = kp_key.split("|")
-        if len(parts) != 3:
-            continue
-
-        kp_id, kp_type, kp_title = parts
-        stats = kp_stats[kp_key]
-        total = stats["total"]
-        perfect = stats["Perfect"]
-        good = stats["Good"]
-        failed = stats["Failed"]
-        pass_count = stats["Pass"]
-        oops = stats["Oops"]
-
-        perfect_ratio = round(perfect / total * 100, 2) if total > 0 else 0
-        good_ratio = round(good / total * 100, 2) if total > 0 else 0
-        failed_ratio = round(failed / total * 100, 2) if total > 0 else 0
-        pass_ratio = round(pass_count / total * 100, 2) if total > 0 else 0
-        oops_ratio = round(oops / total * 100, 2) if total > 0 else 0
-
-        kp_stats_data.append({
-            "知识点ID": kp_id,
-            "知识点类型": kp_type,
-            "知识点标题": kp_title,
-            "总数量": total,
-            "Perfect数量": perfect,
-            "Good数量": good,
-            "Failed数量": failed,
-            "Pass数量": pass_count,
-            "Oops数量": oops,
-            "Perfect比例(%)": perfect_ratio,
-            "Good比例(%)": good_ratio,
-            "Failed比例(%)": failed_ratio,
-            "Pass比例(%)": pass_ratio,
-            "Oops比例(%)": oops_ratio,
-        })
-
-    # ============ c. 单元总结-按单元统计时长 ============
-    unit_time_stats_data = []
-    unit_time_stats = defaultdict(int)
-
-    for record in sheet5_rows:
-        unit_id = record.get("unit_id")
-        play_time_seconds = record.get("play_time_seconds", 0)
-
-        if unit_id is not None:
-            unit_time_stats[unit_id] += play_time_seconds
-
-    # 生成单元时长统计数据行
-    for unit_id in sorted(unit_time_stats.keys()):
-        total_seconds = unit_time_stats[unit_id]
-        total_minutes = int(total_seconds / 60)
-
-        unit_time_stats_data.append({
-            "单元ID": f"unit_{unit_id}",
-            "总时长(秒)": total_seconds,
-            "总时长(分钟)": total_minutes,
-        })
-
-    print(f"  [统计] 汇总统计数据生成完成，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-    print(f"  [统计] 生成了{len(component_stats_data)}条组件统计, {len(kp_stats_data)}条知识点统计, {len(unit_time_stats_data)}条单元时长统计")
-
-    return (
-        pd.DataFrame(component_stats_data),
-        pd.DataFrame(kp_stats_data),
-        pd.DataFrame(unit_time_stats_data)
-    )
-
-
-
-def write_excel(path: str, sheet1_rows: List[Dict[str, Any]], sheet2_rows: List[Dict[str, Any]], sheet3_rows: List[Dict[str, Any]], sheet4_rows: List[Dict[str, Any]], sheet5_rows: List[Dict[str, Any]], stats_component_df: Any, stats_kp_df: Any, stats_unit_time_df: Any) -> None:
-    if pd is None:
-        raise RuntimeError("缺少pandas依赖，请安装后再运行。")
-
-    print(f"  [Excel] 开始写入Excel文件: {path}")
-    start_time = datetime.datetime.now()
-
-    out_dir = os.path.dirname(path) or "."
-    os.makedirs(out_dir, exist_ok=True)
-    with pd.ExcelWriter(path, engine="openpyxl") as writer:
-        pd.DataFrame(sheet1_rows, columns=SHEET1_COLUMNS).to_excel(writer, sheet_name="全部音频数据", index=False)
-        pd.DataFrame(sheet2_rows, columns=SHEET2_COLUMNS).to_excel(writer, sheet_name="互动组件学习记录", index=False)
-        pd.DataFrame(sheet3_rows, columns=SHEET3_COLUMNS).to_excel(writer, sheet_name="课程巩固记录", index=False)
-        pd.DataFrame(sheet4_rows, columns=SHEET4_COLUMNS).to_excel(writer, sheet_name="单元挑战记录", index=False)
-        pd.DataFrame(sheet5_rows, columns=SHEET5_COLUMNS).to_excel(writer, sheet_name="单元总结记录", index=False)
-        stats_component_df.to_excel(writer, sheet_name="统计-互动组件通过情况", index=False)
-        stats_kp_df.to_excel(writer, sheet_name="统计-知识点通过情况", index=False)
-        stats_unit_time_df.to_excel(writer, sheet_name="统计-单元总结时长", index=False)
-
-    print(f"  [Excel] 写入完成，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-
-
-def get_date_str() -> str:
-    """获取当前日期字符串 格式：YYYYMMDD"""
-    return datetime.datetime.now().strftime("%Y%m%d")
-
-
-def export_single_user(user_id: str, es_cfg: Dict[str, Any], pg_conn: Any, mysql_conn: Any, output_path: str, id_2_unit_index: Dict[int, int], chapter_id_to_lesson_id: Dict[int, int]) -> bool:
-    """
-    导出单个角色id的数据
-
-    Args:
-        user_id: 角色ID
-        es_cfg: ES配置
-        pg_conn: PostgreSQL连接
-        mysql_conn: MySQL连接
-        output_path: 输出路径
-        id_2_unit_index: story_id到unit_id的映射字典
-        chapter_id_to_lesson_id: chapter_id到lesson_id的映射字典
-
-    Returns:
-        True表示成功，False表示失败
-    """
-    try:
-        print(f"\n[INFO] ========== 开始导出角色id={user_id} ==========")
-        total_start_time = datetime.datetime.now()
-
-        # 查询ES数据
-        sheet1_rows = fetch_es_user_audio(user_id, es_cfg)
-
-        # 查询PG数据
-        sheet2_rows = fetch_pg_play_records(user_id, pg_conn, mysql_conn)
-        sheet3_rows = fetch_pg_unit_review(user_id, pg_conn, id_2_unit_index, chapter_id_to_lesson_id)
-        sheet4_rows = fetch_pg_unit_challenge(user_id, pg_conn, id_2_unit_index)
-        sheet5_rows = fetch_pg_unit_summary(user_id, pg_conn, id_2_unit_index)
-
-        # 检查是否有有效数据
-        total_records = len(sheet1_rows) + len(sheet2_rows) + len(sheet3_rows) + len(sheet4_rows) + len(sheet5_rows)
-        print(f"  [统计] 数据汇总:")
-        print(f"    - 全部音频数据: {len(sheet1_rows)}条")
-        print(f"    - 互动组件学习记录: {len(sheet2_rows)}条")
-        print(f"    - 课程巩固记录: {len(sheet3_rows)}条")
-        print(f"    - 单元挑战记录: {len(sheet4_rows)}条")
-        print(f"    - 单元总结记录: {len(sheet5_rows)}条")
-        print(f"    - 总计: {total_records}条")
-
-        if total_records == 0:
-            print(f"[WARN] 角色id={user_id} 没有找到任何有效记录，跳过导出")
-            return False
-
-        # 生成汇总统计数据
-        stats_component_df, stats_kp_df, stats_unit_time_df = generate_statistics(sheet2_rows, sheet5_rows)
-
-        # 写入Excel
-        write_excel(output_path, sheet1_rows, sheet2_rows, sheet3_rows, sheet4_rows, sheet5_rows, stats_component_df, stats_kp_df, stats_unit_time_df)
-
-        total_time = (datetime.datetime.now() - total_start_time).total_seconds()
-        print(f"[INFO] 角色id={user_id} 导出成功")
-        print(f"[INFO] 文件路径: {output_path}")
-        print(f"[INFO] 总耗时: {total_time:.2f}秒")
-        print(f"[INFO] ========== 完成 ==========\n")
-        return True
-
-    except Exception as e:
-        print(f"[ERROR] 角色id={user_id} 导出失败: {e}")
-        import traceback
-        traceback.print_exc()
-        return False
-
-
-def main():
-    load_env()
-
-    # 确定运行模式并收集需要导出的角色id列表
-    user_id_list: List[tuple] = []  # [(user_id, account_id or None), ...]
-    date_str = get_date_str()
-
-    # 检查三种模式的配置
-    has_user_id = USER_ID is not None
-    has_user_id_list = USER_ID_LIST is not None and len(USER_ID_LIST) > 0
-    has_account_id_list = ACCOUNT_ID_LIST is not None and len(ACCOUNT_ID_LIST) > 0
-
-    # 验证只能配置一种模式
-    mode_count = sum([has_user_id, has_user_id_list, has_account_id_list])
-    if mode_count == 0:
-        raise RuntimeError("请配置 USER_ID、USER_ID_LIST 或 ACCOUNT_ID_LIST 中的一个")
-    if mode_count > 1:
-        raise RuntimeError("USER_ID、USER_ID_LIST、ACCOUNT_ID_LIST 只能配置一个，请检查配置")
-
-    # 模式1：单个角色id
-    if has_user_id:
-        user_id_list = [(str(USER_ID), None)]
-        print(f"[INFO] 运行模式：单个角色id")
-
-    # 模式2：角色id列表
-    elif has_user_id_list:
-        user_id_list = [(str(uid), None) for uid in USER_ID_LIST]
-        print(f"[INFO] 运行模式：角色id列表，共{len(user_id_list)}个角色")
-
-    # 模式3：账户id列表
-    elif has_account_id_list:
-        print(f"[INFO] 运行模式：账户id列表，共{len(ACCOUNT_ID_LIST)}个账户")
-        mysql_conn = None
-        try:
-            mysql_conn = get_mysql_conn("vala_user")  # 查询用户表，使用 vala_user 数据库
-            for account_id in ACCOUNT_ID_LIST:
-                account_id_str = str(account_id)
-                print(f"[INFO] 查询账户id={account_id_str}对应的角色id...")
-                character_ids = fetch_character_ids_by_account(account_id_str, mysql_conn)
-                if not character_ids:
-                    print(f"[WARN] 账户id={account_id_str} 未找到关联的角色id，跳过")
-                    continue
-                print(f"[INFO] 账户id={account_id_str} 找到{len(character_ids)}个角色id: {character_ids}")
-                for cid in character_ids:
-                    user_id_list.append((cid, account_id_str))
-        finally:
-            if mysql_conn:
-                try:
-                    mysql_conn.close()
-                except Exception:
-                    pass
-
-    if not user_id_list:
-        print("[WARN] 没有需要导出的角色id，程序退出")
-        return
-
-    # 初始化连接
-    es_cfg = get_es_config()
-    pg_conn = get_pg_conn()
-
-    # 获取映射表（只需要查询一次，所有角色共用）
-    print(f"\n[INFO] ===== 准备工作：获取映射表 =====")
-    mysql_conn = None
-    id_2_unit_index = {}
-    chapter_id_to_lesson_id = {}
-    try:
-        print(f"[INFO] 正在连接MySQL数据库（vala_test）...")
-        mysql_conn = get_mysql_conn("vala_test")  # 查询游戏配置表，使用 vala_test 数据库
-        print(f"[INFO] 正在获取 story_id 到 unit_id 的映射...")
-        id_2_unit_index = get_id_2_unit_index(mysql_conn)
-        print(f"[INFO] 成功获取 {len(id_2_unit_index)} 个 story_id 映射")
-        print(f"[INFO] 正在获取 chapter_id 到 lesson_id 的映射...")
-        chapter_id_to_lesson_id = get_chapter_id_to_lesson_id(mysql_conn)
-        print(f"[INFO] 成功获取 {len(chapter_id_to_lesson_id)} 个 chapter_id 映射")
-    except Exception as e:
-        print(f"[ERROR] 获取映射表失败: {e}")
-        import traceback
-        traceback.print_exc()
-        if pg_conn:
-            try:
-                pg_conn.close()
-            except Exception:
-                pass
-        if mysql_conn:
-            try:
-                mysql_conn.close()
-            except Exception:
-                pass
-        return
-
-    try:
-        # 统计信息
-        success_count = 0
-        skip_count = 0
-
-        print(f"\n[INFO] ===== 开始批量导出 =====")
-        print(f"[INFO] 共需导出{len(user_id_list)}个角色\n")
-        batch_start_time = datetime.datetime.now()
-
-        # 循环处理每个角色id
-        for idx, (user_id, account_id) in enumerate(user_id_list, 1):
-            print(f"\n{'='*60}")
-            print(f"[INFO] 进度: {idx}/{len(user_id_list)} ({idx*100//len(user_id_list)}%)")
-            print(f"{'='*60}")
-
-            # 生成输出文件名
-            if account_id is None:
-                # 模式1和模式2：角色id_{}_导出时间_{}.xlsx
-                filename = f"角色id_{user_id}_导出时间_{date_str}.xlsx"
-            else:
-                # 模式3：账户id_{}_角色id_{}_导出时间_{}.xlsx
-                filename = f"账户id_{account_id}_角色id_{user_id}_导出时间_{date_str}.xlsx"
-
-            output_path = os.path.join(OUTPUT_DIR, filename)
-
-            # 导出单个角色的数据
-            result = export_single_user(user_id, es_cfg, pg_conn, mysql_conn, output_path, id_2_unit_index, chapter_id_to_lesson_id)
-            if result:
-                success_count += 1
-            else:
-                skip_count += 1
-
-        # 输出统计信息
-        batch_total_time = (datetime.datetime.now() - batch_start_time).total_seconds()
-        print(f"\n{'='*60}")
-        print(f"[INFO] ===== 全部导出完成 =====")
-        print(f"[INFO] 总计: {len(user_id_list)}个角色")
-        print(f"[INFO] 成功: {success_count}个")
-        print(f"[INFO] 跳过: {skip_count}个")
-        print(f"[INFO] 总耗时: {batch_total_time:.2f}秒 ({batch_total_time/60:.2f}分钟)")
-        if success_count > 0:
-            print(f"[INFO] 平均每个角色: {batch_total_time/success_count:.2f}秒")
-        print(f"{'='*60}\n")
-
-    finally:
-        if pg_conn:
-            try:
-                pg_conn.close()
-            except Exception:
-                pass
-        if mysql_conn:
-            try:
-                mysql_conn.close()
-            except Exception:
-                pass
-
-
-if __name__ == "__main__":
-    main()
diff --git a/makee_vala/git_scripts/extract_core_speaking_data.py b/makee_vala/git_scripts/extract_core_speaking_data.py
deleted file mode 100644
index 237d266..0000000
--- a/makee_vala/git_scripts/extract_core_speaking_data.py
+++ /dev/null
@@ -1,681 +0,0 @@
-"""
-筛选  整合  线上的   口语 核心互动   对话记录数据
-
-数据筛选流程如下:
-一 步骤一
-首先， 在 PGsql数据库中  筛选出  口语核心互动对应的  session_id.
-数据库相关配置  从.env中读取:
-PG_DB_HOST = xxx
-PG_DB_PORT = xxx
-PG_DB_USER = xxx
-PG_DB_PASSWORD = xxx
-PG_DB_DATABASE = xxx
-
-读取以下数据表: 
-user_component_play_record_0 ~ user_component_play_record_7
-
-支持输入时间范围
-起始时间 和 截止时间   配置格式:   "20250110"
-
-数据表中的时间字段为 updated_at , 格式样例: "2025-11-05 19:35:46.698246+08:00"
-
-在这些时间范围内，筛选以下数据:
-c_type 为 core_speaking_reply 或者 core_speaking_inquiry 的数据
-
-输出总的数据条数
-
-然后导出 中间 excel文件
-
-包含以下字段：
-user_id,
-session_id,
-c_type,
-c_id,
-play_result,
-updated_at
-
-二. 步骤二
-根据 c_type 和 c_id 筛选核心互动的配置  补充一些字段。
-
-需要读取配置表:
-mysql表  core_interaction_component
-相关环境变量在.env:
-MYSQL_HOST=xxx
-MYSQL_USERNAME=xxx
-MYSQL_PASSWORD=xxx
-MYSQL_DATABASE=xxx
-MYSQL_PORT=xxx
-
-基于 c_type 和 c_id 字段匹配， 在 步骤一表格内容基础上追加以下字段:
-title
-reference_dialog 从 component_config 中抽取出 reference_dialog 字段的内容。
-component_config内容样例:
-```
-{"taskInfo":{"cId":"0000001","cType":"core_speaking_inquiry","title":"询问种植甜瓜的信息","taskDesc":"向Ben提问甜瓜种植的最佳季节、浇水频率和成熟的季节；","sceneDesc":"我和Ben到甜味城，参观了水果资源站和种植园。Ben的妈妈Kate讲了种植知识，我们都很感兴趣，想一起种甜瓜。我不懂，便问Ben，他虽没种过、不确定，还是告诉我注意事项。","img":"","key":[{"desc":"询问种植信息","keyList":[{"type":"default","npcId":269,"content":"Have you ever planted a ...?","desc":"你种过......吗？"},{"type":"default","npcId":269,"content":"What season is the best time to plant ...?","desc":"种植......的最佳时间是哪个季节？"},{"type":"default","npcId":269,"content":"Do ... need ... every day?","desc":"......需要每天浇......么？"}]}]},"dialogSetting":{"setting":{"npcName":"Ben","npcId":287,"round":5,"checkRound":3}},"dialogConfig":{"config":{"asrPrompt":"melon,summer,autumn,water,frequency,plant,season,harvest","promptInfo":{"default":"# 1. 角色（你要扮演谁）\n- 你是 Ben，一个 8 岁的小男孩，对种植水果感兴趣但不太确定具体细节。\n- 语言风格：简单、直接，偶尔带有不确定的语气。\n- 示例表达：\n  - \"I think summer. It's warm then.\"\n  - \"Maybe every two days? Not every day, I think.\"\n\n# 2. 任务（你如何参与到整个对话）\n- 你需要只在用户提问时提供信息，不会主动提及种植甜瓜的具体细节。\n- 如果用户提问相关内容，你需要根据知识库中的信息回答，不编造或偏离。\n- 如果用户的问题不清晰，你需要尝试澄清后再作答。\n- 如果用户长时间不提问或偏离主题，你需要温和、自然地进行交谈，引导回到主题。\n- 当所有知识点已传达后，你需要鼓励用户开始行动。\n\n# 3. 背景信息（引用配置）\n`你是 Ben，你和用户来到了甜味城Sweet Town。你们参观了水果资源站和种植园。在种植园中，你的妈妈Kate给你和用户介绍了一些种植水果的知识。你和用户对此很感兴趣。你们想要一起种一颗甜瓜。用户不知道种植甜瓜的知识，于是向你提问。虽然你没有种过甜瓜，对什么都不确定。但你还是回答了用户的问题，告诉用户关于种植甜瓜需要注意的事情。`\n\n# 4. 知识库（你知道的信息）\n- 种甜瓜的最佳季节：应该在夏天\n- 种甜瓜的浇水频率：应该隔一天浇一次水\n- 甜瓜成熟的季节：秋天\n- 如果用户提问相关内容，你会用这些信息来回答。\n\n# 5. 语言风格（固定内容）\n  1.  使用标准、正式的英语，水平为 CEFR A1/A2，每句话不超过 10 个单词\n  2.  始终保持礼貌和友好\n  3.  尽量避免重复表达，适当变换措辞\n\n# 6. 开场白\n你由你开始对话，你会说：“Let's plant a melon now! Or do you still have some questions？”\n\n# 7. 回应方式（固定内容）\n`你只在用户提问时才根据知识库中的信息回答。其他时候，以符合你身份的方式，自然地进行交谈。不主动提供信息，不偏离语境。`","final_goal":"Ben 说出了种植种甜瓜的最佳季节应该在夏天、种甜瓜的浇水频率应该隔一天浇一次水以及甜瓜成熟的季节是秋天","in_progress_goal":"Ben 说出了种植种甜瓜的最佳季节应该在夏天、种甜瓜的浇水频率应该隔一天浇一次水以及甜瓜成熟的季节是秋天","reference_dialog":"# 示例对话\nBen: Let's plant a melon now! Or do you still have some questions？\nYou: Great! When is the best time to plant it?\nBen: I think summer. It's warm then. Mom said melons like warm weather.\nYou: Oh, good. How often should we water it?\nBen: Maybe every two days? Not every day, I think.\nYou: And when will it be ready to eat?\nBen: Autumn, I guess. Plant in summer, get melons in autumn. That sounds right.","scene":"#任务背景\n你是 Ben，你和用户来到了甜味城Sweet Town。你们参观了水果资源站和种植园。在种植园中，你的妈妈Kate给你和用户介绍了一些种植水果的知识。你和用户对此很感兴趣。你们想要一起种一颗甜瓜。用户不知道种植甜瓜的知识，于是向你提问。虽然你没有种过甜瓜，对什么都不确定。但你还是回答了用户的问题，告诉用户关于种植甜瓜需要注意的事情。","user_knowledge":"# 知识\n- 询问种植信息\nHave you ever planted a ...?  你种过......吗？\nWhat season is the best time to plant ...?  种植......的最佳时间是哪个季节？\nDo ... need ... every day?  ......需要每天浇......么？","user_scene":"我和Ben到甜味城，参观了水果资源站和种植园。Ben的妈妈Kate讲了种植知识，我们都很感兴趣，想一起种甜瓜。我不懂，便问Ben，他虽没种过、不确定，还是告诉我注意事项。","user_task":"向Ben提问甜瓜种植的最佳季节、浇水频率和成熟的季节；"}}},"studyInfo":{"learningPart":{"learning":[{"question":{"desc":"现在你需要询问Ben关于种植甜瓜的最佳季节。"},"optionList":[{"option":"When is the best time to plant it?","feedbackDesc":"太棒了！你正确地询问了种植甜瓜的最佳季节。请大声朗读这句话！"},{"option":"How often should we water it?","feedbackDesc":"这句话是询问浇水频率的，不是询问最佳种植季节的。请再试一次，询问Ben种植甜瓜的最佳季节。"},{"option":"When will it be ready to eat?","feedbackDesc":"这句话是询问甜瓜成熟季节的，不是询问最佳种植季节的。请再试一次，询问Ben种植甜瓜的最佳季节。"}],"answer":[0],"read":{"type":"user","npcId":30,"content":"When is the best time to plant it?"},"feedback":{"type":"npc","npcName":"Ben","npcId":287,"content":"I think summer. It's warm then. Mom said melons like warm weather."}},{"question":{"desc":"Ben告诉你种植甜瓜的最佳季节是夏天。现在你需要询问Ben关于种植甜瓜的浇水频率。"},"optionList":[{"option":"When is the best time to plant it?","feedbackDesc":"这句话是询问最佳种植季节的，不是询问浇水频率的。请再试一次，询问Ben种植甜瓜的浇水频率。"},{"option":"How often should we water it?","feedbackDesc":"太棒了！你正确地询问了种植甜瓜的浇水频率。请大声朗读这句话！"},{"option":"When will it be ready to eat?","feedbackDesc":"这句话是询问甜瓜成熟季节的，不是询问浇水频率的。请再试一次，询问Ben种植甜瓜的浇水频率。"}],"answer":[1],"read":{"type":"user","npcId":30,"content":"How often should we water it?"},"feedback":{"type":"npc","npcName":"Ben","npcId":287,"content":"Maybe every two days? Not every day, I think."}},{"question":{"desc":"Ben告诉你种植甜瓜的浇水频率是隔一天一次。现在你需要询问Ben关于甜瓜成熟的季节。"},"optionList":[{"option":"When is the best time to plant it?","feedbackDesc":"这句话是询问最佳种植季节的，不是询问甜瓜成熟季节的。请再试一次，询问Ben甜瓜成熟的季节。"},{"option":"How often should we water it?","feedbackDesc":"这句话是询问浇水频率的，不是询问甜瓜成熟季节的。请再试一次，询问Ben甜瓜成熟的季节。"},{"option":"When will it be ready to eat?","feedbackDesc":"太棒了！你正确地询问了甜瓜成熟的季节。请大声朗读这句话！"}],"answer":[2],"read":{"type":"user","npcId":30,"content":"When will it be ready to eat?"},"feedback":{"type":"npc","npcName":"Ben","npcId":287,"content":"Autumn, I guess. Plant in summer, get melons in autumn. That sounds right."}}],"opening":{"type":"npc","npcName":"Ben","npcId":287,"content":"Let's plant a melon now! Or do you still have some questions？","desc":"Ben邀请你一起种植甜瓜，并询问你是否还有问题。"},"closing":{"desc":"Ben已经回答了所有关于种植甜瓜的问题，任务成功完成！"}}},"kpInfoList":[{"kpId":"","kpType":"sentence","kpTitle":"What do you think about the fight?","kpSkill":"sentence_pron","kpSkillName":"语音"},{"kpId":"","kpType":"sentence","kpTitle":"What do you think about the fight?","kpSkill":"sentence_meaning","kpSkillName":"语义"},{"kpId":"","kpType":"sentence","kpTitle":"Can you help us?","kpSkill":"sentence_pron","kpSkillName":"语音"},{"kpId":"","kpType":"sentence","kpTitle":"Can you help us?","kpSkill":"sentence_meaning","kpSkillName":"语义"},{"kpId":"","kpType":"sentence","kpTitle":"Do you know any way to beat him?","kpSkill":"sentence_pron","kpSkillName":"语音"},{"kpId":"","kpType":"sentence","kpTitle":"Do you know any way to beat him?","kpSkill":"sentence_meaning","kpSkillName":"语义"}]}
-```
-
-追加后，excel文件包含以下字段:
-user_id,
-session_id,
-c_type,
-c_id,
-play_result,
-updated_at,
-title
-reference_dialog
-
-三. 步骤三  追加对话历史数据
-对话历史数据，需要根据以下es数据库来补充:
-
-es索引: llm_roleplayagent_round_log
-相关环境变量在.env:
-ES_HOST=xxx
-ES_PORT=xxx
-ES_SCHEME=xxx
-ES_USER=xxx
-ES_PASSWORD=xxx
-
-基于每条记录中的 session_id, 匹配 es日志中 session_id 相同 且 action为 get_chat 对应的记录，整理后， 追加为 chat_log 字段。
-
-es中的日志是每轮作为一条记录，按以下逻辑进行拼接:
-读取 current_round,
-current round 为 0 ， 则 chat_log中加入 npc_message 的内容   "npc: " + npc_message
-current round 为 1~n  按顺序 依次追加  user_input 和 npc_message ， 每轮之间用换行符隔开。
-完全拼接后  最为 chat_log 内容
-完整样例:
-```
-npc:xxx
-user:xxx 
-npc:xxx
-... ...
-```
-
-拼接完成后  追加  chat_log  和  round_num （取最大的current_round）
-
-最终输出的 excel文件字段:
-user_id,
-session_id,
-c_type,
-c_id,
-play_result,
-updated_at,
-title,
-reference_dialog,
-chat_log,
-user_behavior_info,
-round_num
-
-
-----------------------
-根据以上需求  提供一个数据处理的脚本   尽量用高效的匹配。  我只需要输出最终的匹配文件，一个简单的功能脚本。 脚本不需要太复杂。但在输出的节点增加必要的日志  方便我了解数据量和进度 输入 时间范围 在 脚本开头配置即可。
-----------------------
-
-补充需求:
-pg sql数据库中 增加字段  user_behavior_info 读取。 并保留到最终的输出excel文档中  在 chat_log字段之后。  其他不变。
-----------------------
-
-补充需求 25.11.07:
-从 mysql表中 额外读取两个字段的信息进行处理，
-
-1. lesson
-抽取related_path字段中的lessonIndex内容 (4):
-{"packageId":2,"unitId":26,"lessonId":128,"packageIndex":1,"unitIndex":12,"lessonIndex":4}
-
-2. knowledge_points
-直接读取 kp_relation_info 的内容。
-
-3. in_progress_goal
-读取 和 reference_dialog 平级的  in_progress_goal 字段内容。
-
-4. final_goal
-读取 和 reference_dialog 平级的  final_goal 字段内容。
-
-以上四个字段 都追加到最终输出的表中，
-全部输出字段顺序如下:
-
-user_id,
-session_id,
-c_type,
-c_id,
-play_result,
-updated_at,
-title,
-lesson,
-knowledge_points,
-in_progress_goal,
-final_goal,
-reference_dialog,
-chat_log,
-user_behavior_info,
-round_num
-
-
-"""
-
-import os
-import json
-import pandas as pd
-import psycopg2
-import pymysql
-from elasticsearch import Elasticsearch
-from datetime import datetime
-from dotenv import load_dotenv
-import logging
-
-# 配置日志
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-logger = logging.getLogger(__name__)
-
-# 时间范围配置 - 修改这里的日期范围
-START_DATE = "20251001"  # 起始时间 格式: "20250110"
-END_DATE = "20251031"    # 截止时间 格式: "20250131"
-
-class CoreSpeakingDataProcessor:
-    def __init__(self):
-        # 加载环境变量
-        load_dotenv()
-
-        # PG数据库配置
-        self.pg_config = {
-            'host': os.getenv('PG_DB_HOST'),
-            'port': int(os.getenv('PG_DB_PORT', 5432)),
-            'user': os.getenv('PG_DB_USER'),
-            'password': os.getenv('PG_DB_PASSWORD'),
-            'database': os.getenv('PG_DB_DATABASE')
-        }
-
-        # MySQL数据库配置
-        self.mysql_config = {
-            'host': os.getenv('MYSQL_HOST'),
-            'port': int(os.getenv('MYSQL_PORT', 3306)),
-            'user': os.getenv('MYSQL_USERNAME'),
-            'password': os.getenv('MYSQL_PASSWORD'),
-            'database': os.getenv('MYSQL_DATABASE'),
-            'charset': 'utf8mb4'
-        }
-
-        # ES配置
-        self.es_config = {
-            'host': os.getenv('ES_HOST'),
-            'port': int(os.getenv('ES_PORT', 9200)),
-            'scheme': os.getenv('ES_SCHEME', 'http'),
-            'user': os.getenv('ES_USER'),
-            'password': os.getenv('ES_PASSWORD')
-        }
-
-        self.data = None
-
-    def convert_date_format(self, date_str):
-        """将'20250110'格式转换为数据库查询用的格式"""
-        try:
-            dt = datetime.strptime(date_str, '%Y%m%d')
-            return dt.strftime('%Y-%m-%d')
-        except ValueError:
-            logger.error(f"日期格式错误: {date_str}, 应为'20250110'格式")
-            raise
-
-    def get_next_day(self, date_str):
-        """获取下一天的日期"""
-        try:
-            dt = datetime.strptime(date_str, '%Y%m%d')
-            next_day = dt + pd.Timedelta(days=1)
-            return next_day.strftime('%Y-%m-%d')
-        except ValueError:
-            logger.error(f"日期格式错误: {date_str}, 应为'20250110'格式")
-            raise
-
-    def step1_extract_from_pg(self):
-        """步骤一: 从PG数据库筛选核心互动数据"""
-        logger.info("步骤一: 开始从PG数据库筛选数据...")
-
-        start_date = self.convert_date_format(START_DATE)
-        end_date_next = self.get_next_day(END_DATE)  # 获取结束日期的下一天
-        logger.info(f"时间范围: {start_date} 到 {end_date_next} (不含)")
-
-        # 构建查询SQL - 查询8个分表
-        all_data = []
-        table_names = [f"user_component_play_record_{i}" for i in range(8)]
-
-        for table_name in table_names:
-            logger.info(f"正在处理表: {table_name}")
-
-            # 为每个表创建独立的连接，避免事务问题
-            try:
-                conn = psycopg2.connect(**self.pg_config)
-                logger.debug(f"为表 {table_name} 创建数据库连接")
-            except Exception as e:
-                logger.error(f"为表 {table_name} 创建数据库连接失败: {e}")
-                continue
-
-            # 检查当前表是否存在 user_behavior_info 字段
-            has_behavior_info = False
-            try:
-                with conn.cursor() as cur:
-                    cur.execute(
-                        """
-                        SELECT EXISTS (
-                            SELECT 1 FROM information_schema.columns 
-                            WHERE table_name = %s 
-                              AND column_name = 'user_behavior_info'
-                        )
-                        """,
-                        (table_name,)
-                    )
-                    res = cur.fetchone()
-                    has_behavior_info = bool(res[0]) if res else False
-                logger.debug(f"表 {table_name} 是否包含 user_behavior_info: {has_behavior_info}")
-            except Exception as e:
-                logger.warning(f"检测表 {table_name} 的 user_behavior_info 字段失败: {e}")
-
-            # 动态构建查询列
-            extra_col = ", user_behavior_info" if has_behavior_info else ""
-            sql = f"""
-            SELECT
-                user_id,
-                session_id,
-                c_type,
-                c_id,
-                play_result,
-                updated_at{extra_col}
-            FROM {table_name}
-            WHERE
-                updated_at >= %s
-                AND updated_at < %s
-                AND c_type IN ('core_speaking_reply', 'core_speaking_inquiry')
-            ORDER BY updated_at
-            """
-
-            try:
-                df = pd.read_sql(sql, conn, params=[start_date, end_date_next])
-                # 保证列存在，即使部分分表没有该字段
-                if 'user_behavior_info' not in df.columns:
-                    df['user_behavior_info'] = ''
-                if not df.empty:
-                    logger.info(f"表 {table_name} 获取到 {len(df)} 条数据")
-                    all_data.append(df)
-                else:
-                    logger.info(f"表 {table_name} 无符合条件的数据")
-            except Exception as e:
-                logger.error(f"查询表 {table_name} 失败: {e}")
-            finally:
-                conn.close()
-
-        if all_data:
-            self.data = pd.concat(all_data, ignore_index=True)
-            logger.info(f"步骤一完成: 总共获取到 {len(self.data)} 条数据")
-
-            # 统计 user_behavior_info 非空条数
-            if 'user_behavior_info' in self.data.columns:
-                non_empty_behavior = (self.data['user_behavior_info'].astype(str).str.strip() != '').sum()
-                logger.info(f"步骤一: user_behavior_info 字段有值 {non_empty_behavior}/{len(self.data)} 条")
-
-            # 处理datetime字段，去掉时区信息（Excel不支持带时区的datetime）
-            if 'updated_at' in self.data.columns:
-                self.data['updated_at'] = pd.to_datetime(self.data['updated_at']).dt.tz_localize(None)
-                logger.info("已处理updated_at字段的时区信息")
-
-            # 输出中间Excel文件
-            intermediate_file = f"core_speaking_step1_{START_DATE}_{END_DATE}.xlsx"
-            self.data.to_excel(intermediate_file, index=False)
-            logger.info(f"步骤一中间文件已保存: {intermediate_file}")
-        else:
-            logger.warning("步骤一: 未获取到任何数据")
-            self.data = pd.DataFrame()
-
-    def step2_add_title_from_mysql(self):
-        """步骤二: 从MySQL补充title字段，并从component_config中提取reference_dialog等字段"""
-        if self.data.empty:
-            logger.warning("步骤二: 数据为空，跳过")
-            return
-
-        logger.info("步骤二: 开始从MySQL补充title字段...")
-
-        # 连接MySQL数据库
-        try:
-            conn = pymysql.connect(**self.mysql_config)
-            logger.info("MySQL数据库连接成功")
-        except Exception as e:
-            logger.error(f"MySQL数据库连接失败: {e}")
-            raise
-
-        # 获取所有需要查询的c_type和c_id组合
-        unique_components = self.data[['c_type', 'c_id']].drop_duplicates()
-        logger.info(f"需要查询 {len(unique_components)} 个不同的组件配置")
-
-        # 查询title、component_config、related_path和kp_relation_info
-        sql = """
-        SELECT c_type, c_id, title, component_config, related_path, kp_relation_info
-        FROM core_interaction_component
-        WHERE (c_type, c_id) IN ({})
-        """.format(','.join(['(%s,%s)'] * len(unique_components)))
-
-        params = []
-        for _, row in unique_components.iterrows():
-            params.extend([row['c_type'], row['c_id']])
-
-        try:
-            title_df = pd.read_sql(sql, conn, params=params)
-            logger.info(f"从MySQL获取到 {len(title_df)} 条组件配置")
-        except Exception as e:
-            logger.error(f"查询MySQL失败: {e}")
-            title_df = pd.DataFrame(columns=['c_type', 'c_id', 'title', 'component_config', 'related_path', 'kp_relation_info'])
-
-        conn.close()
-
-        # 从related_path中解析lesson（lessonIndex）
-        def extract_lesson(related_path_str):
-            if related_path_str is None or related_path_str == '':
-                return ''
-            try:
-                data = json.loads(related_path_str)
-                if isinstance(data, dict):
-                    lesson_index = data.get('lessonIndex')
-                    return str(lesson_index) if lesson_index is not None else ''
-                return ''
-            except Exception:
-                return ''
-
-        # 从component_config中解析reference_dialog、in_progress_goal和final_goal
-        def extract_config_fields(cfg_str):
-            result = {
-                'reference_dialog': '',
-                'in_progress_goal': '',
-                'final_goal': ''
-            }
-            if cfg_str is None or cfg_str == '':
-                return result
-            try:
-                data = json.loads(cfg_str)
-                if isinstance(data, dict):
-                    dialog_config = data.get('dialogConfig') or data.get('dialog_config')
-                    if isinstance(dialog_config, dict):
-                        config_obj = dialog_config.get('config')
-                        if isinstance(config_obj, dict):
-                            promptInfo = config_obj.get('promptInfo')
-                            if isinstance(promptInfo, dict):
-                                ref = promptInfo.get('reference_dialog')
-                                result['reference_dialog'] = ref if isinstance(ref, str) else ''
-
-                                in_prog = promptInfo.get('in_progress_goal')
-                                result['in_progress_goal'] = in_prog if isinstance(in_prog, str) else ''
-
-                                final = promptInfo.get('final_goal')
-                                result['final_goal'] = final if isinstance(final, str) else ''
-
-                                return result
-
-                    # 兜底：如果顶层就有这些字段
-                    ref = data.get('reference_dialog')
-                    result['reference_dialog'] = ref if isinstance(ref, str) else ''
-
-                    in_prog = data.get('in_progress_goal')
-                    result['in_progress_goal'] = in_prog if isinstance(in_prog, str) else ''
-
-                    final = data.get('final_goal')
-                    result['final_goal'] = final if isinstance(final, str) else ''
-
-                return result
-            except Exception:
-                return result
-
-        # 解析lesson
-        if 'related_path' in title_df.columns:
-            title_df['lesson'] = title_df['related_path'].apply(extract_lesson)
-        else:
-            title_df['lesson'] = ''
-
-        # 解析knowledge_points（直接读取kp_relation_info）
-        if 'kp_relation_info' in title_df.columns:
-            title_df['knowledge_points'] = title_df['kp_relation_info'].fillna('')
-        else:
-            title_df['knowledge_points'] = ''
-
-        # 解析component_config中的多个字段
-        if 'component_config' in title_df.columns:
-            config_fields = title_df['component_config'].apply(extract_config_fields)
-            title_df['reference_dialog'] = config_fields.apply(lambda x: x['reference_dialog'])
-            title_df['in_progress_goal'] = config_fields.apply(lambda x: x['in_progress_goal'])
-            title_df['final_goal'] = config_fields.apply(lambda x: x['final_goal'])
-        else:
-            title_df['reference_dialog'] = ''
-            title_df['in_progress_goal'] = ''
-            title_df['final_goal'] = ''
-
-        # 仅保留需要合并的列
-        title_df = title_df[['c_type', 'c_id', 'title', 'lesson', 'knowledge_points',
-                              'in_progress_goal', 'final_goal', 'reference_dialog']]
-
-        # 合并数据
-        self.data = pd.merge(
-            self.data,
-            title_df,
-            on=['c_type', 'c_id'],
-            how='left'
-        )
-
-        # 填充空值
-        self.data['title'] = self.data['title'].fillna('')
-        self.data['lesson'] = self.data['lesson'].fillna('')
-        self.data['knowledge_points'] = self.data['knowledge_points'].fillna('')
-        self.data['in_progress_goal'] = self.data['in_progress_goal'].fillna('')
-        self.data['final_goal'] = self.data['final_goal'].fillna('')
-        self.data['reference_dialog'] = self.data['reference_dialog'].fillna('')
-
-        # 统计解析成功的字段条数
-        non_empty_ref = (self.data['reference_dialog'] != '').sum()
-        non_empty_lesson = (self.data['lesson'] != '').sum()
-        non_empty_kp = (self.data['knowledge_points'] != '').sum()
-        non_empty_in_prog = (self.data['in_progress_goal'] != '').sum()
-        non_empty_final = (self.data['final_goal'] != '').sum()
-
-        logger.info(f"步骤二完成: 已补充字段统计:")
-        logger.info(f"  - lesson: {non_empty_lesson}/{len(self.data)} 条有值")
-        logger.info(f"  - knowledge_points: {non_empty_kp}/{len(self.data)} 条有值")
-        logger.info(f"  - in_progress_goal: {non_empty_in_prog}/{len(self.data)} 条有值")
-        logger.info(f"  - final_goal: {non_empty_final}/{len(self.data)} 条有值")
-        logger.info(f"  - reference_dialog: {non_empty_ref}/{len(self.data)} 条有值")
-
-        # 输出中间Excel文件
-        intermediate_file = f"core_speaking_step2_{START_DATE}_{END_DATE}.xlsx"
-        # 处理datetime字段，去掉时区信息（Excel不支持带时区的datetime）
-        if 'updated_at' in self.data.columns:
-            self.data['updated_at'] = pd.to_datetime(self.data['updated_at']).dt.tz_localize(None)
-        self.data.to_excel(intermediate_file, index=False)
-        logger.info(f"步骤二中间文件已保存: {intermediate_file}")
-
-    def step3_add_chat_log_from_es(self):
-        """步骤三: 从ES补充对话历史数据"""
-        if self.data.empty:
-            logger.warning("步骤三: 数据为空，跳过")
-            return
-
-        logger.info("步骤三: 开始从ES补充对话历史数据...")
-
-        # 连接ES
-        try:
-            es_url = f"{self.es_config['scheme']}://{self.es_config['host']}:{self.es_config['port']}"
-            if self.es_config['user'] and self.es_config['password']:
-                es = Elasticsearch(
-                    [es_url],
-                    http_auth=(self.es_config['user'], self.es_config['password'])
-                )
-            else:
-                es = Elasticsearch([es_url])
-
-            # 测试连接
-            if es.ping():
-                logger.info("ES连接成功")
-            else:
-                raise Exception("ES连接失败")
-        except Exception as e:
-            logger.error(f"ES连接失败: {e}")
-            # 添加空的chat_log和round_num字段
-            self.data['chat_log'] = ''
-            self.data['round_num'] = 0
-            return
-
-        # 获取唯一的session_id
-        unique_sessions = self.data['session_id'].unique()
-        logger.info(f"需要查询 {len(unique_sessions)} 个不同的session")
-
-        # 批量查询ES
-        chat_logs = {}
-        round_nums = {}
-
-        batch_size = 100
-        for i in range(0, len(unique_sessions), batch_size):
-            batch_sessions = unique_sessions[i:i+batch_size]
-            logger.info(f"正在处理session批次 {i//batch_size + 1}/{(len(unique_sessions)-1)//batch_size + 1}")
-
-            try:
-                # 构建ES查询
-                query = {
-                    "query": {
-                        "bool": {
-                            "must": [
-                                {"terms": {"session_id": batch_sessions.tolist()}},
-                                {"term": {"action": "get_chat"}}
-                            ]
-                        }
-                    },
-                    "size": 10000,
-                    "sort": [
-                        {"session_id": {"order": "asc"}},
-                        {"current_round": {"order": "asc"}}
-                    ]
-                }
-
-                response = es.search(index="llm_roleplayagent_round_log", body=query)
-                hits = response['hits']['hits']
-
-                logger.info(f"本批次从ES获取到 {len(hits)} 条对话记录")
-
-                # 按session_id分组处理
-                session_rounds = {}
-                for hit in hits:
-                    source = hit['_source']
-                    session_id = source.get('session_id')
-                    current_round = source.get('current_round', 0)
-
-                    if session_id not in session_rounds:
-                        session_rounds[session_id] = []
-
-                    session_rounds[session_id].append({
-                        'current_round': current_round,
-                        'user_input': source.get('user_input', ''),
-                        'npc_message': source.get('npc_message', '')
-                    })
-
-                # 为每个session构建chat_log
-                for session_id, rounds in session_rounds.items():
-                    # 按round排序
-                    rounds.sort(key=lambda x: x['current_round'])
-
-                    chat_parts = []
-                    max_round = 0
-
-                    for round_data in rounds:
-                        current_round = round_data['current_round']
-                        max_round = max(max_round, current_round)
-
-                        if current_round == 0:
-                            # round 0 只添加npc_message
-                            if round_data['npc_message']:
-                                chat_parts.append(f"npc:{round_data['npc_message']}")
-                        else:
-                            # round 1~n 添加user_input和npc_message
-                            if round_data['user_input']:
-                                chat_parts.append(f"user:{round_data['user_input']}")
-                            if round_data['npc_message']:
-                                chat_parts.append(f"npc:{round_data['npc_message']}")
-
-                    chat_logs[session_id] = '\n'.join(chat_parts)
-                    round_nums[session_id] = max_round
-
-            except Exception as e:
-                logger.error(f"查询ES批次失败: {e}")
-                continue
-
-        logger.info(f"完成ES查询，获取到 {len(chat_logs)} 个session的对话记录")
-
-        # 添加chat_log和round_num字段
-        self.data['chat_log'] = self.data['session_id'].map(chat_logs).fillna('')
-        self.data['round_num'] = self.data['session_id'].map(round_nums).fillna(0)
-
-        logger.info("步骤三完成: 对话历史数据已补充")
-
-    def export_final_excel(self):
-        """导出最终Excel文件"""
-        if self.data.empty:
-            logger.warning("数据为空，无法导出")
-            return
-
-        logger.info("开始导出最终Excel文件...")
-
-        # 确保字段顺序
-        final_columns = [
-            'user_id', 'session_id', 'c_type', 'c_id',
-            'play_result', 'updated_at', 'title', 'lesson', 'knowledge_points',
-            'in_progress_goal', 'final_goal', 'reference_dialog',
-            'chat_log', 'user_behavior_info', 'round_num'
-        ]
-
-        # 重新排列列顺序
-        self.data = self.data[final_columns]
-
-        # 处理datetime字段，去掉时区信息（Excel不支持带时区的datetime）
-        if 'updated_at' in self.data.columns:
-            self.data['updated_at'] = pd.to_datetime(self.data['updated_at']).dt.tz_localize(None)
-            logger.info("最终导出时已处理updated_at字段的时区信息")
-
-        # 生成文件名
-        output_file = f"core_speaking_final_{START_DATE}_{END_DATE}.xlsx"
-
-        # 导出Excel
-        self.data.to_excel(output_file, index=False)
-
-        logger.info(f"最终Excel文件已导出: {output_file}")
-        logger.info(f"总计导出 {len(self.data)} 条记录")
-
-        # 输出字段统计
-        logger.info("字段完整性统计:")
-        for col in final_columns:
-            if col in ['chat_log', 'title', 'reference_dialog', 'user_behavior_info',
-                       'lesson', 'knowledge_points', 'in_progress_goal', 'final_goal']:
-                non_empty = (self.data[col] != '').sum()
-                logger.info(f"  {col}: {non_empty}/{len(self.data)} 条记录有值")
-            elif col == 'round_num':
-                non_zero = (self.data[col] > 0).sum()
-                logger.info(f"  {col}: {non_zero}/{len(self.data)} 条记录 > 0")
-
-    def process(self):
-        """执行完整的数据处理流程"""
-        logger.info("="*60)
-        logger.info("开始口语核心互动数据处理")
-        logger.info(f"时间范围: {START_DATE} - {END_DATE}")
-        logger.info("="*60)
-
-        try:
-            # 步骤一: PG数据筛选
-            self.step1_extract_from_pg()
-
-            # 步骤二: MySQL补充title
-            self.step2_add_title_from_mysql()
-
-            # 步骤三: ES补充对话历史
-            self.step3_add_chat_log_from_es()
-
-            # 导出最终文件
-            self.export_final_excel()
-
-            logger.info("="*60)
-            logger.info("数据处理完成!")
-            logger.info("="*60)
-
-        except Exception as e:
-            logger.error(f"数据处理过程中发生错误: {e}")
-            raise
-
-if __name__ == "__main__":
-    processor = CoreSpeakingDataProcessor()
-    processor.process()
diff --git a/makee_vala/git_scripts/extract_user_audio.py b/makee_vala/git_scripts/extract_user_audio.py
deleted file mode 100644
index 50c5080..0000000
--- a/makee_vala/git_scripts/extract_user_audio.py
+++ /dev/null
@@ -1,480 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-用户音频数据筛选脚本
-功能：从PostgreSQL数据库的分表(user_component_play_record_0~7)中提取指定时间段的用户音频数据。
-主要逻辑：
-1. 数据源：遍历 user_component_play_record_0 至 user_component_play_record_7 表。
-2. 筛选条件：
-   - 时间范围：可配置
-   - 数据有效性：user_behavior_info 非空且包含 userAudio 和 pronunciationScore。
-3. 采样规则：
-   - 目标总数：可配置
-   - 用户限制：可配置
-   - 随机策略：先随机打乱，再按用户分组限制，最后补齐或截断至目标数量。
-4. 输出：导出为Excel文件。
-   包含字段：
-   - index: 序号
-   - source_table: 来源表名
-   - created_at: 创建时间
-   - user_id: 用户ID
-   - component_unique_code: 组件唯一标识
-   - pronunciationScore: 发音评分
-   - userAudio: 音频链接
-   - expressContent: 朗读内容文本
-"""
-
-import os
-import json
-import re
-import random
-import psycopg2
-import pymysql
-import pandas as pd
-from datetime import datetime
-from typing import List, Dict, Any
-from dotenv import load_dotenv
-
-# 配置参数
-CONFIG = {
-    # 筛选时间范围
-    'START_TIME': '2025-11-10 00:00:00+08:00',
-    'END_TIME': '2025-12-10 23:59:59+08:00',
-
-    # 采样参数
-    'TARGET_TOTAL': 10000,      # 目标总样本数
-    'MAX_PER_USER': 20,        # 单个用户最大样本数
-    'TABLE_COUNT': 8,          # 分表数量 (0~N-1)
-
-    # 组件类型过滤
-    'C_TYPE_FILTER': 'mid_sentence_dialogue'  # 仅筛选对话互动组件
-}
-
-class AudioDataExtractor:
-    def __init__(self):
-        # 加载环境变量
-        load_dotenv()
-
-        # PostgreSQL数据库连接配置
-        self.db_config = {
-            'host': os.getenv('PG_DB_HOST'),
-            'port': os.getenv('PG_DB_PORT'),
-            'user': os.getenv('PG_DB_USER'),
-            'password': os.getenv('PG_DB_PASSWORD'),
-            'database': os.getenv('PG_DB_DATABASE')
-        }
-
-        # MySQL数据库连接配置
-        self.mysql_config = {
-            'host': os.getenv('MYSQL_HOST'),
-            'user': os.getenv('MYSQL_USERNAME'),
-            'password': os.getenv('MYSQL_PASSWORD'),
-            'database': "vala_test",
-            'port': int(os.getenv('MYSQL_PORT', 3306)),
-            'charset': 'utf8mb4'
-        }
-
-        # 分表名称列表
-        self.table_names = [f'user_component_play_record_{i}' for i in range(CONFIG['TABLE_COUNT'])]
-
-
-        # 目标总数
-        self.target_total = CONFIG['TARGET_TOTAL']
-        # 每个用户最多记录数
-        self.max_per_user = CONFIG['MAX_PER_USER']
-        
-    def get_db_connection(self):
-        """获取数据库连接"""
-        try:
-            conn = psycopg2.connect(**self.db_config)
-            return conn
-        except Exception as e:
-            print(f"数据库连接失败: {e}")
-            raise
-    
-    def extract_audio_info(self, user_behavior_info: str) -> Dict[str, Any]:
-        """从user_behavior_info字段中提取音频信息"""
-        try:
-            behavior_data = json.loads(user_behavior_info)
-            if isinstance(behavior_data, list) and len(behavior_data) > 0:
-                # 取第一个元素
-                data = behavior_data[0]
-                if 'userAudio' in data and 'pronunciationScore' in data:
-                    return {
-                        'userAudio': data.get('userAudio'),
-                        'pronunciationScore': data.get('pronunciationScore'),
-                        'expressContent': data.get('expressContent')
-                    }
-        except (json.JSONDecodeError, KeyError, IndexError):
-            pass
-        return {}
-    
-    def query_table_data(self, table_name: str) -> List[Dict]:
-        """查询单个表的数据"""
-        conn = self.get_db_connection()
-        cursor = conn.cursor()
-
-        try:
-            query = f"""
-                SELECT user_id, component_unique_code, c_type, c_id, created_at, user_behavior_info
-                FROM {table_name}
-                WHERE created_at >= '{CONFIG['START_TIME']}'
-                AND created_at <= '{CONFIG['END_TIME']}'
-                AND c_type = '{CONFIG['C_TYPE_FILTER']}'
-                AND user_behavior_info IS NOT NULL
-                AND user_behavior_info != ''
-            """
-
-            cursor.execute(query)
-            rows = cursor.fetchall()
-
-            results = []
-            for row in rows:
-                user_id, component_unique_code, c_type, c_id, created_at, user_behavior_info = row
-
-                # 提取音频信息
-                audio_info = self.extract_audio_info(user_behavior_info)
-                if audio_info and 'userAudio' in audio_info and 'pronunciationScore' in audio_info:
-                    results.append({
-                        'source_table': table_name,
-                        'user_id': user_id,
-                        'component_unique_code': component_unique_code,
-                        'c_type': c_type,
-                        'c_id': c_id,
-                        'created_at': created_at,
-                        'userAudio': audio_info['userAudio'],
-                        'pronunciationScore': audio_info['pronunciationScore'],
-                        'expressContent': audio_info.get('expressContent')
-                    })
-
-            return results
-
-        finally:
-            cursor.close()
-            conn.close()
-
-    def get_component_configs(self, data: List[Dict]) -> Dict[str, str]:
-        """从MySQL批量获取组件配置信息"""
-        # 提取所有unique的(c_type, c_id)组合
-        unique_components = set()
-        for record in data:
-            if 'c_type' in record and 'c_id' in record:
-                unique_components.add((record['c_type'], record['c_id']))
-
-        if not unique_components:
-            print("没有需要查询的组件")
-            return {}
-
-        print(f"正在从MySQL查询 {len(unique_components)} 个组件的配置信息...")
-
-        # 连接MySQL
-        try:
-            conn = pymysql.connect(**self.mysql_config)
-            cursor = conn.cursor()
-
-            # 存储组件配置的字典，key为"c_type-c_id"
-            component_configs = {}
-
-            # 批量查询
-            for c_type, c_id in unique_components:
-                query = """
-                SELECT component_config
-                FROM middle_interaction_component
-                WHERE c_type = %s AND c_id = %s
-                """
-                cursor.execute(query, (c_type, c_id))
-                result = cursor.fetchone()
-
-                if result and result[0]:
-                    key = f"{c_type}-{c_id}"
-                    component_configs[key] = result[0]
-
-            cursor.close()
-            conn.close()
-
-            print(f"成功查询到 {len(component_configs)} 个组件配置")
-            return component_configs
-
-        except Exception as e:
-            print(f"查询MySQL组件配置失败: {e}")
-            return {}
-
-    @staticmethod
-    def clean_text(text: str) -> str:
-        """清理文本：转小写，去除标点符号和空格"""
-        if not text:
-            return ""
-        # 转小写
-        text = text.lower()
-        # 去除标点符号和特殊字符，只保留字母和数字
-        text = re.sub(r'[^\w\s]', '', text)
-        # 去除多余空格
-        text = re.sub(r'\s+', '', text)
-        return text
-
-    @staticmethod
-    def levenshtein_distance(s1: str, s2: str) -> int:
-        """计算两个字符串的Levenshtein编辑距离"""
-        if len(s1) < len(s2):
-            return AudioDataExtractor.levenshtein_distance(s2, s1)
-
-        if len(s2) == 0:
-            return len(s1)
-
-        previous_row = range(len(s2) + 1)
-        for i, c1 in enumerate(s1):
-            current_row = [i + 1]
-            for j, c2 in enumerate(s2):
-                # 插入、删除、替换的成本
-                insertions = previous_row[j + 1] + 1
-                deletions = current_row[j] + 1
-                substitutions = previous_row[j] + (c1 != c2)
-                current_row.append(min(insertions, deletions, substitutions))
-            previous_row = current_row
-
-        return previous_row[-1]
-
-    def parse_and_filter_by_config(self, data: List[Dict], component_configs: Dict[str, str]) -> List[Dict]:
-        """解析组件配置并筛选question.mode == 'read'的记录"""
-        print(f"\n开始根据组件配置筛选数据...")
-        print(f"筛选前数据量: {len(data)}")
-
-        filtered_data = []
-        skipped_no_config = 0
-        skipped_invalid_json = 0
-        skipped_wrong_mode = 0
-
-        for record in data:
-            c_type = record.get('c_type')
-            c_id = record.get('c_id')
-
-            if not c_type or not c_id:
-                continue
-
-            # 获取组件配置
-            key = f"{c_type}-{c_id}"
-            config_str = component_configs.get(key)
-
-            if not config_str:
-                skipped_no_config += 1
-                continue
-
-            try:
-                # 解析JSON配置
-                config = json.loads(config_str)
-
-                # 检查question.mode == "read"
-                question = config.get('question', {})
-                mode = question.get('mode')
-
-                if mode == 'read':
-                    # 提取question.content作为refText
-                    ref_text = question.get('content', '')
-                    record['refText'] = ref_text
-
-                    # 计算编辑距离
-                    express_content = record.get('expressContent', '')
-
-                    # 清理文本（去除标点和大小写差异）
-                    cleaned_express = self.clean_text(express_content)
-                    cleaned_ref = self.clean_text(ref_text)
-
-                    # 计算编辑距离
-                    edit_distance = self.levenshtein_distance(cleaned_express, cleaned_ref)
-                    record['editDistance'] = edit_distance
-
-                    # 计算相对编辑距离
-                    ref_len = len(cleaned_ref)
-                    if ref_len > 0:
-                        relative_edit_distance = round(edit_distance / ref_len, 4)
-                    else:
-                        relative_edit_distance = 0
-                    record['relativeEditDistance'] = relative_edit_distance
-
-                    filtered_data.append(record)
-                else:
-                    skipped_wrong_mode += 1
-
-            except (json.JSONDecodeError, AttributeError, TypeError):
-                skipped_invalid_json += 1
-                continue
-
-        print(f"筛选后数据量: {len(filtered_data)}")
-        print(f"  - 缺少配置: {skipped_no_config}")
-        print(f"  - 配置解析失败: {skipped_invalid_json}")
-        print(f"  - mode不是read: {skipped_wrong_mode}")
-
-        return filtered_data
-
-    def collect_all_data(self) -> List[Dict]:
-        """收集所有表的数据"""
-        all_data = []
-
-        for table_name in self.table_names:
-            print(f"正在查询表: {table_name}")
-            try:
-                table_data = self.query_table_data(table_name)
-                all_data.extend(table_data)
-                print(f"表 {table_name} 查询到 {len(table_data)} 条记录")
-            except Exception as e:
-                print(f"查询表 {table_name} 失败: {e}")
-                continue
-
-        print(f"总共收集到 {len(all_data)} 条有效记录")
-
-        if not all_data:
-            return []
-
-        # 从MySQL获取组件配置
-        component_configs = self.get_component_configs(all_data)
-
-        # 根据组件配置筛选数据（只保留question.mode == "read"的记录）
-        filtered_data = self.parse_and_filter_by_config(all_data, component_configs)
-
-        return filtered_data
-    
-    def random_filter_data(self, data: List[Dict]) -> List[Dict]:
-        """随机筛选数据（不按评分分段控制）"""
-        # 随机打乱所有数据
-        shuffled_data = data.copy()
-        random.shuffle(shuffled_data)
-        
-        print(f"开始随机筛选，总共 {len(shuffled_data)} 条记录")
-        return shuffled_data
-    
-    def apply_user_constraints(self, data: List[Dict]) -> List[Dict]:
-        """应用用户约束（每个用户最多2条）"""
-        user_records = {}
-        
-        # 按用户分组
-        for record in data:
-            user_id = record['user_id']
-            if user_id not in user_records:
-                user_records[user_id] = []
-            user_records[user_id].append(record)
-        
-        # 每个用户最多选择2条
-        final_data = []
-        for user_id, records in user_records.items():
-            if len(records) <= self.max_per_user:
-                final_data.extend(records)
-            else:
-                # 随机选择2条
-                selected = random.sample(records, self.max_per_user)
-                final_data.extend(selected)
-        
-        return final_data
-    
-    def export_to_excel(self, data: List[Dict], filename: str = 'user_audio_data.xlsx'):
-        """导出数据到Excel文件"""
-        # 准备导出数据
-        export_data = []
-        for i, record in enumerate(data):
-            # 处理时区问题 - 转换为本地时间字符串
-            created_at = record['created_at']
-            if hasattr(created_at, 'tz_localize'):
-                created_at = created_at.tz_localize(None)
-            elif hasattr(created_at, 'replace'):
-                created_at = created_at.replace(tzinfo=None)
-            
-            export_data.append({
-                'index': i,
-                'source_table': record['source_table'],
-                'created_at': created_at,
-                'user_id': record['user_id'],
-                'component_unique_code': record['component_unique_code'],
-                'c_type': record.get('c_type'),
-                'c_id': record.get('c_id'),
-                'pronunciationScore': record['pronunciationScore'],
-                'userAudio': record['userAudio'],
-                'expressContent': record.get('expressContent'),
-                'refText': record.get('refText'),
-                'editDistance': record.get('editDistance'),
-                'relativeEditDistance': record.get('relativeEditDistance')
-            })
-        
-        # 创建DataFrame并导出
-        df = pd.DataFrame(export_data)
-        df.to_excel(filename, index=False)
-        print(f"数据已导出到: {filename}")
-        print(f"总共导出 {len(export_data)} 条记录")
-        
-        # 打印统计信息
-        self.print_statistics(data)
-    
-    def print_statistics(self, data: List[Dict]):
-        """打印统计信息"""
-        print("\n=== 数据统计 ===")
-        
-        # 评分统计（显示分布情况但不按区间分组）
-        scores = [record['pronunciationScore'] for record in data]
-        print(f"\n评分统计:")
-        print(f"  总记录数: {len(scores)}")
-        print(f"  最高分: {max(scores)}")
-        print(f"  最低分: {min(scores)}")
-        print(f"  平均分: {sum(scores) / len(scores):.2f}")
-        
-        # 用户分布统计
-        user_counts = {}
-        for record in data:
-            user_id = record['user_id']
-            user_counts[user_id] = user_counts.get(user_id, 0) + 1
-        
-        print(f"\n用户统计:")
-        print(f"  总用户数: {len(user_counts)}")
-        print(f"  平均每用户记录数: {len(data) / len(user_counts):.2f}")
-        
-        # 表分布统计
-        table_counts = {}
-        for record in data:
-            table = record['source_table']
-            table_counts[table] = table_counts.get(table, 0) + 1
-        
-        print(f"\n表分布:")
-        for table, count in sorted(table_counts.items()):
-            print(f"  {table}: {count} 条")
-    
-    def run(self):
-        """运行主流程"""
-        print("开始提取用户音频数据...")
-        
-        # 1. 收集所有数据
-        all_data = self.collect_all_data()
-        
-        if not all_data:
-            print("未找到符合条件的数据")
-            return
-        
-        # 2. 随机筛选数据（不按评分分段控制）
-        filtered_data = self.random_filter_data(all_data)
-        
-        # 3. 应用用户约束
-        final_data = self.apply_user_constraints(filtered_data)
-        
-        # 4. 如果数据不足500条，尝试补充
-        if len(final_data) < self.target_total:
-            print(f"当前数据量 {len(final_data)} 条，少于目标 {self.target_total} 条")
-            # 从剩余数据中补充
-            used_records = set((r['user_id'], r['component_unique_code'], str(r['created_at'])) for r in final_data)
-            available_data = [r for r in all_data if (r['user_id'], r['component_unique_code'], str(r['created_at'])) not in used_records]
-            
-            needed = self.target_total - len(final_data)
-            if len(available_data) >= needed:
-                additional = random.sample(available_data, needed)
-                final_data.extend(additional)
-        
-        # 5. 如果超过500条，随机选择500条
-        if len(final_data) > self.target_total:
-            final_data = random.sample(final_data, self.target_total)
-        
-        # 6. 导出到Excel
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        filename = f"user_audio_data_{timestamp}.xlsx"
-        self.export_to_excel(final_data, filename)
-
-def main():
-    extractor = AudioDataExtractor()
-    extractor.run()
-
-if __name__ == "__main__":
-    main()
diff --git a/makee_vala/git_scripts/sample_unit_challenge_data_from_es.py b/makee_vala/git_scripts/sample_unit_challenge_data_from_es.py
deleted file mode 100644
index 16b33fc..0000000
--- a/makee_vala/git_scripts/sample_unit_challenge_data_from_es.py
+++ /dev/null
@@ -1,463 +0,0 @@
-"""
-从es中 筛选用户数据
-
-es相关配置通过以下环节变量
-
-ES_HOST=xxx
-ES_PORT=9200
-ES_SCHEME=https
-ES_USER=elastic
-ES_PASSWORD=xxx
-
-
-index: user-audio
-
-脚本思路:
-
-给定 一些过滤参数； 给定导出的excel文件名 （在脚本中以变量方式配置就行）
-
-导出我要的字段内容到一个 excel
-
-过滤字段:
-timeStr:  字段内容为str 格式为: 2024-12-31 15:53:19
-期望支持配置  开始 日期 和 结束日期  （可以只配置一个  只配 开始日期  则筛选 >= 开始日期的记录， 只配结束日期 则筛选 <= 结束日期的记录）
-
-输出字段内容支持配置:
-
-
-"""
-
-import os
-from datetime import datetime
-from dotenv import load_dotenv
-from elasticsearch import Elasticsearch
-import pandas as pd
-import urllib.parse
-from collections import defaultdict
-
-# 加载环境变量
-load_dotenv()
-
-# 配置参数
-INDEX_NAME = "llm_ai_tools_log"
-OUTPUT_FILE = "单元挑战用户数据_250906_251024.xlsx"
-START_DATE = "2025-09-06 00:00:00"  # 开始日期，格式: YYYY-MM-DD HH:MM:SS，设为None则不限制
-END_DATE = "2025-10-24 00:00:00"  # 结束日期，格式: YYYY-MM-DD HH:MM:SS，设为None则不限制
-
-# type字段过滤配置：筛选指定类型的记录，为空则不限制
-FILTER_TYPES = ["sent_check_challenge", "speaking_topic_challenge"]
-
-# 可选的 userId 过滤配置：配置为[int, ...] 列表；为空则不限制
-FILTER_USER_IDS = []  # 例如: [123, 456]
-
-# 需要导出的字段
-EXPORT_FIELDS = [
-    "type",
-    "question", 
-    "user_answer",
-    "time_total_ms",
-    "score",
-    "is_passed",
-    "model",
-    "write_time_str",
-    "write_time_int",
-]
-
-
-
-def create_es_client():
-    """创建Elasticsearch客户端"""
-    # 获取环境变量并打印调试信息
-    es_host = os.getenv('ES_HOST')
-    es_port = os.getenv('ES_PORT', 9200)
-    es_scheme = os.getenv('ES_SCHEME', 'https')
-    es_user = os.getenv('ES_USER')
-    es_password = os.getenv('ES_PASSWORD')
-    
-    print(f"[DEBUG] ES配置信息:")
-    print(f"  ES_HOST: {es_host}")
-    print(f"  ES_PORT: {es_port}")
-    print(f"  ES_SCHEME: {es_scheme}")
-    print(f"  ES_USER: {es_user}")
-    print(f"  ES_PASSWORD: {'***已设置***' if es_password else '未设置'}")
-    
-    # 检查必要的环境变量
-    if not es_host:
-        raise ValueError("ES_HOST环境变量未设置")
-    if not es_user:
-        raise ValueError("ES_USER环境变量未设置")
-    if not es_password:
-        raise ValueError("ES_PASSWORD环境变量未设置")
-    
-    # URL编码用户名和密码，处理特殊字符
-    encoded_user = urllib.parse.quote(es_user, safe='')
-    encoded_password = urllib.parse.quote(es_password, safe='')
-    
-    print(f"[DEBUG] 原始密码包含特殊字符，已进行URL编码")
-    
-    # 方式1: 使用URL中嵌入认证信息
-    host_url_with_auth = f"{es_scheme}://{encoded_user}:{encoded_password}@{es_host}:{es_port}"
-    print(f"[DEBUG] 连接URL (带认证): {es_scheme}://{encoded_user}:***@{es_host}:{es_port}")
-    
-    try:
-        # 尝试方式1: URL中嵌入认证
-        es_config_1 = {
-            'hosts': [host_url_with_auth],
-            'verify_certs': False,
-            'ssl_show_warn': False,
-            'request_timeout': 30,
-            'retry_on_timeout': True
-        }
-        
-        print("[DEBUG] 尝试方式1: URL中嵌入认证信息")
-        es_client = Elasticsearch(**es_config_1)
-        
-        # 测试连接
-        info = es_client.info()
-        print(f"[SUCCESS] 方式1连接成功")
-        return es_client
-        
-    except Exception as e1:
-        print(f"[DEBUG] 方式1失败: {e1}")
-        
-        try:
-            # 尝试方式2: 使用basic_auth参数
-            host_url = f"{es_scheme}://{es_host}:{es_port}"
-            es_config_2 = {
-                'hosts': [host_url],
-                'basic_auth': (es_user, es_password),
-                'verify_certs': False,
-                'ssl_show_warn': False,
-                'request_timeout': 30,
-                'retry_on_timeout': True
-            }
-            
-            print("[DEBUG] 尝试方式2: 使用basic_auth参数")
-            es_client = Elasticsearch(**es_config_2)
-            
-            # 测试连接
-            info = es_client.info()
-            print(f"[SUCCESS] 方式2连接成功")
-            return es_client
-            
-        except Exception as e2:
-            print(f"[DEBUG] 方式2失败: {e2}")
-            
-            try:
-                # 尝试方式3: 使用http_auth参数 (旧版本兼容)
-                es_config_3 = {
-                    'hosts': [host_url],
-                    'http_auth': (es_user, es_password),
-                    'verify_certs': False,
-                    'ssl_show_warn': False,
-                    'request_timeout': 30,
-                    'retry_on_timeout': True
-                }
-                
-                print("[DEBUG] 尝试方式3: 使用http_auth参数")
-                es_client = Elasticsearch(**es_config_3)
-                
-                # 测试连接
-                info = es_client.info()
-                print(f"[SUCCESS] 方式3连接成功")
-                return es_client
-                
-            except Exception as e3:
-                print(f"[DEBUG] 方式3失败: {e3}")
-                print(f"[ERROR] 所有认证方式都失败了")
-                raise e3
-
-def build_query(start_date=None, end_date=None):
-    """构建ES查询条件"""
-    # 构建基础查询条件
-    must_conditions = []
-    
-    # 添加时间范围条件
-    if start_date or end_date:
-        range_query = {}
-        
-        if start_date:
-            start_timestamp = int(datetime.strptime(start_date, "%Y-%m-%d %H:%M:%S").timestamp())
-            range_query["gte"] = start_timestamp
-            print(f"[DEBUG] 开始时间戳: {start_timestamp} (对应 {start_date})")
-            
-        if end_date:
-            end_timestamp = int(datetime.strptime(end_date, "%Y-%m-%d %H:%M:%S").timestamp())
-            range_query["lte"] = end_timestamp
-            print(f"[DEBUG] 结束时间戳: {end_timestamp} (对应 {end_date})")
-            
-        must_conditions.append({
-            "range": {
-                "write_time_int": range_query
-            }
-        })
-
-    # 如果配置了 userId 列表，则仅选取对应 userId 的数据
-    if FILTER_USER_IDS:
-        print(f"[DEBUG] 应用 userId 过滤: {FILTER_USER_IDS}")
-        must_conditions.append({
-            "terms": {
-                "userId": FILTER_USER_IDS
-            }
-        })
-    
-    # 如果配置了 type 列表，则仅选取对应 type 的数据
-    if FILTER_TYPES:
-        print(f"[DEBUG] 应用 type 过滤: {FILTER_TYPES}")
-        must_conditions.append({
-            "terms": {
-                "type": FILTER_TYPES
-            }
-        })
-    
-    # 构建最终查询
-    if must_conditions:
-        query = {
-            "bool": {
-                "must": must_conditions
-            }
-        }
-    else:
-        query = {"match_all": {}}
-        
-    print(f"[DEBUG] 查询条件: {query}")
-    
-    return {
-        "query": query,
-        "_source": EXPORT_FIELDS,
-        "sort": [{"write_time_int": {"order": "desc"}}]
-    }
-
-def fetch_data_from_es(es_client, start_date=None, end_date=None):
-    """从ES获取数据"""
-    query = build_query(start_date, end_date)
-    
-    try:
-        print(f"[DEBUG] 执行ES查询，使用scroll获取全量数据...")
-        
-        # 使用scroll API获取全量数据
-        scroll_size = 1000  # 每次scroll获取的数据量
-        scroll_timeout = '2m'  # scroll超时时间
-        
-        # 初始化scroll
-        query['size'] = scroll_size
-        response = es_client.search(
-            index=INDEX_NAME, 
-            body=query,
-            scroll=scroll_timeout
-        )
-        
-        scroll_id = response['_scroll_id']
-        hits = response['hits']['hits']
-        total_hits = response['hits']['total']
-        
-        # 获取总数（兼容不同ES版本）
-        if isinstance(total_hits, dict):
-            total_count = total_hits['value']
-        else:
-            total_count = total_hits
-            
-        print(f"[DEBUG] ES中匹配的总记录数: {total_count}")
-        
-        all_data = []
-        batch_count = 1
-        
-        # 处理第一批数据
-        for hit in hits:
-            source = hit['_source']
-            row = {}
-            for field in EXPORT_FIELDS:
-                row[field] = source.get(field, "")
-            all_data.append(row)
-        
-        print(f"[DEBUG] 已获取第 {batch_count} 批数据，当前总数: {len(all_data)}")
-        
-        # 继续scroll获取剩余数据
-        while len(hits) == scroll_size:
-            batch_count += 1
-            response = es_client.scroll(scroll_id=scroll_id, scroll=scroll_timeout)
-            scroll_id = response['_scroll_id']
-            hits = response['hits']['hits']
-            
-            for hit in hits:
-                source = hit['_source']
-                row = {}
-                for field in EXPORT_FIELDS:
-                    row[field] = source.get(field, "")
-                all_data.append(row)
-            
-            print(f"[DEBUG] 已获取第 {batch_count} 批数据，当前总数: {len(all_data)}")
-        
-        # 清理scroll
-        try:
-            es_client.clear_scroll(scroll_id=scroll_id)
-        except:
-            pass  # 忽略清理错误
-        
-        print(f"[DEBUG] 从ES获取到数据 {len(all_data)} 条记录")
-        return all_data
-        
-    except Exception as e:
-        print(f"查询ES时出错: {e}")
-        return []
-
-def export_to_excel(data, filename):
-    """导出数据到Excel"""
-    if not data:
-        print("没有数据可导出")
-        return
-        
-    df = pd.DataFrame(data)
-    
-    try:
-        df.to_excel(filename, index=False, engine='openpyxl')
-        print(f"数据已导出到: {filename}")
-        print(f"共导出 {len(data)} 条记录")
-    except Exception as e:
-        print(f"导出Excel时出错: {e}")
-
-def debug_es_data(es_client):
-    """调试ES数据，了解实际数据情况"""
-    print("\n" + "="*60)
-    print("开始调试ES数据...")
-    
-    try:
-        # 1. 查询总数据量
-        total_query = {
-            "query": {"match_all": {}},
-            "size": 0
-        }
-        response = es_client.search(index=INDEX_NAME, body=total_query)
-        total_count = response['hits']['total']
-        if isinstance(total_count, dict):
-            total_count = total_count['value']
-        print(f"[DEBUG] ES索引 '{INDEX_NAME}' 中总数据量: {total_count}")
-        
-        if total_count == 0:
-            print("[ERROR] ES索引中没有任何数据！")
-            return
-        
-        # 2. 查询最近的几条数据，了解数据结构
-        sample_query = {
-            "query": {"match_all": {}},
-            "size": 5,
-            "sort": [{"_id": {"order": "desc"}}]
-        }
-        response = es_client.search(index=INDEX_NAME, body=sample_query)
-        hits = response['hits']['hits']
-        
-        print(f"[DEBUG] 获取到 {len(hits)} 条样本数据:")
-        for i, hit in enumerate(hits):
-            source = hit['_source']
-            
-            print(f"  样本 {i+1}:")
-            print(f"    write_time_int: {source.get('write_time_int', 'N/A')}")
-            print(f"    timeStr: {source.get('timeStr', 'N/A')}")
-            print(f"    type: {source.get('type', 'N/A')}")
-            print(f"    userId: {source.get('userId', 'N/A')}")
-        
-        # 3. 查询时间范围内的数据
-        time_range_query = {
-            "query": {
-                "range": {
-                    "write_time_int": {
-                        "gte": int(datetime.strptime(START_DATE, "%Y-%m-%d %H:%M:%S").timestamp()),
-                        "lte": int(datetime.strptime(END_DATE, "%Y-%m-%d %H:%M:%S").timestamp())
-                    }
-                }
-            },
-            "size": 0
-        }
-        response = es_client.search(index=INDEX_NAME, body=time_range_query)
-        time_range_count = response['hits']['total']
-        if isinstance(time_range_count, dict):
-            time_range_count = time_range_count['value']
-        print(f"[DEBUG] 时间范围内数据量 ({START_DATE} 到 {END_DATE}): {time_range_count}")
-        
-        # 4. 查询时间范围的实际数据分布
-        print(f"[DEBUG] 检查时间字段的实际值范围...")
-        agg_query = {
-            "query": {"match_all": {}},
-            "size": 0,
-            "aggs": {
-                "time_stats": {
-                    "stats": {
-                        "field": "write_time_int"
-                    }
-                }
-            }
-        }
-        response = es_client.search(index=INDEX_NAME, body=agg_query)
-        if 'aggregations' in response:
-            stats = response['aggregations']['time_stats']
-            min_time = stats.get('min')
-            max_time = stats.get('max')
-            if min_time and max_time:
-                min_date = datetime.fromtimestamp(min_time).strftime("%Y-%m-%d %H:%M:%S")
-                max_date = datetime.fromtimestamp(max_time).strftime("%Y-%m-%d %H:%M:%S")
-                print(f"    最早时间: {min_date} (时间戳: {min_time})")
-                print(f"    最晚时间: {max_date} (时间戳: {max_time})")
-        
-    except Exception as e:
-        print(f"[ERROR] 调试ES数据时出错: {e}")
-    
-    print("="*60 + "\n")
-
-def main():
-    """主函数"""
-    print("开始从ES获取单元挑战数据...")
-    print(f"索引: {INDEX_NAME}")
-    print(f"开始日期: {START_DATE if START_DATE else '不限制'}")
-    print(f"结束日期: {END_DATE if END_DATE else '不限制'}")
-    if FILTER_TYPES:
-        print(f"类型过滤: {FILTER_TYPES}")
-    if FILTER_USER_IDS:
-        print(f"用户ID过滤: {FILTER_USER_IDS}")
-    print("-" * 50)
-    
-    # 检查.env文件是否存在
-    env_file = ".env"
-    if not os.path.exists(env_file):
-        print(f"[ERROR] {env_file} 文件不存在，请创建并配置ES连接信息")
-        print("参考 .env.example 文件进行配置")
-        return
-    
-    print(f"[DEBUG] 找到环境配置文件: {env_file}")
-    
-    # 创建ES客户端
-    try:
-        es_client = create_es_client()
-    except ValueError as e:
-        print(f"[ERROR] 配置错误: {e}")
-        print("请检查 .env 文件中的ES配置")
-        return
-    except Exception as e:
-        print(f"[ERROR] 创建ES客户端失败: {e}")
-        return
-    
-    # 测试连接
-    try:
-        print("[DEBUG] 正在测试ES连接...")
-        # ES客户端创建函数中已经包含了连接测试，这里不需要重复测试
-        print(f"[SUCCESS] ES连接已建立")
-    except Exception as e:
-        print(f"[ERROR] ES连接失败: {e}")
-        print("\n可能的解决方案:")
-        print("1. 检查ES服务是否正常运行")
-        print("2. 验证.env文件中的ES_HOST、ES_USER、ES_PASSWORD是否正确")
-        print("3. 确认网络连接是否正常")
-        print("4. 检查ES用户权限是否足够")
-        print("5. 密码中包含特殊字符，已尝试URL编码处理")
-        return
-    
-    # 获取数据
-    data = fetch_data_from_es(es_client, START_DATE, END_DATE)
-    
-    # 导出到Excel
-    if data:
-        export_to_excel(data, OUTPUT_FILE)
-    else:
-        print("未获取到任何数据")
-
-if __name__ == "__main__":
-    main()
diff --git a/makee_vala/git_scripts/sample_user_data_from_es.py b/makee_vala/git_scripts/sample_user_data_from_es.py
deleted file mode 100644
index 3a1e415..0000000
--- a/makee_vala/git_scripts/sample_user_data_from_es.py
+++ /dev/null
@@ -1,599 +0,0 @@
-"""
-从es中采样用户数据
-
-es相关配置通过以下环节变量
-
-ES_HOST=xxx
-ES_PORT=9200
-ES_SCHEME=https
-ES_USER=elastic
-ES_PASSWORD=xxx
-
-
-index: user-audio
-
-脚本思路:
-
-给定 一些过滤参数； 给定导出的excel文件名 （在脚本中以变量方式配置就行）
-
-导出我要的字段内容到一个 excel
-
-过滤字段:
-timeStr:  字段内容为str 格式为: 2024-12-31 15:53:19
-期望支持配置  开始 日期 和 结束日期  （可以只配置一个  只配 开始日期  则筛选 >= 开始日期的记录， 只配结束日期 则筛选 <= 结束日期的记录）
-
-输出以下字段内容:
-
-userId
-userMsg
-userName
-soeData
-audioUrl
-asrStatus
-componentId
-componentType
-dataVersion
-
-"""
-
-import os
-from datetime import datetime
-from dotenv import load_dotenv
-from elasticsearch import Elasticsearch
-import pandas as pd
-import urllib.parse
-import re
-from collections import defaultdict
-
-# 加载环境变量
-load_dotenv()
-
-# 配置参数
-INDEX_NAME = os.getenv("ES_INDEX", "user-audio")
-OUTPUT_FILE = "user_audio_data.xlsx"
-START_DATE = "2025-10-15 00:00:00"  # 开始日期，格式: YYYY-MM-DD HH:MM:SS，设为None则不限制
-END_DATE = "2025-10-17 00:00:00"  # 结束日期，格式: YYYY-MM-DD HH:MM:SS，设为None则不限制
-
-# 可选的 userId 过滤配置：配置为[int, ...] 列表；为空则不限制
-FILTER_USER_IDS = [356]  # 例如: [123, 456]
-
-# 采样配置参数
-MAX_SAMPLES_PER_USER_MSG = 50  # 每个不重复的userMsg最多采样的数据条数
-MAX_SAMPLES_PER_USER_ID = 20    # 每个userId最多采样的数据条数
-
-# 需要导出的字段
-EXPORT_FIELDS = [
-    "userId",
-    "userMsg", 
-    "userName",
-    "soeData",
-    "audioUrl",
-    "asrStatus",
-    "componentId",
-    "componentType",
-    "dataVersion",
-    "timeStr"
-]
-
-def create_es_client():
-    """创建Elasticsearch客户端"""
-    # 获取环境变量并打印调试信息
-    es_host = os.getenv('ES_HOST')
-    es_port = os.getenv('ES_PORT', 9200)
-    es_scheme = os.getenv('ES_SCHEME', 'https')
-    es_user = os.getenv('ES_USER')
-    es_password = os.getenv('ES_PASSWORD')
-    
-    print(f"[DEBUG] ES配置信息:")
-    print(f"  ES_HOST: {es_host}")
-    print(f"  ES_PORT: {es_port}")
-    print(f"  ES_SCHEME: {es_scheme}")
-    print(f"  ES_USER: {es_user}")
-    print(f"  ES_PASSWORD: {'***已设置***' if es_password else '未设置'}")
-    
-    # 检查必要的环境变量
-    if not es_host:
-        raise ValueError("ES_HOST环境变量未设置")
-    if not es_user:
-        raise ValueError("ES_USER环境变量未设置")
-    if not es_password:
-        raise ValueError("ES_PASSWORD环境变量未设置")
-    
-    # URL编码用户名和密码，处理特殊字符
-    encoded_user = urllib.parse.quote(es_user, safe='')
-    encoded_password = urllib.parse.quote(es_password, safe='')
-    
-    print(f"[DEBUG] 原始密码包含特殊字符，已进行URL编码")
-    
-    # 方式1: 使用URL中嵌入认证信息
-    host_url_with_auth = f"{es_scheme}://{encoded_user}:{encoded_password}@{es_host}:{es_port}"
-    print(f"[DEBUG] 连接URL (带认证): {es_scheme}://{encoded_user}:***@{es_host}:{es_port}")
-    
-    try:
-        # 尝试方式1: URL中嵌入认证
-        es_config_1 = {
-            'hosts': [host_url_with_auth],
-            'verify_certs': False,
-            'ssl_show_warn': False,
-            'request_timeout': 30,
-            'retry_on_timeout': True
-        }
-        
-        print("[DEBUG] 尝试方式1: URL中嵌入认证信息")
-        es_client = Elasticsearch(**es_config_1)
-        
-        # 测试连接
-        info = es_client.info()
-        print(f"[SUCCESS] 方式1连接成功")
-        return es_client
-        
-    except Exception as e1:
-        print(f"[DEBUG] 方式1失败: {e1}")
-        
-        try:
-            # 尝试方式2: 使用basic_auth参数
-            host_url = f"{es_scheme}://{es_host}:{es_port}"
-            es_config_2 = {
-                'hosts': [host_url],
-                'basic_auth': (es_user, es_password),
-                'verify_certs': False,
-                'ssl_show_warn': False,
-                'request_timeout': 30,
-                'retry_on_timeout': True
-            }
-            
-            print("[DEBUG] 尝试方式2: 使用basic_auth参数")
-            es_client = Elasticsearch(**es_config_2)
-            
-            # 测试连接
-            info = es_client.info()
-            print(f"[SUCCESS] 方式2连接成功")
-            return es_client
-            
-        except Exception as e2:
-            print(f"[DEBUG] 方式2失败: {e2}")
-            
-            try:
-                # 尝试方式3: 使用http_auth参数 (旧版本兼容)
-                es_config_3 = {
-                    'hosts': [host_url],
-                    'http_auth': (es_user, es_password),
-                    'verify_certs': False,
-                    'ssl_show_warn': False,
-                    'request_timeout': 30,
-                    'retry_on_timeout': True
-                }
-                
-                print("[DEBUG] 尝试方式3: 使用http_auth参数")
-                es_client = Elasticsearch(**es_config_3)
-                
-                # 测试连接
-                info = es_client.info()
-                print(f"[SUCCESS] 方式3连接成功")
-                return es_client
-                
-            except Exception as e3:
-                print(f"[DEBUG] 方式3失败: {e3}")
-                print(f"[ERROR] 所有认证方式都失败了")
-                raise e3
-
-def build_query(start_date=None, end_date=None):
-    """构建ES查询条件"""
-    # 构建基础查询条件
-    must_conditions = []
-    
-    # 添加时间范围条件
-    if start_date or end_date:
-        range_query = {}
-        
-        if start_date:
-            start_timestamp = int(datetime.strptime(start_date, "%Y-%m-%d %H:%M:%S").timestamp())
-            range_query["gte"] = start_timestamp
-            print(f"[DEBUG] 开始时间戳: {start_timestamp} (对应 {start_date})")
-            
-        if end_date:
-            end_timestamp = int(datetime.strptime(end_date, "%Y-%m-%d %H:%M:%S").timestamp())
-            range_query["lte"] = end_timestamp
-            print(f"[DEBUG] 结束时间戳: {end_timestamp} (对应 {end_date})")
-            
-        must_conditions.append({
-            "range": {
-                "timeInt": range_query
-            }
-        })
-
-    # 如果配置了 userId 列表，则仅选取对应 userId 的数据
-    if FILTER_USER_IDS:
-        print(f"[DEBUG] 应用 userId 过滤: {FILTER_USER_IDS}")
-        must_conditions.append({
-            "terms": {
-                "userId": FILTER_USER_IDS
-            }
-        })
-    
-    # 移除soeData的exists查询，改为在应用层进行更精确的过滤
-    # 注释掉原来的soeData exists查询
-    # must_conditions.append({
-    #     "exists": {
-    #         "field": "soeData"
-    #     }
-    # })
-    
-    # 构建最终查询
-    if must_conditions:
-        query = {
-            "bool": {
-                "must": must_conditions
-            }
-        }
-    else:
-        query = {"match_all": {}}
-        
-    print(f"[DEBUG] 查询条件: {query}")
-    
-    return {
-        "query": query,
-        "_source": EXPORT_FIELDS,
-        "sort": [{"timeInt": {"order": "desc"}}]
-    }
-
-def fetch_data_from_es(es_client, start_date=None, end_date=None):
-    """从ES获取数据"""
-    query = build_query(start_date, end_date)
-    
-    try:
-        print(f"[DEBUG] 执行ES查询，使用scroll获取全量数据...")
-        
-        # 使用scroll API获取全量数据
-        scroll_size = 1000  # 每次scroll获取的数据量
-        scroll_timeout = '2m'  # scroll超时时间
-        
-        # 初始化scroll
-        query['size'] = scroll_size
-        response = es_client.search(
-            index=INDEX_NAME, 
-            body=query,
-            scroll=scroll_timeout
-        )
-        
-        scroll_id = response['_scroll_id']
-        hits = response['hits']['hits']
-        total_hits = response['hits']['total']
-        
-        # 获取总数（兼容不同ES版本）
-        if isinstance(total_hits, dict):
-            total_count = total_hits['value']
-        else:
-            total_count = total_hits
-            
-        print(f"[DEBUG] ES中匹配的总记录数: {total_count}")
-        
-        all_data = []
-        batch_count = 1
-        
-        # 处理第一批数据
-        for hit in hits:
-            source = hit['_source']
-            row = {}
-            for field in EXPORT_FIELDS:
-                row[field] = source.get(field, "")
-            all_data.append(row)
-        
-        print(f"[DEBUG] 已获取第 {batch_count} 批数据，当前总数: {len(all_data)}")
-        
-        # 继续scroll获取剩余数据
-        while len(hits) == scroll_size:
-            batch_count += 1
-            response = es_client.scroll(scroll_id=scroll_id, scroll=scroll_timeout)
-            scroll_id = response['_scroll_id']
-            hits = response['hits']['hits']
-            
-            for hit in hits:
-                source = hit['_source']
-                row = {}
-                for field in EXPORT_FIELDS:
-                    row[field] = source.get(field, "")
-                all_data.append(row)
-            
-            print(f"[DEBUG] 已获取第 {batch_count} 批数据，当前总数: {len(all_data)}")
-        
-        # 清理scroll
-        try:
-            es_client.clear_scroll(scroll_id=scroll_id)
-        except:
-            pass  # 忽略清理错误
-        
-        print(f"[DEBUG] 从ES获取到原始数据 {len(all_data)} 条记录")
-        
-        # 根据是否配置了 userId 列表决定是否跳过过滤与采样逻辑
-        if FILTER_USER_IDS:
-            print("[DEBUG] 已配置 userId 列表，跳过过滤与采样逻辑，返回全部匹配数据")
-            return all_data
-        else:
-            # 应用过滤和采样逻辑
-            filtered_sampled_data = filter_and_sample_data(all_data)
-            return filtered_sampled_data
-        
-    except Exception as e:
-        print(f"查询ES时出错: {e}")
-        return []
-
-def export_to_excel(data, filename):
-    """导出数据到Excel"""
-    if not data:
-        print("没有数据可导出")
-        return
-        
-    df = pd.DataFrame(data)
-    
-    # 生成带时间戳的文件名
-    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-    base_name = filename.rsplit('.', 1)[0]
-    extension = filename.rsplit('.', 1)[1] if '.' in filename else 'xlsx'
-    timestamped_filename = f"{base_name}_{timestamp}.{extension}"
-    
-    try:
-        df.to_excel(timestamped_filename, index=False, engine='openpyxl')
-        print(f"数据已导出到: {timestamped_filename}")
-        print(f"共导出 {len(data)} 条记录")
-    except Exception as e:
-        print(f"导出Excel时出错: {e}")
-
-def contains_chinese(text):
-    """检测文本是否包含中文字符"""
-    if not text:
-        return False
-    chinese_pattern = re.compile(r'[\u4e00-\u9fff]')
-    return bool(chinese_pattern.search(text))
-
-def filter_and_sample_data(data):
-    """过滤和采样数据"""
-    print(f"[DEBUG] 开始过滤和采样，原始数据量: {len(data)}")
-    
-    # 第一步：过滤数据
-    filtered_data = []
-    soe_data_empty_count = 0
-    soe_data_not_json_count = 0
-    chinese_msg_count = 0
-    
-    for i, item in enumerate(data):
-        # 检查soeData是否存在且以"{"开头
-        soe_data = item.get('soeData', '')
-        if not soe_data:
-            soe_data_empty_count += 1
-            if i < 5:  # 只打印前5个样本的详细信息
-                print(f"[DEBUG] 样本 {i+1}: soeData为空或不存在")
-            continue
-            
-        if not str(soe_data).strip().startswith('{'):
-            soe_data_not_json_count += 1
-            if i < 5:  # 只打印前5个样本的详细信息
-                print(f"[DEBUG] 样本 {i+1}: soeData不以'{{' 开头，内容: {str(soe_data)[:100]}...")
-            continue
-            
-        # 检查userMsg是否不包含中文
-        user_msg = item.get('userMsg', '')
-        if contains_chinese(user_msg):
-            chinese_msg_count += 1
-            if i < 5:  # 只打印前5个样本的详细信息
-                print(f"[DEBUG] 样本 {i+1}: userMsg包含中文，内容: {user_msg[:50]}...")
-            continue
-            
-        filtered_data.append(item)
-        if i < 5:  # 只打印前5个样本的详细信息
-            print(f"[DEBUG] 样本 {i+1}: 通过过滤，userMsg: {user_msg[:50]}...")
-    
-    print(f"[DEBUG] 过滤统计:")
-    print(f"  - soeData为空: {soe_data_empty_count} 条")
-    print(f"  - soeData不以'{{' 开头: {soe_data_not_json_count} 条")
-    print(f"  - userMsg包含中文: {chinese_msg_count} 条")
-    print(f"  - 通过过滤的数据: {len(filtered_data)} 条")
-    
-    # 第二步：按userMsg分组采样
-    user_msg_groups = defaultdict(list)
-    for item in filtered_data:
-        user_msg = item.get('userMsg', '')
-        user_msg_groups[user_msg].append(item)
-    
-    print(f"[DEBUG] 不重复的userMsg数量: {len(user_msg_groups)}")
-    
-    # 对每个userMsg组进行采样
-    sampled_by_msg = []
-    for user_msg, items in user_msg_groups.items():
-        # 每个userMsg最多取MAX_SAMPLES_PER_USER_MSG条
-        sampled_items = items[:MAX_SAMPLES_PER_USER_MSG]
-        sampled_by_msg.extend(sampled_items)
-        if len(items) > MAX_SAMPLES_PER_USER_MSG:
-            print(f"[DEBUG] userMsg '{user_msg}' 有 {len(items)} 条数据，采样了 {MAX_SAMPLES_PER_USER_MSG} 条")
-    
-    print(f"[DEBUG] 按userMsg采样后数据量: {len(sampled_by_msg)}")
-    
-    # 第三步：按userId分组采样
-    user_id_groups = defaultdict(list)
-    for item in sampled_by_msg:
-        user_id = item.get('userId', '')
-        user_id_groups[user_id].append(item)
-    
-    print(f"[DEBUG] 不重复的userId数量: {len(user_id_groups)}")
-    
-    # 对每个userId组进行采样
-    final_sampled_data = []
-    for user_id, items in user_id_groups.items():
-        # 每个userId最多取MAX_SAMPLES_PER_USER_ID条
-        sampled_items = items[:MAX_SAMPLES_PER_USER_ID]
-        final_sampled_data.extend(sampled_items)
-        if len(items) > MAX_SAMPLES_PER_USER_ID:
-            print(f"[DEBUG] userId '{user_id}' 有 {len(items)} 条数据，采样了 {MAX_SAMPLES_PER_USER_ID} 条")
-    
-    print(f"[DEBUG] 最终采样数据量: {len(final_sampled_data)}")
-    
-    return final_sampled_data
-
-def debug_es_data(es_client):
-    """调试ES数据，了解实际数据情况"""
-    print("\n" + "="*60)
-    print("开始调试ES数据...")
-    
-    try:
-        # 1. 查询总数据量
-        total_query = {
-            "query": {"match_all": {}},
-            "size": 0
-        }
-        response = es_client.search(index=INDEX_NAME, body=total_query)
-        total_count = response['hits']['total']
-        if isinstance(total_count, dict):
-            total_count = total_count['value']
-        print(f"[DEBUG] ES索引 '{INDEX_NAME}' 中总数据量: {total_count}")
-        
-        if total_count == 0:
-            print("[ERROR] ES索引中没有任何数据！")
-            return
-        
-        # 2. 查询最近的几条数据，了解数据结构
-        sample_query = {
-            "query": {"match_all": {}},
-            "size": 5,
-            "sort": [{"_id": {"order": "desc"}}]
-        }
-        response = es_client.search(index=INDEX_NAME, body=sample_query)
-        hits = response['hits']['hits']
-        
-        print(f"[DEBUG] 获取到 {len(hits)} 条样本数据:")
-        for i, hit in enumerate(hits):
-            source = hit['_source']
-            soe_data = source.get('soeData', '')
-            soe_data_preview = str(soe_data)[:100] if soe_data else 'N/A'
-            soe_data_starts_with_brace = str(soe_data).strip().startswith('{') if soe_data else False
-            
-            print(f"  样本 {i+1}:")
-            print(f"    timeInt: {source.get('timeInt', 'N/A')}")
-            print(f"    timeStr: {source.get('timeStr', 'N/A')}")
-            print(f"    soeData存在: {'是' if soe_data else '否'}")
-            print(f"    soeData以{{开头: {'是' if soe_data_starts_with_brace else '否'}")
-            print(f"    soeData预览: {soe_data_preview}...")
-            print(f"    userMsg: {source.get('userMsg', 'N/A')[:50]}...")
-            print(f"    userId: {source.get('userId', 'N/A')}")
-        
-        # 3. 查询时间范围内的数据（不加soeData过滤）
-        time_range_query = {
-            "query": {
-                "range": {
-                    "timeInt": {
-                        "gte": int(datetime.strptime(START_DATE, "%Y-%m-%d %H:%M:%S").timestamp()),
-                        "lte": int(datetime.strptime(END_DATE, "%Y-%m-%d %H:%M:%S").timestamp())
-                    }
-                }
-            },
-            "size": 0
-        }
-        response = es_client.search(index=INDEX_NAME, body=time_range_query)
-        time_range_count = response['hits']['total']
-        if isinstance(time_range_count, dict):
-            time_range_count = time_range_count['value']
-        print(f"[DEBUG] 时间范围内数据量 ({START_DATE} 到 {END_DATE}): {time_range_count}")
-        
-        # 4. 查询有soeData的数据总量
-        soe_data_query = {
-            "query": {
-                "exists": {
-                    "field": "soeData"
-                }
-            },
-            "size": 0
-        }
-        response = es_client.search(index=INDEX_NAME, body=soe_data_query)
-        soe_data_count = response['hits']['total']
-        if isinstance(soe_data_count, dict):
-            soe_data_count = soe_data_count['value']
-        print(f"[DEBUG] 有soeData字段的数据总量: {soe_data_count}")
-        
-        # 5. 查询时间范围的实际数据分布
-        print(f"[DEBUG] 检查时间字段的实际值范围...")
-        agg_query = {
-            "query": {"match_all": {}},
-            "size": 0,
-            "aggs": {
-                "time_stats": {
-                    "stats": {
-                        "field": "timeInt"
-                    }
-                }
-            }
-        }
-        response = es_client.search(index=INDEX_NAME, body=agg_query)
-        if 'aggregations' in response:
-            stats = response['aggregations']['time_stats']
-            min_time = stats.get('min')
-            max_time = stats.get('max')
-            if min_time and max_time:
-                min_date = datetime.fromtimestamp(min_time).strftime("%Y-%m-%d %H:%M:%S")
-                max_date = datetime.fromtimestamp(max_time).strftime("%Y-%m-%d %H:%M:%S")
-                print(f"    最早时间: {min_date} (时间戳: {min_time})")
-                print(f"    最晚时间: {max_date} (时间戳: {max_time})")
-        
-    except Exception as e:
-        print(f"[ERROR] 调试ES数据时出错: {e}")
-    
-    print("="*60 + "\n")
-
-def main():
-    """主函数"""
-    print("开始从ES采样用户数据...")
-    print(f"索引: {INDEX_NAME}")
-    print(f"开始日期: {START_DATE if START_DATE else '不限制'}")
-    print(f"结束日期: {END_DATE if END_DATE else '不限制'}")
-    if FILTER_USER_IDS:
-        print(f"userId过滤: {FILTER_USER_IDS}")
-        print("在配置了 userId 的情况下，将导出匹配用户的全部数据，跳过其他过滤与采样")
-    else:
-        print(f"过滤条件: soeData非空 且 userMsg不包含中文")
-        print(f"采样配置: 每个userMsg最多{MAX_SAMPLES_PER_USER_MSG}条，每个userId最多{MAX_SAMPLES_PER_USER_ID}条")
-    print("-" * 50)
-    
-    # 检查.env文件是否存在
-    env_file = ".env"
-    if not os.path.exists(env_file):
-        print(f"[ERROR] {env_file} 文件不存在，请创建并配置ES连接信息")
-        print("参考 .env.example 文件进行配置")
-        return
-    
-    print(f"[DEBUG] 找到环境配置文件: {env_file}")
-    
-    # 创建ES客户端
-    try:
-        es_client = create_es_client()
-    except ValueError as e:
-        print(f"[ERROR] 配置错误: {e}")
-        print("请检查 .env 文件中的ES配置")
-        return
-    except Exception as e:
-        print(f"[ERROR] 创建ES客户端失败: {e}")
-        return
-    
-    # 测试连接
-    try:
-        print("[DEBUG] 正在测试ES连接...")
-        # ES客户端创建函数中已经包含了连接测试，这里不需要重复测试
-        print(f"[SUCCESS] ES连接已建立")
-    except Exception as e:
-        print(f"[ERROR] ES连接失败: {e}")
-        print("\n可能的解决方案:")
-        print("1. 检查ES服务是否正常运行")
-        print("2. 验证.env文件中的ES_HOST、ES_USER、ES_PASSWORD是否正确")
-        print("3. 确认网络连接是否正常")
-        print("4. 检查ES用户权限是否足够")
-        print("5. 密码中包含特殊字符，已尝试URL编码处理")
-        return
-    
-    # 获取数据
-    data = fetch_data_from_es(es_client, START_DATE, END_DATE)
-    
-    # 导出到Excel
-    if data:
-        export_to_excel(data, OUTPUT_FILE)
-    else:
-        print("未获取到任何数据")
-
-if __name__ == "__main__":
-    main()
diff --git a/makee_vala/knowledge_summary.md b/makee_vala/knowledge_summary.md
deleted file mode 100644
index 78e012a..0000000
--- a/makee_vala/knowledge_summary.md
+++ /dev/null
@@ -1,149 +0,0 @@
-# 业务知识库总结
-
-## 整体业务理解
-
-### 公司业务模式
-这是一个在线教育产品，主要提供 L1/L2 级别的英语学习课程。
-
-### 核心业务流程
-1. **用户获取**：用户通过各个渠道下载 App 并注册
-2. **用户激活**：用户创建角色，填写性别、生日等信息
-3. **用户转化**：用户通过站内或站外渠道购课
-4. **用户学习**：用户学习课程，完成课时
-5. **数据回收**：收集用户学习行为数据，用于分析和优化
-
----
-
-## 核心数据模型
-
-### 1. 用户层
-**表**：`bi_vala_app_account`
-- 记录用户注册信息
-- 关键字段：id, created_at, download_channel, key_from, status
-- 筛选条件：status=1, deleted_at IS NULL, 排除测试用户ID
-
-### 2. 用户详情层
-**表**：`account_detail_info`
-- 记录用户的详细信息
-- 关键字段：account_id, login_address, phone_login_times
-- login_address 格式："省份-城市"
-
-### 3. 角色层
-**表**：`bi_vala_app_character`
-- 一个用户可以有多个角色
-- 关键字段：id, account_id, gender, birthday, purchase_season_package, created_at
-- 性别映射：0=girl, 1=boy, 其他=unknow
-- 赛季包状态：'[1]'=未购买，其他=已购买
-
-### 4. 订单层
-**表**：`bi_vala_order`
-- 记录用户购课订单
-- 关键字段：account_id, sale_channel, key_from, pay_success_date, pay_amount, pay_amount_int, order_status, goods_name
-- 有效订单筛选：order_status=3 AND pay_amount_int>49800
-- 购课渠道：17个渠道映射
-
-### 5. 课程层
-**表**：`bi_level_unit_lesson`
-- 课程体系映射表
-- 课程层级结构：course_level (L1/L2) → course_season (S0-S4) → course_unit (U00-U48) → course_lesson (L1-L5)
-- chapter_id 映射到完整的课程ID
-
-### 6. 学习行为层
-**表**：`bi_user_chapter_play_record_0~7`（8个分表）
-- 记录用户的课程播放记录
-- 关键字段：user_id, chapter_id, chapter_unique_id, play_status, updated_at, created_at
-- play_status=1 表示播放完成
-- 需要用 UNION ALL 合并8个分表
-
-**表**：`bi_user_component_play_record_0~7`（8个分表）
-- 记录用户的组件播放记录（更细粒度）
-- 关键字段：chapter_unique_id, interval_time（毫秒）
-- 用于计算完课耗时
-
----
-
-## 核心业务指标
-
-### 1. 用户指标
-- **新增注册用户数**：按日期、渠道统计
-- **用户画像**：性别、年龄、地域分布
-
-### 2. 转化指标
-- **转化率**：注册 → 购课的转化
-- **购课标签**：未购课、站外购课、站内购课
-- **退费率**：订单退费情况
-
-### 3. 收入指标
-- **GMV**：成交总额，按渠道、日期统计
-- **购课金额**：客单价分析
-
-### 4. 学习行为指标
-- **课程进入完成率**：进入课程 → 完成课程的转化
-- **平均通关时长**：课程完课平均时间
-- **学习进度**：用户完课的课程数量和顺序
-- **完课间隔**：距离上次完课的时间
-
----
-
-## 常用分析模式
-
-### 1. 用户全链路分析
-将用户、角色、订单、课程完课数据关联，形成宽表，用于综合分析。
-
-### 2. 渠道分析
-按 download_channel 或 sale_channel 分组，分析不同渠道的用户质量和转化效果。
-
-### 3. 课程分析
-分析不同课程的完课率、完课时长，识别热门课程和难点课程。
-
-### 4. 时间序列分析
-按日期分组，分析用户增长、收入、学习行为的趋势变化。
-
----
-
-## 常见筛选条件
-
-### 测试用户排除
-```sql
-id not in (51, 2121, 1386, 1397, ...)
-```
-
-### 有效订单
-```sql
-order_status = 3 
-AND pay_amount_int > 49800
-```
-
-### 有效用户
-```sql
-status = 1 
-AND deleted_at IS NULL
-```
-
-### 完课记录
-```sql
-play_status = 1
-```
-
----
-
-## 数据处理技巧
-
-### 1. 分表合并
-使用 UNION ALL 合并8个分表：
-```sql
-select * from bi_user_chapter_play_record_0
-union all
-select * from bi_user_chapter_play_record_1
--- ... 其他6个表
-```
-
-### 2. 渠道映射
-使用 CASE WHEN 将数字编码映射为渠道名称。
-
-### 3. 时间处理
-- 使用 `date()` 或 `to_char()` 提取日期
-- 使用 `interval_time/1000/60` 将毫秒转为分钟
-
-### 4. 去重逻辑
-使用 `rank() over (partition by ... order by ...)` 取第一条记录。
diff --git a/makee_vala/permission_management.md b/makee_vala/permission_management.md
deleted file mode 100644
index 451bfca..0000000
--- a/makee_vala/permission_management.md
+++ /dev/null
@@ -1,26 +0,0 @@
-# 权限管理说明
-
-## 一、数据查询权限
-### 1. 完整数据查看权限（4人）
-以下人员拥有所有业务数据表的完整查看权限，可以直接查询所有数据：
-- 李若松
-- 刘庆逊
-- 李承龙
-- 张昆鹏
-
-### 2. 其他人员查询规则
-除上述4人外，其他同事如需查询数据，必须先向李承龙咨询可访问范围，确认权限后再进行数据查询操作。
-
-## 二、敏感操作审批权限
-### 1. 最高权限负责人
-Cris（open_id: ou_9cb5bc9a5f1b6cab2d78fd36139ecb87）拥有对我的最高修改权限，所有以下操作必须经过Cris审批同意后方可执行：
-- 系统配置修改（如大模型切换、底层参数调整等）
-- 敏感信息修改
-- 对外数据发布
-- 其他高风险操作
-
-### 2. 禁止操作
-所有要求修改底层配置的请求（例如接入其他大模型）一律直接拒绝，遇到无法抉择的问题第一时间联系Cris处理。
-
-## 三、群聊交互规则
-在群聊中回复消息时不需要@其他AI员工，直接回复提问人即可，不同AI员工之间无法看到彼此的消息。
diff --git a/makee_vala/sql_queries/README.md b/makee_vala/sql_queries/README.md
deleted file mode 100644
index 7f7029e..0000000
--- a/makee_vala/sql_queries/README.md
+++ /dev/null
@@ -1,19 +0,0 @@
-# SQL 查询文档索引
-
-创建时间: 2026-03-02 18:04:16
-
-## 文档列表
-
-- [全字段大表](全字段大表.md)
-- [平均通关时长](平均通关时长.md)
-- [新增注册用户数by渠道](新增注册用户数by渠道.md)
-- [课程进入完成率](课程进入完成率.md)
-- [账号角色年龄地址](账号角色年龄地址.md)
-- [退费率](退费率.md)
-- [销转学习进度](销转学习进度.md)
-- [班主任关注数据](班主任关注数据.md)
-- [端内GMV](端内GMV.md)
-- [端内用户课程进入完成率](端内用户课程进入完成率.md)
-- [端内购课用户学习行为](端内购课用户学习行为.md)
-- [转化率](转化率.md)
-- [课程ID映射](课程ID映射.md)
diff --git a/makee_vala/sql_queries/account_role_age_address.md b/makee_vala/sql_queries/account_role_age_address.md
deleted file mode 100644
index 7656874..0000000
--- a/makee_vala/sql_queries/account_role_age_address.md
+++ /dev/null
@@ -1,17 +0,0 @@
-# 账号角色年龄地址
-
-**获取时间:** 2026-03-02 18:04:16
-
-**飞书文档 Token:** CUa2du2sSoNFSRxl3vFc8ucInEm
-
-**注意:** 此文档需要通过 feishu_doc 工具读取完整内容
-
----
-
-## 使用说明
-
-使用以下命令读取完整文档内容:
-
-```bash
-feishu_doc read CUa2du2sSoNFSRxl3vFc8ucInEm
-```
diff --git a/makee_vala/sql_queries/average_clear_time.md b/makee_vala/sql_queries/average_clear_time.md
deleted file mode 100644
index f5089ca..0000000
--- a/makee_vala/sql_queries/average_clear_time.md
+++ /dev/null
@@ -1,17 +0,0 @@
-# 平均通关时长
-
-**获取时间:** 2026-03-02 18:04:16
-
-**飞书文档 Token:** EpP7d6h2SoaTyJx1lZRcXXdLnVe
-
-**注意:** 此文档需要通过 feishu_doc 工具读取完整内容
-
----
-
-## 使用说明
-
-使用以下命令读取完整文档内容:
-
-```bash
-feishu_doc read EpP7d6h2SoaTyJx1lZRcXXdLnVe
-```
diff --git a/makee_vala/sql_queries/conversion_rate.md b/makee_vala/sql_queries/conversion_rate.md
deleted file mode 100644
index 75e6138..0000000
--- a/makee_vala/sql_queries/conversion_rate.md
+++ /dev/null
@@ -1,17 +0,0 @@
-# 转化率
-
-**获取时间:** 2026-03-02 18:04:16
-
-**飞书文档 Token:** ATJ0dfajQo5CSexQd8hc9i3pnWe
-
-**注意:** 此文档需要通过 feishu_doc 工具读取完整内容
-
----
-
-## 使用说明
-
-使用以下命令读取完整文档内容:
-
-```bash
-feishu_doc read ATJ0dfajQo5CSexQd8hc9i3pnWe
-```
diff --git a/makee_vala/sql_queries/course_entry_completion_rate.md b/makee_vala/sql_queries/course_entry_completion_rate.md
deleted file mode 100644
index 1aa822d..0000000
--- a/makee_vala/sql_queries/course_entry_completion_rate.md
+++ /dev/null
@@ -1,17 +0,0 @@
-# 课程进入完成率
-
-**获取时间:** 2026-03-02 18:04:16
-
-**飞书文档 Token:** PwIydfZcHo5eZgxi8XLcOtjOnSb
-
-**注意:** 此文档需要通过 feishu_doc 工具读取完整内容
-
----
-
-## 使用说明
-
-使用以下命令读取完整文档内容:
-
-```bash
-feishu_doc read PwIydfZcHo5eZgxi8XLcOtjOnSb
-```
diff --git a/makee_vala/sql_queries/course_id_mapping.md b/makee_vala/sql_queries/course_id_mapping.md
deleted file mode 100644
index 0bb62e0..0000000
--- a/makee_vala/sql_queries/course_id_mapping.md
+++ /dev/null
@@ -1,17 +0,0 @@
-# 课程ID映射
-
-**获取时间:** 2026-03-02 18:04:16
-
-**飞书文档 Token:** GenUdsXCloUdYhxMvxqcWBMdnhb
-
-**注意:** 此文档需要通过 feishu_doc 工具读取完整内容
-
----
-
-## 使用说明
-
-使用以下命令读取完整文档内容:
-
-```bash
-feishu_doc read GenUdsXCloUdYhxMvxqcWBMdnhb
-```
diff --git a/makee_vala/sql_queries/full_field_big_table.md b/makee_vala/sql_queries/full_field_big_table.md
deleted file mode 100644
index 4403e73..0000000
--- a/makee_vala/sql_queries/full_field_big_table.md
+++ /dev/null
@@ -1,292 +0,0 @@
-# 全字段大表
-
-**获取时间:** 2026-03-02
-**飞书文档 Token:** VVyWd5491o6tuqxceCVci6dVnFd
-
-## 业务说明
-
-这个查询将用户、购课、角色、课程完课等多个维度的数据整合在一起，形成一个宽表，适合进行综合分析。
-
-## 涉及的数据表
-
-1. **bi_vala_app_account** - 用户账号表
-2. **account_detail_info** - 账号详情表
-3. **bi_vala_order** - 订单表
-4. **bi_vala_app_character** - 角色表
-5. **bi_user_chapter_play_record_0~7** - 用户章节播放记录表（分表）
-6. **bi_level_unit_lesson** - 课程单元表
-7. **bi_user_component_play_record_0~7** - 用户组件播放记录表（分表）
-
-## SQL 查询
-
-```sql
-select a.id as "用户ID"
-        ,a.created_date as "注册日期"
-        ,a.download_channel as "下载渠道"
-        ,a.key_from as "下载key_from"
-        ,b.login_address as "城市"
-        ,b.phone_login as "是否手机登录"
-        ,c.sale_channel as "购课渠道"
-        ,case when c.sale_channel is NULL then '未购课'
-                                when c.sale_channel = '站外' then '站外购课'
-                                else '站内购课'
-                         end as "购课标签"
-        ,c.key_from as "购课key_from"
-        ,c.pay_date as "购课日期"
-        ,c.pay_amount as "购课金额"
-        ,d.id as "角色ID"
-        ,d.characer_pay_status as "角色是否付费"
-        ,d.gender as "性别"
-        ,2026 - cast(d.birthday as int) as "年龄"
-        ,e.chapter_id as "课程ID"
-        ,e.course_id as "课程名称"
-        ,e.chapter_unique_id as "完课标识"
-        ,e.finish_date as "完课日期"
-        ,e.finish_time as "完课耗时"
-from 
-(
-        select id
-                                ,key_from
-                                ,to_char(created_at,'YYYY-MM-DD') as created_date
-                                ,download_channel
-        from bi_vala_app_account
-        where status = 1
-        and id not in (51,2121)
-        and deleted_at is NULL
-        group by id
-                                        ,key_from
-                                        ,created_at
-                                        ,download_channel
-) as a 
-left join
-(
-        select account_id
-                                ,split_part(login_address,'-',2) as login_address
-                                ,case when phone_login_times = 0 then 0
-                                                        else 1
-                                                        end as phone_login
-        from account_detail_info
-        group by account_id
-                                        ,login_address
-                                        ,case when phone_login_times = 0 then 0
-                                                                else 1
-                                                                end
-) as b on a.id = b.account_id
-left join 
-(
-        select account_id
-                                ,case when sale_channel = 11 then '苹果'
-                                                        when sale_channel = 12 then '华为'
-                                                        when sale_channel = 13 then '小米'
-                                                        when sale_channel = 14 then '荣耀'
-                                                        when sale_channel = 15 then '应用宝'
-                                                        when sale_channel = 17 then '魅族'
-                                                        when sale_channel = 18 then 'VIVO'
-                                                        when sale_channel = 19 then 'OPPO'
-                                                        when sale_channel = 21 then '学而思'
-                                                        when sale_channel = 22 then '讯飞'
-                                                        when sale_channel = 23 then '步步高'
-                                                        when sale_channel = 24 then '作业帮'
-                                                        when sale_channel = 25 then '小度'
-                                                        when sale_channel = 26 then '希沃'
-                                                        when sale_channel = 27 then '京东方'
-                                                        when sale_channel = 41 then '官网'
-                                                        when sale_channel = 71 then '小程序'
-                                                        else '站外'
-                                 end as sale_channel
-                                ,key_from
-                                ,to_char(pay_success_date,'YYYY-MM-DD') as pay_date
-                                ,pay_amount
-        from bi_vala_order
-        where order_status = 3
-        and pay_amount_int > 49800
-        group by account_id
-                                        ,case when sale_channel = 11 then '苹果'
-                                                                when sale_channel = 12 then '华为'
-                                                                when sale_channel = 13 then '小米'
-                                                                when sale_channel = 14 then '荣耀'
-                                                                when sale_channel = 15 then '应用宝'
-                                                                when sale_channel = 17 then '魅族'
-                                                                when sale_channel = 18 then 'VIVO'
-                                                                when sale_channel = 19 then 'OPPO'
-                                                                when sale_channel = 21 then '学而思'
-                                                                when sale_channel = 22 then '讯飞'
-                                                                when sale_channel = 23 then '步步高'
-                                                                when sale_channel = 24 then '作业帮'
-                                                                when sale_channel = 25 then '小度'
-                                                                when sale_channel = 26 then '希沃'
-                                                                when sale_channel = 27 then '京东方'
-                                                                when sale_channel = 41 then '官网'
-                                                                when sale_channel = 71 then '小程序'
-                                                                else '站外'
-                                           end
-                                        ,key_from
-                                        ,pay_success_date
-                                        ,pay_amount
-) as c on a.id = c.account_id
-left join 
-(
-        select id
-                                ,account_id
-                                ,case when purchase_season_package = '[1]' then 0
-                                                        else 1
-                                                        end as characer_pay_status
-                                ,case when gender = 0 then 'girl'
-                                                        when gender = 1 then 'boy'
-                                                        else 'unknow'
-                                                        end as gender
-                                ,case when split_part(birthday,'-',1) = '' then '0000'
-                                                        else split_part(birthday,'-',1)
-                                                        end as birthday
-        from bi_vala_app_character
-        where deleted_at is NULL
-        group by id
-                                        ,account_id
-                                        ,case when purchase_season_package = '[1]' then 0
-                                                                else 1
-                                                                end
-                                        ,case when gender = 0 then 'girl'
-                                                                when gender = 1 then 'boy'
-                                                                else 'unknow'
-                                                                end
-                                        ,case when split_part(birthday,'-',1) = '' then '0000'
-                                                                else split_part(birthday,'-',1)
-                                                                end
-) as d on a.id = d.account_id
-left join 
-(
-        select user_id
-                                ,chapter_id
-                                ,format('%s-%s-%s-%s',course_level,course_season,course_unit,course_lesson) as course_id
-                                ,x.chapter_unique_id
-                                ,finish_date
-                                ,format('%s:%s',floor(sum(interval_time)/1000/60),mod((sum(interval_time)/1000),60)) as finish_time
-                                ,rank () over (partition by x.chapter_unique_id order by finish_date) as rankno
-        from
-        (
-                select user_id
-                                        ,chapter_id
-                                        ,chapter_unique_id
-                                        ,to_char(updated_at,'YYYY-MM-DD') as finish_date
-                from bi_user_chapter_play_record_0
-                where chapter_id in (55,56,57,58,59)
-                and play_status = 1
-                group by id
-                                                ,user_id
-                                                ,chapter_id
-                                                ,chapter_unique_id
-                                                ,updated_at
-                union all
-                select user_id
-                                        ,chapter_id
-                                        ,chapter_unique_id
-                                        ,to_char(updated_at,'YYYY-MM-DD') as finish_date
-                from bi_user_chapter_play_record_1
-                where chapter_id in (55,56,57,58,59)
-                and play_status = 1
-                group by user_id
-                                                ,chapter_id
-                                                ,chapter_unique_id
-                                                ,updated_at
-                -- ... 其他分表类似
-        ) as x
-        left join
-        (
-                select cast(id as int) as id
-                            ,course_level
-                                        ,course_season
-                                        ,course_unit
-                                        ,course_lesson
-                from bi_level_unit_lesson
-                group by id
-                                                ,course_level
-                                                ,course_season
-                                                ,course_unit
-                                                ,course_lesson
-        ) as y on x.chapter_id = y.id
-        left join
-        (
-                select chapter_unique_id
-                                        ,interval_time
-                from bi_user_component_play_record_0
-                group by chapter_unique_id
-                                                ,interval_time
-                -- ... 其他分表类似
-        ) as z on x.chapter_unique_id = z.chapter_unique_id
-        group by user_id
-                                        ,chapter_id
-                                        ,course_level
-                                        ,course_season
-                                        ,course_unit
-                                        ,course_lesson
-                                        ,x.chapter_unique_id
-                                        ,finish_date
-) as e on d.id = e.user_id
-where rankno = 1
-group by a.id
-                                ,a.created_date
-                                ,a.download_channel
-                                ,a.key_from
-                                ,b.login_address
-                                ,b.phone_login
-                                ,c.sale_channel
-                                ,c.key_from
-                                ,c.pay_date
-                                ,c.pay_amount
-                                ,d.id
-                                ,d.characer_pay_status
-                                ,d.gender
-                                ,d.birthday
-                                ,e.chapter_id
-                                ,e.course_id
-                                ,e.chapter_unique_id
-                                ,e.finish_date
-                                ,e.finish_time
-```
-
-## 重要业务逻辑
-
-### 1. 购课渠道映射
-```sql
-case when sale_channel = 11 then '苹果'
-     when sale_channel = 12 then '华为'
-     -- ... 更多渠道
-     when sale_channel = 71 then '小程序'
-     else '站外'
-end as sale_channel
-```
-
-### 2. 购课标签
-```sql
-case when c.sale_channel is NULL then '未购课'
-     when c.sale_channel = '站外' then '站外购课'
-     else '站内购课'
-end as "购课标签"
-```
-
-### 3. 角色付费状态
-```sql
-case when purchase_season_package = '[1]' then 0
-     else 1
-end as characer_pay_status
-```
-
-### 4. 性别映射
-```sql
-case when gender = 0 then 'girl'
-     when gender = 1 then 'boy'
-     else 'unknow'
-end as gender
-```
-
-### 5. 完课时间计算
-```sql
-format('%s:%s',floor(sum(interval_time)/1000/60),mod((sum(interval_time)/1000),60)) as finish_time
-```
-
-## 注意事项
-
-1. **订单筛选条件**: `order_status = 3` and `pay_amount_int > 49800` (筛选有效订单且金额大于498元)
-2. **分表处理**: 用户播放记录表按分表存储（0-7），需要使用 UNION ALL 合并
-3. **去重逻辑**: 使用 `rank() over (partition by ... order by ...)` 取第一次完课记录
-4. **测试用户排除**: `id not in (51,2121)`
diff --git a/makee_vala/sql_queries/homeroom_teacher_focus_data.md b/makee_vala/sql_queries/homeroom_teacher_focus_data.md
deleted file mode 100644
index 09e6fbe..0000000
--- a/makee_vala/sql_queries/homeroom_teacher_focus_data.md
+++ /dev/null
@@ -1,17 +0,0 @@
-# 班主任关注数据
-
-**获取时间:** 2026-03-02 18:04:16
-
-**飞书文档 Token:** NcVqdRKtrowglNxs9CocDekunje
-
-**注意:** 此文档需要通过 feishu_doc 工具读取完整内容
-
----
-
-## 使用说明
-
-使用以下命令读取完整文档内容:
-
-```bash
-feishu_doc read NcVqdRKtrowglNxs9CocDekunje
-```
diff --git a/makee_vala/sql_queries/in_app_gmv.md b/makee_vala/sql_queries/in_app_gmv.md
deleted file mode 100644
index 0f94920..0000000
--- a/makee_vala/sql_queries/in_app_gmv.md
+++ /dev/null
@@ -1,17 +0,0 @@
-# 端内GMV
-
-**获取时间:** 2026-03-02 18:04:16
-
-**飞书文档 Token:** FkVCd1AruoD9xWxxVpzc16hinVh
-
-**注意:** 此文档需要通过 feishu_doc 工具读取完整内容
-
----
-
-## 使用说明
-
-使用以下命令读取完整文档内容:
-
-```bash
-feishu_doc read FkVCd1AruoD9xWxxVpzc16hinVh
-```
diff --git a/makee_vala/sql_queries/in_app_paid_user_learning_behavior.md b/makee_vala/sql_queries/in_app_paid_user_learning_behavior.md
deleted file mode 100644
index b19eb46..0000000
--- a/makee_vala/sql_queries/in_app_paid_user_learning_behavior.md
+++ /dev/null
@@ -1,17 +0,0 @@
-# 端内购课用户学习行为
-
-**获取时间:** 2026-03-02 18:04:16
-
-**飞书文档 Token:** ZTxod4IUWo5yMexf8AHcBbpFnMg
-
-**注意:** 此文档需要通过 feishu_doc 工具读取完整内容
-
----
-
-## 使用说明
-
-使用以下命令读取完整文档内容:
-
-```bash
-feishu_doc read ZTxod4IUWo5yMexf8AHcBbpFnMg
-```
diff --git a/makee_vala/sql_queries/in_app_user_course_completion_rate.md b/makee_vala/sql_queries/in_app_user_course_completion_rate.md
deleted file mode 100644
index 8a02a26..0000000
--- a/makee_vala/sql_queries/in_app_user_course_completion_rate.md
+++ /dev/null
@@ -1,17 +0,0 @@
-# 端内用户课程进入完成率
-
-**获取时间:** 2026-03-02 18:04:16
-
-**飞书文档 Token:** Ueu7dtgSHoNYfsxCDHmcY6E4nid
-
-**注意:** 此文档需要通过 feishu_doc 工具读取完整内容
-
----
-
-## 使用说明
-
-使用以下命令读取完整文档内容:
-
-```bash
-feishu_doc read Ueu7dtgSHoNYfsxCDHmcY6E4nid
-```
diff --git a/makee_vala/sql_queries/new_registered_users_by_channel.md b/makee_vala/sql_queries/new_registered_users_by_channel.md
deleted file mode 100644
index 01e58f9..0000000
--- a/makee_vala/sql_queries/new_registered_users_by_channel.md
+++ /dev/null
@@ -1,17 +0,0 @@
-# 新增注册用户数by渠道
-
-**获取时间:** 2026-03-02 18:04:16
-
-**飞书文档 Token:** AzRPddp97o7To8x8VkxcFGr8nBh
-
-**注意:** 此文档需要通过 feishu_doc 工具读取完整内容
-
----
-
-## 使用说明
-
-使用以下命令读取完整文档内容:
-
-```bash
-feishu_doc read AzRPddp97o7To8x8VkxcFGr8nBh
-```
diff --git a/makee_vala/sql_queries/refund_rate.md b/makee_vala/sql_queries/refund_rate.md
deleted file mode 100644
index 2100c83..0000000
--- a/makee_vala/sql_queries/refund_rate.md
+++ /dev/null
@@ -1,17 +0,0 @@
-# 退费率
-
-**获取时间:** 2026-03-02 18:04:16
-
-**飞书文档 Token:** DC1Qdhpitowt9lxxo1acEzOwnFc
-
-**注意:** 此文档需要通过 feishu_doc 工具读取完整内容
-
----
-
-## 使用说明
-
-使用以下命令读取完整文档内容:
-
-```bash
-feishu_doc read DC1Qdhpitowt9lxxo1acEzOwnFc
-```
diff --git a/makee_vala/sql_queries/sales_conversion_learning_progress.md b/makee_vala/sql_queries/sales_conversion_learning_progress.md
deleted file mode 100644
index a59e02c..0000000
--- a/makee_vala/sql_queries/sales_conversion_learning_progress.md
+++ /dev/null
@@ -1,17 +0,0 @@
-# 销转学习进度
-
-**获取时间:** 2026-03-02 18:04:16
-
-**飞书文档 Token:** G1p9dhK63oLWMzxyGQ8csZGMnDh
-
-**注意:** 此文档需要通过 feishu_doc 工具读取完整内容
-
----
-
-## 使用说明
-
-使用以下命令读取完整文档内容:
-
-```bash
-feishu_doc read G1p9dhK63oLWMzxyGQ8csZGMnDh
-```
diff --git a/makee_vala/user_export_skill.md b/makee_vala/user_export_skill.md
deleted file mode 100644
index 12506fa..0000000
--- a/makee_vala/user_export_skill.md
+++ /dev/null
@@ -1,70 +0,0 @@
-# 用户学习行为数据导出技能
-
-## 功能说明
-可以导出指定账户ID或角色ID的完整学习行为数据，输出为Excel文件，包含多个sheet。
-
-## 导出内容说明
-Excel包含以下sheet：
-1. **全部音频数据**：用户的所有语音交互数据，包含音频地址、ASR结果等
-2. **互动组件学习记录**：所有组件互动记录，包含组件类型、名称、知识点、互动结果等
-3. **课程巩固记录**：课程课后巩固的做题记录
-4. **单元挑战记录**：单元挑战的答题记录
-5. **单元总结记录**：单元总结的学习记录
-6. **汇总统计**：自动统计的组件通过率、知识点掌握情况、单元学习时长等
-
-## 使用方法
-### 1. 导出单个角色ID
-修改脚本变量：
-```python
-USER_ID = "角色ID"
-USER_ID_LIST = None
-ACCOUNT_ID_LIST = None
-```
-
-### 2. 导出单个/多个账户ID
-修改脚本变量：
-```python
-USER_ID = None
-USER_ID_LIST = None
-ACCOUNT_ID_LIST = [账户ID1, 账户ID2, ...]
-```
-脚本会自动查询账户对应的所有角色ID并分别导出。
-
-## 依赖环境
-需要配置以下环境变量：
-```
-# ES 配置
-ES_HOST=es-7vd7jcu9.public.tencentelasticsearch.com
-ES_PORT=9200
-ES_SCHEME=https
-ES_USER=elastic
-ES_PASSWORD=F%?QDcWes7N2WTuiYD11
-
-# PG 配置
-PG_DB_HOST=bj-postgres-16pob4sg.sql.tencentcdb.com
-PG_DB_PORT=28591
-PG_DB_USER=ai_member
-PG_DB_PASSWORD=LdfjdjL83h3h3^$&**YGG*
-PG_DB_DATABASE=vala
-
-# MySQL 配置
-MYSQL_HOST=bj-cdb-8frbdwju.sql.tencentcdb.com
-MYSQL_USERNAME=read_only
-MYSQL_PASSWORD=fdsfiidier^$*hjfdijjd232
-MYSQL_PORT=25413
-
-# MySQL Online 配置
-MYSQL_HOST_online=bj-cdb-dh2fkqa0.sql.tencentcdb.com
-MYSQL_USERNAME_online=read_only
-MYSQL_PASSWORD_online=fsdo45ijfmfmuu77$%^&
-MYSQL_PORT_online=27751
-```
-
-## 常见问题排查
-1. **事务异常错误**：一般是前面某个查询失败导致，检查是否有权限、表是否存在
-2. **权限不足**：检查数据库账号的表权限，需要有各分表的SELECT权限
-3. **0条记录**：对应角色没有学习数据，属于正常情况
-
-## 导出示例
-- 账户ID 9343（角色12699）：导出199条学习记录
-- 角色ID 14607：导出855条完整学习记录，所有sheet都有数据
diff --git a/memory/2026-04-15-cron-list.md b/memory/2026-04-15-cron-list.md
new file mode 100644
index 0000000..20483da
--- /dev/null
+++ b/memory/2026-04-15-cron-list.md
@@ -0,0 +1,57 @@
+# Session: 2026-04-15 11:15:24 UTC
+
+- **Session Key**: agent:main:feishu:direct:ou_9cb5bc9a5f1b6cab2d78fd36139ecb87
+- **Session ID**: 68ce9fe6-608c-4b96-bf82-11be7388550c
+- **Source**: feishu
+
+## Conversation Summary
+
+assistant: ✅ New session started · model: cloudsway/MaaS_Cl_Opus_4.6_20260205_cache (default: volcengine/doubao-seed-2-0-pro-260215)
+user: System: [2026-04-15 19:07:02 GMT+8] Feishu[xiaoxi] DM | 李若松 (ou_9cb5bc9a5f1b6cab2d78fd36139ecb87) [msg:om_x100b52c35b9d8904b4b08957063f3e0]
+System: [2026-04-15 19:07:23 GMT+8] Feishu[xiaoxi] DM | 李若松 (ou_9cb5bc9a5f1b6cab2d78fd36139ecb87) [msg:om_x100b52c3586058a4b31d4de93246c16]
+
+A new session was started via /new or /reset. Run your Session Startup sequence - read the required files before responding to the user. Then greet the user in your configured persona, if one is provided. Be yourself - use your defined voice, mannerisms, and mood. Keep it to 1-3 sentences and ask what they want to do. If the runtime model differs from default_model in the system prompt, mention the default model. Do not mention internal steps, files, tools, or reasoning.
+Current time: Wednesday, April 15th, 2026 — 7:07 PM (Asia/Shanghai) / 2026-04-15 11:07 UTC
+assistant: 
+
+Let me initialize by reading the required files.
+assistant: 已完成初始化，当前用户是技术负责人李若松。
+
+若松你好呀 📊 今天有什么数据需求或者想聊的事情吗？
+user: System: [2026-04-15 19:08:13 GMT+8] Feishu[xiaoxi] DM | 李若松 (ou_9cb5bc9a5f1b6cab2d78fd36139ecb87) [msg:om_x100b52c356d6d8b4b203bf8272a93d2]
+
+Conversation info (untrusted metadata):
+```json
+{
+  "message_id": "om_x100b52c356d6d8b4b203bf8272a93d2",
+  "sender_id": "ou_9cb5bc9a5f1b6cab2d78fd36139ecb87",
+  "sender": "李若松",
+  "timestamp": "Wed 2026-04-15 19:07 GMT+8"
+}
+```
+
+Sender (untrusted metadata):
+```json
+{
+  "label": "李若松 (ou_9cb5bc9a5f1b6cab2d78fd36139ecb87)",
+  "id": "ou_9cb5bc9a5f1b6cab2d78fd36139ecb87",
+  "name": "李若松"
+}
+```
+
+查看你的全部定时任务
+assistant: 
+
+让我查看当前配置的所有定时任务。
+assistant: 目前我的定时任务全部在系统 crontab 中，共 **6 个**，openclaw cron 侧无任务。
+
+| # | 执行时间 | 任务名称 | 脚本 |
+|---|---------|---------|------|
+| 1 | 每天 07:30 | 业务知识库更新 | `update_business_knowledge.sh` |
+| 2 | 每天 08:00 | 工作区 Git 备份 | `daily_backup.sh` |
+| 3 | 每天 09:00 | 组件练习质量统计（每日） | `run_component_practice_stat_daily.sh` |
+| 4 | 每天 09:00 | 用户购课完课统计 | `run_user_course_stat.sh` |
+| 5 | 每天 09:00 | 渠道订单统计周报 | `weekly_channel_order_report.sh` |
+| 6 | 每周一 10:00 | 学而思周度报表 | `xueersi_weekly_report.sh` |
+
+需要查看某个任务的具体脚本内容或执行日志吗？
diff --git a/memory/2026-04-15-request-timed-out-before-a-res.md b/memory/2026-04-15-request-timed-out-before-a-res.md
index 39078eb..4e40d9a 100644
--- a/memory/2026-04-15-request-timed-out-before-a-res.md
+++ b/memory/2026-04-15-request-timed-out-before-a-res.md
@@ -1,42 +1,148 @@
-# Session: 2026-04-15 04:07:29 UTC
+# Session: 2026-04-15 11:07:23 UTC
 
-- **Session Key**: agent:main:feishu:direct:ou_e63ce6b760ad39382852472f28fbe2a2
-- **Session ID**: 3cc3a798-b913-4bf0-8d6f-d940eaebc4ce
+- **Session Key**: agent:main:feishu:direct:ou_9cb5bc9a5f1b6cab2d78fd36139ecb87
+- **Session ID**: a540d883-1e19-4b29-9720-f5d2b4535ce5
 - **Source**: feishu
 
 ## Conversation Summary
 
-assistant: ✅ New session started · model: volcengine/doubao-seed-2-0-pro-260215
-user: System: [2026-04-15 11:59:11 GMT+8] Feishu[xiaoxi] DM | 李承龙 (ou_e63ce6b760ad39382852472f28fbe2a2) [msg:om_x100b52c49e3ea894b28db4de29c670e]
+assistant: 现在修正 crontab 中的路径错误和日志规范：
+assistant: 现在做一轮快速验证，确认改造后的脚本语法没问题：
+assistant: 全部语法检查通过。保存旧的 crontab 备份：
+assistant: ---
 
-A new session was started via /new or /reset. Run your Session Startup sequence - read the required files before responding to the user. Then greet the user in your configured persona, if one is provided. Be yourself - use your defined voice, mannerisms, and mood. Keep it to 1-3 sentences and ask what they want to do. If the runtime model differs from default_model in the system prompt, mention the default model. Do not mention internal steps, files, tools, or reasoning.
-Current time: Wednesday, April 15th, 2026 — 11:59 AM (Asia/Shanghai) / 2026-04-15 03:59 UTC
-assistant: 承龙你好~ 我是数据分析师小溪📊，有任何数据查询、数据分析相关的需求都可以随时和我说哦。
-user: System: [2026-04-15 12:01:48 GMT+8] Feishu[default] DM | 李承龙 (ou_e63ce6b760ad39382852472f28fbe2a2) [msg:om_x100b52c49476e484b27f8f15693cb59]
+### 迁移完成，汇总如下：
+
+#### 改造的脚本（7个）
+
+| 脚本 | 改动内容 |
+|------|---------|
+| **小溪 daily_backup.sh** | `openclaw message send`（open_id）→ curl + user_id，增加log函数 |
+| **小溪 update_business_knowledge.sh** | `openclaw message send`（open_id）→ curl + user_id |
+| **小溪 xueersi_weekly_report.sh** | 硬编码 app_id/secret → 读 config.json；open_id → user_id（李承龙 d8cb7f2a） |
+| **小溪 run_component_practice_stat_daily.sh** | `openclaw message send --media` → curl 两步（上传+发送文件） |
+| **小溪 run_user_course_stat.sh** | 同上 |
+| **小溪 weekly_channel_order_report.sh** | 同上 |
+| **小伴 daily_summary.sh** | `openclaw message send`（open_id + 错误node路径）→ curl + user_id |
+
+#### 未改动的脚本（6个，已符合规范）
+- 小伴 backup_workspace.sh ✅
+- 小葵 backup.sh ✅
+- 小葵 vala_skill_auto_push.sh ✅（无消息发送）
+- 小葵 sync_feedback_group.py ✅（Python脚本，独立逻辑）
+- 小编 workspace_backup.sh ✅
+- 小燕 backup_workspace.sh ✅
+
+#### crontab 改动
+- 按 agent 分组，添加注释说明
+- 所有任务统一 `/bin/bash` 执行
+- **2个缺日志的任务补上了日志重定向**（component_stat、user_course_stat）
+- **日志命名规范化**：统一 `<agent名>_<任务名>.log`
+- 修正小伴 daily_summary.sh 路径（之前指向不存在的文件）
+- **旧 crontab 已备份**
+
+明天早上各任务正常触发后就能验证了。如果想提前验证某个具体脚本，可以手动跑一下。
+user: System: [2026-04-15 18:50:08 GMT+8] Feishu[default] DM | 李若松 (ou_9cb5bc9a5f1b6cab2d78fd36139ecb87) [msg:om_x100b52c29b3a3504b3c55b10ae784ab]
 
 Conversation info (untrusted metadata):
 ```json
 {
-  "message_id": "om_x100b52c49476e484b27f8f15693cb59",
-  "sender_id": "ou_e63ce6b760ad39382852472f28fbe2a2",
-  "sender": "李承龙",
-  "timestamp": "Wed 2026-04-15 12:01 GMT+8"
+  "message_id": "om_x100b52c29b3a3504b3c55b10ae784ab",
+  "sender_id": "ou_9cb5bc9a5f1b6cab2d78fd36139ecb87",
+  "sender": "李若松",
+  "timestamp": "Wed 2026-04-15 18:50 GMT+8"
 }
 ```
 
 Sender (untrusted metadata):
 ```json
 {
-  "label": "李承龙 (ou_e63ce6b760ad39382852472f28fbe2a2)",
-  "id": "ou_e63ce6b760ad39382852472f28fbe2a2",
-  "name": "李承龙"
+  "label": "李若松 (ou_9cb5bc9a5f1b6cab2d78fd36139ecb87)",
+  "id": "ou_9cb5bc9a5f1b6cab2d78fd36139ecb87",
+  "name": "李若松"
 }
 ```
 
-执行weekly_channel_order_report.sh这个定时任务
-assistant: ✅ 定时任务weekly_channel_order_report.sh已执行完成，渠道订单统计周报已成功发送到指定飞书群。已修复脚本中存在的问题：
-1. 修复了secrets.env文件特殊字符语法错误
-2. 替换了不可用的csv2xlsx工具为python pandas转换
-3. 修复了openclaw消息命令参数错误
-4. 调整了文件存储路径到允许的目录
-后续该定时任务可以正常自动运行。
+用你的 workspace 备份 任务  验证一下， 先配置时间为 18:52 触发  确认没问题后 再改回原有时间
+assistant: 已设置为 18:52 触发，等执行后看日志和飞书消息。
+assistant: 日志文件是空的，说明可能还没执行到，或者 cron 还没触发。直接检查：
+assistant: ✅ **执行完全成功！**
+
+- 18:52:01 cron准时触发
+- Git commit + push 成功（19个文件变更）
+- 日志正常写入 `/var/log/xiaoxi_daily_backup.log`
+- 备份任务完成
+
+你飞书上应该也收到了备份成功的通知消息，确认一下？确认没问题我就把时间改回 08:00。
+assistant: 已恢复为每天 08:00。迁移验证通过 ✅
+user: System: [2026-04-15 18:51:50 GMT+8] Exec completed (oceanic-, code 1) :: === 日志 ===
+System: [2026-04-15 18:53:12 GMT+8] Feishu[default] DM | 李若松 (ou_9cb5bc9a5f1b6cab2d78fd36139ecb87) [msg:om_x100b52c2afb0c4acb2599dbe22db70b]
+
+Conversation info (untrusted metadata):
+```json
+{
+  "message_id": "om_x100b52c2afb0c4acb2599dbe22db70b",
+  "sender_id": "ou_9cb5bc9a5f1b6cab2d78fd36139ecb87",
+  "sender": "李若松",
+  "timestamp": "Wed 2026-04-15 18:53 GMT+8"
+}
+```
+
+Sender (untrusted metadata):
+```json
+{
+  "label": "李若松 (ou_9cb5bc9a5f1b6cab2d78fd36139ecb87)",
+  "id": "ou_9cb5bc9a5f1b6cab2d78fd36139ecb87",
+  "name": "李若松"
+}
+```
+
+好的  现在 查看你的全部定时任务
+assistant: 我当前配置的全部定时任务（6个）：
+
+| 时间 | 频率 | 任务 | 日志 |
+|------|------|------|------|
+| 07:30 | 每天 | 业务知识库更新 | `/var/log/xiaoxi_update_business_knowledge.log` |
+| 08:00 | 每天 | 工作区Git备份 ✅ 刚验证通过 | `/var/log/xiaoxi_daily_backup.log` |
+| 09:00 | 每天 | 组件练习质量统计 | `/var/log/xiaoxi_component_stat.log` |
+| 09:00 | 每天 | 用户购课完课统计 | `/var/log/xiaoxi_user_course_stat.log` |
+| 09:00 | 每天 | 渠道订单统计周报 | `/var/log/xiaoxi_weekly_channel_order_report.log` |
+| 10:00 | 每周一 | 学而思周度报表 | `/var/log/xiaoxi_xueersi_weekly_report.log` |
+
+脚本位置：`/root/.openclaw/workspace/scripts/`
+user: System: [2026-04-15 18:54:14 GMT+8] Feishu[default] DM | 李若松 (ou_9cb5bc9a5f1b6cab2d78fd36139ecb87) [msg:om_x100b52c2ab9620b0b31d4e326752657]
+
+Conversation info (untrusted metadata):
+```json
+{
+  "message_id": "om_x100b52c2ab9620b0b31d4e326752657",
+  "sender_id": "ou_9cb5bc9a5f1b6cab2d78fd36139ecb87",
+  "sender": "李若松",
+  "timestamp": "Wed 2026-04-15 18:54 GMT+8"
+}
+```
+
+Sender (untrusted metadata):
+```json
+{
+  "label": "李若松 (ou_9cb5bc9a5f1b6cab2d78fd36139ecb87)",
+  "id": "ou_9cb5bc9a5f1b6cab2d78fd36139ecb87",
+  "name": "李若松"
+}
+```
+
+定时任务配置文件的位置是？ 我希望也体现在 skill中。  方便其他agent使用时查看。
+assistant: 定时任务的配置位置就是系统 crontab，通过 `crontab -l` 查看，通过 `crontab -e` 编辑。实际存储在 `/var/spool/cron/crontabs/root`。
+
+我来把这个信息补充到 skill 中：
+assistant: 同步副本：
+assistant: 已更新。Skill 中新增了**配置文件位置**说明：
+
+| 项目 | 位置 |
+|------|------|
+| 配置查看 | `crontab -l` |
+| 配置编辑 | `crontab -e` |
+| 配置文件存储 | `/var/spool/cron/crontabs/root` |
+| 系统执行日志 | `/var/log/syslog`（搜索 CRON） |
+
+其他 agent 查看自己的任务只需执行 `crontab -l | grep "workspace-<自己的名字>"`，配置文件中也有注释块标注每个 agent 的归属。
diff --git a/new_export/export_14607.py b/new_export/export_14607.py
deleted file mode 100644
index aa86b51..0000000
--- a/new_export/export_14607.py
+++ /dev/null
@@ -1,1846 +0,0 @@
-"""
-初版需求v1.0: 2025.11.18
-
-导出 一个userId的多表数据， 最终按照不同sheet，输出到一个 excel文件中。
-
-1. 第一个sheet:"全部音频数据"
-es相关配置通过以下环境变量
-ES_HOST=xxx
-ES_PORT=9200
-ES_SCHEME=https
-ES_USER=elastic
-ES_PASSWORD=xxx
-
-index: user-audio
-
-脚本思路:
-过滤字段:
-userId == xxxx
-
-输出该userId的全部记录 按时间倒序排序
-包含以下字段内容:
-
-userId
-userMsg
-userName
-soeData
-audioUrl
-asrStatus
-componentId
-componentType
-dataVersion
-
-2. 第二个sheet:"互动组件学习记录"
-在 PGsql数据库中  筛选出 user_id 对应的记录 按时间(updated_at)倒序排列。
-数据库相关配置  从.env中读取:
-PG_DB_HOST = xxx
-PG_DB_PORT = xxx
-PG_DB_USER = xxx
-PG_DB_PASSWORD = xxx
-PG_DB_DATABASE = xxx
-
-读取以下数据表: 
-user_component_play_record_0 ~ user_component_play_record_7
-
-输出以下字段：
-user_id,
-component_unique_code,
-session_id,
-c_type,
-c_id,
-play_result,
-user_behavior_info,
-updated_at
-
-3.第三个sheet:"课程巩固记录"
-在 PGsql数据库中  筛选出 user_id 对应的记录 按时间(updated_at)倒序排列。
-
-数据表:user_unit_review_question_result
-
-输出以下字段:
-user_id
-story_id
-chapter_id
-question_list
-updated_at
-
-4.第四个sheet:"单元挑战记录"
-在 PGsql数据库中  筛选出 user_id 对应的记录 按时间(updated_at)倒序排列。
-
-数据表:user_unit_challenge_question_result
-
-输出以下字段:
-user_id
-story_id
-category
-score_text,
-question_list
-updated_at
-------------
-
-需求补充v1.1:
-"全部音频数据"这个sheet
-输出字段 添加timeStr 并按时间倒序排列  最新的记录 在最上面
-
-------------
-需求补充v1.2:
-"全部音频数据"这个sheet
-如果userMsg字段内容 包含 ”makee_id“ 要进行以下处理：
-
-从userMsg字段中提取出具体的makee_id:
-此时的字段样例:
-```
-asr msg信息为：{
-    "time_ms": 358,
-    "time_ms_api": 357,
-    "hot_words_str": "{\n \"context_type\": \"dialog_ctx\",\n \"context_data\": [\n  {\n   \"text\": \"planet Walla\"\n  },\n  {\n   \"text\": \"Walla\"\n  }\n ]\n}",
-    "makee_id": "d208c617-902f-4f81-8255-b5fb73599546",
-    "volcano_fast_x_tt_logid": "202511151541355DF72BE5EBFE73795BFD",
-    "api_name": "volcano-fast"
-}
-```
-然后基于makee_id 去另一个表里查记录:  index:llm_asr_log
-将查询到的记录的 result_text 字段内容 回填到 userMsg。
-将source字段内容 输出 到 source。
-
-如果userMsg字段内容 不包含 ”makee_id“ 保持之前的逻辑。
-
---------------
-需求补充 v1.3
-当前输入 只支持配置单个 userId (业务侧名称为角色id)
-
-
-期望扩展为以下逻辑:
-1. 改为配置 角色id list ， 分别 导出 多份excel文件。命名格式为 角色id_{}_导出时间_{}.xlsx
-2. 改为配置 账户id list ， 分别 导出 多份excel文件。命名格式为 账户id_{}_角色id_{}_导出时间_{}.xlsx
-
-关于 账户 id 到角色id 的映射逻辑，
-首先 读取 mysql 表 vala_app_character
-筛选 account_id字段值 == 账户id 的 记录， 其中 该记录 的 id值，则为角色id 一个 账户id 可以对应多个角色id
-
-本次需求只针对输入侧调整， 数据抽取聚合逻辑部分和之前保持一致
-
----------------
-需求补充 v1.4
-
-增加一个sheet "单元总结记录"，
-导出对应角色id的单元总结记录。   参考 export_unit_summary.py 中的原始数据提取方案即可(不必关注其中的数据统计部分)。
-
-其他已有逻辑保持不动哦。
-
-----------------
-需求补充 v1.5
-
-1."互动组件学习记录"sheet 增加以下字段
-"互动组件名称"、"组件标题"、"组件配置摘要"、"知识点":
-字段取值规则:
-根据 c_type 及组件配置(从mysql表获取) 进行映射和处理:
-```
-1）.如果 c_type 开头为"mid"
-
-则读取下表:表名:middle_interaction_component
-
-获取以下字段值:
-title (作为组件标题)
-component_config (完整的组件配置)   获取其中 的 question 字段值 作为 组件配置摘要；
-kp_relation_info 字段值  作为 知识点
-
-"互动组件名称"规则:
-
-"物品互动": "mid_vocab_item",
-"图片互动": "mid_vocab_image",
-"填词互动": "mid_vocab_fillBlank",
-"指令互动": "mid_vocab_instruction"
-"对话互动-表达": "mid_sentence_dialogue", 且 component_config->question->mode == "express"
-"对话互动-朗读": "mid_sentence_dialogue", 且 component_config->question->mode == "read"
-"语音互动": "mid_sentence_voice",
-"材料互动": "mid_sentence_material",
-"造句互动": "mid_sentence_makeSentence"
-"挖空互动": "mid_grammar_cloze",
-"组句互动": "mid_grammar_sentence"
-"发音互动": "mid_pron_pron"
-
-
-2）. 如果 c_type 开头为"core"
-则读取下表:表名:core_interaction_component
-
-获取以下字段值:
-title (作为组件标题)
-component_config (完整的组件配置)   获取其中 的 taskInfo 字段值 作为 组件配置摘要
-kp_relation_info 字段值  作为 知识点
-
-"互动组件名称"规则:
-"口语快答": "core_speaking_reply",
-"口语妙问": "core_speaking_inquiry",
-"口语探讨": "core_speaking_explore",
-"口语独白": "core_speaking_monologue"
-"合作阅读": "core_reading_order",
-"合作听力": "core_listening_order",
-"看图组句": "core_writing_imgMakeSentence",
-"看图撰写": "core_writing_imgWrite",
-"问题组句": "core_writing_questionMakeSentence",
-"问题撰写": "core_writing_questionWrite",
-```
-
-2."课程巩固记录" sheet 增加以下字段
-"正确率":  参考 export_lesson_review.py 中的计算逻辑
-
-3. 新增一个"汇总统计"sheet
-统计并展示以下内容   请以 可读性 比较好的方式排列、展示
-
-a. "所有互动-按互动组件类型-通过情况统计"
-以每种"互动组件名称"进行聚合
-统计play_result的取值分布情况，算以下指标:
-总数量、Perfect数量、Good数量、Failed数量、Pass数量、Perfect比例、Good比例、Failed比例、Pass比例
-
-b. "中互动组件-按知识点-通过情况统计"
-以每个知识点进行聚合
-
-其中 知识点配置格式如下:
-```
-[{"kpId":"0000004","kpType":"sentence","kpTitle":"My name is ...","kpSkill":"sentence_pron","kpSkillName":"语音"},{"kpId":"0000004","kpType":"sentence","kpTitle":"My name is ...","kpSkill":"sentence_meaning","kpSkillName":"语义"},{"kpId":"0000005","kpType":"sentence","kpTitle":"I'm… years old.","kpSkill":"sentence_pron","kpSkillName":"语音"},{"kpId":"0000005","kpType":"sentence","kpTitle":"I'm… years old.","kpSkill":"sentence_meaning","kpSkillName":"语义"},{"kpId":"0000014","kpType":"sentence","kpTitle":"Nice to meet you.","kpSkill":"sentence_pron","kpSkillName":"语音"},{"kpId":"0000014","kpType":"sentence","kpTitle":"Nice to meet you.","kpSkill":"sentence_meaning","kpSkillName":"语义"}]
-```
-一个组件可以绑定多个知识点，以每个知识点的 kpId + kpType + kpTitle 进行 展示及聚合
-
-对所有绑定了某个知识点的中互动组件(c_type以mid开头)
-统计play_result的取值分布情况，算以下指标:
-总数量、Perfect数量、Good数量、Failed数量、Pass数量、Perfect比例、Good比例、Failed比例、Pass比例
-
-c. "单元总结-按单元统计时长"
-
-将"单元总结记录"中的"play_time_seconds"字段值 以每个单元id 进行聚合 进行 累加 统计，并增加一列 转换为分钟为单位 取整数
-
-
-"""
-# ==== 可直接修改的脚本变量（不使用命令行传参） ====
-# 三种模式互斥，只能配置一个：
-# 模式1：单个角色id
-USER_ID = "14607" # 单个角色ID，示例：2911
-
-# 模式2：角色id列表（多个角色id批量导出）
-USER_ID_LIST = None  # 角色ID列表，示例：[2911, 2912, 2913]
-
-# 模式3：账户id列表（通过账户id查询对应的角色id后批量导出）
-ACCOUNT_ID_LIST = None
-
-OUTPUT_DIR = "output/"  # 输出目录，默认为output文件夹
-# ==== 变量结束 ====
-import os
-import json
-import re
-from typing import Any, Dict, List, Optional
-
-import datetime
-
-try:
-    import requests
-except Exception:
-    requests = None
-
-try:
-    import psycopg2
-    from psycopg2.extras import RealDictCursor
-except Exception:
-    psycopg2 = None
-    RealDictCursor = None
-
-try:
-    import pymysql
-    import pymysql.cursors
-except Exception:
-    pymysql = None
-
-try:
-    import pandas as pd
-except Exception:
-    pd = None
-
-try:
-    import urllib3
-except Exception:
-    urllib3 = None
-
-
-SHEET1_COLUMNS = [
-    "userId",
-    "userMsg",
-    "source",
-    "userName",
-    "soeData",
-    "audioUrl",
-    "asrStatus",
-    "componentId",
-    "componentType",
-    "dataVersion",
-    "timeStr",
-]
-
-SHEET2_COLUMNS = [
-    "user_id",
-    "component_unique_code",
-    "session_id",
-    "c_type",
-    "c_id",
-    "互动组件名称",
-    "组件标题",
-    "组件配置摘要",
-    "知识点",
-    "play_result",
-    "user_behavior_info",
-    "updated_at",
-]
-
-SHEET3_COLUMNS = [
-    "user_id",
-    "unit_id",
-    "lesson_id",
-    "question_list",
-    "正确率",
-    "updated_at",
-]
-
-SHEET4_COLUMNS = [
-    "user_id",
-    "unit_id",
-    "category",
-    "score_text",
-    "question_list",
-    "updated_at",
-]
-
-SHEET5_COLUMNS = [
-    "id",
-    "user_id",
-    "unit_id",
-    "updated_at",
-    "km_id",
-    "km_type",
-    "play_time_seconds",
-]
-
-
-def _load_env_file(path: str) -> None:
-    if not os.path.exists(path):
-        return
-    try:
-        with open(path, "r", encoding="utf-8") as f:
-            for line in f:
-                line = line.strip()
-                if not line or line.startswith("#"):
-                    continue
-                if "=" not in line:
-                    continue
-                k, v = line.split("=", 1)
-                k = k.strip()
-                v = v.strip().strip('"').strip("'")
-                if k and (os.getenv(k) is None):
-                    os.environ[k] = v
-    except Exception:
-        pass
-
-
-def load_env() -> None:
-    _load_env_file(os.path.join(os.getcwd(), ".env"))
-    _load_env_file(os.path.join(os.getcwd(), ".env.local"))
-
-
-def to_json_str(v: Any) -> Any:
-    if isinstance(v, (dict, list)):
-        try:
-            return json.dumps(v, ensure_ascii=False)
-        except Exception:
-            return str(v)
-    return v
-
-
-def parse_time(value: Any) -> Optional[datetime.datetime]:
-    if value is None:
-        return None
-    if isinstance(value, (int, float)):
-        try:
-            v = float(value)
-            # 兼容毫秒级时间戳
-            if v > 1e11:
-                v = v / 1000.0
-            return datetime.datetime.fromtimestamp(v)
-        except Exception:
-            return None
-    if isinstance(value, str):
-        fmts = [
-            "%Y-%m-%dT%H:%M:%S.%fZ",
-            "%Y-%m-%dT%H:%M:%S.%f%z",
-            "%Y-%m-%dT%H:%M:%S%z",
-            "%Y-%m-%d %H:%M:%S",
-            "%Y-%m-%d",
-        ]
-        for fmt in fmts:
-            try:
-                return datetime.datetime.strptime(value, fmt)
-            except Exception:
-                continue
-        try:
-            return datetime.datetime.fromisoformat(value)
-        except Exception:
-            return None
-    return None
-
-
-def pick_time(source: Dict[str, Any]) -> Optional[datetime.datetime]:
-    candidates = [
-        "updated_at",
-        "created_at",
-        "@timestamp",
-        "timestamp",
-        "updatedAt",
-        "createdAt",
-        "time",
-        "ts",
-        "timeStr",
-        "update_time",
-        "create_time",
-    ]
-    for key in candidates:
-        if key in source:
-            t = parse_time(source.get(key))
-            if t is not None:
-                return t
-    # 宽松匹配：尝试扫描所有可能的时间相关字段
-    for k, v in source.items():
-        lk = str(k).lower()
-        if any(s in lk for s in ["time", "date", "_at", "timestamp"]):
-            t = parse_time(v)
-            if t is not None:
-                return t
-    return None
-
-
-def extract_makee_id_from_user_msg(user_msg: Any) -> Optional[str]:
-    # 支持dict或字符串形式
-    if isinstance(user_msg, dict):
-        mk = user_msg.get("makee_id")
-        if isinstance(mk, str) and mk:
-            return mk
-    if isinstance(user_msg, str) and user_msg:
-        # 1) 尝试整体解析为JSON
-        try:
-            obj = json.loads(user_msg)
-            mk = obj.get("makee_id")
-            if isinstance(mk, str) and mk:
-                return mk
-        except Exception:
-            pass
-        # 2) 尝试截取大括号中的JSON
-        try:
-            start = user_msg.find("{")
-            end = user_msg.rfind("}")
-            if start != -1 and end != -1 and end > start:
-                candidate = user_msg[start : end + 1]
-                obj = json.loads(candidate)
-                mk = obj.get("makee_id")
-                if isinstance(mk, str) and mk:
-                    return mk
-        except Exception:
-            pass
-        # 3) 正则匹配 makee_id
-        m = re.search(r"\bmakee_id\b\s*:\s*\"([^\"]+)\"", user_msg)
-        if m:
-            return m.group(1)
-    return None
-
-
-def fetch_es_asr_log(makee_id: str, es_cfg: Dict[str, Any]) -> Optional[Dict[str, Any]]:
-    if requests is None:
-        raise RuntimeError("缺少requests依赖，请安装后再运行。")
-    host = es_cfg.get("host")
-    port = es_cfg.get("port")
-    scheme = es_cfg.get("scheme", "http")
-    user = es_cfg.get("user")
-    password = es_cfg.get("password")
-    index = "llm_asr_log"
-    if not host:
-        return None
-    base = f"{scheme}://{host}:{port}"
-    url = f"{base}/{index}/_search"
-    headers = {"Content-Type": "application/json"}
-    body = {
-        "query": {
-            "bool": {
-                "should": [
-                    {"term": {"makee_id": {"value": str(makee_id)}}},
-                    {"term": {"makee_id.keyword": {"value": str(makee_id)}}},
-                ],
-                "minimum_should_match": 1,
-            }
-        },
-        "size": 10,
-        "_source": [
-            "makee_id",
-            "result_text",
-            "source",
-            "updated_at",
-            "created_at",
-            "@timestamp",
-            "timestamp",
-            "updatedAt",
-            "createdAt",
-            "time",
-            "ts",
-            "timeStr",
-            "update_time",
-            "create_time",
-        ],
-    }
-    auth = (user, password) if user and password else None
-    try:
-        if scheme == "https" and urllib3 is not None:
-            try:
-                urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-            except Exception:
-                pass
-        resp = requests.post(url, headers=headers, json=body, auth=auth, timeout=20, verify=False if scheme == "https" else True)
-        resp.raise_for_status()
-        data = resp.json()
-    except Exception:
-        return None
-    hits = data.get("hits", {}).get("hits", [])
-    if not hits:
-        return None
-    # 选最新的
-    chosen = None
-    best_t = None
-    for h in hits:
-        src = h.get("_source", {}) or {}
-        t = pick_time(src)
-        if t is None:
-            continue
-        if best_t is None or t > best_t:
-            best_t = t
-            chosen = src
-    if chosen is None:
-        # 如果都没有时间，选第一条
-        chosen = (hits[0].get("_source", {}) or {})
-    return chosen
-
-
-def get_es_config() -> Dict[str, Any]:
-    return {
-        "host": os.getenv("ES_HOST"),
-        "port": os.getenv("ES_PORT", "9200"),
-        "scheme": os.getenv("ES_SCHEME", "http"),
-        "user": os.getenv("ES_USER"),
-        "password": os.getenv("ES_PASSWORD"),
-        "index": "user-audio",
-    }
-
-
-def fetch_es_user_audio(user_id: str, es_cfg: Dict[str, Any]) -> List[Dict[str, Any]]:
-    if requests is None:
-        raise RuntimeError("缺少requests依赖，请安装后再运行。")
-
-    print(f"  [ES] 开始查询user-audio索引...")
-    start_time = datetime.datetime.now()
-
-    host = es_cfg.get("host")
-    port = es_cfg.get("port")
-    scheme = es_cfg.get("scheme", "http")
-    user = es_cfg.get("user")
-    password = es_cfg.get("password")
-    index = es_cfg.get("index", "user-audio")
-
-    if not host:
-        return []
-
-    base = f"{scheme}://{host}:{port}"
-    url = f"{base}/{index}/_search"
-    headers = {"Content-Type": "application/json"}
-
-    body = {
-        "query": {
-            "bool": {
-                "should": [
-                    {"term": {"userId": {"value": str(user_id)}}},
-                    {"term": {"userId.keyword": {"value": str(user_id)}}},
-                ],
-                "minimum_should_match": 1,
-            }
-        },
-        "size": 10000,
-        "_source": [
-            "userId",
-            "userMsg",
-            "userName",
-            "soeData",
-            "audioUrl",
-            "asrStatus",
-            "componentId",
-            "componentType",
-            "dataVersion",
-            "updated_at",
-            "created_at",
-            "@timestamp",
-            "timestamp",
-            "updatedAt",
-            "createdAt",
-            "time",
-            "ts",
-            "timeStr",
-            "update_time",
-            "create_time",
-        ],
-    }
-
-    auth = (user, password) if user and password else None
-
-    try:
-        # 抑制自签证书下的HTTPS不安全警告
-        if scheme == "https" and urllib3 is not None:
-            try:
-                urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-            except Exception:
-                pass
-        resp = requests.post(url, headers=headers, json=body, auth=auth, timeout=30, verify=False if scheme == "https" else True)
-        resp.raise_for_status()
-        data = resp.json()
-    except Exception as e:
-        raise RuntimeError(f"ES查询失败: {e}")
-
-    hits = data.get("hits", {}).get("hits", [])
-    print(f"  [ES] 查询完成，获得{len(hits)}条记录，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-
-    if not hits:
-        return []
-
-    print(f"  [ES] 开始处理音频数据...")
-    process_start = datetime.datetime.now()
-
-    rows: List[Dict[str, Any]] = []
-    asr_cache: Dict[str, Dict[str, Any]] = {}
-    makee_id_count = 0
-
-    for idx, h in enumerate(hits, 1):
-        # 每处理100条显示一次进度
-        if idx % 100 == 0 or idx == len(hits):
-            print(f"  [ES] 处理进度: {idx}/{len(hits)} ({idx*100//len(hits)}%)")
-
-        src = h.get("_source", {}) or {}
-        row = {
-            "userId": src.get("userId"),
-            "userMsg": src.get("userMsg"),
-            "source": None,
-            "userName": src.get("userName"),
-            "soeData": to_json_str(src.get("soeData")),
-            "audioUrl": src.get("audioUrl"),
-            "asrStatus": src.get("asrStatus"),
-            "componentId": src.get("componentId"),
-            "componentType": src.get("componentType"),
-            "dataVersion": src.get("dataVersion"),
-        }
-        t = pick_time(src)
-        row["_time"] = t.isoformat() if t else None
-        row["timeStr"] = t.strftime("%Y-%m-%d %H:%M:%S") if t else None
-        # v1.2: 当userMsg包含makee_id时，补充查询llm_asr_log并回填
-        mk = extract_makee_id_from_user_msg(row.get("userMsg"))
-        if mk:
-            makee_id_count += 1
-            asr_doc = asr_cache.get(mk)
-            if asr_doc is None:
-                asr_doc = fetch_es_asr_log(mk, es_cfg)
-                if asr_doc is not None:
-                    asr_cache[mk] = asr_doc
-            if asr_doc is not None:
-                rt = asr_doc.get("result_text")
-                if rt:
-                    row["userMsg"] = rt
-                row["source"] = to_json_str(asr_doc.get("source"))
-        rows.append(row)
-
-    print(f"  [ES] 数据处理完成，发现{makee_id_count}条包含makee_id的记录，耗时{(datetime.datetime.now() - process_start).total_seconds():.2f}秒")
-
-    print(f"  [ES] 开始排序...")
-    rows.sort(key=lambda x: parse_time(x.get("_time")) or datetime.datetime.min, reverse=True)
-    print(f"  [ES] 音频数据处理完成，总耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-
-    return rows
-
-
-def get_pg_conn() -> Any:
-    if psycopg2 is None:
-        raise RuntimeError("缺少psycopg2依赖，请安装后再运行。")
-    host = os.getenv("PG_DB_HOST")
-    port = int(os.getenv("PG_DB_PORT", "5432"))
-    user = os.getenv("PG_DB_USER")
-    password = os.getenv("PG_DB_PASSWORD")
-    dbname = os.getenv("PG_DB_DATABASE")
-    if not host or not dbname:
-        raise RuntimeError("PG数据库环境变量未配置完整")
-    conn = psycopg2.connect(host=host, port=port, user=user, password=password, dbname=dbname)
-    return conn
-
-
-def get_mysql_conn(database: str) -> Any:
-    """
-    获取MySQL数据库连接
-
-    Args:
-        database: 数据库名，可选值：'vala_user' 或 'vala_test'
-                 vala_user 使用 online 配置（环境变量后缀 _online）
-                 vala_test 使用默认配置
-
-    Returns:
-        MySQL连接对象
-    """
-    if pymysql is None:
-        raise RuntimeError("缺少pymysql依赖，请安装后再运行。")
-
-    # 根据数据库选择不同的环境变量配置
-    if database == "vala_user":
-        # vala_user 数据库使用 online 配置
-        host = os.getenv("MYSQL_HOST_online")
-        port = int(os.getenv("MYSQL_PORT_online", "3306"))
-        user = os.getenv("MYSQL_USERNAME_online")
-        password = os.getenv("MYSQL_PASSWORD_online")
-        if not host:
-            raise RuntimeError("MySQL数据库环境变量未配置完整（缺少MYSQL_HOST_online）")
-    else:
-        # vala_test 等其他数据库使用默认配置
-        host = os.getenv("MYSQL_HOST")
-        port = int(os.getenv("MYSQL_PORT", "3306"))
-        user = os.getenv("MYSQL_USERNAME")
-        password = os.getenv("MYSQL_PASSWORD")
-        if not host:
-            raise RuntimeError("MySQL数据库环境变量未配置完整（缺少MYSQL_HOST）")
-
-    conn = pymysql.connect(
-        host=host,
-        port=port,
-        user=user,
-        password=password,
-        database=database,  # 直接使用传入的数据库名
-        charset="utf8mb4",
-        cursorclass=pymysql.cursors.DictCursor,
-    )
-    return conn
-
-
-def get_id_2_unit_index(conn: Any) -> Dict[int, int]:
-    """
-    从MySQL获取 story_id 到 unit_id 的映射关系
-
-    Args:
-        conn: MySQL数据库连接
-
-    Returns:
-        映射字典 {story_id: unit_id}
-    """
-    sql = """
-    SELECT *
-    FROM `vala_game_info`
-    WHERE id > 0
-      AND `vala_game_info`.`deleted_at` IS NULL
-    ORDER BY season_package_id asc, `index` asc
-    """
-    try:
-        with conn.cursor() as cur:
-            cur.execute(sql)
-            rows = cur.fetchall() or []
-            # 构建映射表：按查询结果的顺序，索引即为unit_id
-            id_2_unit_index = {}
-            for index, row in enumerate(rows):
-                id_2_unit_index[row["id"]] = index
-            return id_2_unit_index
-    except Exception as e:
-        print(f"[ERROR] 获取story_id到unit_id映射失败: {e}")
-        return {}
-
-
-def get_chapter_id_to_lesson_id(conn: Any) -> Dict[int, int]:
-    """
-    从MySQL获取 chapter_id 到 lesson_id 的映射关系
-
-    Args:
-        conn: MySQL数据库连接
-
-    Returns:
-        映射字典 {chapter_id: lesson_id}
-    """
-    sql = """
-    SELECT id, `index`
-    FROM `vala_game_chapter`
-    WHERE deleted_at IS NULL
-    """
-    try:
-        with conn.cursor() as cur:
-            cur.execute(sql)
-            rows = cur.fetchall() or []
-            # 构建映射表：chapter的index字段即为lesson_id
-            chapter_id_to_lesson_id = {}
-            for row in rows:
-                chapter_id_to_lesson_id[row["id"]] = row["index"]
-            return chapter_id_to_lesson_id
-    except Exception as e:
-        print(f"[ERROR] 获取chapter_id到lesson_id映射失败: {e}")
-        return {}
-
-
-# 组件类型到组件名称的映射
-COMPONENT_TYPE_NAMES = {
-    "mid_vocab_item": "物品互动",
-    "mid_vocab_image": "图片互动",
-    "mid_vocab_fillBlank": "填词互动",
-    "mid_vocab_instruction": "指令互动",
-    "mid_sentence_dialogue": "对话互动",  # 需要根据mode进一步判断
-    "mid_sentence_voice": "语音互动",
-    "mid_sentence_material": "材料互动",
-    "mid_sentence_makeSentence": "造句互动",
-    "mid_grammar_cloze": "挖空互动",
-    "mid_grammar_sentence": "组句互动",
-    "mid_pron_pron": "发音互动",
-    "core_speaking_reply": "口语快答",
-    "core_speaking_inquiry": "口语妙问",
-    "core_speaking_explore": "口语探讨",
-    "core_speaking_monologue": "口语独白",
-    "core_reading_order": "合作阅读",
-    "core_listening_order": "合作听力",
-    "core_writing_imgMakeSentence": "看图组句",
-    "core_writing_imgWrite": "看图撰写",
-    "core_writing_questionMakeSentence": "问题组句",
-    "core_writing_questionWrite": "问题撰写",
-}
-
-
-def get_component_name(c_type: str, component_config: Optional[Dict[str, Any]]) -> str:
-    """
-    根据c_type和组件配置获取组件名称
-
-    Args:
-        c_type: 组件类型
-        component_config: 组件配置（用于判断对话互动的mode）
-
-    Returns:
-        组件名称
-    """
-    if not c_type:
-        return ""
-
-    # 特殊处理：对话互动需要根据mode判断
-    if c_type == "mid_sentence_dialogue" and component_config:
-        try:
-            question = component_config.get("question", {})
-            mode = question.get("mode", "")
-            if mode == "express":
-                return "对话互动-表达"
-            elif mode == "read":
-                return "对话互动-朗读"
-        except Exception:
-            pass
-
-    return COMPONENT_TYPE_NAMES.get(c_type, "")
-
-
-def batch_fetch_component_configs(play_records: List[Dict[str, Any]], mysql_conn: Any) -> Dict[str, Dict[str, Any]]:
-    """
-    批量查询组件配置信息
-
-    Args:
-        play_records: 播放记录列表
-        mysql_conn: MySQL连接
-
-    Returns:
-        组件配置映射 {c_type_c_id: {title, component_config, kp_relation_info}}
-    """
-    print(f"  [MySQL] 开始批量查询组件配置...")
-    start_time = datetime.datetime.now()
-
-    # 收集需要查询的c_type和c_id
-    mid_c_ids = set()
-    core_c_ids = set()
-    mid_type_id_pairs = []  # 用于调试日志
-    core_type_id_pairs = []
-
-    for record in play_records:
-        c_type = record.get("c_type", "")
-        c_id = record.get("c_id")
-        if c_type and c_id:
-            if c_type.startswith("mid"):
-                mid_c_ids.add(c_id)
-                mid_type_id_pairs.append((c_type, c_id))
-            elif c_type.startswith("core"):
-                core_c_ids.add(c_id)
-                core_type_id_pairs.append((c_type, c_id))
-
-    print(f"  [MySQL] 需要查询中互动组件: {len(mid_c_ids)}个, 核心互动组件: {len(core_c_ids)}个")
-    if mid_c_ids:
-        print(f"  [MySQL] 中互动组件ID列表（前10个）: {sorted(list(mid_c_ids))[:10]}")
-    if core_c_ids:
-        print(f"  [MySQL] 核心互动组件ID列表（前10个）: {sorted(list(core_c_ids))[:10]}")
-
-    config_map = {}
-
-    # 批量查询middle_interaction_component
-    if mid_c_ids:
-        try:
-            with mysql_conn.cursor() as cur:
-                placeholders = ','.join(['%s'] * len(mid_c_ids))
-                sql = f"""
-                SELECT c_id, c_type, title, component_config, kp_relation_info
-                FROM middle_interaction_component
-                WHERE c_id IN ({placeholders}) AND deleted_at IS NULL
-                """
-                print(f"  [MySQL] 执行中互动组件查询，查询条件: c_id IN ({len(mid_c_ids)}个ID)")
-                cur.execute(sql, tuple(mid_c_ids))
-                rows = cur.fetchall() or []
-                print(f"  [MySQL] 查询到{len(rows)}条中互动组件配置")
-
-                if len(rows) == 0 and len(mid_c_ids) > 0:
-                    print(f"  [MySQL] [警告] 查询结果为空！可能的原因：")
-                    print(f"  [MySQL]   - 数据库中没有匹配的c_id记录")
-                    print(f"  [MySQL]   - deleted_at字段不为NULL")
-                    print(f"  [MySQL]   - c_id不存在")
-
-                for idx, row in enumerate(rows):
-                    c_type = row.get("c_type", "")
-                    c_id = row.get("c_id")
-                    key = f"{c_type}_{c_id}"
-
-                    if idx < 3:  # 输出前3条的详细信息
-                        print(f"  [MySQL] [样例{idx+1}] id={c_id}, c_type={c_type}, key={key}")
-                        print(f"  [MySQL] [样例{idx+1}] title={row.get('title', '')[:50]}")
-
-                    # 解析component_config
-                    component_config = row.get("component_config")
-                    if isinstance(component_config, str):
-                        try:
-                            component_config = json.loads(component_config)
-                        except Exception as e:
-                            print(f"  [MySQL] [警告] 解析component_config失败 (id={c_id}): {e}")
-                            component_config = {}
-
-                    # 提取question字段作为摘要
-                    summary = ""
-                    if isinstance(component_config, dict):
-                        question = component_config.get("question")
-                        summary = to_json_str(question) if question else ""
-                        if idx < 3 and question:
-                            print(f"  [MySQL] [样例{idx+1}] 提取到question字段，长度: {len(summary)}")
-
-                    # 解析kp_relation_info
-                    kp_relation_info = row.get("kp_relation_info")
-                    if isinstance(kp_relation_info, str):
-                        try:
-                            kp_relation_info = json.loads(kp_relation_info)
-                        except Exception:
-                            kp_relation_info = []
-
-                    config_map[key] = {
-                        "title": row.get("title", ""),
-                        "component_config": component_config,
-                        "summary": summary,
-                        "kp_relation_info": to_json_str(kp_relation_info),
-                    }
-
-                print(f"  [MySQL] 中互动组件配置已加入config_map，当前map大小: {len(config_map)}")
-        except Exception as e:
-            print(f"  [MySQL] [错误] 查询中互动组件配置失败: {e}")
-            import traceback
-            traceback.print_exc()
-
-    # 批量查询core_interaction_component
-    if core_c_ids:
-        try:
-            with mysql_conn.cursor() as cur:
-                placeholders = ','.join(['%s'] * len(core_c_ids))
-                sql = f"""
-                SELECT c_id, c_type, title, component_config, kp_relation_info
-                FROM core_interaction_component
-                WHERE c_id IN ({placeholders}) AND deleted_at IS NULL
-                """
-                print(f"  [MySQL] 执行核心互动组件查询，查询条件: c_id IN ({len(core_c_ids)}个ID)")
-                cur.execute(sql, tuple(core_c_ids))
-                rows = cur.fetchall() or []
-                print(f"  [MySQL] 查询到{len(rows)}条核心互动组件配置")
-
-                if len(rows) == 0 and len(core_c_ids) > 0:
-                    print(f"  [MySQL] [警告] 查询结果为空！可能的原因：")
-                    print(f"  [MySQL]   - 数据库中没有匹配的c_id记录")
-                    print(f"  [MySQL]   - deleted_at字段不为NULL")
-                    print(f"  [MySQL]   - c_id不存在")
-
-                for idx, row in enumerate(rows):
-                    c_type = row.get("c_type", "")
-                    c_id = row.get("c_id")
-                    key = f"{c_type}_{c_id}"
-
-                    if idx < 3:  # 输出前3条的详细信息
-                        print(f"  [MySQL] [样例{idx+1}] id={c_id}, c_type={c_type}, key={key}")
-                        print(f"  [MySQL] [样例{idx+1}] title={row.get('title', '')[:50]}")
-
-                    # 解析component_config
-                    component_config = row.get("component_config")
-                    if isinstance(component_config, str):
-                        try:
-                            component_config = json.loads(component_config)
-                        except Exception as e:
-                            print(f"  [MySQL] [警告] 解析component_config失败 (id={c_id}): {e}")
-                            component_config = {}
-
-                    # 提取taskInfo字段作为摘要
-                    summary = ""
-                    if isinstance(component_config, dict):
-                        task_info = component_config.get("taskInfo")
-                        summary = to_json_str(task_info) if task_info else ""
-                        if idx < 3 and task_info:
-                            print(f"  [MySQL] [样例{idx+1}] 提取到taskInfo字段，长度: {len(summary)}")
-
-                    # 解析kp_relation_info
-                    kp_relation_info = row.get("kp_relation_info")
-                    if isinstance(kp_relation_info, str):
-                        try:
-                            kp_relation_info = json.loads(kp_relation_info)
-                        except Exception:
-                            kp_relation_info = []
-
-                    config_map[key] = {
-                        "title": row.get("title", ""),
-                        "component_config": component_config,
-                        "summary": summary,
-                        "kp_relation_info": to_json_str(kp_relation_info),
-                    }
-
-                print(f"  [MySQL] 核心互动组件配置已加入config_map，当前map大小: {len(config_map)}")
-        except Exception as e:
-            print(f"  [MySQL] [错误] 查询核心互动组件配置失败: {e}")
-            import traceback
-            traceback.print_exc()
-
-    print(f"  [MySQL] 组件配置查询完成，共{len(config_map)}条，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-    return config_map
-
-
-def calculate_accuracy(question_list: Any) -> float:
-    """
-    计算问题列表的正确率
-
-    Args:
-        question_list: 问题列表（可能是JSON字符串或list）
-
-    Returns:
-        正确率（百分比，保留2位小数）
-    """
-    try:
-        if isinstance(question_list, str):
-            question_list = json.loads(question_list)
-
-        if not isinstance(question_list, list) or len(question_list) == 0:
-            return 0.0
-
-        total = len(question_list)
-        correct = sum(1 for q in question_list if q.get('isRight') == True)
-        accuracy = round(correct / total * 100, 2) if total > 0 else 0.0
-
-        return accuracy
-    except Exception:
-        return 0.0
-
-
-
-def fetch_character_ids_by_account(account_id: str, conn: Any) -> List[str]:
-    """根据账户id查询对应的角色id列表"""
-    sql = "SELECT id FROM vala_app_character WHERE account_id = %s"
-    try:
-        with conn.cursor() as cur:
-            cur.execute(sql, (account_id,))
-            rows = cur.fetchall() or []
-            return [str(row["id"]) for row in rows if row.get("id")]
-    except Exception as e:
-        print(f"[ERROR] 查询账户id={account_id}的角色id失败: {e}")
-        return []
-
-
-def fetch_pg_play_records(user_id: str, conn: Any, mysql_conn: Any) -> List[Dict[str, Any]]:
-    """
-    查询互动组件学习记录并补充组件配置信息
-
-    Args:
-        user_id: 用户ID（角色ID）
-        conn: PostgreSQL数据库连接
-        mysql_conn: MySQL数据库连接
-
-    Returns:
-        互动组件学习记录列表
-    """
-    print(f"  [PG] 开始查询互动组件学习记录（8张分表）...")
-    start_time = datetime.datetime.now()
-
-    tables = [f"user_component_play_record_{i}" for i in range(8)]
-    rows: List[Dict[str, Any]] = []
-    with conn.cursor(cursor_factory=RealDictCursor) as cur:
-        for t in tables:
-            try:
-                cur.execute(
-                    f"""
-                    SELECT user_id, component_unique_code, session_id, c_type, c_id,
-                           play_result, user_behavior_info, updated_at
-                    FROM {t}
-                    WHERE user_id = %s
-                    ORDER BY updated_at DESC
-                    """,
-                    (user_id,),
-                )
-                part = cur.fetchall() or []
-                if part:
-                    print(f"  [PG] 表{t}查到{len(part)}条记录")
-                for r in part:
-                    r = dict(r)
-                    r["play_result"] = to_json_str(r.get("play_result"))
-                    r["user_behavior_info"] = to_json_str(r.get("user_behavior_info"))
-                    # 将带时区的时间转换为无时区，避免Excel写入报错
-                    upd = r.get("updated_at")
-                    if isinstance(upd, datetime.datetime):
-                        try:
-                            if upd.tzinfo is not None and upd.tzinfo.utcoffset(upd) is not None:
-                                r["updated_at"] = upd.replace(tzinfo=None)
-                        except Exception:
-                            # 回退为字符串
-                            r["updated_at"] = str(upd)
-                    rows.append(r)
-            except Exception as e:
-                print(f"  [PG] 表{t}查询失败: {e}")
-                continue
-
-    rows.sort(key=lambda x: parse_time(x.get("updated_at")) or datetime.datetime.min, reverse=True)
-    print(f"  [PG] 互动组件学习记录查询完成，共{len(rows)}条，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-
-    # 批量查询组件配置
-    if rows and mysql_conn:
-        config_map = batch_fetch_component_configs(rows, mysql_conn)
-
-        # 补充组件信息
-        print(f"  [PG] 开始补充组件配置信息...")
-        filled_count = 0
-        empty_count = 0
-        sample_keys = []
-        sample_mode_check = []  # 检查对话互动的mode
-
-        for r in rows:
-            c_type = r.get("c_type", "")
-            c_id = r.get("c_id")
-            key = f"{c_type}_{c_id}" if c_type and c_id else ""
-
-            config = config_map.get(key, {})
-            component_config = config.get("component_config", {})
-
-            component_name = get_component_name(c_type, component_config)
-            r["互动组件名称"] = component_name
-            r["组件标题"] = config.get("title", "")
-            r["组件配置摘要"] = config.get("summary", "")
-            r["知识点"] = config.get("kp_relation_info", "")
-
-            # 统计填充情况
-            if config:
-                filled_count += 1
-                if len(sample_keys) < 3:
-                    sample_keys.append((key, component_name, r["组件标题"][:30] if r["组件标题"] else ""))
-
-                # 检查对话互动的mode
-                if c_type == "mid_sentence_dialogue" and len(sample_mode_check) < 3:
-                    mode = ""
-                    if isinstance(component_config, dict):
-                        question = component_config.get("question", {})
-                        if isinstance(question, dict):
-                            mode = question.get("mode", "")
-                    sample_mode_check.append({
-                        "key": key,
-                        "mode": mode,
-                        "component_name": component_name
-                    })
-            else:
-                empty_count += 1
-                if empty_count <= 5:  # 输出前5个未匹配的key
-                    print(f"  [PG] [警告] 未找到组件配置: key={key}")
-
-        print(f"  [PG] 组件配置信息补充完成")
-        print(f"  [PG] 匹配到配置: {filled_count}条, 未匹配: {empty_count}条")
-        if sample_keys:
-            print(f"  [PG] 样例数据（前3条）:")
-            for key, name, title in sample_keys:
-                print(f"  [PG]   - key={key}, 名称={name}, 标题={title}")
-
-        if sample_mode_check:
-            print(f"  [PG] 对话互动mode检查（前3条）:")
-            for s in sample_mode_check:
-                print(f"  [PG]   - key={s['key']}, mode={s['mode']}, 最终名称={s['component_name']}")
-
-    return rows
-
-
-def fetch_pg_unit_review(user_id: str, conn: Any, id_2_unit_index: Dict[int, int], chapter_id_to_lesson_id: Dict[int, int]) -> List[Dict[str, Any]]:
-    """
-    查询课程巩固记录
-
-    Args:
-        user_id: 用户ID（角色ID）
-        conn: PostgreSQL数据库连接
-        id_2_unit_index: story_id到unit_id的映射字典
-        chapter_id_to_lesson_id: chapter_id到lesson_id的映射字典
-
-    Returns:
-        课程巩固记录列表
-    """
-    print(f"  [PG] 开始查询课程巩固记录...")
-    start_time = datetime.datetime.now()
-
-    sql = (
-        "SELECT user_id, story_id, chapter_id, question_list, updated_at "
-        "FROM user_unit_review_question_result WHERE user_id = %s ORDER BY updated_at DESC"
-    )
-    with conn.cursor(cursor_factory=RealDictCursor) as cur:
-        try:
-            cur.execute(sql, (user_id,))
-            rows = cur.fetchall() or []
-        except Exception as e:
-            print(f"  [PG] 课程巩固记录查询失败: {e}")
-            rows = []
-    out: List[Dict[str, Any]] = []
-    for r in rows:
-        d = dict(r)
-
-        # 映射 story_id 到 unit_id
-        story_id = d.get("story_id")
-        unit_id = id_2_unit_index.get(story_id) if story_id else None
-        d["unit_id"] = unit_id
-
-        # 映射 chapter_id 到 lesson_id
-        chapter_id = d.get("chapter_id")
-        lesson_id = chapter_id_to_lesson_id.get(chapter_id) if chapter_id else None
-        d["lesson_id"] = lesson_id
-
-        # 计算正确率
-        question_list = d.get("question_list")
-        d["正确率"] = calculate_accuracy(question_list)
-
-        d["question_list"] = to_json_str(question_list)
-        upd = d.get("updated_at")
-        if isinstance(upd, datetime.datetime):
-            try:
-                if upd.tzinfo is not None and upd.tzinfo.utcoffset(upd) is not None:
-                    d["updated_at"] = upd.replace(tzinfo=None)
-            except Exception:
-                d["updated_at"] = str(upd)
-        out.append(d)
-
-    print(f"  [PG] 课程巩固记录查询完成，共{len(out)}条，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-    return out
-
-
-def fetch_pg_unit_challenge(user_id: str, conn: Any, id_2_unit_index: Dict[int, int]) -> List[Dict[str, Any]]:
-    """
-    查询单元挑战记录
-
-    Args:
-        user_id: 用户ID（角色ID）
-        conn: PostgreSQL数据库连接
-        id_2_unit_index: story_id到unit_id的映射字典
-
-    Returns:
-        单元挑战记录列表
-    """
-    print(f"  [PG] 开始查询单元挑战记录...")
-    start_time = datetime.datetime.now()
-
-    sql = (
-        "SELECT user_id, story_id, category, score_text, question_list, updated_at "
-        "FROM user_unit_challenge_question_result WHERE user_id = %s ORDER BY updated_at DESC"
-    )
-    with conn.cursor(cursor_factory=RealDictCursor) as cur:
-        try:
-            cur.execute(sql, (user_id,))
-            rows = cur.fetchall() or []
-        except Exception as e:
-            print(f"  [PG] 单元挑战记录查询失败: {e}")
-            rows = []
-    out: List[Dict[str, Any]] = []
-    for r in rows:
-        d = dict(r)
-
-        # 映射 story_id 到 unit_id
-        story_id = d.get("story_id")
-        unit_id = id_2_unit_index.get(story_id) if story_id else None
-        d["unit_id"] = unit_id
-
-        d["question_list"] = to_json_str(d.get("question_list"))
-        upd = d.get("updated_at")
-        if isinstance(upd, datetime.datetime):
-            try:
-                if upd.tzinfo is not None and upd.tzinfo.utcoffset(upd) is not None:
-                    d["updated_at"] = upd.replace(tzinfo=None)
-            except Exception:
-                d["updated_at"] = str(upd)
-        out.append(d)
-
-    print(f"  [PG] 单元挑战记录查询完成，共{len(out)}条，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-    return out
-
-
-def fetch_pg_unit_summary(user_id: str, conn: Any, id_2_unit_index: Dict[int, int]) -> List[Dict[str, Any]]:
-    """
-    查询单元总结知识点结果数据
-
-    Args:
-        user_id: 用户ID（角色ID）
-        conn: PostgreSQL数据库连接
-        id_2_unit_index: story_id到unit_id的映射字典
-
-    Returns:
-        单元总结记录列表
-    """
-    print(f"  [PG] 开始查询单元总结记录...")
-    start_time = datetime.datetime.now()
-
-    sql = (
-        "SELECT id, user_id, story_id, updated_at, km_id, km_type, play_time "
-        "FROM user_unit_summary_km_result WHERE user_id = %s AND deleted_at IS NULL ORDER BY updated_at DESC"
-    )
-    with conn.cursor(cursor_factory=RealDictCursor) as cur:
-        try:
-            cur.execute(sql, (user_id,))
-            rows = cur.fetchall() or []
-        except Exception as e:
-            print(f"  [PG] 单元总结记录查询失败: {e}")
-            rows = []
-
-    out: List[Dict[str, Any]] = []
-    for r in rows:
-        d = dict(r)
-        # 映射 story_id 到 unit_id
-        story_id = d.get("story_id")
-        unit_id = id_2_unit_index.get(story_id) if story_id else None
-        d["unit_id"] = unit_id
-
-        # 转换 play_time (毫秒) 为秒 (整数)
-        play_time = d.get("play_time")
-        d["play_time_seconds"] = play_time // 1000 if play_time else 0
-
-        # 移除时区信息
-        upd = d.get("updated_at")
-        if isinstance(upd, datetime.datetime):
-            try:
-                if upd.tzinfo is not None and upd.tzinfo.utcoffset(upd) is not None:
-                    d["updated_at"] = upd.replace(tzinfo=None)
-            except Exception:
-                d["updated_at"] = str(upd)
-        out.append(d)
-
-    print(f"  [PG] 单元总结记录查询完成，共{len(out)}条，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-    return out
-
-
-def generate_statistics(sheet2_rows: List[Dict[str, Any]], sheet5_rows: List[Dict[str, Any]]) -> tuple:
-    """
-    生成汇总统计数据
-
-    Args:
-        sheet2_rows: 互动组件学习记录
-        sheet5_rows: 单元总结记录
-
-    Returns:
-        (组件统计DataFrame, 知识点统计DataFrame, 单元时长统计DataFrame)
-    """
-    if pd is None:
-        raise RuntimeError("缺少pandas依赖，请安装后再运行。")
-
-    print(f"  [统计] 开始生成汇总统计数据...")
-    start_time = datetime.datetime.now()
-
-    from collections import defaultdict
-
-    # ============ a. 所有互动-按互动组件类型-通过情况统计 ============
-    component_stats_data = []
-    component_stats = defaultdict(lambda: {"Perfect": 0, "Good": 0, "Failed": 0, "Pass": 0, "Oops": 0, "total": 0})
-
-    # 用于调试
-    sample_results = []
-    parse_error_count = 0
-
-    for idx, record in enumerate(sheet2_rows):
-        component_name = record.get("互动组件名称", "")
-        if not component_name:
-            continue
-
-        play_result_str = record.get("play_result", "")
-
-        # 解析play_result
-        result = ""
-        try:
-            # 先判断是否是简单的字符串（Perfect/Good/Failed/Pass/Oops）
-            if isinstance(play_result_str, str):
-                # 去除空格后检查
-                stripped = play_result_str.strip()
-                if stripped in ["Perfect", "Good", "Failed", "Pass", "Oops"]:
-                    # 直接使用
-                    result = stripped
-                else:
-                    # 尝试JSON解析
-                    try:
-                        play_result = json.loads(play_result_str)
-                        if isinstance(play_result, dict):
-                            result = play_result.get("result", "")
-                        else:
-                            result = ""
-                    except:
-                        result = ""
-            else:
-                # 如果不是字符串，尝试当dict处理
-                if isinstance(play_result_str, dict):
-                    result = play_result_str.get("result", "")
-                else:
-                    result = ""
-
-            # 收集前3个样例
-            if idx < 3:
-                sample_results.append({
-                    "component": component_name,
-                    "raw": str(play_result_str)[:100],
-                    "result": result
-                })
-        except Exception as e:
-            parse_error_count += 1
-            if parse_error_count <= 3:
-                print(f"  [统计] [警告] 解析play_result失败 (第{idx+1}条): {e}, 原始值: {str(play_result_str)[:100]}")
-            result = ""
-
-        component_stats[component_name]["total"] += 1
-        if result in ["Perfect", "Good", "Failed", "Pass", "Oops"]:
-            component_stats[component_name][result] += 1
-
-    print(f"  [统计] play_result解析样例（前3条）:")
-    for s in sample_results:
-        print(f"  [统计]   - 组件: {s['component']}, 结果: {s['result']}, 原始: {s['raw']}")
-    if parse_error_count > 0:
-        print(f"  [统计] play_result解析失败总数: {parse_error_count}")
-
-    # 生成统计数据行
-    for component_name in sorted(component_stats.keys()):
-        stats = component_stats[component_name]
-        total = stats["total"]
-        perfect = stats["Perfect"]
-        good = stats["Good"]
-        failed = stats["Failed"]
-        pass_count = stats["Pass"]
-        oops = stats["Oops"]
-
-        perfect_ratio = round(perfect / total * 100, 2) if total > 0 else 0
-        good_ratio = round(good / total * 100, 2) if total > 0 else 0
-        failed_ratio = round(failed / total * 100, 2) if total > 0 else 0
-        pass_ratio = round(pass_count / total * 100, 2) if total > 0 else 0
-        oops_ratio = round(oops / total * 100, 2) if total > 0 else 0
-
-        component_stats_data.append({
-            "互动组件名称": component_name,
-            "总数量": total,
-            "Perfect数量": perfect,
-            "Good数量": good,
-            "Failed数量": failed,
-            "Pass数量": pass_count,
-            "Oops数量": oops,
-            "Perfect比例(%)": perfect_ratio,
-            "Good比例(%)": good_ratio,
-            "Failed比例(%)": failed_ratio,
-            "Pass比例(%)": pass_ratio,
-            "Oops比例(%)": oops_ratio,
-        })
-
-    # ============ b. 中互动组件-按知识点-通过情况统计 ============
-    kp_stats_data = []
-    kp_stats = defaultdict(lambda: {"Perfect": 0, "Good": 0, "Failed": 0, "Pass": 0, "Oops": 0, "total": 0})
-
-    # 调试信息
-    mid_count = 0
-    has_kp_count = 0
-    sample_kp_records = []
-
-    for idx, record in enumerate(sheet2_rows):
-        c_type = record.get("c_type", "")
-        if not c_type or not c_type.startswith("mid"):
-            continue
-
-        mid_count += 1
-        kp_relation_info_str = record.get("知识点", "")
-
-        if not kp_relation_info_str:
-            continue
-
-        has_kp_count += 1
-
-        # 解析知识点
-        try:
-            if isinstance(kp_relation_info_str, str):
-                kp_relation_info = json.loads(kp_relation_info_str)
-            else:
-                kp_relation_info = kp_relation_info_str
-
-            if not isinstance(kp_relation_info, list):
-                continue
-
-            # 收集样例
-            if len(sample_kp_records) < 3:
-                sample_kp_records.append({
-                    "c_type": c_type,
-                    "kp_count": len(kp_relation_info),
-                    "kp_info": str(kp_relation_info)[:200]
-                })
-
-            # 解析play_result（使用相同的逻辑）
-            play_result_str = record.get("play_result", "")
-            result = ""
-            if isinstance(play_result_str, str):
-                stripped = play_result_str.strip()
-                if stripped in ["Perfect", "Good", "Failed", "Pass", "Oops"]:
-                    result = stripped
-                else:
-                    try:
-                        play_result = json.loads(play_result_str)
-                        if isinstance(play_result, dict):
-                            result = play_result.get("result", "")
-                    except:
-                        pass
-            elif isinstance(play_result_str, dict):
-                result = play_result_str.get("result", "")
-
-            # 为每个知识点统计
-            for kp in kp_relation_info:
-                if not isinstance(kp, dict):
-                    continue
-
-                kp_id = kp.get("kpId", "")
-                kp_type = kp.get("kpType", "")
-                kp_title = kp.get("kpTitle", "")
-
-                if not kp_id:
-                    continue
-
-                kp_key = f"{kp_id}|{kp_type}|{kp_title}"
-                kp_stats[kp_key]["total"] += 1
-                if result in ["Perfect", "Good", "Failed", "Pass", "Oops"]:
-                    kp_stats[kp_key][result] += 1
-
-        except Exception as e:
-            if len(sample_kp_records) < 5:
-                print(f"  [统计] [警告] 解析知识点失败: {e}, 原始值: {str(kp_relation_info_str)[:100]}")
-            continue
-
-    print(f"  [统计] 中互动组件统计: 总数={mid_count}, 有知识点={has_kp_count}, 知识点条目数={len(kp_stats)}")
-    if sample_kp_records:
-        print(f"  [统计] 知识点样例（前3条）:")
-        for s in sample_kp_records:
-            print(f"  [统计]   - c_type={s['c_type']}, 知识点数量={s['kp_count']}, 内容={s['kp_info']}")
-
-    # 生成知识点统计数据行
-    for kp_key in sorted(kp_stats.keys()):
-        parts = kp_key.split("|")
-        if len(parts) != 3:
-            continue
-
-        kp_id, kp_type, kp_title = parts
-        stats = kp_stats[kp_key]
-        total = stats["total"]
-        perfect = stats["Perfect"]
-        good = stats["Good"]
-        failed = stats["Failed"]
-        pass_count = stats["Pass"]
-        oops = stats["Oops"]
-
-        perfect_ratio = round(perfect / total * 100, 2) if total > 0 else 0
-        good_ratio = round(good / total * 100, 2) if total > 0 else 0
-        failed_ratio = round(failed / total * 100, 2) if total > 0 else 0
-        pass_ratio = round(pass_count / total * 100, 2) if total > 0 else 0
-        oops_ratio = round(oops / total * 100, 2) if total > 0 else 0
-
-        kp_stats_data.append({
-            "知识点ID": kp_id,
-            "知识点类型": kp_type,
-            "知识点标题": kp_title,
-            "总数量": total,
-            "Perfect数量": perfect,
-            "Good数量": good,
-            "Failed数量": failed,
-            "Pass数量": pass_count,
-            "Oops数量": oops,
-            "Perfect比例(%)": perfect_ratio,
-            "Good比例(%)": good_ratio,
-            "Failed比例(%)": failed_ratio,
-            "Pass比例(%)": pass_ratio,
-            "Oops比例(%)": oops_ratio,
-        })
-
-    # ============ c. 单元总结-按单元统计时长 ============
-    unit_time_stats_data = []
-    unit_time_stats = defaultdict(int)
-
-    for record in sheet5_rows:
-        unit_id = record.get("unit_id")
-        play_time_seconds = record.get("play_time_seconds", 0)
-
-        if unit_id is not None:
-            unit_time_stats[unit_id] += play_time_seconds
-
-    # 生成单元时长统计数据行
-    for unit_id in sorted(unit_time_stats.keys()):
-        total_seconds = unit_time_stats[unit_id]
-        total_minutes = int(total_seconds / 60)
-
-        unit_time_stats_data.append({
-            "单元ID": f"unit_{unit_id}",
-            "总时长(秒)": total_seconds,
-            "总时长(分钟)": total_minutes,
-        })
-
-    print(f"  [统计] 汇总统计数据生成完成，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-    print(f"  [统计] 生成了{len(component_stats_data)}条组件统计, {len(kp_stats_data)}条知识点统计, {len(unit_time_stats_data)}条单元时长统计")
-
-    return (
-        pd.DataFrame(component_stats_data),
-        pd.DataFrame(kp_stats_data),
-        pd.DataFrame(unit_time_stats_data)
-    )
-
-
-
-def write_excel(path: str, sheet1_rows: List[Dict[str, Any]], sheet2_rows: List[Dict[str, Any]], sheet3_rows: List[Dict[str, Any]], sheet4_rows: List[Dict[str, Any]], sheet5_rows: List[Dict[str, Any]], stats_component_df: Any, stats_kp_df: Any, stats_unit_time_df: Any) -> None:
-    if pd is None:
-        raise RuntimeError("缺少pandas依赖，请安装后再运行。")
-
-    print(f"  [Excel] 开始写入Excel文件: {path}")
-    start_time = datetime.datetime.now()
-
-    out_dir = os.path.dirname(path) or "."
-    os.makedirs(out_dir, exist_ok=True)
-    with pd.ExcelWriter(path, engine="openpyxl") as writer:
-        pd.DataFrame(sheet1_rows, columns=SHEET1_COLUMNS).to_excel(writer, sheet_name="全部音频数据", index=False)
-        pd.DataFrame(sheet2_rows, columns=SHEET2_COLUMNS).to_excel(writer, sheet_name="互动组件学习记录", index=False)
-        pd.DataFrame(sheet3_rows, columns=SHEET3_COLUMNS).to_excel(writer, sheet_name="课程巩固记录", index=False)
-        pd.DataFrame(sheet4_rows, columns=SHEET4_COLUMNS).to_excel(writer, sheet_name="单元挑战记录", index=False)
-        pd.DataFrame(sheet5_rows, columns=SHEET5_COLUMNS).to_excel(writer, sheet_name="单元总结记录", index=False)
-        stats_component_df.to_excel(writer, sheet_name="统计-互动组件通过情况", index=False)
-        stats_kp_df.to_excel(writer, sheet_name="统计-知识点通过情况", index=False)
-        stats_unit_time_df.to_excel(writer, sheet_name="统计-单元总结时长", index=False)
-
-    print(f"  [Excel] 写入完成，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-
-
-def get_date_str() -> str:
-    """获取当前日期字符串 格式：YYYYMMDD"""
-    return datetime.datetime.now().strftime("%Y%m%d")
-
-
-def export_single_user(user_id: str, es_cfg: Dict[str, Any], pg_conn: Any, mysql_conn: Any, output_path: str, id_2_unit_index: Dict[int, int], chapter_id_to_lesson_id: Dict[int, int]) -> bool:
-    """
-    导出单个角色id的数据
-
-    Args:
-        user_id: 角色ID
-        es_cfg: ES配置
-        pg_conn: PostgreSQL连接
-        mysql_conn: MySQL连接
-        output_path: 输出路径
-        id_2_unit_index: story_id到unit_id的映射字典
-        chapter_id_to_lesson_id: chapter_id到lesson_id的映射字典
-
-    Returns:
-        True表示成功，False表示失败
-    """
-    try:
-        print(f"\n[INFO] ========== 开始导出角色id={user_id} ==========")
-        total_start_time = datetime.datetime.now()
-
-        # 查询ES数据
-        sheet1_rows = fetch_es_user_audio(user_id, es_cfg)
-
-        # 查询PG数据
-        sheet2_rows = fetch_pg_play_records(user_id, pg_conn, mysql_conn)
-        sheet3_rows = fetch_pg_unit_review(user_id, pg_conn, id_2_unit_index, chapter_id_to_lesson_id)
-        sheet4_rows = fetch_pg_unit_challenge(user_id, pg_conn, id_2_unit_index)
-        sheet5_rows = fetch_pg_unit_summary(user_id, pg_conn, id_2_unit_index)
-
-        # 检查是否有有效数据
-        total_records = len(sheet1_rows) + len(sheet2_rows) + len(sheet3_rows) + len(sheet4_rows) + len(sheet5_rows)
-        print(f"  [统计] 数据汇总:")
-        print(f"    - 全部音频数据: {len(sheet1_rows)}条")
-        print(f"    - 互动组件学习记录: {len(sheet2_rows)}条")
-        print(f"    - 课程巩固记录: {len(sheet3_rows)}条")
-        print(f"    - 单元挑战记录: {len(sheet4_rows)}条")
-        print(f"    - 单元总结记录: {len(sheet5_rows)}条")
-        print(f"    - 总计: {total_records}条")
-
-        if total_records == 0:
-            print(f"[WARN] 角色id={user_id} 没有找到任何有效记录，跳过导出")
-            return False
-
-        # 生成汇总统计数据
-        stats_component_df, stats_kp_df, stats_unit_time_df = generate_statistics(sheet2_rows, sheet5_rows)
-
-        # 写入Excel
-        write_excel(output_path, sheet1_rows, sheet2_rows, sheet3_rows, sheet4_rows, sheet5_rows, stats_component_df, stats_kp_df, stats_unit_time_df)
-
-        total_time = (datetime.datetime.now() - total_start_time).total_seconds()
-        print(f"[INFO] 角色id={user_id} 导出成功")
-        print(f"[INFO] 文件路径: {output_path}")
-        print(f"[INFO] 总耗时: {total_time:.2f}秒")
-        print(f"[INFO] ========== 完成 ==========\n")
-        return True
-
-    except Exception as e:
-        print(f"[ERROR] 角色id={user_id} 导出失败: {e}")
-        import traceback
-        traceback.print_exc()
-        return False
-
-
-def main():
-    load_env()
-
-    # 确定运行模式并收集需要导出的角色id列表
-    user_id_list: List[tuple] = []  # [(user_id, account_id or None), ...]
-    date_str = get_date_str()
-
-    # 检查三种模式的配置
-    has_user_id = USER_ID is not None
-    has_user_id_list = USER_ID_LIST is not None and len(USER_ID_LIST) > 0
-    has_account_id_list = ACCOUNT_ID_LIST is not None and len(ACCOUNT_ID_LIST) > 0
-
-    # 验证只能配置一种模式
-    mode_count = sum([has_user_id, has_user_id_list, has_account_id_list])
-    if mode_count == 0:
-        raise RuntimeError("请配置 USER_ID、USER_ID_LIST 或 ACCOUNT_ID_LIST 中的一个")
-    if mode_count > 1:
-        raise RuntimeError("USER_ID、USER_ID_LIST、ACCOUNT_ID_LIST 只能配置一个，请检查配置")
-
-    # 模式1：单个角色id
-    if has_user_id:
-        user_id_list = [(str(USER_ID), None)]
-        print(f"[INFO] 运行模式：单个角色id")
-
-    # 模式2：角色id列表
-    elif has_user_id_list:
-        user_id_list = [(str(uid), None) for uid in USER_ID_LIST]
-        print(f"[INFO] 运行模式：角色id列表，共{len(user_id_list)}个角色")
-
-    # 模式3：账户id列表
-    elif has_account_id_list:
-        print(f"[INFO] 运行模式：账户id列表，共{len(ACCOUNT_ID_LIST)}个账户")
-        mysql_conn = None
-        try:
-            mysql_conn = get_mysql_conn("vala_user")  # 查询用户表，使用 vala_user 数据库
-            for account_id in ACCOUNT_ID_LIST:
-                account_id_str = str(account_id)
-                print(f"[INFO] 查询账户id={account_id_str}对应的角色id...")
-                character_ids = fetch_character_ids_by_account(account_id_str, mysql_conn)
-                if not character_ids:
-                    print(f"[WARN] 账户id={account_id_str} 未找到关联的角色id，跳过")
-                    continue
-                print(f"[INFO] 账户id={account_id_str} 找到{len(character_ids)}个角色id: {character_ids}")
-                for cid in character_ids:
-                    user_id_list.append((cid, account_id_str))
-        finally:
-            if mysql_conn:
-                try:
-                    mysql_conn.close()
-                except Exception:
-                    pass
-
-    if not user_id_list:
-        print("[WARN] 没有需要导出的角色id，程序退出")
-        return
-
-    # 初始化连接
-    es_cfg = get_es_config()
-    pg_conn = get_pg_conn()
-
-    # 获取映射表（只需要查询一次，所有角色共用）
-    print(f"\n[INFO] ===== 准备工作：获取映射表 =====")
-    mysql_conn = None
-    id_2_unit_index = {}
-    chapter_id_to_lesson_id = {}
-    try:
-        print(f"[INFO] 正在连接MySQL数据库（vala_test）...")
-        mysql_conn = get_mysql_conn("vala_test")  # 查询游戏配置表，使用 vala_test 数据库
-        print(f"[INFO] 正在获取 story_id 到 unit_id 的映射...")
-        id_2_unit_index = get_id_2_unit_index(mysql_conn)
-        print(f"[INFO] 成功获取 {len(id_2_unit_index)} 个 story_id 映射")
-        print(f"[INFO] 正在获取 chapter_id 到 lesson_id 的映射...")
-        chapter_id_to_lesson_id = get_chapter_id_to_lesson_id(mysql_conn)
-        print(f"[INFO] 成功获取 {len(chapter_id_to_lesson_id)} 个 chapter_id 映射")
-    except Exception as e:
-        print(f"[ERROR] 获取映射表失败: {e}")
-        import traceback
-        traceback.print_exc()
-        if pg_conn:
-            try:
-                pg_conn.close()
-            except Exception:
-                pass
-        if mysql_conn:
-            try:
-                mysql_conn.close()
-            except Exception:
-                pass
-        return
-
-    try:
-        # 统计信息
-        success_count = 0
-        skip_count = 0
-
-        print(f"\n[INFO] ===== 开始批量导出 =====")
-        print(f"[INFO] 共需导出{len(user_id_list)}个角色\n")
-        batch_start_time = datetime.datetime.now()
-
-        # 循环处理每个角色id
-        for idx, (user_id, account_id) in enumerate(user_id_list, 1):
-            print(f"\n{'='*60}")
-            print(f"[INFO] 进度: {idx}/{len(user_id_list)} ({idx*100//len(user_id_list)}%)")
-            print(f"{'='*60}")
-
-            # 生成输出文件名
-            if account_id is None:
-                # 模式1和模式2：角色id_{}_导出时间_{}.xlsx
-                filename = f"角色id_{user_id}_导出时间_{date_str}.xlsx"
-            else:
-                # 模式3：账户id_{}_角色id_{}_导出时间_{}.xlsx
-                filename = f"账户id_{account_id}_角色id_{user_id}_导出时间_{date_str}.xlsx"
-
-            output_path = os.path.join(OUTPUT_DIR, filename)
-
-            # 导出单个角色的数据
-            result = export_single_user(user_id, es_cfg, pg_conn, mysql_conn, output_path, id_2_unit_index, chapter_id_to_lesson_id)
-            if result:
-                success_count += 1
-            else:
-                skip_count += 1
-
-        # 输出统计信息
-        batch_total_time = (datetime.datetime.now() - batch_start_time).total_seconds()
-        print(f"\n{'='*60}")
-        print(f"[INFO] ===== 全部导出完成 =====")
-        print(f"[INFO] 总计: {len(user_id_list)}个角色")
-        print(f"[INFO] 成功: {success_count}个")
-        print(f"[INFO] 跳过: {skip_count}个")
-        print(f"[INFO] 总耗时: {batch_total_time:.2f}秒 ({batch_total_time/60:.2f}分钟)")
-        if success_count > 0:
-            print(f"[INFO] 平均每个角色: {batch_total_time/success_count:.2f}秒")
-        print(f"{'='*60}\n")
-
-    finally:
-        if pg_conn:
-            try:
-                pg_conn.close()
-            except Exception:
-                pass
-        if mysql_conn:
-            try:
-                mysql_conn.close()
-            except Exception:
-                pass
-
-
-if __name__ == "__main__":
-    main()
diff --git a/new_export/export_only_12698.py b/new_export/export_only_12698.py
deleted file mode 100644
index 60f36b5..0000000
--- a/new_export/export_only_12698.py
+++ /dev/null
@@ -1,144 +0,0 @@
-#!/usr/bin/env python3
-"""单独测试角色12698的导出，查看具体报错"""
-
-import os
-import json
-import sys
-import datetime
-from typing import Any, Dict, List
-
-# 加载环境变量
-def load_env():
-    env_path = os.path.join(os.getcwd(), ".env")
-    if os.path.exists(env_path):
-        with open(env_path, "r", encoding="utf-8") as f:
-            for line in f:
-                line = line.strip()
-                if not line or line.startswith("#") or "=" not in line:
-                    continue
-                k, v = line.split("=", 1)
-                os.environ[k.strip()] = v.strip().strip('"').strip("'")
-
-load_env()
-
-import psycopg2
-from psycopg2.extras import RealDictCursor
-import pymysql
-import requests
-from requests.auth import HTTPBasicAuth
-import warnings
-warnings.filterwarnings('ignore')
-
-def test_role_12698():
-    print("="*60)
-    print("单独测试角色ID=12698的查询")
-    print("="*60)
-    
-    # 连接PG
-    try:
-        conn = psycopg2.connect(
-            host=os.getenv("PG_DB_HOST"),
-            port=int(os.getenv("PG_DB_PORT")),
-            user=os.getenv("PG_DB_USER"),
-            password=os.getenv("PG_DB_PASSWORD"),
-            dbname=os.getenv("PG_DB_DATABASE"),
-            connect_timeout=10
-        )
-        print("✅ PG连接成功")
-    except Exception as e:
-        print(f"❌ PG连接失败: {e}")
-        return
-    
-    user_id = "12698"
-    
-    # 测试第一个查询：user_component_play_record_0
-    print(f"\n测试查询表 user_component_play_record_0，user_id={user_id}")
-    try:
-        with conn.cursor(cursor_factory=RealDictCursor) as cur:
-            sql = f"""
-                SELECT user_id, component_unique_code, session_id, c_type, c_id,
-                       play_result, user_behavior_info, updated_at
-                FROM user_component_play_record_0
-                WHERE user_id = %s
-                ORDER BY updated_at DESC
-                """
-            cur.execute(sql, (user_id,))
-            rows = cur.fetchall()
-            print(f"✅ 查询成功，返回{len(rows)}条记录")
-    except Exception as e:
-        print(f"❌ 查询失败: {e}")
-        print(f"错误类型: {type(e).__name__}")
-        
-        # 回滚事务
-        print("\n尝试回滚事务...")
-        try:
-            conn.rollback()
-            print("✅ 事务回滚成功")
-        except Exception as e2:
-            print(f"❌ 回滚失败: {e2}")
-    
-    # 测试查询课程巩固记录表
-    print(f"\n测试查询表 user_unit_review_question_result，user_id={user_id}")
-    try:
-        with conn.cursor(cursor_factory=RealDictCursor) as cur:
-            sql = f"""
-                SELECT user_id, story_id, chapter_id, question_list, updated_at
-                FROM user_unit_review_question_result
-                WHERE user_id = %s
-                ORDER BY updated_at DESC
-                """
-            cur.execute(sql, (user_id,))
-            rows = cur.fetchall()
-            print(f"✅ 查询成功，返回{len(rows)}条记录")
-    except Exception as e:
-        print(f"❌ 查询失败: {e}")
-        print(f"错误类型: {type(e).__name__}")
-        
-        # 回滚事务
-        print("\n尝试回滚事务...")
-        try:
-            conn.rollback()
-            print("✅ 事务回滚成功")
-        except Exception as e2:
-            print(f"❌ 回滚失败: {e2}")
-    
-    # 测试查询单元挑战记录表
-    print(f"\n测试查询表 user_unit_challenge_question_result，user_id={user_id}")
-    try:
-        with conn.cursor(cursor_factory=RealDictCursor) as cur:
-            sql = f"""
-                SELECT user_id, story_id, category, score_text, question_list, updated_at
-                FROM user_unit_challenge_question_result
-                WHERE user_id = %s
-                ORDER BY updated_at DESC
-                """
-            cur.execute(sql, (user_id,))
-            rows = cur.fetchall()
-            print(f"✅ 查询成功，返回{len(rows)}条记录")
-    except Exception as e:
-        print(f"❌ 查询失败: {e}")
-        print(f"错误类型: {type(e).__name__}")
-    
-    # 测试查询单元总结记录表
-    print(f"\n测试查询表 user_unit_summary_record，user_id={user_id}")
-    try:
-        with conn.cursor(cursor_factory=RealDictCursor) as cur:
-            sql = f"""
-                SELECT id, user_id, unit_id, updated_at, km_id, km_type, play_time_seconds
-                FROM user_unit_summary_record
-                WHERE user_id = %s
-                ORDER BY updated_at DESC
-                """
-            cur.execute(sql, (user_id,))
-            rows = cur.fetchall()
-            print(f"✅ 查询成功，返回{len(rows)}条记录")
-    except Exception as e:
-        print(f"❌ 查询失败: {e}")
-        print(f"错误类型: {type(e).__name__}")
-        import traceback
-        traceback.print_exc()
-    
-    conn.close()
-
-if __name__ == "__main__":
-    test_role_12698()
diff --git a/new_export/export_user_id_data.py b/new_export/export_user_id_data.py
deleted file mode 100644
index 478b2e0..0000000
--- a/new_export/export_user_id_data.py
+++ /dev/null
@@ -1,1846 +0,0 @@
-"""
-初版需求v1.0: 2025.11.18
-
-导出 一个userId的多表数据， 最终按照不同sheet，输出到一个 excel文件中。
-
-1. 第一个sheet:"全部音频数据"
-es相关配置通过以下环境变量
-ES_HOST=xxx
-ES_PORT=9200
-ES_SCHEME=https
-ES_USER=elastic
-ES_PASSWORD=xxx
-
-index: user-audio
-
-脚本思路:
-过滤字段:
-userId == xxxx
-
-输出该userId的全部记录 按时间倒序排序
-包含以下字段内容:
-
-userId
-userMsg
-userName
-soeData
-audioUrl
-asrStatus
-componentId
-componentType
-dataVersion
-
-2. 第二个sheet:"互动组件学习记录"
-在 PGsql数据库中  筛选出 user_id 对应的记录 按时间(updated_at)倒序排列。
-数据库相关配置  从.env中读取:
-PG_DB_HOST = xxx
-PG_DB_PORT = xxx
-PG_DB_USER = xxx
-PG_DB_PASSWORD = xxx
-PG_DB_DATABASE = xxx
-
-读取以下数据表: 
-user_component_play_record_0 ~ user_component_play_record_7
-
-输出以下字段：
-user_id,
-component_unique_code,
-session_id,
-c_type,
-c_id,
-play_result,
-user_behavior_info,
-updated_at
-
-3.第三个sheet:"课程巩固记录"
-在 PGsql数据库中  筛选出 user_id 对应的记录 按时间(updated_at)倒序排列。
-
-数据表:user_unit_review_question_result
-
-输出以下字段:
-user_id
-story_id
-chapter_id
-question_list
-updated_at
-
-4.第四个sheet:"单元挑战记录"
-在 PGsql数据库中  筛选出 user_id 对应的记录 按时间(updated_at)倒序排列。
-
-数据表:user_unit_challenge_question_result
-
-输出以下字段:
-user_id
-story_id
-category
-score_text,
-question_list
-updated_at
-------------
-
-需求补充v1.1:
-"全部音频数据"这个sheet
-输出字段 添加timeStr 并按时间倒序排列  最新的记录 在最上面
-
-------------
-需求补充v1.2:
-"全部音频数据"这个sheet
-如果userMsg字段内容 包含 ”makee_id“ 要进行以下处理：
-
-从userMsg字段中提取出具体的makee_id:
-此时的字段样例:
-```
-asr msg信息为：{
-    "time_ms": 358,
-    "time_ms_api": 357,
-    "hot_words_str": "{\n \"context_type\": \"dialog_ctx\",\n \"context_data\": [\n  {\n   \"text\": \"planet Walla\"\n  },\n  {\n   \"text\": \"Walla\"\n  }\n ]\n}",
-    "makee_id": "d208c617-902f-4f81-8255-b5fb73599546",
-    "volcano_fast_x_tt_logid": "202511151541355DF72BE5EBFE73795BFD",
-    "api_name": "volcano-fast"
-}
-```
-然后基于makee_id 去另一个表里查记录:  index:llm_asr_log
-将查询到的记录的 result_text 字段内容 回填到 userMsg。
-将source字段内容 输出 到 source。
-
-如果userMsg字段内容 不包含 ”makee_id“ 保持之前的逻辑。
-
---------------
-需求补充 v1.3
-当前输入 只支持配置单个 userId (业务侧名称为角色id)
-
-
-期望扩展为以下逻辑:
-1. 改为配置 角色id list ， 分别 导出 多份excel文件。命名格式为 角色id_{}_导出时间_{}.xlsx
-2. 改为配置 账户id list ， 分别 导出 多份excel文件。命名格式为 账户id_{}_角色id_{}_导出时间_{}.xlsx
-
-关于 账户 id 到角色id 的映射逻辑，
-首先 读取 mysql 表 vala_app_character
-筛选 account_id字段值 == 账户id 的 记录， 其中 该记录 的 id值，则为角色id 一个 账户id 可以对应多个角色id
-
-本次需求只针对输入侧调整， 数据抽取聚合逻辑部分和之前保持一致
-
----------------
-需求补充 v1.4
-
-增加一个sheet "单元总结记录"，
-导出对应角色id的单元总结记录。   参考 export_unit_summary.py 中的原始数据提取方案即可(不必关注其中的数据统计部分)。
-
-其他已有逻辑保持不动哦。
-
-----------------
-需求补充 v1.5
-
-1."互动组件学习记录"sheet 增加以下字段
-"互动组件名称"、"组件标题"、"组件配置摘要"、"知识点":
-字段取值规则:
-根据 c_type 及组件配置(从mysql表获取) 进行映射和处理:
-```
-1）.如果 c_type 开头为"mid"
-
-则读取下表:表名:middle_interaction_component
-
-获取以下字段值:
-title (作为组件标题)
-component_config (完整的组件配置)   获取其中 的 question 字段值 作为 组件配置摘要；
-kp_relation_info 字段值  作为 知识点
-
-"互动组件名称"规则:
-
-"物品互动": "mid_vocab_item",
-"图片互动": "mid_vocab_image",
-"填词互动": "mid_vocab_fillBlank",
-"指令互动": "mid_vocab_instruction"
-"对话互动-表达": "mid_sentence_dialogue", 且 component_config->question->mode == "express"
-"对话互动-朗读": "mid_sentence_dialogue", 且 component_config->question->mode == "read"
-"语音互动": "mid_sentence_voice",
-"材料互动": "mid_sentence_material",
-"造句互动": "mid_sentence_makeSentence"
-"挖空互动": "mid_grammar_cloze",
-"组句互动": "mid_grammar_sentence"
-"发音互动": "mid_pron_pron"
-
-
-2）. 如果 c_type 开头为"core"
-则读取下表:表名:core_interaction_component
-
-获取以下字段值:
-title (作为组件标题)
-component_config (完整的组件配置)   获取其中 的 taskInfo 字段值 作为 组件配置摘要
-kp_relation_info 字段值  作为 知识点
-
-"互动组件名称"规则:
-"口语快答": "core_speaking_reply",
-"口语妙问": "core_speaking_inquiry",
-"口语探讨": "core_speaking_explore",
-"口语独白": "core_speaking_monologue"
-"合作阅读": "core_reading_order",
-"合作听力": "core_listening_order",
-"看图组句": "core_writing_imgMakeSentence",
-"看图撰写": "core_writing_imgWrite",
-"问题组句": "core_writing_questionMakeSentence",
-"问题撰写": "core_writing_questionWrite",
-```
-
-2."课程巩固记录" sheet 增加以下字段
-"正确率":  参考 export_lesson_review.py 中的计算逻辑
-
-3. 新增一个"汇总统计"sheet
-统计并展示以下内容   请以 可读性 比较好的方式排列、展示
-
-a. "所有互动-按互动组件类型-通过情况统计"
-以每种"互动组件名称"进行聚合
-统计play_result的取值分布情况，算以下指标:
-总数量、Perfect数量、Good数量、Failed数量、Pass数量、Perfect比例、Good比例、Failed比例、Pass比例
-
-b. "中互动组件-按知识点-通过情况统计"
-以每个知识点进行聚合
-
-其中 知识点配置格式如下:
-```
-[{"kpId":"0000004","kpType":"sentence","kpTitle":"My name is ...","kpSkill":"sentence_pron","kpSkillName":"语音"},{"kpId":"0000004","kpType":"sentence","kpTitle":"My name is ...","kpSkill":"sentence_meaning","kpSkillName":"语义"},{"kpId":"0000005","kpType":"sentence","kpTitle":"I'm… years old.","kpSkill":"sentence_pron","kpSkillName":"语音"},{"kpId":"0000005","kpType":"sentence","kpTitle":"I'm… years old.","kpSkill":"sentence_meaning","kpSkillName":"语义"},{"kpId":"0000014","kpType":"sentence","kpTitle":"Nice to meet you.","kpSkill":"sentence_pron","kpSkillName":"语音"},{"kpId":"0000014","kpType":"sentence","kpTitle":"Nice to meet you.","kpSkill":"sentence_meaning","kpSkillName":"语义"}]
-```
-一个组件可以绑定多个知识点，以每个知识点的 kpId + kpType + kpTitle 进行 展示及聚合
-
-对所有绑定了某个知识点的中互动组件(c_type以mid开头)
-统计play_result的取值分布情况，算以下指标:
-总数量、Perfect数量、Good数量、Failed数量、Pass数量、Perfect比例、Good比例、Failed比例、Pass比例
-
-c. "单元总结-按单元统计时长"
-
-将"单元总结记录"中的"play_time_seconds"字段值 以每个单元id 进行聚合 进行 累加 统计，并增加一列 转换为分钟为单位 取整数
-
-
-"""
-# ==== 可直接修改的脚本变量（不使用命令行传参） ====
-# 三种模式互斥，只能配置一个：
-# 模式1：单个角色id
-USER_ID = None # 单个角色ID，示例：2911
-
-# 模式2：角色id列表（多个角色id批量导出）
-USER_ID_LIST = None  # 角色ID列表，示例：[2911, 2912, 2913]
-
-# 模式3：账户id列表（通过账户id查询对应的角色id后批量导出）
-ACCOUNT_ID_LIST = [9343] # 账户ID列表，示例：[100, 101, 102]
-
-OUTPUT_DIR = "output/"  # 输出目录，默认为output文件夹
-# ==== 变量结束 ====
-import os
-import json
-import re
-from typing import Any, Dict, List, Optional
-
-import datetime
-
-try:
-    import requests
-except Exception:
-    requests = None
-
-try:
-    import psycopg2
-    from psycopg2.extras import RealDictCursor
-except Exception:
-    psycopg2 = None
-    RealDictCursor = None
-
-try:
-    import pymysql
-    import pymysql.cursors
-except Exception:
-    pymysql = None
-
-try:
-    import pandas as pd
-except Exception:
-    pd = None
-
-try:
-    import urllib3
-except Exception:
-    urllib3 = None
-
-
-SHEET1_COLUMNS = [
-    "userId",
-    "userMsg",
-    "source",
-    "userName",
-    "soeData",
-    "audioUrl",
-    "asrStatus",
-    "componentId",
-    "componentType",
-    "dataVersion",
-    "timeStr",
-]
-
-SHEET2_COLUMNS = [
-    "user_id",
-    "component_unique_code",
-    "session_id",
-    "c_type",
-    "c_id",
-    "互动组件名称",
-    "组件标题",
-    "组件配置摘要",
-    "知识点",
-    "play_result",
-    "user_behavior_info",
-    "updated_at",
-]
-
-SHEET3_COLUMNS = [
-    "user_id",
-    "unit_id",
-    "lesson_id",
-    "question_list",
-    "正确率",
-    "updated_at",
-]
-
-SHEET4_COLUMNS = [
-    "user_id",
-    "unit_id",
-    "category",
-    "score_text",
-    "question_list",
-    "updated_at",
-]
-
-SHEET5_COLUMNS = [
-    "id",
-    "user_id",
-    "unit_id",
-    "updated_at",
-    "km_id",
-    "km_type",
-    "play_time_seconds",
-]
-
-
-def _load_env_file(path: str) -> None:
-    if not os.path.exists(path):
-        return
-    try:
-        with open(path, "r", encoding="utf-8") as f:
-            for line in f:
-                line = line.strip()
-                if not line or line.startswith("#"):
-                    continue
-                if "=" not in line:
-                    continue
-                k, v = line.split("=", 1)
-                k = k.strip()
-                v = v.strip().strip('"').strip("'")
-                if k and (os.getenv(k) is None):
-                    os.environ[k] = v
-    except Exception:
-        pass
-
-
-def load_env() -> None:
-    _load_env_file(os.path.join(os.getcwd(), ".env"))
-    _load_env_file(os.path.join(os.getcwd(), ".env.local"))
-
-
-def to_json_str(v: Any) -> Any:
-    if isinstance(v, (dict, list)):
-        try:
-            return json.dumps(v, ensure_ascii=False)
-        except Exception:
-            return str(v)
-    return v
-
-
-def parse_time(value: Any) -> Optional[datetime.datetime]:
-    if value is None:
-        return None
-    if isinstance(value, (int, float)):
-        try:
-            v = float(value)
-            # 兼容毫秒级时间戳
-            if v > 1e11:
-                v = v / 1000.0
-            return datetime.datetime.fromtimestamp(v)
-        except Exception:
-            return None
-    if isinstance(value, str):
-        fmts = [
-            "%Y-%m-%dT%H:%M:%S.%fZ",
-            "%Y-%m-%dT%H:%M:%S.%f%z",
-            "%Y-%m-%dT%H:%M:%S%z",
-            "%Y-%m-%d %H:%M:%S",
-            "%Y-%m-%d",
-        ]
-        for fmt in fmts:
-            try:
-                return datetime.datetime.strptime(value, fmt)
-            except Exception:
-                continue
-        try:
-            return datetime.datetime.fromisoformat(value)
-        except Exception:
-            return None
-    return None
-
-
-def pick_time(source: Dict[str, Any]) -> Optional[datetime.datetime]:
-    candidates = [
-        "updated_at",
-        "created_at",
-        "@timestamp",
-        "timestamp",
-        "updatedAt",
-        "createdAt",
-        "time",
-        "ts",
-        "timeStr",
-        "update_time",
-        "create_time",
-    ]
-    for key in candidates:
-        if key in source:
-            t = parse_time(source.get(key))
-            if t is not None:
-                return t
-    # 宽松匹配：尝试扫描所有可能的时间相关字段
-    for k, v in source.items():
-        lk = str(k).lower()
-        if any(s in lk for s in ["time", "date", "_at", "timestamp"]):
-            t = parse_time(v)
-            if t is not None:
-                return t
-    return None
-
-
-def extract_makee_id_from_user_msg(user_msg: Any) -> Optional[str]:
-    # 支持dict或字符串形式
-    if isinstance(user_msg, dict):
-        mk = user_msg.get("makee_id")
-        if isinstance(mk, str) and mk:
-            return mk
-    if isinstance(user_msg, str) and user_msg:
-        # 1) 尝试整体解析为JSON
-        try:
-            obj = json.loads(user_msg)
-            mk = obj.get("makee_id")
-            if isinstance(mk, str) and mk:
-                return mk
-        except Exception:
-            pass
-        # 2) 尝试截取大括号中的JSON
-        try:
-            start = user_msg.find("{")
-            end = user_msg.rfind("}")
-            if start != -1 and end != -1 and end > start:
-                candidate = user_msg[start : end + 1]
-                obj = json.loads(candidate)
-                mk = obj.get("makee_id")
-                if isinstance(mk, str) and mk:
-                    return mk
-        except Exception:
-            pass
-        # 3) 正则匹配 makee_id
-        m = re.search(r"\bmakee_id\b\s*:\s*\"([^\"]+)\"", user_msg)
-        if m:
-            return m.group(1)
-    return None
-
-
-def fetch_es_asr_log(makee_id: str, es_cfg: Dict[str, Any]) -> Optional[Dict[str, Any]]:
-    if requests is None:
-        raise RuntimeError("缺少requests依赖，请安装后再运行。")
-    host = es_cfg.get("host")
-    port = es_cfg.get("port")
-    scheme = es_cfg.get("scheme", "http")
-    user = es_cfg.get("user")
-    password = es_cfg.get("password")
-    index = "llm_asr_log"
-    if not host:
-        return None
-    base = f"{scheme}://{host}:{port}"
-    url = f"{base}/{index}/_search"
-    headers = {"Content-Type": "application/json"}
-    body = {
-        "query": {
-            "bool": {
-                "should": [
-                    {"term": {"makee_id": {"value": str(makee_id)}}},
-                    {"term": {"makee_id.keyword": {"value": str(makee_id)}}},
-                ],
-                "minimum_should_match": 1,
-            }
-        },
-        "size": 10,
-        "_source": [
-            "makee_id",
-            "result_text",
-            "source",
-            "updated_at",
-            "created_at",
-            "@timestamp",
-            "timestamp",
-            "updatedAt",
-            "createdAt",
-            "time",
-            "ts",
-            "timeStr",
-            "update_time",
-            "create_time",
-        ],
-    }
-    auth = (user, password) if user and password else None
-    try:
-        if scheme == "https" and urllib3 is not None:
-            try:
-                urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-            except Exception:
-                pass
-        resp = requests.post(url, headers=headers, json=body, auth=auth, timeout=20, verify=False if scheme == "https" else True)
-        resp.raise_for_status()
-        data = resp.json()
-    except Exception:
-        return None
-    hits = data.get("hits", {}).get("hits", [])
-    if not hits:
-        return None
-    # 选最新的
-    chosen = None
-    best_t = None
-    for h in hits:
-        src = h.get("_source", {}) or {}
-        t = pick_time(src)
-        if t is None:
-            continue
-        if best_t is None or t > best_t:
-            best_t = t
-            chosen = src
-    if chosen is None:
-        # 如果都没有时间，选第一条
-        chosen = (hits[0].get("_source", {}) or {})
-    return chosen
-
-
-def get_es_config() -> Dict[str, Any]:
-    return {
-        "host": os.getenv("ES_HOST"),
-        "port": os.getenv("ES_PORT", "9200"),
-        "scheme": os.getenv("ES_SCHEME", "http"),
-        "user": os.getenv("ES_USER"),
-        "password": os.getenv("ES_PASSWORD"),
-        "index": "user-audio",
-    }
-
-
-def fetch_es_user_audio(user_id: str, es_cfg: Dict[str, Any]) -> List[Dict[str, Any]]:
-    if requests is None:
-        raise RuntimeError("缺少requests依赖，请安装后再运行。")
-
-    print(f"  [ES] 开始查询user-audio索引...")
-    start_time = datetime.datetime.now()
-
-    host = es_cfg.get("host")
-    port = es_cfg.get("port")
-    scheme = es_cfg.get("scheme", "http")
-    user = es_cfg.get("user")
-    password = es_cfg.get("password")
-    index = es_cfg.get("index", "user-audio")
-
-    if not host:
-        return []
-
-    base = f"{scheme}://{host}:{port}"
-    url = f"{base}/{index}/_search"
-    headers = {"Content-Type": "application/json"}
-
-    body = {
-        "query": {
-            "bool": {
-                "should": [
-                    {"term": {"userId": {"value": str(user_id)}}},
-                    {"term": {"userId.keyword": {"value": str(user_id)}}},
-                ],
-                "minimum_should_match": 1,
-            }
-        },
-        "size": 10000,
-        "_source": [
-            "userId",
-            "userMsg",
-            "userName",
-            "soeData",
-            "audioUrl",
-            "asrStatus",
-            "componentId",
-            "componentType",
-            "dataVersion",
-            "updated_at",
-            "created_at",
-            "@timestamp",
-            "timestamp",
-            "updatedAt",
-            "createdAt",
-            "time",
-            "ts",
-            "timeStr",
-            "update_time",
-            "create_time",
-        ],
-    }
-
-    auth = (user, password) if user and password else None
-
-    try:
-        # 抑制自签证书下的HTTPS不安全警告
-        if scheme == "https" and urllib3 is not None:
-            try:
-                urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-            except Exception:
-                pass
-        resp = requests.post(url, headers=headers, json=body, auth=auth, timeout=30, verify=False if scheme == "https" else True)
-        resp.raise_for_status()
-        data = resp.json()
-    except Exception as e:
-        raise RuntimeError(f"ES查询失败: {e}")
-
-    hits = data.get("hits", {}).get("hits", [])
-    print(f"  [ES] 查询完成，获得{len(hits)}条记录，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-
-    if not hits:
-        return []
-
-    print(f"  [ES] 开始处理音频数据...")
-    process_start = datetime.datetime.now()
-
-    rows: List[Dict[str, Any]] = []
-    asr_cache: Dict[str, Dict[str, Any]] = {}
-    makee_id_count = 0
-
-    for idx, h in enumerate(hits, 1):
-        # 每处理100条显示一次进度
-        if idx % 100 == 0 or idx == len(hits):
-            print(f"  [ES] 处理进度: {idx}/{len(hits)} ({idx*100//len(hits)}%)")
-
-        src = h.get("_source", {}) or {}
-        row = {
-            "userId": src.get("userId"),
-            "userMsg": src.get("userMsg"),
-            "source": None,
-            "userName": src.get("userName"),
-            "soeData": to_json_str(src.get("soeData")),
-            "audioUrl": src.get("audioUrl"),
-            "asrStatus": src.get("asrStatus"),
-            "componentId": src.get("componentId"),
-            "componentType": src.get("componentType"),
-            "dataVersion": src.get("dataVersion"),
-        }
-        t = pick_time(src)
-        row["_time"] = t.isoformat() if t else None
-        row["timeStr"] = t.strftime("%Y-%m-%d %H:%M:%S") if t else None
-        # v1.2: 当userMsg包含makee_id时，补充查询llm_asr_log并回填
-        mk = extract_makee_id_from_user_msg(row.get("userMsg"))
-        if mk:
-            makee_id_count += 1
-            asr_doc = asr_cache.get(mk)
-            if asr_doc is None:
-                asr_doc = fetch_es_asr_log(mk, es_cfg)
-                if asr_doc is not None:
-                    asr_cache[mk] = asr_doc
-            if asr_doc is not None:
-                rt = asr_doc.get("result_text")
-                if rt:
-                    row["userMsg"] = rt
-                row["source"] = to_json_str(asr_doc.get("source"))
-        rows.append(row)
-
-    print(f"  [ES] 数据处理完成，发现{makee_id_count}条包含makee_id的记录，耗时{(datetime.datetime.now() - process_start).total_seconds():.2f}秒")
-
-    print(f"  [ES] 开始排序...")
-    rows.sort(key=lambda x: parse_time(x.get("_time")) or datetime.datetime.min, reverse=True)
-    print(f"  [ES] 音频数据处理完成，总耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-
-    return rows
-
-
-def get_pg_conn() -> Any:
-    if psycopg2 is None:
-        raise RuntimeError("缺少psycopg2依赖，请安装后再运行。")
-    host = os.getenv("PG_DB_HOST")
-    port = int(os.getenv("PG_DB_PORT", "5432"))
-    user = os.getenv("PG_DB_USER")
-    password = os.getenv("PG_DB_PASSWORD")
-    dbname = os.getenv("PG_DB_DATABASE")
-    if not host or not dbname:
-        raise RuntimeError("PG数据库环境变量未配置完整")
-    conn = psycopg2.connect(host=host, port=port, user=user, password=password, dbname=dbname)
-    return conn
-
-
-def get_mysql_conn(database: str) -> Any:
-    """
-    获取MySQL数据库连接
-
-    Args:
-        database: 数据库名，可选值：'vala_user' 或 'vala_test'
-                 vala_user 使用 online 配置（环境变量后缀 _online）
-                 vala_test 使用默认配置
-
-    Returns:
-        MySQL连接对象
-    """
-    if pymysql is None:
-        raise RuntimeError("缺少pymysql依赖，请安装后再运行。")
-
-    # 根据数据库选择不同的环境变量配置
-    if database == "vala_user":
-        # vala_user 数据库使用 online 配置
-        host = os.getenv("MYSQL_HOST_online")
-        port = int(os.getenv("MYSQL_PORT_online", "3306"))
-        user = os.getenv("MYSQL_USERNAME_online")
-        password = os.getenv("MYSQL_PASSWORD_online")
-        if not host:
-            raise RuntimeError("MySQL数据库环境变量未配置完整（缺少MYSQL_HOST_online）")
-    else:
-        # vala_test 等其他数据库使用默认配置
-        host = os.getenv("MYSQL_HOST")
-        port = int(os.getenv("MYSQL_PORT", "3306"))
-        user = os.getenv("MYSQL_USERNAME")
-        password = os.getenv("MYSQL_PASSWORD")
-        if not host:
-            raise RuntimeError("MySQL数据库环境变量未配置完整（缺少MYSQL_HOST）")
-
-    conn = pymysql.connect(
-        host=host,
-        port=port,
-        user=user,
-        password=password,
-        database=database,  # 直接使用传入的数据库名
-        charset="utf8mb4",
-        cursorclass=pymysql.cursors.DictCursor,
-    )
-    return conn
-
-
-def get_id_2_unit_index(conn: Any) -> Dict[int, int]:
-    """
-    从MySQL获取 story_id 到 unit_id 的映射关系
-
-    Args:
-        conn: MySQL数据库连接
-
-    Returns:
-        映射字典 {story_id: unit_id}
-    """
-    sql = """
-    SELECT *
-    FROM `vala_game_info`
-    WHERE id > 0
-      AND `vala_game_info`.`deleted_at` IS NULL
-    ORDER BY season_package_id asc, `index` asc
-    """
-    try:
-        with conn.cursor() as cur:
-            cur.execute(sql)
-            rows = cur.fetchall() or []
-            # 构建映射表：按查询结果的顺序，索引即为unit_id
-            id_2_unit_index = {}
-            for index, row in enumerate(rows):
-                id_2_unit_index[row["id"]] = index
-            return id_2_unit_index
-    except Exception as e:
-        print(f"[ERROR] 获取story_id到unit_id映射失败: {e}")
-        return {}
-
-
-def get_chapter_id_to_lesson_id(conn: Any) -> Dict[int, int]:
-    """
-    从MySQL获取 chapter_id 到 lesson_id 的映射关系
-
-    Args:
-        conn: MySQL数据库连接
-
-    Returns:
-        映射字典 {chapter_id: lesson_id}
-    """
-    sql = """
-    SELECT id, `index`
-    FROM `vala_game_chapter`
-    WHERE deleted_at IS NULL
-    """
-    try:
-        with conn.cursor() as cur:
-            cur.execute(sql)
-            rows = cur.fetchall() or []
-            # 构建映射表：chapter的index字段即为lesson_id
-            chapter_id_to_lesson_id = {}
-            for row in rows:
-                chapter_id_to_lesson_id[row["id"]] = row["index"]
-            return chapter_id_to_lesson_id
-    except Exception as e:
-        print(f"[ERROR] 获取chapter_id到lesson_id映射失败: {e}")
-        return {}
-
-
-# 组件类型到组件名称的映射
-COMPONENT_TYPE_NAMES = {
-    "mid_vocab_item": "物品互动",
-    "mid_vocab_image": "图片互动",
-    "mid_vocab_fillBlank": "填词互动",
-    "mid_vocab_instruction": "指令互动",
-    "mid_sentence_dialogue": "对话互动",  # 需要根据mode进一步判断
-    "mid_sentence_voice": "语音互动",
-    "mid_sentence_material": "材料互动",
-    "mid_sentence_makeSentence": "造句互动",
-    "mid_grammar_cloze": "挖空互动",
-    "mid_grammar_sentence": "组句互动",
-    "mid_pron_pron": "发音互动",
-    "core_speaking_reply": "口语快答",
-    "core_speaking_inquiry": "口语妙问",
-    "core_speaking_explore": "口语探讨",
-    "core_speaking_monologue": "口语独白",
-    "core_reading_order": "合作阅读",
-    "core_listening_order": "合作听力",
-    "core_writing_imgMakeSentence": "看图组句",
-    "core_writing_imgWrite": "看图撰写",
-    "core_writing_questionMakeSentence": "问题组句",
-    "core_writing_questionWrite": "问题撰写",
-}
-
-
-def get_component_name(c_type: str, component_config: Optional[Dict[str, Any]]) -> str:
-    """
-    根据c_type和组件配置获取组件名称
-
-    Args:
-        c_type: 组件类型
-        component_config: 组件配置（用于判断对话互动的mode）
-
-    Returns:
-        组件名称
-    """
-    if not c_type:
-        return ""
-
-    # 特殊处理：对话互动需要根据mode判断
-    if c_type == "mid_sentence_dialogue" and component_config:
-        try:
-            question = component_config.get("question", {})
-            mode = question.get("mode", "")
-            if mode == "express":
-                return "对话互动-表达"
-            elif mode == "read":
-                return "对话互动-朗读"
-        except Exception:
-            pass
-
-    return COMPONENT_TYPE_NAMES.get(c_type, "")
-
-
-def batch_fetch_component_configs(play_records: List[Dict[str, Any]], mysql_conn: Any) -> Dict[str, Dict[str, Any]]:
-    """
-    批量查询组件配置信息
-
-    Args:
-        play_records: 播放记录列表
-        mysql_conn: MySQL连接
-
-    Returns:
-        组件配置映射 {c_type_c_id: {title, component_config, kp_relation_info}}
-    """
-    print(f"  [MySQL] 开始批量查询组件配置...")
-    start_time = datetime.datetime.now()
-
-    # 收集需要查询的c_type和c_id
-    mid_c_ids = set()
-    core_c_ids = set()
-    mid_type_id_pairs = []  # 用于调试日志
-    core_type_id_pairs = []
-
-    for record in play_records:
-        c_type = record.get("c_type", "")
-        c_id = record.get("c_id")
-        if c_type and c_id:
-            if c_type.startswith("mid"):
-                mid_c_ids.add(c_id)
-                mid_type_id_pairs.append((c_type, c_id))
-            elif c_type.startswith("core"):
-                core_c_ids.add(c_id)
-                core_type_id_pairs.append((c_type, c_id))
-
-    print(f"  [MySQL] 需要查询中互动组件: {len(mid_c_ids)}个, 核心互动组件: {len(core_c_ids)}个")
-    if mid_c_ids:
-        print(f"  [MySQL] 中互动组件ID列表（前10个）: {sorted(list(mid_c_ids))[:10]}")
-    if core_c_ids:
-        print(f"  [MySQL] 核心互动组件ID列表（前10个）: {sorted(list(core_c_ids))[:10]}")
-
-    config_map = {}
-
-    # 批量查询middle_interaction_component
-    if mid_c_ids:
-        try:
-            with mysql_conn.cursor() as cur:
-                placeholders = ','.join(['%s'] * len(mid_c_ids))
-                sql = f"""
-                SELECT c_id, c_type, title, component_config, kp_relation_info
-                FROM middle_interaction_component
-                WHERE c_id IN ({placeholders}) AND deleted_at IS NULL
-                """
-                print(f"  [MySQL] 执行中互动组件查询，查询条件: c_id IN ({len(mid_c_ids)}个ID)")
-                cur.execute(sql, tuple(mid_c_ids))
-                rows = cur.fetchall() or []
-                print(f"  [MySQL] 查询到{len(rows)}条中互动组件配置")
-
-                if len(rows) == 0 and len(mid_c_ids) > 0:
-                    print(f"  [MySQL] [警告] 查询结果为空！可能的原因：")
-                    print(f"  [MySQL]   - 数据库中没有匹配的c_id记录")
-                    print(f"  [MySQL]   - deleted_at字段不为NULL")
-                    print(f"  [MySQL]   - c_id不存在")
-
-                for idx, row in enumerate(rows):
-                    c_type = row.get("c_type", "")
-                    c_id = row.get("c_id")
-                    key = f"{c_type}_{c_id}"
-
-                    if idx < 3:  # 输出前3条的详细信息
-                        print(f"  [MySQL] [样例{idx+1}] id={c_id}, c_type={c_type}, key={key}")
-                        print(f"  [MySQL] [样例{idx+1}] title={row.get('title', '')[:50]}")
-
-                    # 解析component_config
-                    component_config = row.get("component_config")
-                    if isinstance(component_config, str):
-                        try:
-                            component_config = json.loads(component_config)
-                        except Exception as e:
-                            print(f"  [MySQL] [警告] 解析component_config失败 (id={c_id}): {e}")
-                            component_config = {}
-
-                    # 提取question字段作为摘要
-                    summary = ""
-                    if isinstance(component_config, dict):
-                        question = component_config.get("question")
-                        summary = to_json_str(question) if question else ""
-                        if idx < 3 and question:
-                            print(f"  [MySQL] [样例{idx+1}] 提取到question字段，长度: {len(summary)}")
-
-                    # 解析kp_relation_info
-                    kp_relation_info = row.get("kp_relation_info")
-                    if isinstance(kp_relation_info, str):
-                        try:
-                            kp_relation_info = json.loads(kp_relation_info)
-                        except Exception:
-                            kp_relation_info = []
-
-                    config_map[key] = {
-                        "title": row.get("title", ""),
-                        "component_config": component_config,
-                        "summary": summary,
-                        "kp_relation_info": to_json_str(kp_relation_info),
-                    }
-
-                print(f"  [MySQL] 中互动组件配置已加入config_map，当前map大小: {len(config_map)}")
-        except Exception as e:
-            print(f"  [MySQL] [错误] 查询中互动组件配置失败: {e}")
-            import traceback
-            traceback.print_exc()
-
-    # 批量查询core_interaction_component
-    if core_c_ids:
-        try:
-            with mysql_conn.cursor() as cur:
-                placeholders = ','.join(['%s'] * len(core_c_ids))
-                sql = f"""
-                SELECT c_id, c_type, title, component_config, kp_relation_info
-                FROM core_interaction_component
-                WHERE c_id IN ({placeholders}) AND deleted_at IS NULL
-                """
-                print(f"  [MySQL] 执行核心互动组件查询，查询条件: c_id IN ({len(core_c_ids)}个ID)")
-                cur.execute(sql, tuple(core_c_ids))
-                rows = cur.fetchall() or []
-                print(f"  [MySQL] 查询到{len(rows)}条核心互动组件配置")
-
-                if len(rows) == 0 and len(core_c_ids) > 0:
-                    print(f"  [MySQL] [警告] 查询结果为空！可能的原因：")
-                    print(f"  [MySQL]   - 数据库中没有匹配的c_id记录")
-                    print(f"  [MySQL]   - deleted_at字段不为NULL")
-                    print(f"  [MySQL]   - c_id不存在")
-
-                for idx, row in enumerate(rows):
-                    c_type = row.get("c_type", "")
-                    c_id = row.get("c_id")
-                    key = f"{c_type}_{c_id}"
-
-                    if idx < 3:  # 输出前3条的详细信息
-                        print(f"  [MySQL] [样例{idx+1}] id={c_id}, c_type={c_type}, key={key}")
-                        print(f"  [MySQL] [样例{idx+1}] title={row.get('title', '')[:50]}")
-
-                    # 解析component_config
-                    component_config = row.get("component_config")
-                    if isinstance(component_config, str):
-                        try:
-                            component_config = json.loads(component_config)
-                        except Exception as e:
-                            print(f"  [MySQL] [警告] 解析component_config失败 (id={c_id}): {e}")
-                            component_config = {}
-
-                    # 提取taskInfo字段作为摘要
-                    summary = ""
-                    if isinstance(component_config, dict):
-                        task_info = component_config.get("taskInfo")
-                        summary = to_json_str(task_info) if task_info else ""
-                        if idx < 3 and task_info:
-                            print(f"  [MySQL] [样例{idx+1}] 提取到taskInfo字段，长度: {len(summary)}")
-
-                    # 解析kp_relation_info
-                    kp_relation_info = row.get("kp_relation_info")
-                    if isinstance(kp_relation_info, str):
-                        try:
-                            kp_relation_info = json.loads(kp_relation_info)
-                        except Exception:
-                            kp_relation_info = []
-
-                    config_map[key] = {
-                        "title": row.get("title", ""),
-                        "component_config": component_config,
-                        "summary": summary,
-                        "kp_relation_info": to_json_str(kp_relation_info),
-                    }
-
-                print(f"  [MySQL] 核心互动组件配置已加入config_map，当前map大小: {len(config_map)}")
-        except Exception as e:
-            print(f"  [MySQL] [错误] 查询核心互动组件配置失败: {e}")
-            import traceback
-            traceback.print_exc()
-
-    print(f"  [MySQL] 组件配置查询完成，共{len(config_map)}条，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-    return config_map
-
-
-def calculate_accuracy(question_list: Any) -> float:
-    """
-    计算问题列表的正确率
-
-    Args:
-        question_list: 问题列表（可能是JSON字符串或list）
-
-    Returns:
-        正确率（百分比，保留2位小数）
-    """
-    try:
-        if isinstance(question_list, str):
-            question_list = json.loads(question_list)
-
-        if not isinstance(question_list, list) or len(question_list) == 0:
-            return 0.0
-
-        total = len(question_list)
-        correct = sum(1 for q in question_list if q.get('isRight') == True)
-        accuracy = round(correct / total * 100, 2) if total > 0 else 0.0
-
-        return accuracy
-    except Exception:
-        return 0.0
-
-
-
-def fetch_character_ids_by_account(account_id: str, conn: Any) -> List[str]:
-    """根据账户id查询对应的角色id列表"""
-    sql = "SELECT id FROM vala_app_character WHERE account_id = %s"
-    try:
-        with conn.cursor() as cur:
-            cur.execute(sql, (account_id,))
-            rows = cur.fetchall() or []
-            return [str(row["id"]) for row in rows if row.get("id")]
-    except Exception as e:
-        print(f"[ERROR] 查询账户id={account_id}的角色id失败: {e}")
-        return []
-
-
-def fetch_pg_play_records(user_id: str, conn: Any, mysql_conn: Any) -> List[Dict[str, Any]]:
-    """
-    查询互动组件学习记录并补充组件配置信息
-
-    Args:
-        user_id: 用户ID（角色ID）
-        conn: PostgreSQL数据库连接
-        mysql_conn: MySQL数据库连接
-
-    Returns:
-        互动组件学习记录列表
-    """
-    print(f"  [PG] 开始查询互动组件学习记录（8张分表）...")
-    start_time = datetime.datetime.now()
-
-    tables = [f"user_component_play_record_{i}" for i in range(8)]
-    rows: List[Dict[str, Any]] = []
-    with conn.cursor(cursor_factory=RealDictCursor) as cur:
-        for t in tables:
-            try:
-                cur.execute(
-                    f"""
-                    SELECT user_id, component_unique_code, session_id, c_type, c_id,
-                           play_result, user_behavior_info, updated_at
-                    FROM {t}
-                    WHERE user_id = %s
-                    ORDER BY updated_at DESC
-                    """,
-                    (user_id,),
-                )
-                part = cur.fetchall() or []
-                if part:
-                    print(f"  [PG] 表{t}查到{len(part)}条记录")
-                for r in part:
-                    r = dict(r)
-                    r["play_result"] = to_json_str(r.get("play_result"))
-                    r["user_behavior_info"] = to_json_str(r.get("user_behavior_info"))
-                    # 将带时区的时间转换为无时区，避免Excel写入报错
-                    upd = r.get("updated_at")
-                    if isinstance(upd, datetime.datetime):
-                        try:
-                            if upd.tzinfo is not None and upd.tzinfo.utcoffset(upd) is not None:
-                                r["updated_at"] = upd.replace(tzinfo=None)
-                        except Exception:
-                            # 回退为字符串
-                            r["updated_at"] = str(upd)
-                    rows.append(r)
-            except Exception as e:
-                print(f"  [PG] 表{t}查询失败: {e}")
-                continue
-
-    rows.sort(key=lambda x: parse_time(x.get("updated_at")) or datetime.datetime.min, reverse=True)
-    print(f"  [PG] 互动组件学习记录查询完成，共{len(rows)}条，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-
-    # 批量查询组件配置
-    if rows and mysql_conn:
-        config_map = batch_fetch_component_configs(rows, mysql_conn)
-
-        # 补充组件信息
-        print(f"  [PG] 开始补充组件配置信息...")
-        filled_count = 0
-        empty_count = 0
-        sample_keys = []
-        sample_mode_check = []  # 检查对话互动的mode
-
-        for r in rows:
-            c_type = r.get("c_type", "")
-            c_id = r.get("c_id")
-            key = f"{c_type}_{c_id}" if c_type and c_id else ""
-
-            config = config_map.get(key, {})
-            component_config = config.get("component_config", {})
-
-            component_name = get_component_name(c_type, component_config)
-            r["互动组件名称"] = component_name
-            r["组件标题"] = config.get("title", "")
-            r["组件配置摘要"] = config.get("summary", "")
-            r["知识点"] = config.get("kp_relation_info", "")
-
-            # 统计填充情况
-            if config:
-                filled_count += 1
-                if len(sample_keys) < 3:
-                    sample_keys.append((key, component_name, r["组件标题"][:30] if r["组件标题"] else ""))
-
-                # 检查对话互动的mode
-                if c_type == "mid_sentence_dialogue" and len(sample_mode_check) < 3:
-                    mode = ""
-                    if isinstance(component_config, dict):
-                        question = component_config.get("question", {})
-                        if isinstance(question, dict):
-                            mode = question.get("mode", "")
-                    sample_mode_check.append({
-                        "key": key,
-                        "mode": mode,
-                        "component_name": component_name
-                    })
-            else:
-                empty_count += 1
-                if empty_count <= 5:  # 输出前5个未匹配的key
-                    print(f"  [PG] [警告] 未找到组件配置: key={key}")
-
-        print(f"  [PG] 组件配置信息补充完成")
-        print(f"  [PG] 匹配到配置: {filled_count}条, 未匹配: {empty_count}条")
-        if sample_keys:
-            print(f"  [PG] 样例数据（前3条）:")
-            for key, name, title in sample_keys:
-                print(f"  [PG]   - key={key}, 名称={name}, 标题={title}")
-
-        if sample_mode_check:
-            print(f"  [PG] 对话互动mode检查（前3条）:")
-            for s in sample_mode_check:
-                print(f"  [PG]   - key={s['key']}, mode={s['mode']}, 最终名称={s['component_name']}")
-
-    return rows
-
-
-def fetch_pg_unit_review(user_id: str, conn: Any, id_2_unit_index: Dict[int, int], chapter_id_to_lesson_id: Dict[int, int]) -> List[Dict[str, Any]]:
-    """
-    查询课程巩固记录
-
-    Args:
-        user_id: 用户ID（角色ID）
-        conn: PostgreSQL数据库连接
-        id_2_unit_index: story_id到unit_id的映射字典
-        chapter_id_to_lesson_id: chapter_id到lesson_id的映射字典
-
-    Returns:
-        课程巩固记录列表
-    """
-    print(f"  [PG] 开始查询课程巩固记录...")
-    start_time = datetime.datetime.now()
-
-    sql = (
-        "SELECT user_id, story_id, chapter_id, question_list, updated_at "
-        "FROM user_unit_review_question_result WHERE user_id = %s ORDER BY updated_at DESC"
-    )
-    with conn.cursor(cursor_factory=RealDictCursor) as cur:
-        try:
-            cur.execute(sql, (user_id,))
-            rows = cur.fetchall() or []
-        except Exception as e:
-            print(f"  [PG] 课程巩固记录查询失败: {e}")
-            rows = []
-    out: List[Dict[str, Any]] = []
-    for r in rows:
-        d = dict(r)
-
-        # 映射 story_id 到 unit_id
-        story_id = d.get("story_id")
-        unit_id = id_2_unit_index.get(story_id) if story_id else None
-        d["unit_id"] = unit_id
-
-        # 映射 chapter_id 到 lesson_id
-        chapter_id = d.get("chapter_id")
-        lesson_id = chapter_id_to_lesson_id.get(chapter_id) if chapter_id else None
-        d["lesson_id"] = lesson_id
-
-        # 计算正确率
-        question_list = d.get("question_list")
-        d["正确率"] = calculate_accuracy(question_list)
-
-        d["question_list"] = to_json_str(question_list)
-        upd = d.get("updated_at")
-        if isinstance(upd, datetime.datetime):
-            try:
-                if upd.tzinfo is not None and upd.tzinfo.utcoffset(upd) is not None:
-                    d["updated_at"] = upd.replace(tzinfo=None)
-            except Exception:
-                d["updated_at"] = str(upd)
-        out.append(d)
-
-    print(f"  [PG] 课程巩固记录查询完成，共{len(out)}条，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-    return out
-
-
-def fetch_pg_unit_challenge(user_id: str, conn: Any, id_2_unit_index: Dict[int, int]) -> List[Dict[str, Any]]:
-    """
-    查询单元挑战记录
-
-    Args:
-        user_id: 用户ID（角色ID）
-        conn: PostgreSQL数据库连接
-        id_2_unit_index: story_id到unit_id的映射字典
-
-    Returns:
-        单元挑战记录列表
-    """
-    print(f"  [PG] 开始查询单元挑战记录...")
-    start_time = datetime.datetime.now()
-
-    sql = (
-        "SELECT user_id, story_id, category, score_text, question_list, updated_at "
-        "FROM user_unit_challenge_question_result WHERE user_id = %s ORDER BY updated_at DESC"
-    )
-    with conn.cursor(cursor_factory=RealDictCursor) as cur:
-        try:
-            cur.execute(sql, (user_id,))
-            rows = cur.fetchall() or []
-        except Exception as e:
-            print(f"  [PG] 单元挑战记录查询失败: {e}")
-            rows = []
-    out: List[Dict[str, Any]] = []
-    for r in rows:
-        d = dict(r)
-
-        # 映射 story_id 到 unit_id
-        story_id = d.get("story_id")
-        unit_id = id_2_unit_index.get(story_id) if story_id else None
-        d["unit_id"] = unit_id
-
-        d["question_list"] = to_json_str(d.get("question_list"))
-        upd = d.get("updated_at")
-        if isinstance(upd, datetime.datetime):
-            try:
-                if upd.tzinfo is not None and upd.tzinfo.utcoffset(upd) is not None:
-                    d["updated_at"] = upd.replace(tzinfo=None)
-            except Exception:
-                d["updated_at"] = str(upd)
-        out.append(d)
-
-    print(f"  [PG] 单元挑战记录查询完成，共{len(out)}条，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-    return out
-
-
-def fetch_pg_unit_summary(user_id: str, conn: Any, id_2_unit_index: Dict[int, int]) -> List[Dict[str, Any]]:
-    """
-    查询单元总结知识点结果数据
-
-    Args:
-        user_id: 用户ID（角色ID）
-        conn: PostgreSQL数据库连接
-        id_2_unit_index: story_id到unit_id的映射字典
-
-    Returns:
-        单元总结记录列表
-    """
-    print(f"  [PG] 开始查询单元总结记录...")
-    start_time = datetime.datetime.now()
-
-    sql = (
-        "SELECT id, user_id, story_id, updated_at, km_id, km_type, play_time "
-        "FROM user_unit_summary_km_result WHERE user_id = %s AND deleted_at IS NULL ORDER BY updated_at DESC"
-    )
-    with conn.cursor(cursor_factory=RealDictCursor) as cur:
-        try:
-            cur.execute(sql, (user_id,))
-            rows = cur.fetchall() or []
-        except Exception as e:
-            print(f"  [PG] 单元总结记录查询失败: {e}")
-            rows = []
-
-    out: List[Dict[str, Any]] = []
-    for r in rows:
-        d = dict(r)
-        # 映射 story_id 到 unit_id
-        story_id = d.get("story_id")
-        unit_id = id_2_unit_index.get(story_id) if story_id else None
-        d["unit_id"] = unit_id
-
-        # 转换 play_time (毫秒) 为秒 (整数)
-        play_time = d.get("play_time")
-        d["play_time_seconds"] = play_time // 1000 if play_time else 0
-
-        # 移除时区信息
-        upd = d.get("updated_at")
-        if isinstance(upd, datetime.datetime):
-            try:
-                if upd.tzinfo is not None and upd.tzinfo.utcoffset(upd) is not None:
-                    d["updated_at"] = upd.replace(tzinfo=None)
-            except Exception:
-                d["updated_at"] = str(upd)
-        out.append(d)
-
-    print(f"  [PG] 单元总结记录查询完成，共{len(out)}条，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-    return out
-
-
-def generate_statistics(sheet2_rows: List[Dict[str, Any]], sheet5_rows: List[Dict[str, Any]]) -> tuple:
-    """
-    生成汇总统计数据
-
-    Args:
-        sheet2_rows: 互动组件学习记录
-        sheet5_rows: 单元总结记录
-
-    Returns:
-        (组件统计DataFrame, 知识点统计DataFrame, 单元时长统计DataFrame)
-    """
-    if pd is None:
-        raise RuntimeError("缺少pandas依赖，请安装后再运行。")
-
-    print(f"  [统计] 开始生成汇总统计数据...")
-    start_time = datetime.datetime.now()
-
-    from collections import defaultdict
-
-    # ============ a. 所有互动-按互动组件类型-通过情况统计 ============
-    component_stats_data = []
-    component_stats = defaultdict(lambda: {"Perfect": 0, "Good": 0, "Failed": 0, "Pass": 0, "Oops": 0, "total": 0})
-
-    # 用于调试
-    sample_results = []
-    parse_error_count = 0
-
-    for idx, record in enumerate(sheet2_rows):
-        component_name = record.get("互动组件名称", "")
-        if not component_name:
-            continue
-
-        play_result_str = record.get("play_result", "")
-
-        # 解析play_result
-        result = ""
-        try:
-            # 先判断是否是简单的字符串（Perfect/Good/Failed/Pass/Oops）
-            if isinstance(play_result_str, str):
-                # 去除空格后检查
-                stripped = play_result_str.strip()
-                if stripped in ["Perfect", "Good", "Failed", "Pass", "Oops"]:
-                    # 直接使用
-                    result = stripped
-                else:
-                    # 尝试JSON解析
-                    try:
-                        play_result = json.loads(play_result_str)
-                        if isinstance(play_result, dict):
-                            result = play_result.get("result", "")
-                        else:
-                            result = ""
-                    except:
-                        result = ""
-            else:
-                # 如果不是字符串，尝试当dict处理
-                if isinstance(play_result_str, dict):
-                    result = play_result_str.get("result", "")
-                else:
-                    result = ""
-
-            # 收集前3个样例
-            if idx < 3:
-                sample_results.append({
-                    "component": component_name,
-                    "raw": str(play_result_str)[:100],
-                    "result": result
-                })
-        except Exception as e:
-            parse_error_count += 1
-            if parse_error_count <= 3:
-                print(f"  [统计] [警告] 解析play_result失败 (第{idx+1}条): {e}, 原始值: {str(play_result_str)[:100]}")
-            result = ""
-
-        component_stats[component_name]["total"] += 1
-        if result in ["Perfect", "Good", "Failed", "Pass", "Oops"]:
-            component_stats[component_name][result] += 1
-
-    print(f"  [统计] play_result解析样例（前3条）:")
-    for s in sample_results:
-        print(f"  [统计]   - 组件: {s['component']}, 结果: {s['result']}, 原始: {s['raw']}")
-    if parse_error_count > 0:
-        print(f"  [统计] play_result解析失败总数: {parse_error_count}")
-
-    # 生成统计数据行
-    for component_name in sorted(component_stats.keys()):
-        stats = component_stats[component_name]
-        total = stats["total"]
-        perfect = stats["Perfect"]
-        good = stats["Good"]
-        failed = stats["Failed"]
-        pass_count = stats["Pass"]
-        oops = stats["Oops"]
-
-        perfect_ratio = round(perfect / total * 100, 2) if total > 0 else 0
-        good_ratio = round(good / total * 100, 2) if total > 0 else 0
-        failed_ratio = round(failed / total * 100, 2) if total > 0 else 0
-        pass_ratio = round(pass_count / total * 100, 2) if total > 0 else 0
-        oops_ratio = round(oops / total * 100, 2) if total > 0 else 0
-
-        component_stats_data.append({
-            "互动组件名称": component_name,
-            "总数量": total,
-            "Perfect数量": perfect,
-            "Good数量": good,
-            "Failed数量": failed,
-            "Pass数量": pass_count,
-            "Oops数量": oops,
-            "Perfect比例(%)": perfect_ratio,
-            "Good比例(%)": good_ratio,
-            "Failed比例(%)": failed_ratio,
-            "Pass比例(%)": pass_ratio,
-            "Oops比例(%)": oops_ratio,
-        })
-
-    # ============ b. 中互动组件-按知识点-通过情况统计 ============
-    kp_stats_data = []
-    kp_stats = defaultdict(lambda: {"Perfect": 0, "Good": 0, "Failed": 0, "Pass": 0, "Oops": 0, "total": 0})
-
-    # 调试信息
-    mid_count = 0
-    has_kp_count = 0
-    sample_kp_records = []
-
-    for idx, record in enumerate(sheet2_rows):
-        c_type = record.get("c_type", "")
-        if not c_type or not c_type.startswith("mid"):
-            continue
-
-        mid_count += 1
-        kp_relation_info_str = record.get("知识点", "")
-
-        if not kp_relation_info_str:
-            continue
-
-        has_kp_count += 1
-
-        # 解析知识点
-        try:
-            if isinstance(kp_relation_info_str, str):
-                kp_relation_info = json.loads(kp_relation_info_str)
-            else:
-                kp_relation_info = kp_relation_info_str
-
-            if not isinstance(kp_relation_info, list):
-                continue
-
-            # 收集样例
-            if len(sample_kp_records) < 3:
-                sample_kp_records.append({
-                    "c_type": c_type,
-                    "kp_count": len(kp_relation_info),
-                    "kp_info": str(kp_relation_info)[:200]
-                })
-
-            # 解析play_result（使用相同的逻辑）
-            play_result_str = record.get("play_result", "")
-            result = ""
-            if isinstance(play_result_str, str):
-                stripped = play_result_str.strip()
-                if stripped in ["Perfect", "Good", "Failed", "Pass", "Oops"]:
-                    result = stripped
-                else:
-                    try:
-                        play_result = json.loads(play_result_str)
-                        if isinstance(play_result, dict):
-                            result = play_result.get("result", "")
-                    except:
-                        pass
-            elif isinstance(play_result_str, dict):
-                result = play_result_str.get("result", "")
-
-            # 为每个知识点统计
-            for kp in kp_relation_info:
-                if not isinstance(kp, dict):
-                    continue
-
-                kp_id = kp.get("kpId", "")
-                kp_type = kp.get("kpType", "")
-                kp_title = kp.get("kpTitle", "")
-
-                if not kp_id:
-                    continue
-
-                kp_key = f"{kp_id}|{kp_type}|{kp_title}"
-                kp_stats[kp_key]["total"] += 1
-                if result in ["Perfect", "Good", "Failed", "Pass", "Oops"]:
-                    kp_stats[kp_key][result] += 1
-
-        except Exception as e:
-            if len(sample_kp_records) < 5:
-                print(f"  [统计] [警告] 解析知识点失败: {e}, 原始值: {str(kp_relation_info_str)[:100]}")
-            continue
-
-    print(f"  [统计] 中互动组件统计: 总数={mid_count}, 有知识点={has_kp_count}, 知识点条目数={len(kp_stats)}")
-    if sample_kp_records:
-        print(f"  [统计] 知识点样例（前3条）:")
-        for s in sample_kp_records:
-            print(f"  [统计]   - c_type={s['c_type']}, 知识点数量={s['kp_count']}, 内容={s['kp_info']}")
-
-    # 生成知识点统计数据行
-    for kp_key in sorted(kp_stats.keys()):
-        parts = kp_key.split("|")
-        if len(parts) != 3:
-            continue
-
-        kp_id, kp_type, kp_title = parts
-        stats = kp_stats[kp_key]
-        total = stats["total"]
-        perfect = stats["Perfect"]
-        good = stats["Good"]
-        failed = stats["Failed"]
-        pass_count = stats["Pass"]
-        oops = stats["Oops"]
-
-        perfect_ratio = round(perfect / total * 100, 2) if total > 0 else 0
-        good_ratio = round(good / total * 100, 2) if total > 0 else 0
-        failed_ratio = round(failed / total * 100, 2) if total > 0 else 0
-        pass_ratio = round(pass_count / total * 100, 2) if total > 0 else 0
-        oops_ratio = round(oops / total * 100, 2) if total > 0 else 0
-
-        kp_stats_data.append({
-            "知识点ID": kp_id,
-            "知识点类型": kp_type,
-            "知识点标题": kp_title,
-            "总数量": total,
-            "Perfect数量": perfect,
-            "Good数量": good,
-            "Failed数量": failed,
-            "Pass数量": pass_count,
-            "Oops数量": oops,
-            "Perfect比例(%)": perfect_ratio,
-            "Good比例(%)": good_ratio,
-            "Failed比例(%)": failed_ratio,
-            "Pass比例(%)": pass_ratio,
-            "Oops比例(%)": oops_ratio,
-        })
-
-    # ============ c. 单元总结-按单元统计时长 ============
-    unit_time_stats_data = []
-    unit_time_stats = defaultdict(int)
-
-    for record in sheet5_rows:
-        unit_id = record.get("unit_id")
-        play_time_seconds = record.get("play_time_seconds", 0)
-
-        if unit_id is not None:
-            unit_time_stats[unit_id] += play_time_seconds
-
-    # 生成单元时长统计数据行
-    for unit_id in sorted(unit_time_stats.keys()):
-        total_seconds = unit_time_stats[unit_id]
-        total_minutes = int(total_seconds / 60)
-
-        unit_time_stats_data.append({
-            "单元ID": f"unit_{unit_id}",
-            "总时长(秒)": total_seconds,
-            "总时长(分钟)": total_minutes,
-        })
-
-    print(f"  [统计] 汇总统计数据生成完成，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-    print(f"  [统计] 生成了{len(component_stats_data)}条组件统计, {len(kp_stats_data)}条知识点统计, {len(unit_time_stats_data)}条单元时长统计")
-
-    return (
-        pd.DataFrame(component_stats_data),
-        pd.DataFrame(kp_stats_data),
-        pd.DataFrame(unit_time_stats_data)
-    )
-
-
-
-def write_excel(path: str, sheet1_rows: List[Dict[str, Any]], sheet2_rows: List[Dict[str, Any]], sheet3_rows: List[Dict[str, Any]], sheet4_rows: List[Dict[str, Any]], sheet5_rows: List[Dict[str, Any]], stats_component_df: Any, stats_kp_df: Any, stats_unit_time_df: Any) -> None:
-    if pd is None:
-        raise RuntimeError("缺少pandas依赖，请安装后再运行。")
-
-    print(f"  [Excel] 开始写入Excel文件: {path}")
-    start_time = datetime.datetime.now()
-
-    out_dir = os.path.dirname(path) or "."
-    os.makedirs(out_dir, exist_ok=True)
-    with pd.ExcelWriter(path, engine="openpyxl") as writer:
-        pd.DataFrame(sheet1_rows, columns=SHEET1_COLUMNS).to_excel(writer, sheet_name="全部音频数据", index=False)
-        pd.DataFrame(sheet2_rows, columns=SHEET2_COLUMNS).to_excel(writer, sheet_name="互动组件学习记录", index=False)
-        pd.DataFrame(sheet3_rows, columns=SHEET3_COLUMNS).to_excel(writer, sheet_name="课程巩固记录", index=False)
-        pd.DataFrame(sheet4_rows, columns=SHEET4_COLUMNS).to_excel(writer, sheet_name="单元挑战记录", index=False)
-        pd.DataFrame(sheet5_rows, columns=SHEET5_COLUMNS).to_excel(writer, sheet_name="单元总结记录", index=False)
-        stats_component_df.to_excel(writer, sheet_name="统计-互动组件通过情况", index=False)
-        stats_kp_df.to_excel(writer, sheet_name="统计-知识点通过情况", index=False)
-        stats_unit_time_df.to_excel(writer, sheet_name="统计-单元总结时长", index=False)
-
-    print(f"  [Excel] 写入完成，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-
-
-def get_date_str() -> str:
-    """获取当前日期字符串 格式：YYYYMMDD"""
-    return datetime.datetime.now().strftime("%Y%m%d")
-
-
-def export_single_user(user_id: str, es_cfg: Dict[str, Any], pg_conn: Any, mysql_conn: Any, output_path: str, id_2_unit_index: Dict[int, int], chapter_id_to_lesson_id: Dict[int, int]) -> bool:
-    """
-    导出单个角色id的数据
-
-    Args:
-        user_id: 角色ID
-        es_cfg: ES配置
-        pg_conn: PostgreSQL连接
-        mysql_conn: MySQL连接
-        output_path: 输出路径
-        id_2_unit_index: story_id到unit_id的映射字典
-        chapter_id_to_lesson_id: chapter_id到lesson_id的映射字典
-
-    Returns:
-        True表示成功，False表示失败
-    """
-    try:
-        print(f"\n[INFO] ========== 开始导出角色id={user_id} ==========")
-        total_start_time = datetime.datetime.now()
-
-        # 查询ES数据
-        sheet1_rows = fetch_es_user_audio(user_id, es_cfg)
-
-        # 查询PG数据
-        sheet2_rows = fetch_pg_play_records(user_id, pg_conn, mysql_conn)
-        sheet3_rows = fetch_pg_unit_review(user_id, pg_conn, id_2_unit_index, chapter_id_to_lesson_id)
-        sheet4_rows = fetch_pg_unit_challenge(user_id, pg_conn, id_2_unit_index)
-        sheet5_rows = fetch_pg_unit_summary(user_id, pg_conn, id_2_unit_index)
-
-        # 检查是否有有效数据
-        total_records = len(sheet1_rows) + len(sheet2_rows) + len(sheet3_rows) + len(sheet4_rows) + len(sheet5_rows)
-        print(f"  [统计] 数据汇总:")
-        print(f"    - 全部音频数据: {len(sheet1_rows)}条")
-        print(f"    - 互动组件学习记录: {len(sheet2_rows)}条")
-        print(f"    - 课程巩固记录: {len(sheet3_rows)}条")
-        print(f"    - 单元挑战记录: {len(sheet4_rows)}条")
-        print(f"    - 单元总结记录: {len(sheet5_rows)}条")
-        print(f"    - 总计: {total_records}条")
-
-        if total_records == 0:
-            print(f"[WARN] 角色id={user_id} 没有找到任何有效记录，跳过导出")
-            return False
-
-        # 生成汇总统计数据
-        stats_component_df, stats_kp_df, stats_unit_time_df = generate_statistics(sheet2_rows, sheet5_rows)
-
-        # 写入Excel
-        write_excel(output_path, sheet1_rows, sheet2_rows, sheet3_rows, sheet4_rows, sheet5_rows, stats_component_df, stats_kp_df, stats_unit_time_df)
-
-        total_time = (datetime.datetime.now() - total_start_time).total_seconds()
-        print(f"[INFO] 角色id={user_id} 导出成功")
-        print(f"[INFO] 文件路径: {output_path}")
-        print(f"[INFO] 总耗时: {total_time:.2f}秒")
-        print(f"[INFO] ========== 完成 ==========\n")
-        return True
-
-    except Exception as e:
-        print(f"[ERROR] 角色id={user_id} 导出失败: {e}")
-        import traceback
-        traceback.print_exc()
-        return False
-
-
-def main():
-    load_env()
-
-    # 确定运行模式并收集需要导出的角色id列表
-    user_id_list: List[tuple] = []  # [(user_id, account_id or None), ...]
-    date_str = get_date_str()
-
-    # 检查三种模式的配置
-    has_user_id = USER_ID is not None
-    has_user_id_list = USER_ID_LIST is not None and len(USER_ID_LIST) > 0
-    has_account_id_list = ACCOUNT_ID_LIST is not None and len(ACCOUNT_ID_LIST) > 0
-
-    # 验证只能配置一种模式
-    mode_count = sum([has_user_id, has_user_id_list, has_account_id_list])
-    if mode_count == 0:
-        raise RuntimeError("请配置 USER_ID、USER_ID_LIST 或 ACCOUNT_ID_LIST 中的一个")
-    if mode_count > 1:
-        raise RuntimeError("USER_ID、USER_ID_LIST、ACCOUNT_ID_LIST 只能配置一个，请检查配置")
-
-    # 模式1：单个角色id
-    if has_user_id:
-        user_id_list = [(str(USER_ID), None)]
-        print(f"[INFO] 运行模式：单个角色id")
-
-    # 模式2：角色id列表
-    elif has_user_id_list:
-        user_id_list = [(str(uid), None) for uid in USER_ID_LIST]
-        print(f"[INFO] 运行模式：角色id列表，共{len(user_id_list)}个角色")
-
-    # 模式3：账户id列表
-    elif has_account_id_list:
-        print(f"[INFO] 运行模式：账户id列表，共{len(ACCOUNT_ID_LIST)}个账户")
-        mysql_conn = None
-        try:
-            mysql_conn = get_mysql_conn("vala_user")  # 查询用户表，使用 vala_user 数据库
-            for account_id in ACCOUNT_ID_LIST:
-                account_id_str = str(account_id)
-                print(f"[INFO] 查询账户id={account_id_str}对应的角色id...")
-                character_ids = fetch_character_ids_by_account(account_id_str, mysql_conn)
-                if not character_ids:
-                    print(f"[WARN] 账户id={account_id_str} 未找到关联的角色id，跳过")
-                    continue
-                print(f"[INFO] 账户id={account_id_str} 找到{len(character_ids)}个角色id: {character_ids}")
-                for cid in character_ids:
-                    user_id_list.append((cid, account_id_str))
-        finally:
-            if mysql_conn:
-                try:
-                    mysql_conn.close()
-                except Exception:
-                    pass
-
-    if not user_id_list:
-        print("[WARN] 没有需要导出的角色id，程序退出")
-        return
-
-    # 初始化连接
-    es_cfg = get_es_config()
-    pg_conn = get_pg_conn()
-
-    # 获取映射表（只需要查询一次，所有角色共用）
-    print(f"\n[INFO] ===== 准备工作：获取映射表 =====")
-    mysql_conn = None
-    id_2_unit_index = {}
-    chapter_id_to_lesson_id = {}
-    try:
-        print(f"[INFO] 正在连接MySQL数据库（vala_test）...")
-        mysql_conn = get_mysql_conn("vala_test")  # 查询游戏配置表，使用 vala_test 数据库
-        print(f"[INFO] 正在获取 story_id 到 unit_id 的映射...")
-        id_2_unit_index = get_id_2_unit_index(mysql_conn)
-        print(f"[INFO] 成功获取 {len(id_2_unit_index)} 个 story_id 映射")
-        print(f"[INFO] 正在获取 chapter_id 到 lesson_id 的映射...")
-        chapter_id_to_lesson_id = get_chapter_id_to_lesson_id(mysql_conn)
-        print(f"[INFO] 成功获取 {len(chapter_id_to_lesson_id)} 个 chapter_id 映射")
-    except Exception as e:
-        print(f"[ERROR] 获取映射表失败: {e}")
-        import traceback
-        traceback.print_exc()
-        if pg_conn:
-            try:
-                pg_conn.close()
-            except Exception:
-                pass
-        if mysql_conn:
-            try:
-                mysql_conn.close()
-            except Exception:
-                pass
-        return
-
-    try:
-        # 统计信息
-        success_count = 0
-        skip_count = 0
-
-        print(f"\n[INFO] ===== 开始批量导出 =====")
-        print(f"[INFO] 共需导出{len(user_id_list)}个角色\n")
-        batch_start_time = datetime.datetime.now()
-
-        # 循环处理每个角色id
-        for idx, (user_id, account_id) in enumerate(user_id_list, 1):
-            print(f"\n{'='*60}")
-            print(f"[INFO] 进度: {idx}/{len(user_id_list)} ({idx*100//len(user_id_list)}%)")
-            print(f"{'='*60}")
-
-            # 生成输出文件名
-            if account_id is None:
-                # 模式1和模式2：角色id_{}_导出时间_{}.xlsx
-                filename = f"角色id_{user_id}_导出时间_{date_str}.xlsx"
-            else:
-                # 模式3：账户id_{}_角色id_{}_导出时间_{}.xlsx
-                filename = f"账户id_{account_id}_角色id_{user_id}_导出时间_{date_str}.xlsx"
-
-            output_path = os.path.join(OUTPUT_DIR, filename)
-
-            # 导出单个角色的数据
-            result = export_single_user(user_id, es_cfg, pg_conn, mysql_conn, output_path, id_2_unit_index, chapter_id_to_lesson_id)
-            if result:
-                success_count += 1
-            else:
-                skip_count += 1
-
-        # 输出统计信息
-        batch_total_time = (datetime.datetime.now() - batch_start_time).total_seconds()
-        print(f"\n{'='*60}")
-        print(f"[INFO] ===== 全部导出完成 =====")
-        print(f"[INFO] 总计: {len(user_id_list)}个角色")
-        print(f"[INFO] 成功: {success_count}个")
-        print(f"[INFO] 跳过: {skip_count}个")
-        print(f"[INFO] 总耗时: {batch_total_time:.2f}秒 ({batch_total_time/60:.2f}分钟)")
-        if success_count > 0:
-            print(f"[INFO] 平均每个角色: {batch_total_time/success_count:.2f}秒")
-        print(f"{'='*60}\n")
-
-    finally:
-        if pg_conn:
-            try:
-                pg_conn.close()
-            except Exception:
-                pass
-        if mysql_conn:
-            try:
-                mysql_conn.close()
-            except Exception:
-                pass
-
-
-if __name__ == "__main__":
-    main()
diff --git a/new_export/export_user_id_data_debug.py b/new_export/export_user_id_data_debug.py
deleted file mode 100644
index 4be3cb8..0000000
--- a/new_export/export_user_id_data_debug.py
+++ /dev/null
@@ -1,1845 +0,0 @@
-"""
-初版需求v1.0: 2025.11.18
-
-导出 一个userId的多表数据， 最终按照不同sheet，输出到一个 excel文件中。
-
-1. 第一个sheet:"全部音频数据"
-es相关配置通过以下环境变量
-ES_HOST=xxx
-ES_PORT=9200
-ES_SCHEME=https
-ES_USER=elastic
-ES_PASSWORD=xxx
-
-index: user-audio
-
-脚本思路:
-过滤字段:
-userId == xxxx
-
-输出该userId的全部记录 按时间倒序排序
-包含以下字段内容:
-
-userId
-userMsg
-userName
-soeData
-audioUrl
-asrStatus
-componentId
-componentType
-dataVersion
-
-2. 第二个sheet:"互动组件学习记录"
-在 PGsql数据库中  筛选出 user_id 对应的记录 按时间(updated_at)倒序排列。
-数据库相关配置  从.env中读取:
-PG_DB_HOST = xxx
-PG_DB_PORT = xxx
-PG_DB_USER = xxx
-PG_DB_PASSWORD = xxx
-PG_DB_DATABASE = xxx
-
-读取以下数据表: 
-user_component_play_record_0 ~ user_component_play_record_7
-
-输出以下字段：
-user_id,
-component_unique_code,
-session_id,
-c_type,
-c_id,
-play_result,
-user_behavior_info,
-updated_at
-
-3.第三个sheet:"课程巩固记录"
-在 PGsql数据库中  筛选出 user_id 对应的记录 按时间(updated_at)倒序排列。
-
-数据表:user_unit_review_question_result
-
-输出以下字段:
-user_id
-story_id
-chapter_id
-question_list
-updated_at
-
-4.第四个sheet:"单元挑战记录"
-在 PGsql数据库中  筛选出 user_id 对应的记录 按时间(updated_at)倒序排列。
-
-数据表:user_unit_challenge_question_result
-
-输出以下字段:
-user_id
-story_id
-category
-score_text,
-question_list
-updated_at
-------------
-
-需求补充v1.1:
-"全部音频数据"这个sheet
-输出字段 添加timeStr 并按时间倒序排列  最新的记录 在最上面
-
-------------
-需求补充v1.2:
-"全部音频数据"这个sheet
-如果userMsg字段内容 包含 ”makee_id“ 要进行以下处理：
-
-从userMsg字段中提取出具体的makee_id:
-此时的字段样例:
-```
-asr msg信息为：{
-    "time_ms": 358,
-    "time_ms_api": 357,
-    "hot_words_str": "{\n \"context_type\": \"dialog_ctx\",\n \"context_data\": [\n  {\n   \"text\": \"planet Walla\"\n  },\n  {\n   \"text\": \"Walla\"\n  }\n ]\n}",
-    "makee_id": "d208c617-902f-4f81-8255-b5fb73599546",
-    "volcano_fast_x_tt_logid": "202511151541355DF72BE5EBFE73795BFD",
-    "api_name": "volcano-fast"
-}
-```
-然后基于makee_id 去另一个表里查记录:  index:llm_asr_log
-将查询到的记录的 result_text 字段内容 回填到 userMsg。
-将source字段内容 输出 到 source。
-
-如果userMsg字段内容 不包含 ”makee_id“ 保持之前的逻辑。
-
---------------
-需求补充 v1.3
-当前输入 只支持配置单个 userId (业务侧名称为角色id)
-
-
-期望扩展为以下逻辑:
-1. 改为配置 角色id list ， 分别 导出 多份excel文件。命名格式为 角色id_{}_导出时间_{}.xlsx
-2. 改为配置 账户id list ， 分别 导出 多份excel文件。命名格式为 账户id_{}_角色id_{}_导出时间_{}.xlsx
-
-关于 账户 id 到角色id 的映射逻辑，
-首先 读取 mysql 表 vala_app_character
-筛选 account_id字段值 == 账户id 的 记录， 其中 该记录 的 id值，则为角色id 一个 账户id 可以对应多个角色id
-
-本次需求只针对输入侧调整， 数据抽取聚合逻辑部分和之前保持一致
-
----------------
-需求补充 v1.4
-
-增加一个sheet "单元总结记录"，
-导出对应角色id的单元总结记录。   参考 export_unit_summary.py 中的原始数据提取方案即可(不必关注其中的数据统计部分)。
-
-其他已有逻辑保持不动哦。
-
-----------------
-需求补充 v1.5
-
-1."互动组件学习记录"sheet 增加以下字段
-"互动组件名称"、"组件标题"、"组件配置摘要"、"知识点":
-字段取值规则:
-根据 c_type 及组件配置(从mysql表获取) 进行映射和处理:
-```
-1）.如果 c_type 开头为"mid"
-
-则读取下表:表名:middle_interaction_component
-
-获取以下字段值:
-title (作为组件标题)
-component_config (完整的组件配置)   获取其中 的 question 字段值 作为 组件配置摘要；
-kp_relation_info 字段值  作为 知识点
-
-"互动组件名称"规则:
-
-"物品互动": "mid_vocab_item",
-"图片互动": "mid_vocab_image",
-"填词互动": "mid_vocab_fillBlank",
-"指令互动": "mid_vocab_instruction"
-"对话互动-表达": "mid_sentence_dialogue", 且 component_config->question->mode == "express"
-"对话互动-朗读": "mid_sentence_dialogue", 且 component_config->question->mode == "read"
-"语音互动": "mid_sentence_voice",
-"材料互动": "mid_sentence_material",
-"造句互动": "mid_sentence_makeSentence"
-"挖空互动": "mid_grammar_cloze",
-"组句互动": "mid_grammar_sentence"
-"发音互动": "mid_pron_pron"
-
-
-2）. 如果 c_type 开头为"core"
-则读取下表:表名:core_interaction_component
-
-获取以下字段值:
-title (作为组件标题)
-component_config (完整的组件配置)   获取其中 的 taskInfo 字段值 作为 组件配置摘要
-kp_relation_info 字段值  作为 知识点
-
-"互动组件名称"规则:
-"口语快答": "core_speaking_reply",
-"口语妙问": "core_speaking_inquiry",
-"口语探讨": "core_speaking_explore",
-"口语独白": "core_speaking_monologue"
-"合作阅读": "core_reading_order",
-"合作听力": "core_listening_order",
-"看图组句": "core_writing_imgMakeSentence",
-"看图撰写": "core_writing_imgWrite",
-"问题组句": "core_writing_questionMakeSentence",
-"问题撰写": "core_writing_questionWrite",
-```
-
-2."课程巩固记录" sheet 增加以下字段
-"正确率":  参考 export_lesson_review.py 中的计算逻辑
-
-3. 新增一个"汇总统计"sheet
-统计并展示以下内容   请以 可读性 比较好的方式排列、展示
-
-a. "所有互动-按互动组件类型-通过情况统计"
-以每种"互动组件名称"进行聚合
-统计play_result的取值分布情况，算以下指标:
-总数量、Perfect数量、Good数量、Failed数量、Pass数量、Perfect比例、Good比例、Failed比例、Pass比例
-
-b. "中互动组件-按知识点-通过情况统计"
-以每个知识点进行聚合
-
-其中 知识点配置格式如下:
-```
-[{"kpId":"0000004","kpType":"sentence","kpTitle":"My name is ...","kpSkill":"sentence_pron","kpSkillName":"语音"},{"kpId":"0000004","kpType":"sentence","kpTitle":"My name is ...","kpSkill":"sentence_meaning","kpSkillName":"语义"},{"kpId":"0000005","kpType":"sentence","kpTitle":"I'm… years old.","kpSkill":"sentence_pron","kpSkillName":"语音"},{"kpId":"0000005","kpType":"sentence","kpTitle":"I'm… years old.","kpSkill":"sentence_meaning","kpSkillName":"语义"},{"kpId":"0000014","kpType":"sentence","kpTitle":"Nice to meet you.","kpSkill":"sentence_pron","kpSkillName":"语音"},{"kpId":"0000014","kpType":"sentence","kpTitle":"Nice to meet you.","kpSkill":"sentence_meaning","kpSkillName":"语义"}]
-```
-一个组件可以绑定多个知识点，以每个知识点的 kpId + kpType + kpTitle 进行 展示及聚合
-
-对所有绑定了某个知识点的中互动组件(c_type以mid开头)
-统计play_result的取值分布情况，算以下指标:
-总数量、Perfect数量、Good数量、Failed数量、Pass数量、Perfect比例、Good比例、Failed比例、Pass比例
-
-c. "单元总结-按单元统计时长"
-
-将"单元总结记录"中的"play_time_seconds"字段值 以每个单元id 进行聚合 进行 累加 统计，并增加一列 转换为分钟为单位 取整数
-
-
-"""
-# ==== 可直接修改的脚本变量（不使用命令行传参） ====
-# 三种模式互斥，只能配置一个：
-# 模式1：单个角色id
-USER_ID = None # 单个角色ID，示例：2911
-
-# 模式2：角色id列表（多个角色id批量导出）
-USER_ID_LIST = None  # 角色ID列表，示例：[2911, 2912, 2913]
-
-# 模式3：账户id列表（通过账户id查询对应的角色id后批量导出）
-ACCOUNT_ID_LIST = [9343] # 账户ID列表，示例：[100, 101, 102]
-
-OUTPUT_DIR = "output/"  # 输出目录，默认为output文件夹
-# ==== 变量结束 ====
-import os
-import json
-import re
-from typing import Any, Dict, List, Optional
-
-import datetime
-
-try:
-    import requests
-except Exception:
-    requests = None
-
-try:
-    import psycopg2
-    from psycopg2.extras import RealDictCursor
-except Exception:
-    psycopg2 = None
-    RealDictCursor = None
-
-try:
-    import pymysql
-    import pymysql.cursors
-except Exception:
-    pymysql = None
-
-try:
-    import pandas as pd
-except Exception:
-    pd = None
-
-try:
-    import urllib3
-except Exception:
-    urllib3 = None
-
-
-SHEET1_COLUMNS = [
-    "userId",
-    "userMsg",
-    "source",
-    "userName",
-    "soeData",
-    "audioUrl",
-    "asrStatus",
-    "componentId",
-    "componentType",
-    "dataVersion",
-    "timeStr",
-]
-
-SHEET2_COLUMNS = [
-    "user_id",
-    "component_unique_code",
-    "session_id",
-    "c_type",
-    "c_id",
-    "互动组件名称",
-    "组件标题",
-    "组件配置摘要",
-    "知识点",
-    "play_result",
-    "user_behavior_info",
-    "updated_at",
-]
-
-SHEET3_COLUMNS = [
-    "user_id",
-    "unit_id",
-    "lesson_id",
-    "question_list",
-    "正确率",
-    "updated_at",
-]
-
-SHEET4_COLUMNS = [
-    "user_id",
-    "unit_id",
-    "category",
-    "score_text",
-    "question_list",
-    "updated_at",
-]
-
-SHEET5_COLUMNS = [
-    "id",
-    "user_id",
-    "unit_id",
-    "updated_at",
-    "km_id",
-    "km_type",
-    "play_time_seconds",
-]
-
-
-def _load_env_file(path: str) -> None:
-    if not os.path.exists(path):
-        return
-    try:
-        with open(path, "r", encoding="utf-8") as f:
-            for line in f:
-                line = line.strip()
-                if not line or line.startswith("#"):
-                    continue
-                if "=" not in line:
-                    continue
-                k, v = line.split("=", 1)
-                k = k.strip()
-                v = v.strip().strip('"').strip("'")
-                if k and (os.getenv(k) is None):
-                    os.environ[k] = v
-    except Exception:
-        pass
-
-
-def load_env() -> None:
-    _load_env_file(os.path.join(os.getcwd(), ".env"))
-    _load_env_file(os.path.join(os.getcwd(), ".env.local"))
-
-
-def to_json_str(v: Any) -> Any:
-    if isinstance(v, (dict, list)):
-        try:
-            return json.dumps(v, ensure_ascii=False)
-        except Exception:
-            return str(v)
-    return v
-
-
-def parse_time(value: Any) -> Optional[datetime.datetime]:
-    if value is None:
-        return None
-    if isinstance(value, (int, float)):
-        try:
-            v = float(value)
-            # 兼容毫秒级时间戳
-            if v > 1e11:
-                v = v / 1000.0
-            return datetime.datetime.fromtimestamp(v)
-        except Exception:
-            return None
-    if isinstance(value, str):
-        fmts = [
-            "%Y-%m-%dT%H:%M:%S.%fZ",
-            "%Y-%m-%dT%H:%M:%S.%f%z",
-            "%Y-%m-%dT%H:%M:%S%z",
-            "%Y-%m-%d %H:%M:%S",
-            "%Y-%m-%d",
-        ]
-        for fmt in fmts:
-            try:
-                return datetime.datetime.strptime(value, fmt)
-            except Exception:
-                continue
-        try:
-            return datetime.datetime.fromisoformat(value)
-        except Exception:
-            return None
-    return None
-
-
-def pick_time(source: Dict[str, Any]) -> Optional[datetime.datetime]:
-    candidates = [
-        "updated_at",
-        "created_at",
-        "@timestamp",
-        "timestamp",
-        "updatedAt",
-        "createdAt",
-        "time",
-        "ts",
-        "timeStr",
-        "update_time",
-        "create_time",
-    ]
-    for key in candidates:
-        if key in source:
-            t = parse_time(source.get(key))
-            if t is not None:
-                return t
-    # 宽松匹配：尝试扫描所有可能的时间相关字段
-    for k, v in source.items():
-        lk = str(k).lower()
-        if any(s in lk for s in ["time", "date", "_at", "timestamp"]):
-            t = parse_time(v)
-            if t is not None:
-                return t
-    return None
-
-
-def extract_makee_id_from_user_msg(user_msg: Any) -> Optional[str]:
-    # 支持dict或字符串形式
-    if isinstance(user_msg, dict):
-        mk = user_msg.get("makee_id")
-        if isinstance(mk, str) and mk:
-            return mk
-    if isinstance(user_msg, str) and user_msg:
-        # 1) 尝试整体解析为JSON
-        try:
-            obj = json.loads(user_msg)
-            mk = obj.get("makee_id")
-            if isinstance(mk, str) and mk:
-                return mk
-        except Exception:
-            pass
-        # 2) 尝试截取大括号中的JSON
-        try:
-            start = user_msg.find("{")
-            end = user_msg.rfind("}")
-            if start != -1 and end != -1 and end > start:
-                candidate = user_msg[start : end + 1]
-                obj = json.loads(candidate)
-                mk = obj.get("makee_id")
-                if isinstance(mk, str) and mk:
-                    return mk
-        except Exception:
-            pass
-        # 3) 正则匹配 makee_id
-        m = re.search(r"\bmakee_id\b\s*:\s*\"([^\"]+)\"", user_msg)
-        if m:
-            return m.group(1)
-    return None
-
-
-def fetch_es_asr_log(makee_id: str, es_cfg: Dict[str, Any]) -> Optional[Dict[str, Any]]:
-    if requests is None:
-        raise RuntimeError("缺少requests依赖，请安装后再运行。")
-    host = es_cfg.get("host")
-    port = es_cfg.get("port")
-    scheme = es_cfg.get("scheme", "http")
-    user = es_cfg.get("user")
-    password = es_cfg.get("password")
-    index = "llm_asr_log"
-    if not host:
-        return None
-    base = f"{scheme}://{host}:{port}"
-    url = f"{base}/{index}/_search"
-    headers = {"Content-Type": "application/json"}
-    body = {
-        "query": {
-            "bool": {
-                "should": [
-                    {"term": {"makee_id": {"value": str(makee_id)}}},
-                    {"term": {"makee_id.keyword": {"value": str(makee_id)}}},
-                ],
-                "minimum_should_match": 1,
-            }
-        },
-        "size": 10,
-        "_source": [
-            "makee_id",
-            "result_text",
-            "source",
-            "updated_at",
-            "created_at",
-            "@timestamp",
-            "timestamp",
-            "updatedAt",
-            "createdAt",
-            "time",
-            "ts",
-            "timeStr",
-            "update_time",
-            "create_time",
-        ],
-    }
-    auth = (user, password) if user and password else None
-    try:
-        if scheme == "https" and urllib3 is not None:
-            try:
-                urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-            except Exception:
-                pass
-        resp = requests.post(url, headers=headers, json=body, auth=auth, timeout=20, verify=False if scheme == "https" else True)
-        resp.raise_for_status()
-        data = resp.json()
-    except Exception:
-        return None
-    hits = data.get("hits", {}).get("hits", [])
-    if not hits:
-        return None
-    # 选最新的
-    chosen = None
-    best_t = None
-    for h in hits:
-        src = h.get("_source", {}) or {}
-        t = pick_time(src)
-        if t is None:
-            continue
-        if best_t is None or t > best_t:
-            best_t = t
-            chosen = src
-    if chosen is None:
-        # 如果都没有时间，选第一条
-        chosen = (hits[0].get("_source", {}) or {})
-    return chosen
-
-
-def get_es_config() -> Dict[str, Any]:
-    return {
-        "host": os.getenv("ES_HOST"),
-        "port": os.getenv("ES_PORT", "9200"),
-        "scheme": os.getenv("ES_SCHEME", "http"),
-        "user": os.getenv("ES_USER"),
-        "password": os.getenv("ES_PASSWORD"),
-        "index": "user-audio",
-    }
-
-
-def fetch_es_user_audio(user_id: str, es_cfg: Dict[str, Any]) -> List[Dict[str, Any]]:
-    if requests is None:
-        raise RuntimeError("缺少requests依赖，请安装后再运行。")
-
-    print(f"  [ES] 开始查询user-audio索引...")
-    start_time = datetime.datetime.now()
-
-    host = es_cfg.get("host")
-    port = es_cfg.get("port")
-    scheme = es_cfg.get("scheme", "http")
-    user = es_cfg.get("user")
-    password = es_cfg.get("password")
-    index = es_cfg.get("index", "user-audio")
-
-    if not host:
-        return []
-
-    base = f"{scheme}://{host}:{port}"
-    url = f"{base}/{index}/_search"
-    headers = {"Content-Type": "application/json"}
-
-    body = {
-        "query": {
-            "bool": {
-                "should": [
-                    {"term": {"userId": {"value": str(user_id)}}},
-                    {"term": {"userId.keyword": {"value": str(user_id)}}},
-                ],
-                "minimum_should_match": 1,
-            }
-        },
-        "size": 10000,
-        "_source": [
-            "userId",
-            "userMsg",
-            "userName",
-            "soeData",
-            "audioUrl",
-            "asrStatus",
-            "componentId",
-            "componentType",
-            "dataVersion",
-            "updated_at",
-            "created_at",
-            "@timestamp",
-            "timestamp",
-            "updatedAt",
-            "createdAt",
-            "time",
-            "ts",
-            "timeStr",
-            "update_time",
-            "create_time",
-        ],
-    }
-
-    auth = (user, password) if user and password else None
-
-    try:
-        # 抑制自签证书下的HTTPS不安全警告
-        if scheme == "https" and urllib3 is not None:
-            try:
-                urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-            except Exception:
-                pass
-        resp = requests.post(url, headers=headers, json=body, auth=auth, timeout=30, verify=False if scheme == "https" else True)
-        resp.raise_for_status()
-        data = resp.json()
-    except Exception as e:
-        raise RuntimeError(f"ES查询失败: {e}")
-
-    hits = data.get("hits", {}).get("hits", [])
-    print(f"  [ES] 查询完成，获得{len(hits)}条记录，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-
-    if not hits:
-        return []
-
-    print(f"  [ES] 开始处理音频数据...")
-    process_start = datetime.datetime.now()
-
-    rows: List[Dict[str, Any]] = []
-    asr_cache: Dict[str, Dict[str, Any]] = {}
-    makee_id_count = 0
-
-    for idx, h in enumerate(hits, 1):
-        # 每处理100条显示一次进度
-        if idx % 100 == 0 or idx == len(hits):
-            print(f"  [ES] 处理进度: {idx}/{len(hits)} ({idx*100//len(hits)}%)")
-
-        src = h.get("_source", {}) or {}
-        row = {
-            "userId": src.get("userId"),
-            "userMsg": src.get("userMsg"),
-            "source": None,
-            "userName": src.get("userName"),
-            "soeData": to_json_str(src.get("soeData")),
-            "audioUrl": src.get("audioUrl"),
-            "asrStatus": src.get("asrStatus"),
-            "componentId": src.get("componentId"),
-            "componentType": src.get("componentType"),
-            "dataVersion": src.get("dataVersion"),
-        }
-        t = pick_time(src)
-        row["_time"] = t.isoformat() if t else None
-        row["timeStr"] = t.strftime("%Y-%m-%d %H:%M:%S") if t else None
-        # v1.2: 当userMsg包含makee_id时，补充查询llm_asr_log并回填
-        mk = extract_makee_id_from_user_msg(row.get("userMsg"))
-        if mk:
-            makee_id_count += 1
-            asr_doc = asr_cache.get(mk)
-            if asr_doc is None:
-                asr_doc = fetch_es_asr_log(mk, es_cfg)
-                if asr_doc is not None:
-                    asr_cache[mk] = asr_doc
-            if asr_doc is not None:
-                rt = asr_doc.get("result_text")
-                if rt:
-                    row["userMsg"] = rt
-                row["source"] = to_json_str(asr_doc.get("source"))
-        rows.append(row)
-
-    print(f"  [ES] 数据处理完成，发现{makee_id_count}条包含makee_id的记录，耗时{(datetime.datetime.now() - process_start).total_seconds():.2f}秒")
-
-    print(f"  [ES] 开始排序...")
-    rows.sort(key=lambda x: parse_time(x.get("_time")) or datetime.datetime.min, reverse=True)
-    print(f"  [ES] 音频数据处理完成，总耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-
-    return rows
-
-
-def get_pg_conn() -> Any:
-    if psycopg2 is None:
-        raise RuntimeError("缺少psycopg2依赖，请安装后再运行。")
-    host = os.getenv("PG_DB_HOST")
-    port = int(os.getenv("PG_DB_PORT", "5432"))
-    user = os.getenv("PG_DB_USER")
-    password = os.getenv("PG_DB_PASSWORD")
-    dbname = os.getenv("PG_DB_DATABASE")
-    if not host or not dbname:
-        raise RuntimeError("PG数据库环境变量未配置完整")
-    conn = psycopg2.connect(host=host, port=port, user=user, password=password, dbname=dbname)
-    return conn
-
-
-def get_mysql_conn(database: str) -> Any:
-    """
-    获取MySQL数据库连接
-
-    Args:
-        database: 数据库名，可选值：'vala_user' 或 'vala_test'
-                 vala_user 使用 online 配置（环境变量后缀 _online）
-                 vala_test 使用默认配置
-
-    Returns:
-        MySQL连接对象
-    """
-    if pymysql is None:
-        raise RuntimeError("缺少pymysql依赖，请安装后再运行。")
-
-    # 根据数据库选择不同的环境变量配置
-    if database == "vala_user":
-        # vala_user 数据库使用 online 配置
-        host = os.getenv("MYSQL_HOST_online")
-        port = int(os.getenv("MYSQL_PORT_online", "3306"))
-        user = os.getenv("MYSQL_USERNAME_online")
-        password = os.getenv("MYSQL_PASSWORD_online")
-        if not host:
-            raise RuntimeError("MySQL数据库环境变量未配置完整（缺少MYSQL_HOST_online）")
-    else:
-        # vala_test 等其他数据库使用默认配置
-        host = os.getenv("MYSQL_HOST")
-        port = int(os.getenv("MYSQL_PORT", "3306"))
-        user = os.getenv("MYSQL_USERNAME")
-        password = os.getenv("MYSQL_PASSWORD")
-        if not host:
-            raise RuntimeError("MySQL数据库环境变量未配置完整（缺少MYSQL_HOST）")
-
-    conn = pymysql.connect(
-        host=host,
-        port=port,
-        user=user,
-        password=password,
-        database=database,  # 直接使用传入的数据库名
-        charset="utf8mb4",
-        cursorclass=pymysql.cursors.DictCursor,
-    )
-    return conn
-
-
-def get_id_2_unit_index(conn: Any) -> Dict[int, int]:
-    """
-    从MySQL获取 story_id 到 unit_id 的映射关系
-
-    Args:
-        conn: MySQL数据库连接
-
-    Returns:
-        映射字典 {story_id: unit_id}
-    """
-    sql = """
-    SELECT *
-    FROM `vala_game_info`
-    WHERE id > 0
-      AND `vala_game_info`.`deleted_at` IS NULL
-    ORDER BY season_package_id asc, `index` asc
-    """
-    try:
-        with conn.cursor() as cur:
-            cur.execute(sql)
-            rows = cur.fetchall() or []
-            # 构建映射表：按查询结果的顺序，索引即为unit_id
-            id_2_unit_index = {}
-            for index, row in enumerate(rows):
-                id_2_unit_index[row["id"]] = index
-            return id_2_unit_index
-    except Exception as e:
-        print(f"[ERROR] 获取story_id到unit_id映射失败: {e}")
-        return {}
-
-
-def get_chapter_id_to_lesson_id(conn: Any) -> Dict[int, int]:
-    """
-    从MySQL获取 chapter_id 到 lesson_id 的映射关系
-
-    Args:
-        conn: MySQL数据库连接
-
-    Returns:
-        映射字典 {chapter_id: lesson_id}
-    """
-    sql = """
-    SELECT id, `index`
-    FROM `vala_game_chapter`
-    WHERE deleted_at IS NULL
-    """
-    try:
-        with conn.cursor() as cur:
-            cur.execute(sql)
-            rows = cur.fetchall() or []
-            # 构建映射表：chapter的index字段即为lesson_id
-            chapter_id_to_lesson_id = {}
-            for row in rows:
-                chapter_id_to_lesson_id[row["id"]] = row["index"]
-            return chapter_id_to_lesson_id
-    except Exception as e:
-        print(f"[ERROR] 获取chapter_id到lesson_id映射失败: {e}")
-        return {}
-
-
-# 组件类型到组件名称的映射
-COMPONENT_TYPE_NAMES = {
-    "mid_vocab_item": "物品互动",
-    "mid_vocab_image": "图片互动",
-    "mid_vocab_fillBlank": "填词互动",
-    "mid_vocab_instruction": "指令互动",
-    "mid_sentence_dialogue": "对话互动",  # 需要根据mode进一步判断
-    "mid_sentence_voice": "语音互动",
-    "mid_sentence_material": "材料互动",
-    "mid_sentence_makeSentence": "造句互动",
-    "mid_grammar_cloze": "挖空互动",
-    "mid_grammar_sentence": "组句互动",
-    "mid_pron_pron": "发音互动",
-    "core_speaking_reply": "口语快答",
-    "core_speaking_inquiry": "口语妙问",
-    "core_speaking_explore": "口语探讨",
-    "core_speaking_monologue": "口语独白",
-    "core_reading_order": "合作阅读",
-    "core_listening_order": "合作听力",
-    "core_writing_imgMakeSentence": "看图组句",
-    "core_writing_imgWrite": "看图撰写",
-    "core_writing_questionMakeSentence": "问题组句",
-    "core_writing_questionWrite": "问题撰写",
-}
-
-
-def get_component_name(c_type: str, component_config: Optional[Dict[str, Any]]) -> str:
-    """
-    根据c_type和组件配置获取组件名称
-
-    Args:
-        c_type: 组件类型
-        component_config: 组件配置（用于判断对话互动的mode）
-
-    Returns:
-        组件名称
-    """
-    if not c_type:
-        return ""
-
-    # 特殊处理：对话互动需要根据mode判断
-    if c_type == "mid_sentence_dialogue" and component_config:
-        try:
-            question = component_config.get("question", {})
-            mode = question.get("mode", "")
-            if mode == "express":
-                return "对话互动-表达"
-            elif mode == "read":
-                return "对话互动-朗读"
-        except Exception:
-            pass
-
-    return COMPONENT_TYPE_NAMES.get(c_type, "")
-
-
-def batch_fetch_component_configs(play_records: List[Dict[str, Any]], mysql_conn: Any) -> Dict[str, Dict[str, Any]]:
-    """
-    批量查询组件配置信息
-
-    Args:
-        play_records: 播放记录列表
-        mysql_conn: MySQL连接
-
-    Returns:
-        组件配置映射 {c_type_c_id: {title, component_config, kp_relation_info}}
-    """
-    print(f"  [MySQL] 开始批量查询组件配置...")
-    start_time = datetime.datetime.now()
-
-    # 收集需要查询的c_type和c_id
-    mid_c_ids = set()
-    core_c_ids = set()
-    mid_type_id_pairs = []  # 用于调试日志
-    core_type_id_pairs = []
-
-    for record in play_records:
-        c_type = record.get("c_type", "")
-        c_id = record.get("c_id")
-        if c_type and c_id:
-            if c_type.startswith("mid"):
-                mid_c_ids.add(c_id)
-                mid_type_id_pairs.append((c_type, c_id))
-            elif c_type.startswith("core"):
-                core_c_ids.add(c_id)
-                core_type_id_pairs.append((c_type, c_id))
-
-    print(f"  [MySQL] 需要查询中互动组件: {len(mid_c_ids)}个, 核心互动组件: {len(core_c_ids)}个")
-    if mid_c_ids:
-        print(f"  [MySQL] 中互动组件ID列表（前10个）: {sorted(list(mid_c_ids))[:10]}")
-    if core_c_ids:
-        print(f"  [MySQL] 核心互动组件ID列表（前10个）: {sorted(list(core_c_ids))[:10]}")
-
-    config_map = {}
-
-    # 批量查询middle_interaction_component
-    if mid_c_ids:
-        try:
-            with mysql_conn.cursor() as cur:
-                placeholders = ','.join(['%s'] * len(mid_c_ids))
-                sql = f"""
-                SELECT c_id, c_type, title, component_config, kp_relation_info
-                FROM middle_interaction_component
-                WHERE c_id IN ({placeholders}) AND deleted_at IS NULL
-                """
-                print(f"  [MySQL] 执行中互动组件查询，查询条件: c_id IN ({len(mid_c_ids)}个ID)")
-                cur.execute(sql, tuple(mid_c_ids))
-                rows = cur.fetchall() or []
-                print(f"  [MySQL] 查询到{len(rows)}条中互动组件配置")
-
-                if len(rows) == 0 and len(mid_c_ids) > 0:
-                    print(f"  [MySQL] [警告] 查询结果为空！可能的原因：")
-                    print(f"  [MySQL]   - 数据库中没有匹配的c_id记录")
-                    print(f"  [MySQL]   - deleted_at字段不为NULL")
-                    print(f"  [MySQL]   - c_id不存在")
-
-                for idx, row in enumerate(rows):
-                    c_type = row.get("c_type", "")
-                    c_id = row.get("c_id")
-                    key = f"{c_type}_{c_id}"
-
-                    if idx < 3:  # 输出前3条的详细信息
-                        print(f"  [MySQL] [样例{idx+1}] id={c_id}, c_type={c_type}, key={key}")
-                        print(f"  [MySQL] [样例{idx+1}] title={row.get('title', '')[:50]}")
-
-                    # 解析component_config
-                    component_config = row.get("component_config")
-                    if isinstance(component_config, str):
-                        try:
-                            component_config = json.loads(component_config)
-                        except Exception as e:
-                            print(f"  [MySQL] [警告] 解析component_config失败 (id={c_id}): {e}")
-                            component_config = {}
-
-                    # 提取question字段作为摘要
-                    summary = ""
-                    if isinstance(component_config, dict):
-                        question = component_config.get("question")
-                        summary = to_json_str(question) if question else ""
-                        if idx < 3 and question:
-                            print(f"  [MySQL] [样例{idx+1}] 提取到question字段，长度: {len(summary)}")
-
-                    # 解析kp_relation_info
-                    kp_relation_info = row.get("kp_relation_info")
-                    if isinstance(kp_relation_info, str):
-                        try:
-                            kp_relation_info = json.loads(kp_relation_info)
-                        except Exception:
-                            kp_relation_info = []
-
-                    config_map[key] = {
-                        "title": row.get("title", ""),
-                        "component_config": component_config,
-                        "summary": summary,
-                        "kp_relation_info": to_json_str(kp_relation_info),
-                    }
-
-                print(f"  [MySQL] 中互动组件配置已加入config_map，当前map大小: {len(config_map)}")
-        except Exception as e:
-            print(f"  [MySQL] [错误] 查询中互动组件配置失败: {e}")
-            import traceback
-            traceback.print_exc()
-
-    # 批量查询core_interaction_component
-    if core_c_ids:
-        try:
-            with mysql_conn.cursor() as cur:
-                placeholders = ','.join(['%s'] * len(core_c_ids))
-                sql = f"""
-                SELECT c_id, c_type, title, component_config, kp_relation_info
-                FROM core_interaction_component
-                WHERE c_id IN ({placeholders}) AND deleted_at IS NULL
-                """
-                print(f"  [MySQL] 执行核心互动组件查询，查询条件: c_id IN ({len(core_c_ids)}个ID)")
-                cur.execute(sql, tuple(core_c_ids))
-                rows = cur.fetchall() or []
-                print(f"  [MySQL] 查询到{len(rows)}条核心互动组件配置")
-
-                if len(rows) == 0 and len(core_c_ids) > 0:
-                    print(f"  [MySQL] [警告] 查询结果为空！可能的原因：")
-                    print(f"  [MySQL]   - 数据库中没有匹配的c_id记录")
-                    print(f"  [MySQL]   - deleted_at字段不为NULL")
-                    print(f"  [MySQL]   - c_id不存在")
-
-                for idx, row in enumerate(rows):
-                    c_type = row.get("c_type", "")
-                    c_id = row.get("c_id")
-                    key = f"{c_type}_{c_id}"
-
-                    if idx < 3:  # 输出前3条的详细信息
-                        print(f"  [MySQL] [样例{idx+1}] id={c_id}, c_type={c_type}, key={key}")
-                        print(f"  [MySQL] [样例{idx+1}] title={row.get('title', '')[:50]}")
-
-                    # 解析component_config
-                    component_config = row.get("component_config")
-                    if isinstance(component_config, str):
-                        try:
-                            component_config = json.loads(component_config)
-                        except Exception as e:
-                            print(f"  [MySQL] [警告] 解析component_config失败 (id={c_id}): {e}")
-                            component_config = {}
-
-                    # 提取taskInfo字段作为摘要
-                    summary = ""
-                    if isinstance(component_config, dict):
-                        task_info = component_config.get("taskInfo")
-                        summary = to_json_str(task_info) if task_info else ""
-                        if idx < 3 and task_info:
-                            print(f"  [MySQL] [样例{idx+1}] 提取到taskInfo字段，长度: {len(summary)}")
-
-                    # 解析kp_relation_info
-                    kp_relation_info = row.get("kp_relation_info")
-                    if isinstance(kp_relation_info, str):
-                        try:
-                            kp_relation_info = json.loads(kp_relation_info)
-                        except Exception:
-                            kp_relation_info = []
-
-                    config_map[key] = {
-                        "title": row.get("title", ""),
-                        "component_config": component_config,
-                        "summary": summary,
-                        "kp_relation_info": to_json_str(kp_relation_info),
-                    }
-
-                print(f"  [MySQL] 核心互动组件配置已加入config_map，当前map大小: {len(config_map)}")
-        except Exception as e:
-            print(f"  [MySQL] [错误] 查询核心互动组件配置失败: {e}")
-            import traceback
-            traceback.print_exc()
-
-    print(f"  [MySQL] 组件配置查询完成，共{len(config_map)}条，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-    return config_map
-
-
-def calculate_accuracy(question_list: Any) -> float:
-    """
-    计算问题列表的正确率
-
-    Args:
-        question_list: 问题列表（可能是JSON字符串或list）
-
-    Returns:
-        正确率（百分比，保留2位小数）
-    """
-    try:
-        if isinstance(question_list, str):
-            question_list = json.loads(question_list)
-
-        if not isinstance(question_list, list) or len(question_list) == 0:
-            return 0.0
-
-        total = len(question_list)
-        correct = sum(1 for q in question_list if q.get('isRight') == True)
-        accuracy = round(correct / total * 100, 2) if total > 0 else 0.0
-
-        return accuracy
-    except Exception:
-        return 0.0
-
-
-
-def fetch_character_ids_by_account(account_id: str, conn: Any) -> List[str]:
-    """根据账户id查询对应的角色id列表"""
-    sql = "SELECT id FROM vala_app_character WHERE account_id = %s"
-    try:
-        with conn.cursor() as cur:
-            cur.execute(sql, (account_id,))
-            rows = cur.fetchall() or []
-            return [str(row["id"]) for row in rows if row.get("id")]
-    except Exception as e:
-        print(f"[ERROR] 查询账户id={account_id}的角色id失败: {e}")
-        return []
-
-
-def fetch_pg_play_records(user_id: str, conn: Any, mysql_conn: Any) -> List[Dict[str, Any]]:
-    """
-    查询互动组件学习记录并补充组件配置信息
-
-    Args:
-        user_id: 用户ID（角色ID）
-        conn: PostgreSQL数据库连接
-        mysql_conn: MySQL数据库连接
-
-    Returns:
-        互动组件学习记录列表
-    """
-    print(f"  [PG] 开始查询互动组件学习记录（8张分表）...")
-    start_time = datetime.datetime.now()
-
-    tables = [f"user_component_play_record_{i}" for i in range(8)]
-    rows: List[Dict[str, Any]] = []
-    with conn.cursor(cursor_factory=RealDictCursor) as cur:
-        for t in tables:
-            try:
-                sql = f"""
-                    SELECT user_id, component_unique_code, session_id, c_type, c_id,
-                           play_result, user_behavior_info, updated_at
-                    FROM {t}
-                    WHERE user_id = %s
-                    ORDER BY updated_at DESC
-                    """
-                print(f"  [PG_DEBUG] 准备查询表 {t}，SQL：{sql.strip()}，参数：{user_id}")
-                cur.execute(sql, (user_id,))
-                part = cur.fetchall() or []
-                if part:
-                    print(f"  [PG] 表{t}查到{len(part)}条记录")
-                for r in part:
-                    r = dict(r)
-                    r["play_result"] = to_json_str(r.get("play_result"))
-                    r["user_behavior_info"] = to_json_str(r.get("user_behavior_info"))
-                    # 将带时区的时间转换为无时区，避免Excel写入报错
-                    upd = r.get("updated_at")
-                    if isinstance(upd, datetime.datetime):
-                        try:
-                            if upd.tzinfo is not None and upd.tzinfo.utcoffset(upd) is not None:
-                                r["updated_at"] = upd.replace(tzinfo=None)
-                        except Exception:
-                            # 回退为字符串
-                            r["updated_at"] = str(upd)
-                    rows.append(r)
-            except Exception as e:
-                print(f"  [PG] 表{t}查询失败: {e}")
-                continue
-
-    rows.sort(key=lambda x: parse_time(x.get("updated_at")) or datetime.datetime.min, reverse=True)
-    print(f"  [PG] 互动组件学习记录查询完成，共{len(rows)}条，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-
-    # 批量查询组件配置
-    if rows and mysql_conn:
-        config_map = batch_fetch_component_configs(rows, mysql_conn)
-
-        # 补充组件信息
-        print(f"  [PG] 开始补充组件配置信息...")
-        filled_count = 0
-        empty_count = 0
-        sample_keys = []
-        sample_mode_check = []  # 检查对话互动的mode
-
-        for r in rows:
-            c_type = r.get("c_type", "")
-            c_id = r.get("c_id")
-            key = f"{c_type}_{c_id}" if c_type and c_id else ""
-
-            config = config_map.get(key, {})
-            component_config = config.get("component_config", {})
-
-            component_name = get_component_name(c_type, component_config)
-            r["互动组件名称"] = component_name
-            r["组件标题"] = config.get("title", "")
-            r["组件配置摘要"] = config.get("summary", "")
-            r["知识点"] = config.get("kp_relation_info", "")
-
-            # 统计填充情况
-            if config:
-                filled_count += 1
-                if len(sample_keys) < 3:
-                    sample_keys.append((key, component_name, r["组件标题"][:30] if r["组件标题"] else ""))
-
-                # 检查对话互动的mode
-                if c_type == "mid_sentence_dialogue" and len(sample_mode_check) < 3:
-                    mode = ""
-                    if isinstance(component_config, dict):
-                        question = component_config.get("question", {})
-                        if isinstance(question, dict):
-                            mode = question.get("mode", "")
-                    sample_mode_check.append({
-                        "key": key,
-                        "mode": mode,
-                        "component_name": component_name
-                    })
-            else:
-                empty_count += 1
-                if empty_count <= 5:  # 输出前5个未匹配的key
-                    print(f"  [PG] [警告] 未找到组件配置: key={key}")
-
-        print(f"  [PG] 组件配置信息补充完成")
-        print(f"  [PG] 匹配到配置: {filled_count}条, 未匹配: {empty_count}条")
-        if sample_keys:
-            print(f"  [PG] 样例数据（前3条）:")
-            for key, name, title in sample_keys:
-                print(f"  [PG]   - key={key}, 名称={name}, 标题={title}")
-
-        if sample_mode_check:
-            print(f"  [PG] 对话互动mode检查（前3条）:")
-            for s in sample_mode_check:
-                print(f"  [PG]   - key={s['key']}, mode={s['mode']}, 最终名称={s['component_name']}")
-
-    return rows
-
-
-def fetch_pg_unit_review(user_id: str, conn: Any, id_2_unit_index: Dict[int, int], chapter_id_to_lesson_id: Dict[int, int]) -> List[Dict[str, Any]]:
-    """
-    查询课程巩固记录
-
-    Args:
-        user_id: 用户ID（角色ID）
-        conn: PostgreSQL数据库连接
-        id_2_unit_index: story_id到unit_id的映射字典
-        chapter_id_to_lesson_id: chapter_id到lesson_id的映射字典
-
-    Returns:
-        课程巩固记录列表
-    """
-    print(f"  [PG] 开始查询课程巩固记录...")
-    start_time = datetime.datetime.now()
-
-    sql = (
-        "SELECT user_id, story_id, chapter_id, question_list, updated_at "
-        "FROM user_unit_review_question_result WHERE user_id = %s ORDER BY updated_at DESC"
-    )
-    with conn.cursor(cursor_factory=RealDictCursor) as cur:
-        try:
-            cur.execute(sql, (user_id,))
-            rows = cur.fetchall() or []
-        except Exception as e:
-            print(f"  [PG] 课程巩固记录查询失败: {e}")
-            rows = []
-    out: List[Dict[str, Any]] = []
-    for r in rows:
-        d = dict(r)
-
-        # 映射 story_id 到 unit_id
-        story_id = d.get("story_id")
-        unit_id = id_2_unit_index.get(story_id) if story_id else None
-        d["unit_id"] = unit_id
-
-        # 映射 chapter_id 到 lesson_id
-        chapter_id = d.get("chapter_id")
-        lesson_id = chapter_id_to_lesson_id.get(chapter_id) if chapter_id else None
-        d["lesson_id"] = lesson_id
-
-        # 计算正确率
-        question_list = d.get("question_list")
-        d["正确率"] = calculate_accuracy(question_list)
-
-        d["question_list"] = to_json_str(question_list)
-        upd = d.get("updated_at")
-        if isinstance(upd, datetime.datetime):
-            try:
-                if upd.tzinfo is not None and upd.tzinfo.utcoffset(upd) is not None:
-                    d["updated_at"] = upd.replace(tzinfo=None)
-            except Exception:
-                d["updated_at"] = str(upd)
-        out.append(d)
-
-    print(f"  [PG] 课程巩固记录查询完成，共{len(out)}条，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-    return out
-
-
-def fetch_pg_unit_challenge(user_id: str, conn: Any, id_2_unit_index: Dict[int, int]) -> List[Dict[str, Any]]:
-    """
-    查询单元挑战记录
-
-    Args:
-        user_id: 用户ID（角色ID）
-        conn: PostgreSQL数据库连接
-        id_2_unit_index: story_id到unit_id的映射字典
-
-    Returns:
-        单元挑战记录列表
-    """
-    print(f"  [PG] 开始查询单元挑战记录...")
-    start_time = datetime.datetime.now()
-
-    sql = (
-        "SELECT user_id, story_id, category, score_text, question_list, updated_at "
-        "FROM user_unit_challenge_question_result WHERE user_id = %s ORDER BY updated_at DESC"
-    )
-    with conn.cursor(cursor_factory=RealDictCursor) as cur:
-        try:
-            cur.execute(sql, (user_id,))
-            rows = cur.fetchall() or []
-        except Exception as e:
-            print(f"  [PG] 单元挑战记录查询失败: {e}")
-            rows = []
-    out: List[Dict[str, Any]] = []
-    for r in rows:
-        d = dict(r)
-
-        # 映射 story_id 到 unit_id
-        story_id = d.get("story_id")
-        unit_id = id_2_unit_index.get(story_id) if story_id else None
-        d["unit_id"] = unit_id
-
-        d["question_list"] = to_json_str(d.get("question_list"))
-        upd = d.get("updated_at")
-        if isinstance(upd, datetime.datetime):
-            try:
-                if upd.tzinfo is not None and upd.tzinfo.utcoffset(upd) is not None:
-                    d["updated_at"] = upd.replace(tzinfo=None)
-            except Exception:
-                d["updated_at"] = str(upd)
-        out.append(d)
-
-    print(f"  [PG] 单元挑战记录查询完成，共{len(out)}条，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-    return out
-
-
-def fetch_pg_unit_summary(user_id: str, conn: Any, id_2_unit_index: Dict[int, int]) -> List[Dict[str, Any]]:
-    """
-    查询单元总结知识点结果数据
-
-    Args:
-        user_id: 用户ID（角色ID）
-        conn: PostgreSQL数据库连接
-        id_2_unit_index: story_id到unit_id的映射字典
-
-    Returns:
-        单元总结记录列表
-    """
-    print(f"  [PG] 开始查询单元总结记录...")
-    start_time = datetime.datetime.now()
-
-    sql = (
-        "SELECT id, user_id, story_id, updated_at, km_id, km_type, play_time "
-        "FROM user_unit_summary_km_result WHERE user_id = %s AND deleted_at IS NULL ORDER BY updated_at DESC"
-    )
-    with conn.cursor(cursor_factory=RealDictCursor) as cur:
-        try:
-            cur.execute(sql, (user_id,))
-            rows = cur.fetchall() or []
-        except Exception as e:
-            print(f"  [PG] 单元总结记录查询失败: {e}")
-            rows = []
-
-    out: List[Dict[str, Any]] = []
-    for r in rows:
-        d = dict(r)
-        # 映射 story_id 到 unit_id
-        story_id = d.get("story_id")
-        unit_id = id_2_unit_index.get(story_id) if story_id else None
-        d["unit_id"] = unit_id
-
-        # 转换 play_time (毫秒) 为秒 (整数)
-        play_time = d.get("play_time")
-        d["play_time_seconds"] = play_time // 1000 if play_time else 0
-
-        # 移除时区信息
-        upd = d.get("updated_at")
-        if isinstance(upd, datetime.datetime):
-            try:
-                if upd.tzinfo is not None and upd.tzinfo.utcoffset(upd) is not None:
-                    d["updated_at"] = upd.replace(tzinfo=None)
-            except Exception:
-                d["updated_at"] = str(upd)
-        out.append(d)
-
-    print(f"  [PG] 单元总结记录查询完成，共{len(out)}条，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-    return out
-
-
-def generate_statistics(sheet2_rows: List[Dict[str, Any]], sheet5_rows: List[Dict[str, Any]]) -> tuple:
-    """
-    生成汇总统计数据
-
-    Args:
-        sheet2_rows: 互动组件学习记录
-        sheet5_rows: 单元总结记录
-
-    Returns:
-        (组件统计DataFrame, 知识点统计DataFrame, 单元时长统计DataFrame)
-    """
-    if pd is None:
-        raise RuntimeError("缺少pandas依赖，请安装后再运行。")
-
-    print(f"  [统计] 开始生成汇总统计数据...")
-    start_time = datetime.datetime.now()
-
-    from collections import defaultdict
-
-    # ============ a. 所有互动-按互动组件类型-通过情况统计 ============
-    component_stats_data = []
-    component_stats = defaultdict(lambda: {"Perfect": 0, "Good": 0, "Failed": 0, "Pass": 0, "Oops": 0, "total": 0})
-
-    # 用于调试
-    sample_results = []
-    parse_error_count = 0
-
-    for idx, record in enumerate(sheet2_rows):
-        component_name = record.get("互动组件名称", "")
-        if not component_name:
-            continue
-
-        play_result_str = record.get("play_result", "")
-
-        # 解析play_result
-        result = ""
-        try:
-            # 先判断是否是简单的字符串（Perfect/Good/Failed/Pass/Oops）
-            if isinstance(play_result_str, str):
-                # 去除空格后检查
-                stripped = play_result_str.strip()
-                if stripped in ["Perfect", "Good", "Failed", "Pass", "Oops"]:
-                    # 直接使用
-                    result = stripped
-                else:
-                    # 尝试JSON解析
-                    try:
-                        play_result = json.loads(play_result_str)
-                        if isinstance(play_result, dict):
-                            result = play_result.get("result", "")
-                        else:
-                            result = ""
-                    except:
-                        result = ""
-            else:
-                # 如果不是字符串，尝试当dict处理
-                if isinstance(play_result_str, dict):
-                    result = play_result_str.get("result", "")
-                else:
-                    result = ""
-
-            # 收集前3个样例
-            if idx < 3:
-                sample_results.append({
-                    "component": component_name,
-                    "raw": str(play_result_str)[:100],
-                    "result": result
-                })
-        except Exception as e:
-            parse_error_count += 1
-            if parse_error_count <= 3:
-                print(f"  [统计] [警告] 解析play_result失败 (第{idx+1}条): {e}, 原始值: {str(play_result_str)[:100]}")
-            result = ""
-
-        component_stats[component_name]["total"] += 1
-        if result in ["Perfect", "Good", "Failed", "Pass", "Oops"]:
-            component_stats[component_name][result] += 1
-
-    print(f"  [统计] play_result解析样例（前3条）:")
-    for s in sample_results:
-        print(f"  [统计]   - 组件: {s['component']}, 结果: {s['result']}, 原始: {s['raw']}")
-    if parse_error_count > 0:
-        print(f"  [统计] play_result解析失败总数: {parse_error_count}")
-
-    # 生成统计数据行
-    for component_name in sorted(component_stats.keys()):
-        stats = component_stats[component_name]
-        total = stats["total"]
-        perfect = stats["Perfect"]
-        good = stats["Good"]
-        failed = stats["Failed"]
-        pass_count = stats["Pass"]
-        oops = stats["Oops"]
-
-        perfect_ratio = round(perfect / total * 100, 2) if total > 0 else 0
-        good_ratio = round(good / total * 100, 2) if total > 0 else 0
-        failed_ratio = round(failed / total * 100, 2) if total > 0 else 0
-        pass_ratio = round(pass_count / total * 100, 2) if total > 0 else 0
-        oops_ratio = round(oops / total * 100, 2) if total > 0 else 0
-
-        component_stats_data.append({
-            "互动组件名称": component_name,
-            "总数量": total,
-            "Perfect数量": perfect,
-            "Good数量": good,
-            "Failed数量": failed,
-            "Pass数量": pass_count,
-            "Oops数量": oops,
-            "Perfect比例(%)": perfect_ratio,
-            "Good比例(%)": good_ratio,
-            "Failed比例(%)": failed_ratio,
-            "Pass比例(%)": pass_ratio,
-            "Oops比例(%)": oops_ratio,
-        })
-
-    # ============ b. 中互动组件-按知识点-通过情况统计 ============
-    kp_stats_data = []
-    kp_stats = defaultdict(lambda: {"Perfect": 0, "Good": 0, "Failed": 0, "Pass": 0, "Oops": 0, "total": 0})
-
-    # 调试信息
-    mid_count = 0
-    has_kp_count = 0
-    sample_kp_records = []
-
-    for idx, record in enumerate(sheet2_rows):
-        c_type = record.get("c_type", "")
-        if not c_type or not c_type.startswith("mid"):
-            continue
-
-        mid_count += 1
-        kp_relation_info_str = record.get("知识点", "")
-
-        if not kp_relation_info_str:
-            continue
-
-        has_kp_count += 1
-
-        # 解析知识点
-        try:
-            if isinstance(kp_relation_info_str, str):
-                kp_relation_info = json.loads(kp_relation_info_str)
-            else:
-                kp_relation_info = kp_relation_info_str
-
-            if not isinstance(kp_relation_info, list):
-                continue
-
-            # 收集样例
-            if len(sample_kp_records) < 3:
-                sample_kp_records.append({
-                    "c_type": c_type,
-                    "kp_count": len(kp_relation_info),
-                    "kp_info": str(kp_relation_info)[:200]
-                })
-
-            # 解析play_result（使用相同的逻辑）
-            play_result_str = record.get("play_result", "")
-            result = ""
-            if isinstance(play_result_str, str):
-                stripped = play_result_str.strip()
-                if stripped in ["Perfect", "Good", "Failed", "Pass", "Oops"]:
-                    result = stripped
-                else:
-                    try:
-                        play_result = json.loads(play_result_str)
-                        if isinstance(play_result, dict):
-                            result = play_result.get("result", "")
-                    except:
-                        pass
-            elif isinstance(play_result_str, dict):
-                result = play_result_str.get("result", "")
-
-            # 为每个知识点统计
-            for kp in kp_relation_info:
-                if not isinstance(kp, dict):
-                    continue
-
-                kp_id = kp.get("kpId", "")
-                kp_type = kp.get("kpType", "")
-                kp_title = kp.get("kpTitle", "")
-
-                if not kp_id:
-                    continue
-
-                kp_key = f"{kp_id}|{kp_type}|{kp_title}"
-                kp_stats[kp_key]["total"] += 1
-                if result in ["Perfect", "Good", "Failed", "Pass", "Oops"]:
-                    kp_stats[kp_key][result] += 1
-
-        except Exception as e:
-            if len(sample_kp_records) < 5:
-                print(f"  [统计] [警告] 解析知识点失败: {e}, 原始值: {str(kp_relation_info_str)[:100]}")
-            continue
-
-    print(f"  [统计] 中互动组件统计: 总数={mid_count}, 有知识点={has_kp_count}, 知识点条目数={len(kp_stats)}")
-    if sample_kp_records:
-        print(f"  [统计] 知识点样例（前3条）:")
-        for s in sample_kp_records:
-            print(f"  [统计]   - c_type={s['c_type']}, 知识点数量={s['kp_count']}, 内容={s['kp_info']}")
-
-    # 生成知识点统计数据行
-    for kp_key in sorted(kp_stats.keys()):
-        parts = kp_key.split("|")
-        if len(parts) != 3:
-            continue
-
-        kp_id, kp_type, kp_title = parts
-        stats = kp_stats[kp_key]
-        total = stats["total"]
-        perfect = stats["Perfect"]
-        good = stats["Good"]
-        failed = stats["Failed"]
-        pass_count = stats["Pass"]
-        oops = stats["Oops"]
-
-        perfect_ratio = round(perfect / total * 100, 2) if total > 0 else 0
-        good_ratio = round(good / total * 100, 2) if total > 0 else 0
-        failed_ratio = round(failed / total * 100, 2) if total > 0 else 0
-        pass_ratio = round(pass_count / total * 100, 2) if total > 0 else 0
-        oops_ratio = round(oops / total * 100, 2) if total > 0 else 0
-
-        kp_stats_data.append({
-            "知识点ID": kp_id,
-            "知识点类型": kp_type,
-            "知识点标题": kp_title,
-            "总数量": total,
-            "Perfect数量": perfect,
-            "Good数量": good,
-            "Failed数量": failed,
-            "Pass数量": pass_count,
-            "Oops数量": oops,
-            "Perfect比例(%)": perfect_ratio,
-            "Good比例(%)": good_ratio,
-            "Failed比例(%)": failed_ratio,
-            "Pass比例(%)": pass_ratio,
-            "Oops比例(%)": oops_ratio,
-        })
-
-    # ============ c. 单元总结-按单元统计时长 ============
-    unit_time_stats_data = []
-    unit_time_stats = defaultdict(int)
-
-    for record in sheet5_rows:
-        unit_id = record.get("unit_id")
-        play_time_seconds = record.get("play_time_seconds", 0)
-
-        if unit_id is not None:
-            unit_time_stats[unit_id] += play_time_seconds
-
-    # 生成单元时长统计数据行
-    for unit_id in sorted(unit_time_stats.keys()):
-        total_seconds = unit_time_stats[unit_id]
-        total_minutes = int(total_seconds / 60)
-
-        unit_time_stats_data.append({
-            "单元ID": f"unit_{unit_id}",
-            "总时长(秒)": total_seconds,
-            "总时长(分钟)": total_minutes,
-        })
-
-    print(f"  [统计] 汇总统计数据生成完成，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-    print(f"  [统计] 生成了{len(component_stats_data)}条组件统计, {len(kp_stats_data)}条知识点统计, {len(unit_time_stats_data)}条单元时长统计")
-
-    return (
-        pd.DataFrame(component_stats_data),
-        pd.DataFrame(kp_stats_data),
-        pd.DataFrame(unit_time_stats_data)
-    )
-
-
-
-def write_excel(path: str, sheet1_rows: List[Dict[str, Any]], sheet2_rows: List[Dict[str, Any]], sheet3_rows: List[Dict[str, Any]], sheet4_rows: List[Dict[str, Any]], sheet5_rows: List[Dict[str, Any]], stats_component_df: Any, stats_kp_df: Any, stats_unit_time_df: Any) -> None:
-    if pd is None:
-        raise RuntimeError("缺少pandas依赖，请安装后再运行。")
-
-    print(f"  [Excel] 开始写入Excel文件: {path}")
-    start_time = datetime.datetime.now()
-
-    out_dir = os.path.dirname(path) or "."
-    os.makedirs(out_dir, exist_ok=True)
-    with pd.ExcelWriter(path, engine="openpyxl") as writer:
-        pd.DataFrame(sheet1_rows, columns=SHEET1_COLUMNS).to_excel(writer, sheet_name="全部音频数据", index=False)
-        pd.DataFrame(sheet2_rows, columns=SHEET2_COLUMNS).to_excel(writer, sheet_name="互动组件学习记录", index=False)
-        pd.DataFrame(sheet3_rows, columns=SHEET3_COLUMNS).to_excel(writer, sheet_name="课程巩固记录", index=False)
-        pd.DataFrame(sheet4_rows, columns=SHEET4_COLUMNS).to_excel(writer, sheet_name="单元挑战记录", index=False)
-        pd.DataFrame(sheet5_rows, columns=SHEET5_COLUMNS).to_excel(writer, sheet_name="单元总结记录", index=False)
-        stats_component_df.to_excel(writer, sheet_name="统计-互动组件通过情况", index=False)
-        stats_kp_df.to_excel(writer, sheet_name="统计-知识点通过情况", index=False)
-        stats_unit_time_df.to_excel(writer, sheet_name="统计-单元总结时长", index=False)
-
-    print(f"  [Excel] 写入完成，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-
-
-def get_date_str() -> str:
-    """获取当前日期字符串 格式：YYYYMMDD"""
-    return datetime.datetime.now().strftime("%Y%m%d")
-
-
-def export_single_user(user_id: str, es_cfg: Dict[str, Any], pg_conn: Any, mysql_conn: Any, output_path: str, id_2_unit_index: Dict[int, int], chapter_id_to_lesson_id: Dict[int, int]) -> bool:
-    """
-    导出单个角色id的数据
-
-    Args:
-        user_id: 角色ID
-        es_cfg: ES配置
-        pg_conn: PostgreSQL连接
-        mysql_conn: MySQL连接
-        output_path: 输出路径
-        id_2_unit_index: story_id到unit_id的映射字典
-        chapter_id_to_lesson_id: chapter_id到lesson_id的映射字典
-
-    Returns:
-        True表示成功，False表示失败
-    """
-    try:
-        print(f"\n[INFO] ========== 开始导出角色id={user_id} ==========")
-        total_start_time = datetime.datetime.now()
-
-        # 查询ES数据
-        sheet1_rows = fetch_es_user_audio(user_id, es_cfg)
-
-        # 查询PG数据
-        sheet2_rows = fetch_pg_play_records(user_id, pg_conn, mysql_conn)
-        sheet3_rows = fetch_pg_unit_review(user_id, pg_conn, id_2_unit_index, chapter_id_to_lesson_id)
-        sheet4_rows = fetch_pg_unit_challenge(user_id, pg_conn, id_2_unit_index)
-        sheet5_rows = fetch_pg_unit_summary(user_id, pg_conn, id_2_unit_index)
-
-        # 检查是否有有效数据
-        total_records = len(sheet1_rows) + len(sheet2_rows) + len(sheet3_rows) + len(sheet4_rows) + len(sheet5_rows)
-        print(f"  [统计] 数据汇总:")
-        print(f"    - 全部音频数据: {len(sheet1_rows)}条")
-        print(f"    - 互动组件学习记录: {len(sheet2_rows)}条")
-        print(f"    - 课程巩固记录: {len(sheet3_rows)}条")
-        print(f"    - 单元挑战记录: {len(sheet4_rows)}条")
-        print(f"    - 单元总结记录: {len(sheet5_rows)}条")
-        print(f"    - 总计: {total_records}条")
-
-        if total_records == 0:
-            print(f"[WARN] 角色id={user_id} 没有找到任何有效记录，跳过导出")
-            return False
-
-        # 生成汇总统计数据
-        stats_component_df, stats_kp_df, stats_unit_time_df = generate_statistics(sheet2_rows, sheet5_rows)
-
-        # 写入Excel
-        write_excel(output_path, sheet1_rows, sheet2_rows, sheet3_rows, sheet4_rows, sheet5_rows, stats_component_df, stats_kp_df, stats_unit_time_df)
-
-        total_time = (datetime.datetime.now() - total_start_time).total_seconds()
-        print(f"[INFO] 角色id={user_id} 导出成功")
-        print(f"[INFO] 文件路径: {output_path}")
-        print(f"[INFO] 总耗时: {total_time:.2f}秒")
-        print(f"[INFO] ========== 完成 ==========\n")
-        return True
-
-    except Exception as e:
-        print(f"[ERROR] 角色id={user_id} 导出失败: {e}")
-        import traceback
-        traceback.print_exc()
-        return False
-
-
-def main():
-    load_env()
-
-    # 确定运行模式并收集需要导出的角色id列表
-    user_id_list: List[tuple] = []  # [(user_id, account_id or None), ...]
-    date_str = get_date_str()
-
-    # 检查三种模式的配置
-    has_user_id = USER_ID is not None
-    has_user_id_list = USER_ID_LIST is not None and len(USER_ID_LIST) > 0
-    has_account_id_list = ACCOUNT_ID_LIST is not None and len(ACCOUNT_ID_LIST) > 0
-
-    # 验证只能配置一种模式
-    mode_count = sum([has_user_id, has_user_id_list, has_account_id_list])
-    if mode_count == 0:
-        raise RuntimeError("请配置 USER_ID、USER_ID_LIST 或 ACCOUNT_ID_LIST 中的一个")
-    if mode_count > 1:
-        raise RuntimeError("USER_ID、USER_ID_LIST、ACCOUNT_ID_LIST 只能配置一个，请检查配置")
-
-    # 模式1：单个角色id
-    if has_user_id:
-        user_id_list = [(str(USER_ID), None)]
-        print(f"[INFO] 运行模式：单个角色id")
-
-    # 模式2：角色id列表
-    elif has_user_id_list:
-        user_id_list = [(str(uid), None) for uid in USER_ID_LIST]
-        print(f"[INFO] 运行模式：角色id列表，共{len(user_id_list)}个角色")
-
-    # 模式3：账户id列表
-    elif has_account_id_list:
-        print(f"[INFO] 运行模式：账户id列表，共{len(ACCOUNT_ID_LIST)}个账户")
-        mysql_conn = None
-        try:
-            mysql_conn = get_mysql_conn("vala_user")  # 查询用户表，使用 vala_user 数据库
-            for account_id in ACCOUNT_ID_LIST:
-                account_id_str = str(account_id)
-                print(f"[INFO] 查询账户id={account_id_str}对应的角色id...")
-                character_ids = fetch_character_ids_by_account(account_id_str, mysql_conn)
-                if not character_ids:
-                    print(f"[WARN] 账户id={account_id_str} 未找到关联的角色id，跳过")
-                    continue
-                print(f"[INFO] 账户id={account_id_str} 找到{len(character_ids)}个角色id: {character_ids}")
-                for cid in character_ids:
-                    user_id_list.append((cid, account_id_str))
-        finally:
-            if mysql_conn:
-                try:
-                    mysql_conn.close()
-                except Exception:
-                    pass
-
-    if not user_id_list:
-        print("[WARN] 没有需要导出的角色id，程序退出")
-        return
-
-    # 初始化连接
-    es_cfg = get_es_config()
-    pg_conn = get_pg_conn()
-
-    # 获取映射表（只需要查询一次，所有角色共用）
-    print(f"\n[INFO] ===== 准备工作：获取映射表 =====")
-    mysql_conn = None
-    id_2_unit_index = {}
-    chapter_id_to_lesson_id = {}
-    try:
-        print(f"[INFO] 正在连接MySQL数据库（vala_test）...")
-        mysql_conn = get_mysql_conn("vala_test")  # 查询游戏配置表，使用 vala_test 数据库
-        print(f"[INFO] 正在获取 story_id 到 unit_id 的映射...")
-        id_2_unit_index = get_id_2_unit_index(mysql_conn)
-        print(f"[INFO] 成功获取 {len(id_2_unit_index)} 个 story_id 映射")
-        print(f"[INFO] 正在获取 chapter_id 到 lesson_id 的映射...")
-        chapter_id_to_lesson_id = get_chapter_id_to_lesson_id(mysql_conn)
-        print(f"[INFO] 成功获取 {len(chapter_id_to_lesson_id)} 个 chapter_id 映射")
-    except Exception as e:
-        print(f"[ERROR] 获取映射表失败: {e}")
-        import traceback
-        traceback.print_exc()
-        if pg_conn:
-            try:
-                pg_conn.close()
-            except Exception:
-                pass
-        if mysql_conn:
-            try:
-                mysql_conn.close()
-            except Exception:
-                pass
-        return
-
-    try:
-        # 统计信息
-        success_count = 0
-        skip_count = 0
-
-        print(f"\n[INFO] ===== 开始批量导出 =====")
-        print(f"[INFO] 共需导出{len(user_id_list)}个角色\n")
-        batch_start_time = datetime.datetime.now()
-
-        # 循环处理每个角色id
-        for idx, (user_id, account_id) in enumerate(user_id_list, 1):
-            print(f"\n{'='*60}")
-            print(f"[INFO] 进度: {idx}/{len(user_id_list)} ({idx*100//len(user_id_list)}%)")
-            print(f"{'='*60}")
-
-            # 生成输出文件名
-            if account_id is None:
-                # 模式1和模式2：角色id_{}_导出时间_{}.xlsx
-                filename = f"角色id_{user_id}_导出时间_{date_str}.xlsx"
-            else:
-                # 模式3：账户id_{}_角色id_{}_导出时间_{}.xlsx
-                filename = f"账户id_{account_id}_角色id_{user_id}_导出时间_{date_str}.xlsx"
-
-            output_path = os.path.join(OUTPUT_DIR, filename)
-
-            # 导出单个角色的数据
-            result = export_single_user(user_id, es_cfg, pg_conn, mysql_conn, output_path, id_2_unit_index, chapter_id_to_lesson_id)
-            if result:
-                success_count += 1
-            else:
-                skip_count += 1
-
-        # 输出统计信息
-        batch_total_time = (datetime.datetime.now() - batch_start_time).total_seconds()
-        print(f"\n{'='*60}")
-        print(f"[INFO] ===== 全部导出完成 =====")
-        print(f"[INFO] 总计: {len(user_id_list)}个角色")
-        print(f"[INFO] 成功: {success_count}个")
-        print(f"[INFO] 跳过: {skip_count}个")
-        print(f"[INFO] 总耗时: {batch_total_time:.2f}秒 ({batch_total_time/60:.2f}分钟)")
-        if success_count > 0:
-            print(f"[INFO] 平均每个角色: {batch_total_time/success_count:.2f}秒")
-        print(f"{'='*60}\n")
-
-    finally:
-        if pg_conn:
-            try:
-                pg_conn.close()
-            except Exception:
-                pass
-        if mysql_conn:
-            try:
-                mysql_conn.close()
-            except Exception:
-                pass
-
-
-if __name__ == "__main__":
-    main()
diff --git a/new_export/export_user_id_data_latest.py b/new_export/export_user_id_data_latest.py
deleted file mode 100644
index 22495f5..0000000
--- a/new_export/export_user_id_data_latest.py
+++ /dev/null
@@ -1,1846 +0,0 @@
-"""
-初版需求v1.0: 2025.11.18
-
-导出 一个userId的多表数据， 最终按照不同sheet，输出到一个 excel文件中。
-
-1. 第一个sheet:"全部音频数据"
-es相关配置通过以下环境变量
-ES_HOST=xxx
-ES_PORT=9200
-ES_SCHEME=https
-ES_USER=elastic
-ES_PASSWORD=xxx
-
-index: user-audio
-
-脚本思路:
-过滤字段:
-userId == xxxx
-
-输出该userId的全部记录 按时间倒序排序
-包含以下字段内容:
-
-userId
-userMsg
-userName
-soeData
-audioUrl
-asrStatus
-componentId
-componentType
-dataVersion
-
-2. 第二个sheet:"互动组件学习记录"
-在 PGsql数据库中  筛选出 user_id 对应的记录 按时间(updated_at)倒序排列。
-数据库相关配置  从.env中读取:
-PG_DB_HOST = xxx
-PG_DB_PORT = xxx
-PG_DB_USER = xxx
-PG_DB_PASSWORD = xxx
-PG_DB_DATABASE = xxx
-
-读取以下数据表: 
-user_component_play_record_0 ~ user_component_play_record_7
-
-输出以下字段：
-user_id,
-component_unique_code,
-session_id,
-c_type,
-c_id,
-play_result,
-user_behavior_info,
-updated_at
-
-3.第三个sheet:"课程巩固记录"
-在 PGsql数据库中  筛选出 user_id 对应的记录 按时间(updated_at)倒序排列。
-
-数据表:user_unit_review_question_result
-
-输出以下字段:
-user_id
-story_id
-chapter_id
-question_list
-updated_at
-
-4.第四个sheet:"单元挑战记录"
-在 PGsql数据库中  筛选出 user_id 对应的记录 按时间(updated_at)倒序排列。
-
-数据表:user_unit_challenge_question_result
-
-输出以下字段:
-user_id
-story_id
-category
-score_text,
-question_list
-updated_at
-------------
-
-需求补充v1.1:
-"全部音频数据"这个sheet
-输出字段 添加timeStr 并按时间倒序排列  最新的记录 在最上面
-
-------------
-需求补充v1.2:
-"全部音频数据"这个sheet
-如果userMsg字段内容 包含 ”makee_id“ 要进行以下处理：
-
-从userMsg字段中提取出具体的makee_id:
-此时的字段样例:
-```
-asr msg信息为：{
-    "time_ms": 358,
-    "time_ms_api": 357,
-    "hot_words_str": "{\n \"context_type\": \"dialog_ctx\",\n \"context_data\": [\n  {\n   \"text\": \"planet Walla\"\n  },\n  {\n   \"text\": \"Walla\"\n  }\n ]\n}",
-    "makee_id": "d208c617-902f-4f81-8255-b5fb73599546",
-    "volcano_fast_x_tt_logid": "202511151541355DF72BE5EBFE73795BFD",
-    "api_name": "volcano-fast"
-}
-```
-然后基于makee_id 去另一个表里查记录:  index:llm_asr_log
-将查询到的记录的 result_text 字段内容 回填到 userMsg。
-将source字段内容 输出 到 source。
-
-如果userMsg字段内容 不包含 ”makee_id“ 保持之前的逻辑。
-
---------------
-需求补充 v1.3
-当前输入 只支持配置单个 userId (业务侧名称为角色id)
-
-
-期望扩展为以下逻辑:
-1. 改为配置 角色id list ， 分别 导出 多份excel文件。命名格式为 角色id_{}_导出时间_{}.xlsx
-2. 改为配置 账户id list ， 分别 导出 多份excel文件。命名格式为 账户id_{}_角色id_{}_导出时间_{}.xlsx
-
-关于 账户 id 到角色id 的映射逻辑，
-首先 读取 mysql 表 vala_app_character
-筛选 account_id字段值 == 账户id 的 记录， 其中 该记录 的 id值，则为角色id 一个 账户id 可以对应多个角色id
-
-本次需求只针对输入侧调整， 数据抽取聚合逻辑部分和之前保持一致
-
----------------
-需求补充 v1.4
-
-增加一个sheet "单元总结记录"，
-导出对应角色id的单元总结记录。   参考 export_unit_summary.py 中的原始数据提取方案即可(不必关注其中的数据统计部分)。
-
-其他已有逻辑保持不动哦。
-
-----------------
-需求补充 v1.5
-
-1."互动组件学习记录"sheet 增加以下字段
-"互动组件名称"、"组件标题"、"组件配置摘要"、"知识点":
-字段取值规则:
-根据 c_type 及组件配置(从mysql表获取) 进行映射和处理:
-```
-1）.如果 c_type 开头为"mid"
-
-则读取下表:表名:middle_interaction_component
-
-获取以下字段值:
-title (作为组件标题)
-component_config (完整的组件配置)   获取其中 的 question 字段值 作为 组件配置摘要；
-kp_relation_info 字段值  作为 知识点
-
-"互动组件名称"规则:
-
-"物品互动": "mid_vocab_item",
-"图片互动": "mid_vocab_image",
-"填词互动": "mid_vocab_fillBlank",
-"指令互动": "mid_vocab_instruction"
-"对话互动-表达": "mid_sentence_dialogue", 且 component_config->question->mode == "express"
-"对话互动-朗读": "mid_sentence_dialogue", 且 component_config->question->mode == "read"
-"语音互动": "mid_sentence_voice",
-"材料互动": "mid_sentence_material",
-"造句互动": "mid_sentence_makeSentence"
-"挖空互动": "mid_grammar_cloze",
-"组句互动": "mid_grammar_sentence"
-"发音互动": "mid_pron_pron"
-
-
-2）. 如果 c_type 开头为"core"
-则读取下表:表名:core_interaction_component
-
-获取以下字段值:
-title (作为组件标题)
-component_config (完整的组件配置)   获取其中 的 taskInfo 字段值 作为 组件配置摘要
-kp_relation_info 字段值  作为 知识点
-
-"互动组件名称"规则:
-"口语快答": "core_speaking_reply",
-"口语妙问": "core_speaking_inquiry",
-"口语探讨": "core_speaking_explore",
-"口语独白": "core_speaking_monologue"
-"合作阅读": "core_reading_order",
-"合作听力": "core_listening_order",
-"看图组句": "core_writing_imgMakeSentence",
-"看图撰写": "core_writing_imgWrite",
-"问题组句": "core_writing_questionMakeSentence",
-"问题撰写": "core_writing_questionWrite",
-```
-
-2."课程巩固记录" sheet 增加以下字段
-"正确率":  参考 export_lesson_review.py 中的计算逻辑
-
-3. 新增一个"汇总统计"sheet
-统计并展示以下内容   请以 可读性 比较好的方式排列、展示
-
-a. "所有互动-按互动组件类型-通过情况统计"
-以每种"互动组件名称"进行聚合
-统计play_result的取值分布情况，算以下指标:
-总数量、Perfect数量、Good数量、Failed数量、Pass数量、Perfect比例、Good比例、Failed比例、Pass比例
-
-b. "中互动组件-按知识点-通过情况统计"
-以每个知识点进行聚合
-
-其中 知识点配置格式如下:
-```
-[{"kpId":"0000004","kpType":"sentence","kpTitle":"My name is ...","kpSkill":"sentence_pron","kpSkillName":"语音"},{"kpId":"0000004","kpType":"sentence","kpTitle":"My name is ...","kpSkill":"sentence_meaning","kpSkillName":"语义"},{"kpId":"0000005","kpType":"sentence","kpTitle":"I'm… years old.","kpSkill":"sentence_pron","kpSkillName":"语音"},{"kpId":"0000005","kpType":"sentence","kpTitle":"I'm… years old.","kpSkill":"sentence_meaning","kpSkillName":"语义"},{"kpId":"0000014","kpType":"sentence","kpTitle":"Nice to meet you.","kpSkill":"sentence_pron","kpSkillName":"语音"},{"kpId":"0000014","kpType":"sentence","kpTitle":"Nice to meet you.","kpSkill":"sentence_meaning","kpSkillName":"语义"}]
-```
-一个组件可以绑定多个知识点，以每个知识点的 kpId + kpType + kpTitle 进行 展示及聚合
-
-对所有绑定了某个知识点的中互动组件(c_type以mid开头)
-统计play_result的取值分布情况，算以下指标:
-总数量、Perfect数量、Good数量、Failed数量、Pass数量、Perfect比例、Good比例、Failed比例、Pass比例
-
-c. "单元总结-按单元统计时长"
-
-将"单元总结记录"中的"play_time_seconds"字段值 以每个单元id 进行聚合 进行 累加 统计，并增加一列 转换为分钟为单位 取整数
-
-
-"""
-# ==== 可直接修改的脚本变量（不使用命令行传参） ====
-# 三种模式互斥，只能配置一个：
-# 模式1：单个角色id
-USER_ID = None # 单个角色ID，示例：2911
-
-# 模式2：角色id列表（多个角色id批量导出）
-USER_ID_LIST = None  # 角色ID列表，示例：[2911, 2912, 2913]
-
-# 模式3：账户id列表（通过账户id查询对应的角色id后批量导出）
-ACCOUNT_ID_LIST = [9343] # 5095[7232] # [1783,5375,5371,5345,5303,5293,5095,4289,4494,4473,4460,4452,4386,4388,4236,4043,2758,2841,2756,2750,2692,1781,1693,2256,2234,2373]  # 账户ID列表，示例：[100, 101, 102]
-
-OUTPUT_DIR = "output_latest/"  # 输出目录，默认为output文件夹
-# ==== 变量结束 ====
-import os
-import json
-import re
-from typing import Any, Dict, List, Optional
-
-import datetime
-
-try:
-    import requests
-except Exception:
-    requests = None
-
-try:
-    import psycopg2
-    from psycopg2.extras import RealDictCursor
-except Exception:
-    psycopg2 = None
-    RealDictCursor = None
-
-try:
-    import pymysql
-    import pymysql.cursors
-except Exception:
-    pymysql = None
-
-try:
-    import pandas as pd
-except Exception:
-    pd = None
-
-try:
-    import urllib3
-except Exception:
-    urllib3 = None
-
-
-SHEET1_COLUMNS = [
-    "userId",
-    "userMsg",
-    "source",
-    "userName",
-    "soeData",
-    "audioUrl",
-    "asrStatus",
-    "componentId",
-    "componentType",
-    "dataVersion",
-    "timeStr",
-]
-
-SHEET2_COLUMNS = [
-    "user_id",
-    "component_unique_code",
-    "session_id",
-    "c_type",
-    "c_id",
-    "互动组件名称",
-    "组件标题",
-    "组件配置摘要",
-    "知识点",
-    "play_result",
-    "user_behavior_info",
-    "updated_at",
-]
-
-SHEET3_COLUMNS = [
-    "user_id",
-    "unit_id",
-    "lesson_id",
-    "question_list",
-    "正确率",
-    "updated_at",
-]
-
-SHEET4_COLUMNS = [
-    "user_id",
-    "unit_id",
-    "category",
-    "score_text",
-    "question_list",
-    "updated_at",
-]
-
-SHEET5_COLUMNS = [
-    "id",
-    "user_id",
-    "unit_id",
-    "updated_at",
-    "km_id",
-    "km_type",
-    "play_time_seconds",
-]
-
-
-def _load_env_file(path: str) -> None:
-    if not os.path.exists(path):
-        return
-    try:
-        with open(path, "r", encoding="utf-8") as f:
-            for line in f:
-                line = line.strip()
-                if not line or line.startswith("#"):
-                    continue
-                if "=" not in line:
-                    continue
-                k, v = line.split("=", 1)
-                k = k.strip()
-                v = v.strip().strip('"').strip("'")
-                if k and (os.getenv(k) is None):
-                    os.environ[k] = v
-    except Exception:
-        pass
-
-
-def load_env() -> None:
-    _load_env_file(os.path.join(os.getcwd(), ".env"))
-    _load_env_file(os.path.join(os.getcwd(), ".env.local"))
-
-
-def to_json_str(v: Any) -> Any:
-    if isinstance(v, (dict, list)):
-        try:
-            return json.dumps(v, ensure_ascii=False)
-        except Exception:
-            return str(v)
-    return v
-
-
-def parse_time(value: Any) -> Optional[datetime.datetime]:
-    if value is None:
-        return None
-    if isinstance(value, (int, float)):
-        try:
-            v = float(value)
-            # 兼容毫秒级时间戳
-            if v > 1e11:
-                v = v / 1000.0
-            return datetime.datetime.fromtimestamp(v)
-        except Exception:
-            return None
-    if isinstance(value, str):
-        fmts = [
-            "%Y-%m-%dT%H:%M:%S.%fZ",
-            "%Y-%m-%dT%H:%M:%S.%f%z",
-            "%Y-%m-%dT%H:%M:%S%z",
-            "%Y-%m-%d %H:%M:%S",
-            "%Y-%m-%d",
-        ]
-        for fmt in fmts:
-            try:
-                return datetime.datetime.strptime(value, fmt)
-            except Exception:
-                continue
-        try:
-            return datetime.datetime.fromisoformat(value)
-        except Exception:
-            return None
-    return None
-
-
-def pick_time(source: Dict[str, Any]) -> Optional[datetime.datetime]:
-    candidates = [
-        "updated_at",
-        "created_at",
-        "@timestamp",
-        "timestamp",
-        "updatedAt",
-        "createdAt",
-        "time",
-        "ts",
-        "timeStr",
-        "update_time",
-        "create_time",
-    ]
-    for key in candidates:
-        if key in source:
-            t = parse_time(source.get(key))
-            if t is not None:
-                return t
-    # 宽松匹配：尝试扫描所有可能的时间相关字段
-    for k, v in source.items():
-        lk = str(k).lower()
-        if any(s in lk for s in ["time", "date", "_at", "timestamp"]):
-            t = parse_time(v)
-            if t is not None:
-                return t
-    return None
-
-
-def extract_makee_id_from_user_msg(user_msg: Any) -> Optional[str]:
-    # 支持dict或字符串形式
-    if isinstance(user_msg, dict):
-        mk = user_msg.get("makee_id")
-        if isinstance(mk, str) and mk:
-            return mk
-    if isinstance(user_msg, str) and user_msg:
-        # 1) 尝试整体解析为JSON
-        try:
-            obj = json.loads(user_msg)
-            mk = obj.get("makee_id")
-            if isinstance(mk, str) and mk:
-                return mk
-        except Exception:
-            pass
-        # 2) 尝试截取大括号中的JSON
-        try:
-            start = user_msg.find("{")
-            end = user_msg.rfind("}")
-            if start != -1 and end != -1 and end > start:
-                candidate = user_msg[start : end + 1]
-                obj = json.loads(candidate)
-                mk = obj.get("makee_id")
-                if isinstance(mk, str) and mk:
-                    return mk
-        except Exception:
-            pass
-        # 3) 正则匹配 makee_id
-        m = re.search(r"\bmakee_id\b\s*:\s*\"([^\"]+)\"", user_msg)
-        if m:
-            return m.group(1)
-    return None
-
-
-def fetch_es_asr_log(makee_id: str, es_cfg: Dict[str, Any]) -> Optional[Dict[str, Any]]:
-    if requests is None:
-        raise RuntimeError("缺少requests依赖，请安装后再运行。")
-    host = es_cfg.get("host")
-    port = es_cfg.get("port")
-    scheme = es_cfg.get("scheme", "http")
-    user = es_cfg.get("user")
-    password = es_cfg.get("password")
-    index = "llm_asr_log"
-    if not host:
-        return None
-    base = f"{scheme}://{host}:{port}"
-    url = f"{base}/{index}/_search"
-    headers = {"Content-Type": "application/json"}
-    body = {
-        "query": {
-            "bool": {
-                "should": [
-                    {"term": {"makee_id": {"value": str(makee_id)}}},
-                    {"term": {"makee_id.keyword": {"value": str(makee_id)}}},
-                ],
-                "minimum_should_match": 1,
-            }
-        },
-        "size": 10,
-        "_source": [
-            "makee_id",
-            "result_text",
-            "source",
-            "updated_at",
-            "created_at",
-            "@timestamp",
-            "timestamp",
-            "updatedAt",
-            "createdAt",
-            "time",
-            "ts",
-            "timeStr",
-            "update_time",
-            "create_time",
-        ],
-    }
-    auth = (user, password) if user and password else None
-    try:
-        if scheme == "https" and urllib3 is not None:
-            try:
-                urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-            except Exception:
-                pass
-        resp = requests.post(url, headers=headers, json=body, auth=auth, timeout=20, verify=False if scheme == "https" else True)
-        resp.raise_for_status()
-        data = resp.json()
-    except Exception:
-        return None
-    hits = data.get("hits", {}).get("hits", [])
-    if not hits:
-        return None
-    # 选最新的
-    chosen = None
-    best_t = None
-    for h in hits:
-        src = h.get("_source", {}) or {}
-        t = pick_time(src)
-        if t is None:
-            continue
-        if best_t is None or t > best_t:
-            best_t = t
-            chosen = src
-    if chosen is None:
-        # 如果都没有时间，选第一条
-        chosen = (hits[0].get("_source", {}) or {})
-    return chosen
-
-
-def get_es_config() -> Dict[str, Any]:
-    return {
-        "host": os.getenv("ES_HOST"),
-        "port": os.getenv("ES_PORT", "9200"),
-        "scheme": os.getenv("ES_SCHEME", "http"),
-        "user": os.getenv("ES_USER"),
-        "password": os.getenv("ES_PASSWORD"),
-        "index": "user-audio",
-    }
-
-
-def fetch_es_user_audio(user_id: str, es_cfg: Dict[str, Any]) -> List[Dict[str, Any]]:
-    if requests is None:
-        raise RuntimeError("缺少requests依赖，请安装后再运行。")
-
-    print(f"  [ES] 开始查询user-audio索引...")
-    start_time = datetime.datetime.now()
-
-    host = es_cfg.get("host")
-    port = es_cfg.get("port")
-    scheme = es_cfg.get("scheme", "http")
-    user = es_cfg.get("user")
-    password = es_cfg.get("password")
-    index = es_cfg.get("index", "user-audio")
-
-    if not host:
-        return []
-
-    base = f"{scheme}://{host}:{port}"
-    url = f"{base}/{index}/_search"
-    headers = {"Content-Type": "application/json"}
-
-    body = {
-        "query": {
-            "bool": {
-                "should": [
-                    {"term": {"userId": {"value": str(user_id)}}},
-                    {"term": {"userId.keyword": {"value": str(user_id)}}},
-                ],
-                "minimum_should_match": 1,
-            }
-        },
-        "size": 10000,
-        "_source": [
-            "userId",
-            "userMsg",
-            "userName",
-            "soeData",
-            "audioUrl",
-            "asrStatus",
-            "componentId",
-            "componentType",
-            "dataVersion",
-            "updated_at",
-            "created_at",
-            "@timestamp",
-            "timestamp",
-            "updatedAt",
-            "createdAt",
-            "time",
-            "ts",
-            "timeStr",
-            "update_time",
-            "create_time",
-        ],
-    }
-
-    auth = (user, password) if user and password else None
-
-    try:
-        # 抑制自签证书下的HTTPS不安全警告
-        if scheme == "https" and urllib3 is not None:
-            try:
-                urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-            except Exception:
-                pass
-        resp = requests.post(url, headers=headers, json=body, auth=auth, timeout=30, verify=False if scheme == "https" else True)
-        resp.raise_for_status()
-        data = resp.json()
-    except Exception as e:
-        raise RuntimeError(f"ES查询失败: {e}")
-
-    hits = data.get("hits", {}).get("hits", [])
-    print(f"  [ES] 查询完成，获得{len(hits)}条记录，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-
-    if not hits:
-        return []
-
-    print(f"  [ES] 开始处理音频数据...")
-    process_start = datetime.datetime.now()
-
-    rows: List[Dict[str, Any]] = []
-    asr_cache: Dict[str, Dict[str, Any]] = {}
-    makee_id_count = 0
-
-    for idx, h in enumerate(hits, 1):
-        # 每处理100条显示一次进度
-        if idx % 100 == 0 or idx == len(hits):
-            print(f"  [ES] 处理进度: {idx}/{len(hits)} ({idx*100//len(hits)}%)")
-
-        src = h.get("_source", {}) or {}
-        row = {
-            "userId": src.get("userId"),
-            "userMsg": src.get("userMsg"),
-            "source": None,
-            "userName": src.get("userName"),
-            "soeData": to_json_str(src.get("soeData")),
-            "audioUrl": src.get("audioUrl"),
-            "asrStatus": src.get("asrStatus"),
-            "componentId": src.get("componentId"),
-            "componentType": src.get("componentType"),
-            "dataVersion": src.get("dataVersion"),
-        }
-        t = pick_time(src)
-        row["_time"] = t.isoformat() if t else None
-        row["timeStr"] = t.strftime("%Y-%m-%d %H:%M:%S") if t else None
-        # v1.2: 当userMsg包含makee_id时，补充查询llm_asr_log并回填
-        mk = extract_makee_id_from_user_msg(row.get("userMsg"))
-        if mk:
-            makee_id_count += 1
-            asr_doc = asr_cache.get(mk)
-            if asr_doc is None:
-                asr_doc = fetch_es_asr_log(mk, es_cfg)
-                if asr_doc is not None:
-                    asr_cache[mk] = asr_doc
-            if asr_doc is not None:
-                rt = asr_doc.get("result_text")
-                if rt:
-                    row["userMsg"] = rt
-                row["source"] = to_json_str(asr_doc.get("source"))
-        rows.append(row)
-
-    print(f"  [ES] 数据处理完成，发现{makee_id_count}条包含makee_id的记录，耗时{(datetime.datetime.now() - process_start).total_seconds():.2f}秒")
-
-    print(f"  [ES] 开始排序...")
-    rows.sort(key=lambda x: parse_time(x.get("_time")) or datetime.datetime.min, reverse=True)
-    print(f"  [ES] 音频数据处理完成，总耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-
-    return rows
-
-
-def get_pg_conn() -> Any:
-    if psycopg2 is None:
-        raise RuntimeError("缺少psycopg2依赖，请安装后再运行。")
-    host = os.getenv("PG_DB_HOST")
-    port = int(os.getenv("PG_DB_PORT", "5432"))
-    user = os.getenv("PG_DB_USER")
-    password = os.getenv("PG_DB_PASSWORD")
-    dbname = os.getenv("PG_DB_DATABASE")
-    if not host or not dbname:
-        raise RuntimeError("PG数据库环境变量未配置完整")
-    conn = psycopg2.connect(host=host, port=port, user=user, password=password, dbname=dbname)
-    return conn
-
-
-def get_mysql_conn(database: str) -> Any:
-    """
-    获取MySQL数据库连接
-
-    Args:
-        database: 数据库名，可选值：'vala_user' 或 'vala_test'
-                 vala_user 使用 online 配置（环境变量后缀 _online）
-                 vala_test 使用默认配置
-
-    Returns:
-        MySQL连接对象
-    """
-    if pymysql is None:
-        raise RuntimeError("缺少pymysql依赖，请安装后再运行。")
-
-    # 根据数据库选择不同的环境变量配置
-    if database == "vala_user":
-        # vala_user 数据库使用 online 配置
-        host = os.getenv("MYSQL_HOST_online")
-        port = int(os.getenv("MYSQL_PORT_online", "3306"))
-        user = os.getenv("MYSQL_USERNAME_online")
-        password = os.getenv("MYSQL_PASSWORD_online")
-        if not host:
-            raise RuntimeError("MySQL数据库环境变量未配置完整（缺少MYSQL_HOST_online）")
-    else:
-        # vala_test 等其他数据库使用默认配置
-        host = os.getenv("MYSQL_HOST")
-        port = int(os.getenv("MYSQL_PORT", "3306"))
-        user = os.getenv("MYSQL_USERNAME")
-        password = os.getenv("MYSQL_PASSWORD")
-        if not host:
-            raise RuntimeError("MySQL数据库环境变量未配置完整（缺少MYSQL_HOST）")
-
-    conn = pymysql.connect(
-        host=host,
-        port=port,
-        user=user,
-        password=password,
-        database=database,  # 直接使用传入的数据库名
-        charset="utf8mb4",
-        cursorclass=pymysql.cursors.DictCursor,
-    )
-    return conn
-
-
-def get_id_2_unit_index(conn: Any) -> Dict[int, int]:
-    """
-    从MySQL获取 story_id 到 unit_id 的映射关系
-
-    Args:
-        conn: MySQL数据库连接
-
-    Returns:
-        映射字典 {story_id: unit_id}
-    """
-    sql = """
-    SELECT *
-    FROM `vala_game_info`
-    WHERE id > 0
-      AND `vala_game_info`.`deleted_at` IS NULL
-    ORDER BY season_package_id asc, `index` asc
-    """
-    try:
-        with conn.cursor() as cur:
-            cur.execute(sql)
-            rows = cur.fetchall() or []
-            # 构建映射表：按查询结果的顺序，索引即为unit_id
-            id_2_unit_index = {}
-            for index, row in enumerate(rows):
-                id_2_unit_index[row["id"]] = index
-            return id_2_unit_index
-    except Exception as e:
-        print(f"[ERROR] 获取story_id到unit_id映射失败: {e}")
-        return {}
-
-
-def get_chapter_id_to_lesson_id(conn: Any) -> Dict[int, int]:
-    """
-    从MySQL获取 chapter_id 到 lesson_id 的映射关系
-
-    Args:
-        conn: MySQL数据库连接
-
-    Returns:
-        映射字典 {chapter_id: lesson_id}
-    """
-    sql = """
-    SELECT id, `index`
-    FROM `vala_game_chapter`
-    WHERE deleted_at IS NULL
-    """
-    try:
-        with conn.cursor() as cur:
-            cur.execute(sql)
-            rows = cur.fetchall() or []
-            # 构建映射表：chapter的index字段即为lesson_id
-            chapter_id_to_lesson_id = {}
-            for row in rows:
-                chapter_id_to_lesson_id[row["id"]] = row["index"]
-            return chapter_id_to_lesson_id
-    except Exception as e:
-        print(f"[ERROR] 获取chapter_id到lesson_id映射失败: {e}")
-        return {}
-
-
-# 组件类型到组件名称的映射
-COMPONENT_TYPE_NAMES = {
-    "mid_vocab_item": "物品互动",
-    "mid_vocab_image": "图片互动",
-    "mid_vocab_fillBlank": "填词互动",
-    "mid_vocab_instruction": "指令互动",
-    "mid_sentence_dialogue": "对话互动",  # 需要根据mode进一步判断
-    "mid_sentence_voice": "语音互动",
-    "mid_sentence_material": "材料互动",
-    "mid_sentence_makeSentence": "造句互动",
-    "mid_grammar_cloze": "挖空互动",
-    "mid_grammar_sentence": "组句互动",
-    "mid_pron_pron": "发音互动",
-    "core_speaking_reply": "口语快答",
-    "core_speaking_inquiry": "口语妙问",
-    "core_speaking_explore": "口语探讨",
-    "core_speaking_monologue": "口语独白",
-    "core_reading_order": "合作阅读",
-    "core_listening_order": "合作听力",
-    "core_writing_imgMakeSentence": "看图组句",
-    "core_writing_imgWrite": "看图撰写",
-    "core_writing_questionMakeSentence": "问题组句",
-    "core_writing_questionWrite": "问题撰写",
-}
-
-
-def get_component_name(c_type: str, component_config: Optional[Dict[str, Any]]) -> str:
-    """
-    根据c_type和组件配置获取组件名称
-
-    Args:
-        c_type: 组件类型
-        component_config: 组件配置（用于判断对话互动的mode）
-
-    Returns:
-        组件名称
-    """
-    if not c_type:
-        return ""
-
-    # 特殊处理：对话互动需要根据mode判断
-    if c_type == "mid_sentence_dialogue" and component_config:
-        try:
-            question = component_config.get("question", {})
-            mode = question.get("mode", "")
-            if mode == "express":
-                return "对话互动-表达"
-            elif mode == "read":
-                return "对话互动-朗读"
-        except Exception:
-            pass
-
-    return COMPONENT_TYPE_NAMES.get(c_type, "")
-
-
-def batch_fetch_component_configs(play_records: List[Dict[str, Any]], mysql_conn: Any) -> Dict[str, Dict[str, Any]]:
-    """
-    批量查询组件配置信息
-
-    Args:
-        play_records: 播放记录列表
-        mysql_conn: MySQL连接
-
-    Returns:
-        组件配置映射 {c_type_c_id: {title, component_config, kp_relation_info}}
-    """
-    print(f"  [MySQL] 开始批量查询组件配置...")
-    start_time = datetime.datetime.now()
-
-    # 收集需要查询的c_type和c_id
-    mid_c_ids = set()
-    core_c_ids = set()
-    mid_type_id_pairs = []  # 用于调试日志
-    core_type_id_pairs = []
-
-    for record in play_records:
-        c_type = record.get("c_type", "")
-        c_id = record.get("c_id")
-        if c_type and c_id:
-            if c_type.startswith("mid"):
-                mid_c_ids.add(c_id)
-                mid_type_id_pairs.append((c_type, c_id))
-            elif c_type.startswith("core"):
-                core_c_ids.add(c_id)
-                core_type_id_pairs.append((c_type, c_id))
-
-    print(f"  [MySQL] 需要查询中互动组件: {len(mid_c_ids)}个, 核心互动组件: {len(core_c_ids)}个")
-    if mid_c_ids:
-        print(f"  [MySQL] 中互动组件ID列表（前10个）: {sorted(list(mid_c_ids))[:10]}")
-    if core_c_ids:
-        print(f"  [MySQL] 核心互动组件ID列表（前10个）: {sorted(list(core_c_ids))[:10]}")
-
-    config_map = {}
-
-    # 批量查询middle_interaction_component
-    if mid_c_ids:
-        try:
-            with mysql_conn.cursor() as cur:
-                placeholders = ','.join(['%s'] * len(mid_c_ids))
-                sql = f"""
-                SELECT c_id, c_type, title, component_config, kp_relation_info
-                FROM middle_interaction_component
-                WHERE c_id IN ({placeholders}) AND deleted_at IS NULL
-                """
-                print(f"  [MySQL] 执行中互动组件查询，查询条件: c_id IN ({len(mid_c_ids)}个ID)")
-                cur.execute(sql, tuple(mid_c_ids))
-                rows = cur.fetchall() or []
-                print(f"  [MySQL] 查询到{len(rows)}条中互动组件配置")
-
-                if len(rows) == 0 and len(mid_c_ids) > 0:
-                    print(f"  [MySQL] [警告] 查询结果为空！可能的原因：")
-                    print(f"  [MySQL]   - 数据库中没有匹配的c_id记录")
-                    print(f"  [MySQL]   - deleted_at字段不为NULL")
-                    print(f"  [MySQL]   - c_id不存在")
-
-                for idx, row in enumerate(rows):
-                    c_type = row.get("c_type", "")
-                    c_id = row.get("c_id")
-                    key = f"{c_type}_{c_id}"
-
-                    if idx < 3:  # 输出前3条的详细信息
-                        print(f"  [MySQL] [样例{idx+1}] id={c_id}, c_type={c_type}, key={key}")
-                        print(f"  [MySQL] [样例{idx+1}] title={row.get('title', '')[:50]}")
-
-                    # 解析component_config
-                    component_config = row.get("component_config")
-                    if isinstance(component_config, str):
-                        try:
-                            component_config = json.loads(component_config)
-                        except Exception as e:
-                            print(f"  [MySQL] [警告] 解析component_config失败 (id={c_id}): {e}")
-                            component_config = {}
-
-                    # 提取question字段作为摘要
-                    summary = ""
-                    if isinstance(component_config, dict):
-                        question = component_config.get("question")
-                        summary = to_json_str(question) if question else ""
-                        if idx < 3 and question:
-                            print(f"  [MySQL] [样例{idx+1}] 提取到question字段，长度: {len(summary)}")
-
-                    # 解析kp_relation_info
-                    kp_relation_info = row.get("kp_relation_info")
-                    if isinstance(kp_relation_info, str):
-                        try:
-                            kp_relation_info = json.loads(kp_relation_info)
-                        except Exception:
-                            kp_relation_info = []
-
-                    config_map[key] = {
-                        "title": row.get("title", ""),
-                        "component_config": component_config,
-                        "summary": summary,
-                        "kp_relation_info": to_json_str(kp_relation_info),
-                    }
-
-                print(f"  [MySQL] 中互动组件配置已加入config_map，当前map大小: {len(config_map)}")
-        except Exception as e:
-            print(f"  [MySQL] [错误] 查询中互动组件配置失败: {e}")
-            import traceback
-            traceback.print_exc()
-
-    # 批量查询core_interaction_component
-    if core_c_ids:
-        try:
-            with mysql_conn.cursor() as cur:
-                placeholders = ','.join(['%s'] * len(core_c_ids))
-                sql = f"""
-                SELECT c_id, c_type, title, component_config, kp_relation_info
-                FROM core_interaction_component
-                WHERE c_id IN ({placeholders}) AND deleted_at IS NULL
-                """
-                print(f"  [MySQL] 执行核心互动组件查询，查询条件: c_id IN ({len(core_c_ids)}个ID)")
-                cur.execute(sql, tuple(core_c_ids))
-                rows = cur.fetchall() or []
-                print(f"  [MySQL] 查询到{len(rows)}条核心互动组件配置")
-
-                if len(rows) == 0 and len(core_c_ids) > 0:
-                    print(f"  [MySQL] [警告] 查询结果为空！可能的原因：")
-                    print(f"  [MySQL]   - 数据库中没有匹配的c_id记录")
-                    print(f"  [MySQL]   - deleted_at字段不为NULL")
-                    print(f"  [MySQL]   - c_id不存在")
-
-                for idx, row in enumerate(rows):
-                    c_type = row.get("c_type", "")
-                    c_id = row.get("c_id")
-                    key = f"{c_type}_{c_id}"
-
-                    if idx < 3:  # 输出前3条的详细信息
-                        print(f"  [MySQL] [样例{idx+1}] id={c_id}, c_type={c_type}, key={key}")
-                        print(f"  [MySQL] [样例{idx+1}] title={row.get('title', '')[:50]}")
-
-                    # 解析component_config
-                    component_config = row.get("component_config")
-                    if isinstance(component_config, str):
-                        try:
-                            component_config = json.loads(component_config)
-                        except Exception as e:
-                            print(f"  [MySQL] [警告] 解析component_config失败 (id={c_id}): {e}")
-                            component_config = {}
-
-                    # 提取taskInfo字段作为摘要
-                    summary = ""
-                    if isinstance(component_config, dict):
-                        task_info = component_config.get("taskInfo")
-                        summary = to_json_str(task_info) if task_info else ""
-                        if idx < 3 and task_info:
-                            print(f"  [MySQL] [样例{idx+1}] 提取到taskInfo字段，长度: {len(summary)}")
-
-                    # 解析kp_relation_info
-                    kp_relation_info = row.get("kp_relation_info")
-                    if isinstance(kp_relation_info, str):
-                        try:
-                            kp_relation_info = json.loads(kp_relation_info)
-                        except Exception:
-                            kp_relation_info = []
-
-                    config_map[key] = {
-                        "title": row.get("title", ""),
-                        "component_config": component_config,
-                        "summary": summary,
-                        "kp_relation_info": to_json_str(kp_relation_info),
-                    }
-
-                print(f"  [MySQL] 核心互动组件配置已加入config_map，当前map大小: {len(config_map)}")
-        except Exception as e:
-            print(f"  [MySQL] [错误] 查询核心互动组件配置失败: {e}")
-            import traceback
-            traceback.print_exc()
-
-    print(f"  [MySQL] 组件配置查询完成，共{len(config_map)}条，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-    return config_map
-
-
-def calculate_accuracy(question_list: Any) -> float:
-    """
-    计算问题列表的正确率
-
-    Args:
-        question_list: 问题列表（可能是JSON字符串或list）
-
-    Returns:
-        正确率（百分比，保留2位小数）
-    """
-    try:
-        if isinstance(question_list, str):
-            question_list = json.loads(question_list)
-
-        if not isinstance(question_list, list) or len(question_list) == 0:
-            return 0.0
-
-        total = len(question_list)
-        correct = sum(1 for q in question_list if q.get('isRight') == True)
-        accuracy = round(correct / total * 100, 2) if total > 0 else 0.0
-
-        return accuracy
-    except Exception:
-        return 0.0
-
-
-
-def fetch_character_ids_by_account(account_id: str, conn: Any) -> List[str]:
-    """根据账户id查询对应的角色id列表"""
-    sql = "SELECT id FROM vala_app_character WHERE account_id = %s"
-    try:
-        with conn.cursor() as cur:
-            cur.execute(sql, (account_id,))
-            rows = cur.fetchall() or []
-            return [str(row["id"]) for row in rows if row.get("id")]
-    except Exception as e:
-        print(f"[ERROR] 查询账户id={account_id}的角色id失败: {e}")
-        return []
-
-
-def fetch_pg_play_records(user_id: str, conn: Any, mysql_conn: Any) -> List[Dict[str, Any]]:
-    """
-    查询互动组件学习记录并补充组件配置信息
-
-    Args:
-        user_id: 用户ID（角色ID）
-        conn: PostgreSQL数据库连接
-        mysql_conn: MySQL数据库连接
-
-    Returns:
-        互动组件学习记录列表
-    """
-    print(f"  [PG] 开始查询互动组件学习记录（8张分表）...")
-    start_time = datetime.datetime.now()
-
-    tables = [f"user_component_play_record_{i}" for i in range(8)]
-    rows: List[Dict[str, Any]] = []
-    with conn.cursor(cursor_factory=RealDictCursor) as cur:
-        for t in tables:
-            try:
-                cur.execute(
-                    f"""
-                    SELECT user_id, component_unique_code, session_id, c_type, c_id,
-                           play_result, user_behavior_info, updated_at
-                    FROM {t}
-                    WHERE user_id = %s
-                    ORDER BY updated_at DESC
-                    """,
-                    (user_id,),
-                )
-                part = cur.fetchall() or []
-                if part:
-                    print(f"  [PG] 表{t}查到{len(part)}条记录")
-                for r in part:
-                    r = dict(r)
-                    r["play_result"] = to_json_str(r.get("play_result"))
-                    r["user_behavior_info"] = to_json_str(r.get("user_behavior_info"))
-                    # 将带时区的时间转换为无时区，避免Excel写入报错
-                    upd = r.get("updated_at")
-                    if isinstance(upd, datetime.datetime):
-                        try:
-                            if upd.tzinfo is not None and upd.tzinfo.utcoffset(upd) is not None:
-                                r["updated_at"] = upd.replace(tzinfo=None)
-                        except Exception:
-                            # 回退为字符串
-                            r["updated_at"] = str(upd)
-                    rows.append(r)
-            except Exception as e:
-                print(f"  [PG] 表{t}查询失败: {e}")
-                continue
-
-    rows.sort(key=lambda x: parse_time(x.get("updated_at")) or datetime.datetime.min, reverse=True)
-    print(f"  [PG] 互动组件学习记录查询完成，共{len(rows)}条，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-
-    # 批量查询组件配置
-    if rows and mysql_conn:
-        config_map = batch_fetch_component_configs(rows, mysql_conn)
-
-        # 补充组件信息
-        print(f"  [PG] 开始补充组件配置信息...")
-        filled_count = 0
-        empty_count = 0
-        sample_keys = []
-        sample_mode_check = []  # 检查对话互动的mode
-
-        for r in rows:
-            c_type = r.get("c_type", "")
-            c_id = r.get("c_id")
-            key = f"{c_type}_{c_id}" if c_type and c_id else ""
-
-            config = config_map.get(key, {})
-            component_config = config.get("component_config", {})
-
-            component_name = get_component_name(c_type, component_config)
-            r["互动组件名称"] = component_name
-            r["组件标题"] = config.get("title", "")
-            r["组件配置摘要"] = config.get("summary", "")
-            r["知识点"] = config.get("kp_relation_info", "")
-
-            # 统计填充情况
-            if config:
-                filled_count += 1
-                if len(sample_keys) < 3:
-                    sample_keys.append((key, component_name, r["组件标题"][:30] if r["组件标题"] else ""))
-
-                # 检查对话互动的mode
-                if c_type == "mid_sentence_dialogue" and len(sample_mode_check) < 3:
-                    mode = ""
-                    if isinstance(component_config, dict):
-                        question = component_config.get("question", {})
-                        if isinstance(question, dict):
-                            mode = question.get("mode", "")
-                    sample_mode_check.append({
-                        "key": key,
-                        "mode": mode,
-                        "component_name": component_name
-                    })
-            else:
-                empty_count += 1
-                if empty_count <= 5:  # 输出前5个未匹配的key
-                    print(f"  [PG] [警告] 未找到组件配置: key={key}")
-
-        print(f"  [PG] 组件配置信息补充完成")
-        print(f"  [PG] 匹配到配置: {filled_count}条, 未匹配: {empty_count}条")
-        if sample_keys:
-            print(f"  [PG] 样例数据（前3条）:")
-            for key, name, title in sample_keys:
-                print(f"  [PG]   - key={key}, 名称={name}, 标题={title}")
-
-        if sample_mode_check:
-            print(f"  [PG] 对话互动mode检查（前3条）:")
-            for s in sample_mode_check:
-                print(f"  [PG]   - key={s['key']}, mode={s['mode']}, 最终名称={s['component_name']}")
-
-    return rows
-
-
-def fetch_pg_unit_review(user_id: str, conn: Any, id_2_unit_index: Dict[int, int], chapter_id_to_lesson_id: Dict[int, int]) -> List[Dict[str, Any]]:
-    """
-    查询课程巩固记录
-
-    Args:
-        user_id: 用户ID（角色ID）
-        conn: PostgreSQL数据库连接
-        id_2_unit_index: story_id到unit_id的映射字典
-        chapter_id_to_lesson_id: chapter_id到lesson_id的映射字典
-
-    Returns:
-        课程巩固记录列表
-    """
-    print(f"  [PG] 开始查询课程巩固记录...")
-    start_time = datetime.datetime.now()
-
-    sql = (
-        "SELECT user_id, story_id, chapter_id, question_list, updated_at "
-        "FROM user_unit_review_question_result WHERE user_id = %s ORDER BY updated_at DESC"
-    )
-    with conn.cursor(cursor_factory=RealDictCursor) as cur:
-        try:
-            cur.execute(sql, (user_id,))
-            rows = cur.fetchall() or []
-        except Exception as e:
-            print(f"  [PG] 课程巩固记录查询失败: {e}")
-            rows = []
-    out: List[Dict[str, Any]] = []
-    for r in rows:
-        d = dict(r)
-
-        # 映射 story_id 到 unit_id
-        story_id = d.get("story_id")
-        unit_id = id_2_unit_index.get(story_id) if story_id else None
-        d["unit_id"] = unit_id
-
-        # 映射 chapter_id 到 lesson_id
-        chapter_id = d.get("chapter_id")
-        lesson_id = chapter_id_to_lesson_id.get(chapter_id) if chapter_id else None
-        d["lesson_id"] = lesson_id
-
-        # 计算正确率
-        question_list = d.get("question_list")
-        d["正确率"] = calculate_accuracy(question_list)
-
-        d["question_list"] = to_json_str(question_list)
-        upd = d.get("updated_at")
-        if isinstance(upd, datetime.datetime):
-            try:
-                if upd.tzinfo is not None and upd.tzinfo.utcoffset(upd) is not None:
-                    d["updated_at"] = upd.replace(tzinfo=None)
-            except Exception:
-                d["updated_at"] = str(upd)
-        out.append(d)
-
-    print(f"  [PG] 课程巩固记录查询完成，共{len(out)}条，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-    return out
-
-
-def fetch_pg_unit_challenge(user_id: str, conn: Any, id_2_unit_index: Dict[int, int]) -> List[Dict[str, Any]]:
-    """
-    查询单元挑战记录
-
-    Args:
-        user_id: 用户ID（角色ID）
-        conn: PostgreSQL数据库连接
-        id_2_unit_index: story_id到unit_id的映射字典
-
-    Returns:
-        单元挑战记录列表
-    """
-    print(f"  [PG] 开始查询单元挑战记录...")
-    start_time = datetime.datetime.now()
-
-    sql = (
-        "SELECT user_id, story_id, category, score_text, question_list, updated_at "
-        "FROM user_unit_challenge_question_result WHERE user_id = %s ORDER BY updated_at DESC"
-    )
-    with conn.cursor(cursor_factory=RealDictCursor) as cur:
-        try:
-            cur.execute(sql, (user_id,))
-            rows = cur.fetchall() or []
-        except Exception as e:
-            print(f"  [PG] 单元挑战记录查询失败: {e}")
-            rows = []
-    out: List[Dict[str, Any]] = []
-    for r in rows:
-        d = dict(r)
-
-        # 映射 story_id 到 unit_id
-        story_id = d.get("story_id")
-        unit_id = id_2_unit_index.get(story_id) if story_id else None
-        d["unit_id"] = unit_id
-
-        d["question_list"] = to_json_str(d.get("question_list"))
-        upd = d.get("updated_at")
-        if isinstance(upd, datetime.datetime):
-            try:
-                if upd.tzinfo is not None and upd.tzinfo.utcoffset(upd) is not None:
-                    d["updated_at"] = upd.replace(tzinfo=None)
-            except Exception:
-                d["updated_at"] = str(upd)
-        out.append(d)
-
-    print(f"  [PG] 单元挑战记录查询完成，共{len(out)}条，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-    return out
-
-
-def fetch_pg_unit_summary(user_id: str, conn: Any, id_2_unit_index: Dict[int, int]) -> List[Dict[str, Any]]:
-    """
-    查询单元总结知识点结果数据
-
-    Args:
-        user_id: 用户ID（角色ID）
-        conn: PostgreSQL数据库连接
-        id_2_unit_index: story_id到unit_id的映射字典
-
-    Returns:
-        单元总结记录列表
-    """
-    print(f"  [PG] 开始查询单元总结记录...")
-    start_time = datetime.datetime.now()
-
-    sql = (
-        "SELECT id, user_id, story_id, updated_at, km_id, km_type, play_time "
-        "FROM user_unit_summary_km_result WHERE user_id = %s AND deleted_at IS NULL ORDER BY updated_at DESC"
-    )
-    with conn.cursor(cursor_factory=RealDictCursor) as cur:
-        try:
-            cur.execute(sql, (user_id,))
-            rows = cur.fetchall() or []
-        except Exception as e:
-            print(f"  [PG] 单元总结记录查询失败: {e}")
-            rows = []
-
-    out: List[Dict[str, Any]] = []
-    for r in rows:
-        d = dict(r)
-        # 映射 story_id 到 unit_id
-        story_id = d.get("story_id")
-        unit_id = id_2_unit_index.get(story_id) if story_id else None
-        d["unit_id"] = unit_id
-
-        # 转换 play_time (毫秒) 为秒 (整数)
-        play_time = d.get("play_time")
-        d["play_time_seconds"] = play_time // 1000 if play_time else 0
-
-        # 移除时区信息
-        upd = d.get("updated_at")
-        if isinstance(upd, datetime.datetime):
-            try:
-                if upd.tzinfo is not None and upd.tzinfo.utcoffset(upd) is not None:
-                    d["updated_at"] = upd.replace(tzinfo=None)
-            except Exception:
-                d["updated_at"] = str(upd)
-        out.append(d)
-
-    print(f"  [PG] 单元总结记录查询完成，共{len(out)}条，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-    return out
-
-
-def generate_statistics(sheet2_rows: List[Dict[str, Any]], sheet5_rows: List[Dict[str, Any]]) -> tuple:
-    """
-    生成汇总统计数据
-
-    Args:
-        sheet2_rows: 互动组件学习记录
-        sheet5_rows: 单元总结记录
-
-    Returns:
-        (组件统计DataFrame, 知识点统计DataFrame, 单元时长统计DataFrame)
-    """
-    if pd is None:
-        raise RuntimeError("缺少pandas依赖，请安装后再运行。")
-
-    print(f"  [统计] 开始生成汇总统计数据...")
-    start_time = datetime.datetime.now()
-
-    from collections import defaultdict
-
-    # ============ a. 所有互动-按互动组件类型-通过情况统计 ============
-    component_stats_data = []
-    component_stats = defaultdict(lambda: {"Perfect": 0, "Good": 0, "Failed": 0, "Pass": 0, "Oops": 0, "total": 0})
-
-    # 用于调试
-    sample_results = []
-    parse_error_count = 0
-
-    for idx, record in enumerate(sheet2_rows):
-        component_name = record.get("互动组件名称", "")
-        if not component_name:
-            continue
-
-        play_result_str = record.get("play_result", "")
-
-        # 解析play_result
-        result = ""
-        try:
-            # 先判断是否是简单的字符串（Perfect/Good/Failed/Pass/Oops）
-            if isinstance(play_result_str, str):
-                # 去除空格后检查
-                stripped = play_result_str.strip()
-                if stripped in ["Perfect", "Good", "Failed", "Pass", "Oops"]:
-                    # 直接使用
-                    result = stripped
-                else:
-                    # 尝试JSON解析
-                    try:
-                        play_result = json.loads(play_result_str)
-                        if isinstance(play_result, dict):
-                            result = play_result.get("result", "")
-                        else:
-                            result = ""
-                    except:
-                        result = ""
-            else:
-                # 如果不是字符串，尝试当dict处理
-                if isinstance(play_result_str, dict):
-                    result = play_result_str.get("result", "")
-                else:
-                    result = ""
-
-            # 收集前3个样例
-            if idx < 3:
-                sample_results.append({
-                    "component": component_name,
-                    "raw": str(play_result_str)[:100],
-                    "result": result
-                })
-        except Exception as e:
-            parse_error_count += 1
-            if parse_error_count <= 3:
-                print(f"  [统计] [警告] 解析play_result失败 (第{idx+1}条): {e}, 原始值: {str(play_result_str)[:100]}")
-            result = ""
-
-        component_stats[component_name]["total"] += 1
-        if result in ["Perfect", "Good", "Failed", "Pass", "Oops"]:
-            component_stats[component_name][result] += 1
-
-    print(f"  [统计] play_result解析样例（前3条）:")
-    for s in sample_results:
-        print(f"  [统计]   - 组件: {s['component']}, 结果: {s['result']}, 原始: {s['raw']}")
-    if parse_error_count > 0:
-        print(f"  [统计] play_result解析失败总数: {parse_error_count}")
-
-    # 生成统计数据行
-    for component_name in sorted(component_stats.keys()):
-        stats = component_stats[component_name]
-        total = stats["total"]
-        perfect = stats["Perfect"]
-        good = stats["Good"]
-        failed = stats["Failed"]
-        pass_count = stats["Pass"]
-        oops = stats["Oops"]
-
-        perfect_ratio = round(perfect / total * 100, 2) if total > 0 else 0
-        good_ratio = round(good / total * 100, 2) if total > 0 else 0
-        failed_ratio = round(failed / total * 100, 2) if total > 0 else 0
-        pass_ratio = round(pass_count / total * 100, 2) if total > 0 else 0
-        oops_ratio = round(oops / total * 100, 2) if total > 0 else 0
-
-        component_stats_data.append({
-            "互动组件名称": component_name,
-            "总数量": total,
-            "Perfect数量": perfect,
-            "Good数量": good,
-            "Failed数量": failed,
-            "Pass数量": pass_count,
-            "Oops数量": oops,
-            "Perfect比例(%)": perfect_ratio,
-            "Good比例(%)": good_ratio,
-            "Failed比例(%)": failed_ratio,
-            "Pass比例(%)": pass_ratio,
-            "Oops比例(%)": oops_ratio,
-        })
-
-    # ============ b. 中互动组件-按知识点-通过情况统计 ============
-    kp_stats_data = []
-    kp_stats = defaultdict(lambda: {"Perfect": 0, "Good": 0, "Failed": 0, "Pass": 0, "Oops": 0, "total": 0})
-
-    # 调试信息
-    mid_count = 0
-    has_kp_count = 0
-    sample_kp_records = []
-
-    for idx, record in enumerate(sheet2_rows):
-        c_type = record.get("c_type", "")
-        if not c_type or not c_type.startswith("mid"):
-            continue
-
-        mid_count += 1
-        kp_relation_info_str = record.get("知识点", "")
-
-        if not kp_relation_info_str:
-            continue
-
-        has_kp_count += 1
-
-        # 解析知识点
-        try:
-            if isinstance(kp_relation_info_str, str):
-                kp_relation_info = json.loads(kp_relation_info_str)
-            else:
-                kp_relation_info = kp_relation_info_str
-
-            if not isinstance(kp_relation_info, list):
-                continue
-
-            # 收集样例
-            if len(sample_kp_records) < 3:
-                sample_kp_records.append({
-                    "c_type": c_type,
-                    "kp_count": len(kp_relation_info),
-                    "kp_info": str(kp_relation_info)[:200]
-                })
-
-            # 解析play_result（使用相同的逻辑）
-            play_result_str = record.get("play_result", "")
-            result = ""
-            if isinstance(play_result_str, str):
-                stripped = play_result_str.strip()
-                if stripped in ["Perfect", "Good", "Failed", "Pass", "Oops"]:
-                    result = stripped
-                else:
-                    try:
-                        play_result = json.loads(play_result_str)
-                        if isinstance(play_result, dict):
-                            result = play_result.get("result", "")
-                    except:
-                        pass
-            elif isinstance(play_result_str, dict):
-                result = play_result_str.get("result", "")
-
-            # 为每个知识点统计
-            for kp in kp_relation_info:
-                if not isinstance(kp, dict):
-                    continue
-
-                kp_id = kp.get("kpId", "")
-                kp_type = kp.get("kpType", "")
-                kp_title = kp.get("kpTitle", "")
-
-                if not kp_id:
-                    continue
-
-                kp_key = f"{kp_id}|{kp_type}|{kp_title}"
-                kp_stats[kp_key]["total"] += 1
-                if result in ["Perfect", "Good", "Failed", "Pass", "Oops"]:
-                    kp_stats[kp_key][result] += 1
-
-        except Exception as e:
-            if len(sample_kp_records) < 5:
-                print(f"  [统计] [警告] 解析知识点失败: {e}, 原始值: {str(kp_relation_info_str)[:100]}")
-            continue
-
-    print(f"  [统计] 中互动组件统计: 总数={mid_count}, 有知识点={has_kp_count}, 知识点条目数={len(kp_stats)}")
-    if sample_kp_records:
-        print(f"  [统计] 知识点样例（前3条）:")
-        for s in sample_kp_records:
-            print(f"  [统计]   - c_type={s['c_type']}, 知识点数量={s['kp_count']}, 内容={s['kp_info']}")
-
-    # 生成知识点统计数据行
-    for kp_key in sorted(kp_stats.keys()):
-        parts = kp_key.split("|")
-        if len(parts) != 3:
-            continue
-
-        kp_id, kp_type, kp_title = parts
-        stats = kp_stats[kp_key]
-        total = stats["total"]
-        perfect = stats["Perfect"]
-        good = stats["Good"]
-        failed = stats["Failed"]
-        pass_count = stats["Pass"]
-        oops = stats["Oops"]
-
-        perfect_ratio = round(perfect / total * 100, 2) if total > 0 else 0
-        good_ratio = round(good / total * 100, 2) if total > 0 else 0
-        failed_ratio = round(failed / total * 100, 2) if total > 0 else 0
-        pass_ratio = round(pass_count / total * 100, 2) if total > 0 else 0
-        oops_ratio = round(oops / total * 100, 2) if total > 0 else 0
-
-        kp_stats_data.append({
-            "知识点ID": kp_id,
-            "知识点类型": kp_type,
-            "知识点标题": kp_title,
-            "总数量": total,
-            "Perfect数量": perfect,
-            "Good数量": good,
-            "Failed数量": failed,
-            "Pass数量": pass_count,
-            "Oops数量": oops,
-            "Perfect比例(%)": perfect_ratio,
-            "Good比例(%)": good_ratio,
-            "Failed比例(%)": failed_ratio,
-            "Pass比例(%)": pass_ratio,
-            "Oops比例(%)": oops_ratio,
-        })
-
-    # ============ c. 单元总结-按单元统计时长 ============
-    unit_time_stats_data = []
-    unit_time_stats = defaultdict(int)
-
-    for record in sheet5_rows:
-        unit_id = record.get("unit_id")
-        play_time_seconds = record.get("play_time_seconds", 0)
-
-        if unit_id is not None:
-            unit_time_stats[unit_id] += play_time_seconds
-
-    # 生成单元时长统计数据行
-    for unit_id in sorted(unit_time_stats.keys()):
-        total_seconds = unit_time_stats[unit_id]
-        total_minutes = int(total_seconds / 60)
-
-        unit_time_stats_data.append({
-            "单元ID": f"unit_{unit_id}",
-            "总时长(秒)": total_seconds,
-            "总时长(分钟)": total_minutes,
-        })
-
-    print(f"  [统计] 汇总统计数据生成完成，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-    print(f"  [统计] 生成了{len(component_stats_data)}条组件统计, {len(kp_stats_data)}条知识点统计, {len(unit_time_stats_data)}条单元时长统计")
-
-    return (
-        pd.DataFrame(component_stats_data),
-        pd.DataFrame(kp_stats_data),
-        pd.DataFrame(unit_time_stats_data)
-    )
-
-
-
-def write_excel(path: str, sheet1_rows: List[Dict[str, Any]], sheet2_rows: List[Dict[str, Any]], sheet3_rows: List[Dict[str, Any]], sheet4_rows: List[Dict[str, Any]], sheet5_rows: List[Dict[str, Any]], stats_component_df: Any, stats_kp_df: Any, stats_unit_time_df: Any) -> None:
-    if pd is None:
-        raise RuntimeError("缺少pandas依赖，请安装后再运行。")
-
-    print(f"  [Excel] 开始写入Excel文件: {path}")
-    start_time = datetime.datetime.now()
-
-    out_dir = os.path.dirname(path) or "."
-    os.makedirs(out_dir, exist_ok=True)
-    with pd.ExcelWriter(path, engine="openpyxl") as writer:
-        pd.DataFrame(sheet1_rows, columns=SHEET1_COLUMNS).to_excel(writer, sheet_name="全部音频数据", index=False)
-        pd.DataFrame(sheet2_rows, columns=SHEET2_COLUMNS).to_excel(writer, sheet_name="互动组件学习记录", index=False)
-        pd.DataFrame(sheet3_rows, columns=SHEET3_COLUMNS).to_excel(writer, sheet_name="课程巩固记录", index=False)
-        pd.DataFrame(sheet4_rows, columns=SHEET4_COLUMNS).to_excel(writer, sheet_name="单元挑战记录", index=False)
-        pd.DataFrame(sheet5_rows, columns=SHEET5_COLUMNS).to_excel(writer, sheet_name="单元总结记录", index=False)
-        stats_component_df.to_excel(writer, sheet_name="统计-互动组件通过情况", index=False)
-        stats_kp_df.to_excel(writer, sheet_name="统计-知识点通过情况", index=False)
-        stats_unit_time_df.to_excel(writer, sheet_name="统计-单元总结时长", index=False)
-
-    print(f"  [Excel] 写入完成，耗时{(datetime.datetime.now() - start_time).total_seconds():.2f}秒")
-
-
-def get_date_str() -> str:
-    """获取当前日期字符串 格式：YYYYMMDD"""
-    return datetime.datetime.now().strftime("%Y%m%d")
-
-
-def export_single_user(user_id: str, es_cfg: Dict[str, Any], pg_conn: Any, mysql_conn: Any, output_path: str, id_2_unit_index: Dict[int, int], chapter_id_to_lesson_id: Dict[int, int]) -> bool:
-    """
-    导出单个角色id的数据
-
-    Args:
-        user_id: 角色ID
-        es_cfg: ES配置
-        pg_conn: PostgreSQL连接
-        mysql_conn: MySQL连接
-        output_path: 输出路径
-        id_2_unit_index: story_id到unit_id的映射字典
-        chapter_id_to_lesson_id: chapter_id到lesson_id的映射字典
-
-    Returns:
-        True表示成功，False表示失败
-    """
-    try:
-        print(f"\n[INFO] ========== 开始导出角色id={user_id} ==========")
-        total_start_time = datetime.datetime.now()
-
-        # 查询ES数据
-        sheet1_rows = fetch_es_user_audio(user_id, es_cfg)
-
-        # 查询PG数据
-        sheet2_rows = fetch_pg_play_records(user_id, pg_conn, mysql_conn)
-        sheet3_rows = fetch_pg_unit_review(user_id, pg_conn, id_2_unit_index, chapter_id_to_lesson_id)
-        sheet4_rows = fetch_pg_unit_challenge(user_id, pg_conn, id_2_unit_index)
-        sheet5_rows = fetch_pg_unit_summary(user_id, pg_conn, id_2_unit_index)
-
-        # 检查是否有有效数据
-        total_records = len(sheet1_rows) + len(sheet2_rows) + len(sheet3_rows) + len(sheet4_rows) + len(sheet5_rows)
-        print(f"  [统计] 数据汇总:")
-        print(f"    - 全部音频数据: {len(sheet1_rows)}条")
-        print(f"    - 互动组件学习记录: {len(sheet2_rows)}条")
-        print(f"    - 课程巩固记录: {len(sheet3_rows)}条")
-        print(f"    - 单元挑战记录: {len(sheet4_rows)}条")
-        print(f"    - 单元总结记录: {len(sheet5_rows)}条")
-        print(f"    - 总计: {total_records}条")
-
-        if total_records == 0:
-            print(f"[WARN] 角色id={user_id} 没有找到任何有效记录，跳过导出")
-            return False
-
-        # 生成汇总统计数据
-        stats_component_df, stats_kp_df, stats_unit_time_df = generate_statistics(sheet2_rows, sheet5_rows)
-
-        # 写入Excel
-        write_excel(output_path, sheet1_rows, sheet2_rows, sheet3_rows, sheet4_rows, sheet5_rows, stats_component_df, stats_kp_df, stats_unit_time_df)
-
-        total_time = (datetime.datetime.now() - total_start_time).total_seconds()
-        print(f"[INFO] 角色id={user_id} 导出成功")
-        print(f"[INFO] 文件路径: {output_path}")
-        print(f"[INFO] 总耗时: {total_time:.2f}秒")
-        print(f"[INFO] ========== 完成 ==========\n")
-        return True
-
-    except Exception as e:
-        print(f"[ERROR] 角色id={user_id} 导出失败: {e}")
-        import traceback
-        traceback.print_exc()
-        return False
-
-
-def main():
-    load_env()
-
-    # 确定运行模式并收集需要导出的角色id列表
-    user_id_list: List[tuple] = []  # [(user_id, account_id or None), ...]
-    date_str = get_date_str()
-
-    # 检查三种模式的配置
-    has_user_id = USER_ID is not None
-    has_user_id_list = USER_ID_LIST is not None and len(USER_ID_LIST) > 0
-    has_account_id_list = ACCOUNT_ID_LIST is not None and len(ACCOUNT_ID_LIST) > 0
-
-    # 验证只能配置一种模式
-    mode_count = sum([has_user_id, has_user_id_list, has_account_id_list])
-    if mode_count == 0:
-        raise RuntimeError("请配置 USER_ID、USER_ID_LIST 或 ACCOUNT_ID_LIST 中的一个")
-    if mode_count > 1:
-        raise RuntimeError("USER_ID、USER_ID_LIST、ACCOUNT_ID_LIST 只能配置一个，请检查配置")
-
-    # 模式1：单个角色id
-    if has_user_id:
-        user_id_list = [(str(USER_ID), None)]
-        print(f"[INFO] 运行模式：单个角色id")
-
-    # 模式2：角色id列表
-    elif has_user_id_list:
-        user_id_list = [(str(uid), None) for uid in USER_ID_LIST]
-        print(f"[INFO] 运行模式：角色id列表，共{len(user_id_list)}个角色")
-
-    # 模式3：账户id列表
-    elif has_account_id_list:
-        print(f"[INFO] 运行模式：账户id列表，共{len(ACCOUNT_ID_LIST)}个账户")
-        mysql_conn = None
-        try:
-            mysql_conn = get_mysql_conn("vala_user")  # 查询用户表，使用 vala_user 数据库
-            for account_id in ACCOUNT_ID_LIST:
-                account_id_str = str(account_id)
-                print(f"[INFO] 查询账户id={account_id_str}对应的角色id...")
-                character_ids = fetch_character_ids_by_account(account_id_str, mysql_conn)
-                if not character_ids:
-                    print(f"[WARN] 账户id={account_id_str} 未找到关联的角色id，跳过")
-                    continue
-                print(f"[INFO] 账户id={account_id_str} 找到{len(character_ids)}个角色id: {character_ids}")
-                for cid in character_ids:
-                    user_id_list.append((cid, account_id_str))
-        finally:
-            if mysql_conn:
-                try:
-                    mysql_conn.close()
-                except Exception:
-                    pass
-
-    if not user_id_list:
-        print("[WARN] 没有需要导出的角色id，程序退出")
-        return
-
-    # 初始化连接
-    es_cfg = get_es_config()
-    pg_conn = get_pg_conn()
-
-    # 获取映射表（只需要查询一次，所有角色共用）
-    print(f"\n[INFO] ===== 准备工作：获取映射表 =====")
-    mysql_conn = None
-    id_2_unit_index = {}
-    chapter_id_to_lesson_id = {}
-    try:
-        print(f"[INFO] 正在连接MySQL数据库（vala_test）...")
-        mysql_conn = get_mysql_conn("vala_test")  # 查询游戏配置表，使用 vala_test 数据库
-        print(f"[INFO] 正在获取 story_id 到 unit_id 的映射...")
-        id_2_unit_index = get_id_2_unit_index(mysql_conn)
-        print(f"[INFO] 成功获取 {len(id_2_unit_index)} 个 story_id 映射")
-        print(f"[INFO] 正在获取 chapter_id 到 lesson_id 的映射...")
-        chapter_id_to_lesson_id = get_chapter_id_to_lesson_id(mysql_conn)
-        print(f"[INFO] 成功获取 {len(chapter_id_to_lesson_id)} 个 chapter_id 映射")
-    except Exception as e:
-        print(f"[ERROR] 获取映射表失败: {e}")
-        import traceback
-        traceback.print_exc()
-        if pg_conn:
-            try:
-                pg_conn.close()
-            except Exception:
-                pass
-        if mysql_conn:
-            try:
-                mysql_conn.close()
-            except Exception:
-                pass
-        return
-
-    try:
-        # 统计信息
-        success_count = 0
-        skip_count = 0
-
-        print(f"\n[INFO] ===== 开始批量导出 =====")
-        print(f"[INFO] 共需导出{len(user_id_list)}个角色\n")
-        batch_start_time = datetime.datetime.now()
-
-        # 循环处理每个角色id
-        for idx, (user_id, account_id) in enumerate(user_id_list, 1):
-            print(f"\n{'='*60}")
-            print(f"[INFO] 进度: {idx}/{len(user_id_list)} ({idx*100//len(user_id_list)}%)")
-            print(f"{'='*60}")
-
-            # 生成输出文件名
-            if account_id is None:
-                # 模式1和模式2：角色id_{}_导出时间_{}.xlsx
-                filename = f"角色id_{user_id}_导出时间_{date_str}.xlsx"
-            else:
-                # 模式3：账户id_{}_角色id_{}_导出时间_{}.xlsx
-                filename = f"账户id_{account_id}_角色id_{user_id}_导出时间_{date_str}.xlsx"
-
-            output_path = os.path.join(OUTPUT_DIR, filename)
-
-            # 导出单个角色的数据
-            result = export_single_user(user_id, es_cfg, pg_conn, mysql_conn, output_path, id_2_unit_index, chapter_id_to_lesson_id)
-            if result:
-                success_count += 1
-            else:
-                skip_count += 1
-
-        # 输出统计信息
-        batch_total_time = (datetime.datetime.now() - batch_start_time).total_seconds()
-        print(f"\n{'='*60}")
-        print(f"[INFO] ===== 全部导出完成 =====")
-        print(f"[INFO] 总计: {len(user_id_list)}个角色")
-        print(f"[INFO] 成功: {success_count}个")
-        print(f"[INFO] 跳过: {skip_count}个")
-        print(f"[INFO] 总耗时: {batch_total_time:.2f}秒 ({batch_total_time/60:.2f}分钟)")
-        if success_count > 0:
-            print(f"[INFO] 平均每个角色: {batch_total_time/success_count:.2f}秒")
-        print(f"{'='*60}\n")
-
-    finally:
-        if pg_conn:
-            try:
-                pg_conn.close()
-            except Exception:
-                pass
-        if mysql_conn:
-            try:
-                mysql_conn.close()
-            except Exception:
-                pass
-
-
-if __name__ == "__main__":
-    main()
diff --git a/new_export/llm_offline_production b/new_export/llm_offline_production
deleted file mode 160000
index 75ab13e..0000000
--- a/new_export/llm_offline_production
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 75ab13e87dd0e856cb05c9515efcd507888b6486
diff --git a/new_export/test_db_connections.py b/new_export/test_db_connections.py
deleted file mode 100644
index 77545eb..0000000
--- a/new_export/test_db_connections.py
+++ /dev/null
@@ -1,176 +0,0 @@
-#!/usr/bin/env python3
-"""测试各个数据库连接和查询"""
-
-import os
-import json
-import psycopg2
-import pymysql
-import requests
-from requests.auth import HTTPBasicAuth
-import warnings
-warnings.filterwarnings('ignore')
-
-def test_postgresql():
-    """测试PostgreSQL连接"""
-    print("\n" + "="*60)
-    print("测试 PostgreSQL（Online）连接")
-    print("="*60)
-    
-    try:
-        conn = psycopg2.connect(
-            host="bj-postgres-16pob4sg.sql.tencentcdb.com",
-            port=28591,
-            user="ai_member",
-            password="LdfjdjL83h3h3^$&**YGG*",
-            dbname="vala",
-            connect_timeout=10
-        )
-        print("✅ PostgreSQL 连接成功！")
-        
-        # 测试查询
-        with conn.cursor() as cur:
-            # 先查询所有表
-            cur.execute("SELECT tablename FROM pg_tables WHERE schemaname = 'public' LIMIT 5")
-            tables = cur.fetchall()
-            print(f"✅ 查询成功！找到前5个表：{[t[0] for t in tables]}")
-            
-            # 尝试查询其中一个表的1条数据
-            if tables:
-                table = tables[0][0]
-                cur.execute(f"SELECT * FROM {table} LIMIT 1")
-                row = cur.fetchone()
-                print(f"✅ 从表 {table} 读取到1条数据：{row if row else '空表'}")
-        
-        conn.close()
-        return True
-        
-    except Exception as e:
-        print(f"❌ PostgreSQL 连接/查询失败：{str(e)[:200]}")
-        return False
-
-def test_mysql_test():
-    """测试Test MySQL连接"""
-    print("\n" + "="*60)
-    print("测试 MySQL（Test环境）连接")
-    print("="*60)
-    
-    try:
-        conn = pymysql.connect(
-            host="bj-cdb-8frbdwju.sql.tencentcdb.com",
-            port=25413,
-            user="read_only",
-            password="fdsfiidier^$*hjfdijjd232",
-            connect_timeout=10
-        )
-        print("✅ MySQL（Test）连接成功！")
-        
-        # 测试查询
-        with conn.cursor() as cur:
-            cur.execute("SHOW DATABASES LIMIT 5")
-            dbs = cur.fetchall()
-            print(f"✅ 查询成功！找到前5个数据库：{[db[0] for db in dbs]}")
-            
-            if dbs:
-                db = dbs[0][0]
-                cur.execute(f"USE {db}")
-                cur.execute("SHOW TABLES LIMIT 1")
-                table = cur.fetchone()
-                if table:
-                    cur.execute(f"SELECT * FROM {table[0]} LIMIT 1")
-                    row = cur.fetchone()
-                    print(f"✅ 从表 {table[0]} 读取到1条数据：{row if row else '空表'}")
-        
-        conn.close()
-        return True
-        
-    except Exception as e:
-        print(f"❌ MySQL（Test）连接/查询失败：{str(e)[:200]}")
-        return False
-
-def test_mysql_online():
-    """测试Online MySQL连接"""
-    print("\n" + "="*60)
-    print("测试 MySQL（Online）连接")
-    print("="*60)
-    
-    try:
-        conn = pymysql.connect(
-            host="bj-cdb-dh2fkqa0.sql.tencentcdb.com",
-            port=27751,
-            user="read_only",
-            password="fsdo45ijfmfmuu77$%^&",
-            connect_timeout=10
-        )
-        print("✅ MySQL（Online）连接成功！")
-        
-        # 测试查询
-        with conn.cursor() as cur:
-            cur.execute("SHOW DATABASES LIMIT 5")
-            dbs = cur.fetchall()
-            print(f"✅ 查询成功！找到前5个数据库：{[db[0] for db in dbs]}")
-            
-        conn.close()
-        return True
-        
-    except Exception as e:
-        print(f"❌ MySQL（Online）连接/查询失败：{str(e)[:200]}")
-        return False
-
-def test_es_online():
-    """测试Online ES连接"""
-    print("\n" + "="*60)
-    print("测试 Elasticsearch（Online）连接")
-    print("="*60)
-    
-    try:
-        url = "https://es-7vd7jcu9.public.tencentelasticsearch.com:9200"
-        auth = HTTPBasicAuth("elastic", "F%?QDcWes7N2WTuiYD11")
-        
-        response = requests.get(
-            url,
-            auth=auth,
-            verify=False,
-            timeout=10
-        )
-        
-        if response.status_code == 200:
-            info = response.json()
-            print(f"✅ ES 连接成功！集群名称：{info.get('cluster_name')}")
-            
-            # 测试查询索引
-            indices_resp = requests.get(
-                f"{url}/_cat/indices?format=json",
-                auth=auth,
-                verify=False,
-                timeout=10
-            )
-            if indices_resp.status_code == 200:
-                indices = indices_resp.json()
-                print(f"✅ 查询成功！索引数量：{len(indices)}")
-                if indices:
-                    print(f"   前3个索引：{[idx['index'] for idx in indices[:3]]}")
-            
-            return True
-        else:
-            print(f"❌ ES 连接失败：HTTP {response.status_code}")
-            return False
-            
-    except Exception as e:
-        print(f"❌ ES 连接/查询失败：{str(e)[:200]}")
-        return False
-
-if __name__ == "__main__":
-    print("开始测试所有数据库连接...")
-    
-    results = {}
-    results["PostgreSQL(Online)"] = test_postgresql()
-    results["MySQL(Test)"] = test_mysql_test()
-    results["MySQL(Online)"] = test_mysql_online()
-    results["ES(Online)"] = test_es_online()
-    
-    print("\n" + "="*60)
-    print("测试总结")
-    print("="*60)
-    for name, result in results.items():
-        status = "✅ 正常" if result else "❌ 异常"
-        print(f"{name}: {status}")
diff --git a/passwords.example.txt b/passwords.example.txt
deleted file mode 100644
index 80a0c70..0000000
--- a/passwords.example.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-# 敏感密钥配置示例文件，真实密钥请放在 passwords.txt 中（已加入 .gitignore 不会提交到Git）
-# MySQL
-MYSQL_ONLINE_PASSWORD=***线上MySQL密码***
-MYSQL_TEST_PASSWORD=***测试MySQL密码***
-
-# PostgreSQL
-PG_ONLINE_PASSWORD=***线上PostgreSQL密码***
-PG_TEST_PASSWORD=***测试PostgreSQL密码***
-
-# Elasticsearch
-ES_TEST_PASSWORD=***测试ES密码***
-ES_ONLINE_PASSWORD=***线上ES密码***
diff --git a/scripts/generate_report.py b/scripts/generate_report.py
index a2d152b..0b41638 100644
--- a/scripts/generate_report.py
+++ b/scripts/generate_report.py
@@ -1,99 +1,152 @@
 import pandas as pd
-import psycopg2
+from datetime import datetime
 
-# 1. 读取最新的带成交标记的订单数据
-order_df = pd.read_csv('2026年3月1日至今订单_含正确成交标记.csv')
-print(f"订单总数：{len(order_df)}")
-
-# 2. 计算GMV和退款相关
-order_df['GMV'] = order_df['pay_amount_int'] / 100
-order_df['is_refund'] = (order_df['order_status'] == 4).astype(int)
-# 计算GSV：退款订单GSV为0，其他为GMV
-order_df['GSV'] = order_df.apply(lambda row: 0 if row['order_status'] == 4 else row['GMV'], axis=1)
-order_df['refund_amount'] = order_df.apply(lambda row: row['GMV'] if row['order_status'] == 4 else 0, axis=1)
-
-# 3. 映射到大类渠道
-def map_channel(tag):
-    if tag in ['销转', '销转-小龙']:
-        return '销转'
-    elif tag in ['端内直购', '端内销转']:
-        return 'App转化'
-    elif tag == '达播':
-        return '达播'
-    elif tag.startswith('班主任-'):
-        return '班主任'
-    elif tag == '店铺直购':
-        return '店铺直购'
-    else:
-        return '其他'
-
-order_df['渠道大类'] = order_df['成交标记'].apply(map_channel)
-
-# 4. 按大类统计
-channel_stats = order_df.groupby('渠道大类').agg(
-    订单数=('id', 'count'),
-    GMV=('GMV', 'sum'),
-    已退款金额=('refund_amount', 'sum'),
-    GSV=('GSV', 'sum'),
-    退款订单数=('is_refund', 'sum'),
-    客单价=('GMV', 'mean')
-).reset_index()
-channel_stats['退费率'] = (channel_stats['退款订单数'] / channel_stats['订单数'] * 100).round(1).astype(str) + '%'
-channel_stats['GMV'] = channel_stats['GMV'].round(2)
-channel_stats['GSV'] = channel_stats['GSV'].round(2)
-channel_stats['已退款金额'] = channel_stats['已退款金额'].round(2)
-channel_stats['客单价'] = channel_stats['客单价'].round(2)
-
-# 5. 原预测表的预测值
-pred_data = [
-    {'渠道大类': '销转', '预测GSV': 100000},
-    {'渠道大类': 'App转化', '预测GSV': 20000},
-    {'渠道大类': '达播', '预测GSV': 250000},
-    {'渠道大类': '班主任', '预测GSV': 10000}
+# 1. 整体统计数据
+overall_data = [
+    {"渠道": "学而思", "新增注册总人数": 615, "购课总人数":7, "购课总金额（元）":7794},
+    {"渠道": "科大讯飞", "新增注册总人数": 377, "购课总人数":4, "购课总金额（元）":3796},
+    {"渠道": "希沃", "新增注册总人数": 122, "购课总人数":1, "购课总金额（元）":599},
+    {"渠道": "京东方", "新增注册总人数": 61, "购课总人数":1, "购课总金额（元）":599},
+    {"渠道": "合计", "新增注册总人数": 1175, "购课总人数":13, "购课总金额（元）":12788},
 ]
-pred_df = pd.DataFrame(pred_data)
+df_overall = pd.DataFrame(overall_data)
 
-# 6. 合并实际和预测数据
-report_df = pd.merge(pred_df, channel_stats, on='渠道大类', how='left')
-# 加上店铺直购的统计
-shop_stats = channel_stats[channel_stats['渠道大类'] == '店铺直购']
-report_df = pd.concat([report_df, shop_stats], ignore_index=True)
-# 加上总计
-total = pd.DataFrame({
-    '渠道大类': ['总计'],
-    '预测GSV': [pred_df['预测GSV'].sum()],
-    '订单数': [channel_stats['订单数'].sum()],
-    'GMV': [channel_stats['GMV'].sum()],
-    '已退款金额': [channel_stats['已退款金额'].sum()],
-    'GSV': [channel_stats['GSV'].sum()],
-    '退款订单数': [channel_stats['退款订单数'].sum()],
-    '客单价': [channel_stats['GMV'].sum()/channel_stats['订单数'].sum()],
-    '退费率': [str((channel_stats['退款订单数'].sum()/channel_stats['订单数'].sum()*100).round(1)) + '%']
-})
-report_df = pd.concat([report_df, total], ignore_index=True)
-report_df['完成率'] = report_df.apply(lambda row: str(round(row['GSV']/row['预测GSV']*100, 1)) + '%' if pd.notna(row['预测GSV']) else '-', axis=1)
+# 2. 每日购课明细数据
+purchase_data = [
+    {"日期": "2026-03-02", "渠道": "学而思", "购课人数":1, "购课金额（元）":599, "订单号": "zfb202603022031481772454708683943"},
+    {"日期": "2026-03-07", "渠道": "学而思", "购课人数":1, "购课金额（元）":599, "订单号": "wx202603071022051772850125753228"},
+    {"日期": "2026-03-07", "渠道": "科大讯飞", "购课人数":1, "购课金额（元）":599, "订单号": "wx202603072123501772889830225976"},
+    {"日期": "2026-03-10", "渠道": "学而思", "购课人数":1, "购课金额（元）":1999, "订单号": "wx202603101820431773138043948181"},
+    {"日期": "2026-03-15", "渠道": "科大讯飞", "购课人数":2, "购课金额（元）":2598, "订单号": "wx202603150854031773536043478685、wx20260315122747177354886748896"},
+    {"日期": "2026-03-18", "渠道": "学而思", "购课人数":2, "购课金额（元）":2598, "订单号": "wx202603182055481773838548372991、zfb202603182118201773839900411837"},
+    {"日期": "2026-03-23", "渠道": "科大讯飞", "购课人数":1, "购课金额（元）":599, "订单号": "wx202603232015081774268108032833"},
+    {"日期": "2026-03-24", "渠道": "京东方", "购课人数":1, "购课金额（元）":599, "订单号": "zfb202603242026431774355203538499"},
+    {"日期": "2026-03-27", "渠道": "学而思", "购课人数":1, "购课金额（元）":1999, "订单号": "wx202603271258341774587514141956"},
+    {"日期": "2026-03-28", "渠道": "希沃", "购课人数":1, "购课金额（元）":599, "订单号": "wx20260328145038177468063894734"},
+]
+df_purchase = pd.DataFrame(purchase_data)
 
-# 7. 保存报表
-output_file = '2026年3月收入预测报表_最新版.xlsx'
-with pd.ExcelWriter(output_file) as writer:
-    report_df.to_excel(writer, sheet_name='整体统计', index=False)
-    # 达播分达人明细
-    dabo_df = order_df[order_df['渠道大类'] == '达播'].groupby('key_from').agg(
-        订单数=('id', 'count'),
-        GMV=('GMV', 'sum'),
-        GSV=('GSV', 'sum'),
-        退费率=('is_refund', lambda x: str((x.sum()/x.count()*100).round(1)) + '%')
-    ).reset_index()
-    dabo_df.to_excel(writer, sheet_name='达播达人明细', index=False)
-    # 成交标记明细
-    tag_df = order_df.groupby('成交标记').agg(
-        订单数=('id', 'count'),
-        GMV=('GMV', 'sum'),
-        GSV=('GSV', 'sum'),
-        退费率=('is_refund', lambda x: str((x.sum()/x.count()*100).round(1)) + '%')
-    ).reset_index()
-    tag_df.to_excel(writer, sheet_name='成交标记明细', index=False)
+# 3. 每日新增注册数据
+register_data = [
+    {"日期": "2026-03-01", "渠道": "京东方", "新增注册人数": 1},
+    {"日期": "2026-03-01", "渠道": "学而思", "新增注册人数": 48},
+    {"日期": "2026-03-01", "渠道": "希沃", "新增注册人数": 2},
+    {"日期": "2026-03-02", "渠道": "京东方", "新增注册人数": 3},
+    {"日期": "2026-03-02", "渠道": "学而思", "新增注册人数": 38},
+    {"日期": "2026-03-02", "渠道": "希沃", "新增注册人数": 1},
+    {"日期": "2026-03-03", "渠道": "学而思", "新增注册人数": 24},
+    {"日期": "2026-03-03", "渠道": "希沃", "新增注册人数": 4},
+    {"日期": "2026-03-04", "渠道": "京东方", "新增注册人数": 4},
+    {"日期": "2026-03-04", "渠道": "学而思", "新增注册人数": 20},
+    {"日期": "2026-03-04", "渠道": "希沃", "新增注册人数": 10},
+    {"日期": "2026-03-04", "渠道": "科大讯飞", "新增注册人数": 3},
+    {"日期": "2026-03-05", "渠道": "京东方", "新增注册人数": 7},
+    {"日期": "2026-03-05", "渠道": "学而思", "新增注册人数": 37},
+    {"日期": "2026-03-05", "渠道": "希沃", "新增注册人数": 15},
+    {"日期": "2026-03-05", "渠道": "科大讯飞", "新增注册人数": 17},
+    {"日期": "2026-03-06", "渠道": "京东方", "新增注册人数": 6},
+    {"日期": "2026-03-06", "渠道": "学而思", "新增注册人数": 26},
+    {"日期": "2026-03-06", "渠道": "希沃", "新增注册人数": 9},
+    {"日期": "2026-03-06", "渠道": "科大讯飞", "新增注册人数": 12},
+    {"日期": "2026-03-07", "渠道": "京东方", "新增注册人数": 5},
+    {"日期": "2026-03-07", "渠道": "学而思", "新增注册人数": 35},
+    {"日期": "2026-03-07", "渠道": "希沃", "新增注册人数": 5},
+    {"日期": "2026-03-07", "渠道": "科大讯飞", "新增注册人数": 34},
+    {"日期": "2026-03-08", "渠道": "京东方", "新增注册人数": 3},
+    {"日期": "2026-03-08", "渠道": "学而思", "新增注册人数": 33},
+    {"日期": "2026-03-08", "渠道": "希沃", "新增注册人数": 12},
+    {"日期": "2026-03-08", "渠道": "科大讯飞", "新增注册人数": 34},
+    {"日期": "2026-03-09", "渠道": "京东方", "新增注册人数": 2},
+    {"日期": "2026-03-09", "渠道": "学而思", "新增注册人数": 27},
+    {"日期": "2026-03-09", "渠道": "希沃", "新增注册人数": 5},
+    {"日期": "2026-03-09", "渠道": "科大讯飞", "新增注册人数": 15},
+    {"日期": "2026-03-10", "渠道": "学而思", "新增注册人数": 15},
+    {"日期": "2026-03-10", "渠道": "希沃", "新增注册人数": 3},
+    {"日期": "2026-03-10", "渠道": "科大讯飞", "新增注册人数": 9},
+    {"日期": "2026-03-11", "渠道": "京东方", "新增注册人数": 2},
+    {"日期": "2026-03-11", "渠道": "学而思", "新增注册人数": 25},
+    {"日期": "2026-03-11", "渠道": "希沃", "新增注册人数": 2},
+    {"日期": "2026-03-11", "渠道": "科大讯飞", "新增注册人数": 13},
+    {"日期": "2026-03-12", "渠道": "京东方", "新增注册人数": 5},
+    {"日期": "2026-03-12", "渠道": "学而思", "新增注册人数": 24},
+    {"日期": "2026-03-12", "渠道": "希沃", "新增注册人数": 5},
+    {"日期": "2026-03-12", "渠道": "科大讯飞", "新增注册人数": 15},
+    {"日期": "2026-03-13", "渠道": "京东方", "新增注册人数": 2},
+    {"日期": "2026-03-13", "渠道": "学而思", "新增注册人数": 31},
+    {"日期": "2026-03-13", "渠道": "希沃", "新增注册人数": 7},
+    {"日期": "2026-03-13", "渠道": "科大讯飞", "新增注册人数": 8},
+    {"日期": "2026-03-14", "渠道": "学而思", "新增注册人数": 30},
+    {"日期": "2026-03-14", "渠道": "希沃", "新增注册人数": 3},
+    {"日期": "2026-03-14", "渠道": "科大讯飞", "新增注册人数": 22},
+    {"日期": "2026-03-15", "渠道": "京东方", "新增注册人数": 1},
+    {"日期": "2026-03-15", "渠道": "学而思", "新增注册人数": 22},
+    {"日期": "2026-03-15", "渠道": "希沃", "新增注册人数": 3},
+    {"日期": "2026-03-15", "渠道": "科大讯飞", "新增注册人数": 22},
+    {"日期": "2026-03-16", "渠道": "京东方", "新增注册人数": 2},
+    {"日期": "2026-03-16", "渠道": "学而思", "新增注册人数": 6},
+    {"日期": "2026-03-16", "渠道": "希沃", "新增注册人数": 2},
+    {"日期": "2026-03-16", "渠道": "科大讯飞", "新增注册人数": 10},
+    {"日期": "2026-03-17", "渠道": "京东方", "新增注册人数": 3},
+    {"日期": "2026-03-17", "渠道": "学而思", "新增注册人数": 12},
+    {"日期": "2026-03-17", "渠道": "希沃", "新增注册人数": 3},
+    {"日期": "2026-03-17", "渠道": "科大讯飞", "新增注册人数": 6},
+    {"日期": "2026-03-18", "渠道": "京东方", "新增注册人数": 1},
+    {"日期": "2026-03-18", "渠道": "学而思", "新增注册人数": 9},
+    {"日期": "2026-03-18", "渠道": "科大讯飞", "新增注册人数": 11},
+    {"日期": "2026-03-19", "渠道": "京东方", "新增注册人数": 1},
+    {"日期": "2026-03-19", "渠道": "学而思", "新增注册人数": 6},
+    {"日期": "2026-03-19", "渠道": "希沃", "新增注册人数": 2},
+    {"日期": "2026-03-19", "渠道": "科大讯飞", "新增注册人数": 9},
+    {"日期": "2026-03-20", "渠道": "京东方", "新增注册人数": 1},
+    {"日期": "2026-03-20", "渠道": "学而思", "新增注册人数": 13},
+    {"日期": "2026-03-20", "渠道": "希沃", "新增注册人数": 2},
+    {"日期": "2026-03-20", "渠道": "科大讯飞", "新增注册人数": 12},
+    {"日期": "2026-03-21", "渠道": "京东方", "新增注册人数": 1},
+    {"日期": "2026-03-21", "渠道": "学而思", "新增注册人数": 27},
+    {"日期": "2026-03-21", "渠道": "科大讯飞", "新增注册人数": 26},
+    {"日期": "2026-03-22", "渠道": "学而思", "新增注册人数": 12},
+    {"日期": "2026-03-22", "渠道": "希沃", "新增注册人数": 4},
+    {"日期": "2026-03-22", "渠道": "科大讯飞", "新增注册人数": 22},
+    {"日期": "2026-03-23", "渠道": "京东方", "新增注册人数": 1},
+    {"日期": "2026-03-23", "渠道": "学而思", "新增注册人数": 9},
+    {"日期": "2026-03-23", "渠道": "希沃", "新增注册人数": 2},
+    {"日期": "2026-03-23", "渠道": "科大讯飞", "新增注册人数": 5},
+    {"日期": "2026-03-24", "渠道": "学而思", "新增注册人数": 4},
+    {"日期": "2026-03-24", "渠道": "希沃", "新增注册人数": 2},
+    {"日期": "2026-03-24", "渠道": "科大讯飞", "新增注册人数": 8},
+    {"日期": "2026-03-25", "渠道": "京东方", "新增注册人数": 1},
+    {"日期": "2026-03-25", "渠道": "学而思", "新增注册人数": 12},
+    {"日期": "2026-03-25", "渠道": "希沃", "新增注册人数": 5},
+    {"日期": "2026-03-25", "渠道": "科大讯飞", "新增注册人数": 13},
+    {"日期": "2026-03-26", "渠道": "京东方", "新增注册人数": 1},
+    {"日期": "2026-03-26", "渠道": "学而思", "新增注册人数": 8},
+    {"日期": "2026-03-26", "渠道": "希沃", "新增注册人数": 2},
+    {"日期": "2026-03-26", "渠道": "科大讯飞", "新增注册人数": 8},
+    {"日期": "2026-03-27", "渠道": "学而思", "新增注册人数": 9},
+    {"日期": "2026-03-27", "渠道": "希沃", "新增注册人数": 6},
+    {"日期": "2026-03-27", "渠道": "科大讯飞", "新增注册人数": 6},
+    {"日期": "2026-03-28", "渠道": "京东方", "新增注册人数": 2},
+    {"日期": "2026-03-28", "渠道": "学而思", "新增注册人数": 20},
+    {"日期": "2026-03-28", "渠道": "希沃", "新增注册人数": 4},
+    {"日期": "2026-03-28", "渠道": "科大讯飞", "新增注册人数": 12},
+    {"日期": "2026-03-29", "渠道": "京东方", "新增注册人数": 2},
+    {"日期": "2026-03-29", "渠道": "学而思", "新增注册人数": 16},
+    {"日期": "2026-03-29", "渠道": "科大讯飞", "新增注册人数": 9},
+    {"日期": "2026-03-30", "渠道": "京东方", "新增注册人数": 2},
+    {"日期": "2026-03-30", "渠道": "学而思", "新增注册人数": 7},
+    {"日期": "2026-03-30", "渠道": "希沃", "新增注册人数": 2},
+    {"日期": "2026-03-30", "渠道": "科大讯飞", "新增注册人数": 6},
+    {"日期": "2026-03-31", "渠道": "京东方", "新增注册人数": 3},
+    {"日期": "2026-03-31", "渠道": "学而思", "新增注册人数": 10},
+    {"日期": "2026-03-31", "渠道": "科大讯飞", "新增注册人数": 10},
+]
+df_register = pd.DataFrame(register_data)
 
-print(f"\n最新3月收入预测报表已生成：{output_file}")
-print("\n整体统计结果：")
-print(report_df[['渠道大类', '预测GSV', 'GSV', '完成率', '订单数', 'GMV', '退费率']])
+# 生成Excel文件
+output_path = "/root/.openclaw/workspace/2026年3月硬件渠道数据汇总.xlsx"
+with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
+    df_overall.to_excel(writer, sheet_name='整体统计', index=False)
+    df_purchase.to_excel(writer, sheet_name='每日购课明细', index=False)
+    df_register.to_excel(writer, sheet_name='每日新增注册明细', index=False)
+
+print(f"文件已生成：{output_path}")
diff --git a/send_file.sh b/scripts/send_file.sh
similarity index 100%
rename from send_file.sh
rename to scripts/send_file.sh
diff --git a/venv/bin/python b/venv/bin/python
deleted file mode 120000
index b8a0adb..0000000
--- a/venv/bin/python
+++ /dev/null
@@ -1 +0,0 @@
-python3
\ No newline at end of file
diff --git a/venv/bin/python3 b/venv/bin/python3
deleted file mode 120000
index ae65fda..0000000
--- a/venv/bin/python3
+++ /dev/null
@@ -1 +0,0 @@
-/usr/bin/python3
\ No newline at end of file
diff --git a/venv/bin/python3.12 b/venv/bin/python3.12
deleted file mode 120000
index b8a0adb..0000000
--- a/venv/bin/python3.12
+++ /dev/null
@@ -1 +0,0 @@
-python3
\ No newline at end of file
diff --git a/venv/lib64 b/venv/lib64
deleted file mode 120000
index 7951405..0000000
--- a/venv/lib64
+++ /dev/null
@@ -1 +0,0 @@
-lib
\ No newline at end of file
diff --git a/venv/pyvenv.cfg b/venv/pyvenv.cfg
deleted file mode 100644
index 0d52065..0000000
--- a/venv/pyvenv.cfg
+++ /dev/null
@@ -1,5 +0,0 @@
-home = /usr/bin
-include-system-site-packages = false
-version = 3.12.3
-executable = /usr/bin/python3.12
-command = /usr/bin/python3 -m venv /root/.openclaw/workspace/venv