Compare commits

...

2 Commits

Author SHA1 Message Date
小溪
40c914c4d2 🤖 每日自动备份 - 2026-04-16 08:00:01 2026-04-16 08:00:01 +08:00
小溪
5af3ef928f feat: add refund-user-learning-analysis skill 2026-04-15 19:03:52 +08:00
74 changed files with 841 additions and 20747 deletions

3
.gitignore vendored
View File

@ -24,6 +24,9 @@ __pycache__/
.openclaw/ .openclaw/
.clawhub/ .clawhub/
# 归档备份目录
backup/
# 日志文件 # 日志文件
*.log *.log
logs/ logs/

View File

@ -10,3 +10,4 @@ smart-auto-model-switch f94a7b1ff6357c287a374366b339cd06b0dde5ca9e965a71a369ff66
vala-component-oops-stat 2e3a3dc8ee252f133256ec5fb8edc031fc25ff602158fd2ddf778ce4d2b961f2 vala-component-oops-stat 2e3a3dc8ee252f133256ec5fb8edc031fc25ff602158fd2ddf778ce4d2b961f2
vala-component-practice-stat 8e768e2641019d27bd41f4647d2d90f24182a0554dad5ad9f4136e9ce0bae147 vala-component-practice-stat 8e768e2641019d27bd41f4647d2d90f24182a0554dad5ad9f4136e9ce0bae147
cron-schedule e103cbb1806b28c891b9c856963325086ecaff32edec208f0a841865f26e8f3e cron-schedule e103cbb1806b28c891b9c856963325086ecaff32edec208f0a841865f26e8f3e
refund-user-learning-analysis 18f23bb62167466319ab667225cad5fc654680fe7820a1f3d87cfe98c30e96fd

View File

@ -133,6 +133,34 @@ System: [...] Feishu[xiaobian] group oc_xxx | 发送者 (ou_xxx) [msg:om_xxx]
参与,而非主导。质量 > 数量。 参与,而非主导。质量 > 数量。
## 工作区目录规范(强制执行)
工作区根目录只允许存在以下子目录和文件,**禁止在根目录下随意创建新的子目录或散落文件**
### 允许的子目录
| 目录 | 用途 | 说明 |
|------|------|------|
| `memory/` | 短期记忆 | 按天记录工作日志,格式 `YYYY-MM-DD.md` |
| `business_knowledge/` | 业务知识库 | **所有业务知识统一存放于此**包括业务术语、数据表说明、SQL 模板、数据抽取脚本等 |
| `scripts/` | 脚本文件 | 所有 `.py`、`.sh`、`.sql` 等脚本文件必须放在此目录 |
| `output/` | 输出文件 | 所有生成的报表(`.xlsx`、`.csv`)、日志(`.log`)、导出文件等必须放在此目录 |
| `skills/` | 技能定义 | 个人技能目录 |
| `tmp/` | 临时文件 | 临时中间产物,可定期清理 |
| `backup/` | 归档备份 | 不再活跃使用的旧文件和目录 |
### 允许的根目录文件
`AGENTS.md`、`SOUL.md`、`USER.md`、`MEMORY.md`、`TOOLS.md`、`IDENTITY.md`、`HEARTBEAT.md`、`BOOTSTRAP.md`、`secrets.env`、`.env`、`.gitignore`
### 强制规则
1. **脚本文件** → 始终创建在 `scripts/` 目录下,绝不放在根目录
2. **输出文件**xlsx/csv/log/报表等)→ 始终创建在 `output/` 目录下,绝不放在根目录
3. **业务知识** → 统一记录到 `business_knowledge/` 目录
4. **新增子目录** → 禁止在根目录下随意创建新子目录。如有特殊需要,须经技术负责人确认
5. **临时文件** → 使用 `tmp/`,用完即清
## 工具 ## 工具
Skills 提供你的工具。当你需要某个工具时,查看它的 `SKILL.md`。在 `TOOLS.md` 中保存环境相关的备注数据库连接、API 配置等)。 Skills 提供你的工具。当你需要某个工具时,查看它的 `SKILL.md`。在 `TOOLS.md` 中保存环境相关的备注数据库连接、API 配置等)。

@ -1 +0,0 @@
Subproject commit c7e1952f72fd3e7379b14c10a87540fa2ce84037

View File

@ -1,4 +0,0 @@
(apply,CacheStats{hitCount=16, missCount=5, loadSuccessCount=5, loadExceptionCount=0, totalLoadTime=28346537, evictionCount=0})
(tree,CacheStats{hitCount=5, missCount=8, loadSuccessCount=8, loadExceptionCount=0, totalLoadTime=13193310, evictionCount=0})
(commit,CacheStats{hitCount=2, missCount=3, loadSuccessCount=3, loadExceptionCount=0, totalLoadTime=27853456, evictionCount=0})
(tag,CacheStats{hitCount=0, missCount=0, loadSuccessCount=0, loadExceptionCount=0, totalLoadTime=0, evictionCount=0})

View File

@ -1 +0,0 @@
8e09023a9bda22c602a1c8dc1eb2f7a0ebdcfb2d 3062188004198c6a73c95042a620fd20f1492c39 TOOLS.md

View File

@ -1,3 +0,0 @@
037a62079890d989cdcd02b38d4e3951a92fc87e 8ab4e0749014d066411ca1b40bc3db69af9a0c6e
2ee12bae8e137cf71fe5ab95293623d2017b7985 339001c1df863b8e5eaaf54ad3fff2d0aa35bce5
b91ce3a3a73bd86c4ee7f2616c8259fb8ae1bcfc 85f58778f25a6ac5a5959531ef601405e827e902

@ -1 +0,0 @@
Subproject commit 2ee12bae8e137cf71fe5ab95293623d2017b7985

BIN
bfg.jar

Binary file not shown.

View File

@ -1,441 +0,0 @@
TABLE_NAME COLUMN_NAME DATA_TYPE IS_NULLABLE
casbin_rule id bigint NO
casbin_rule ptype varchar YES
casbin_rule v0 varchar YES
casbin_rule v1 varchar YES
casbin_rule v2 varchar YES
casbin_rule v3 varchar YES
casbin_rule v4 varchar YES
casbin_rule v5 varchar YES
exa_customers id bigint NO
exa_customers created_at datetime YES
exa_customers updated_at datetime YES
exa_customers deleted_at datetime YES
exa_customers customer_name varchar YES
exa_customers customer_phone_data varchar YES
exa_customers sys_user_id bigint YES
exa_customers sys_user_authority_id bigint YES
exa_file_chunks id bigint NO
exa_file_chunks created_at datetime YES
exa_file_chunks updated_at datetime YES
exa_file_chunks deleted_at datetime YES
exa_file_chunks exa_file_id bigint YES
exa_file_chunks file_chunk_number bigint YES
exa_file_chunks file_chunk_path varchar YES
exa_file_upload_and_downloads id bigint NO
exa_file_upload_and_downloads name varchar YES
exa_file_upload_and_downloads url varchar YES
exa_file_upload_and_downloads tag varchar YES
exa_file_upload_and_downloads key varchar YES
exa_file_upload_and_downloads created_at datetime YES
exa_file_upload_and_downloads updated_at datetime YES
exa_file_upload_and_downloads deleted_at datetime YES
exa_files id bigint NO
exa_files created_at datetime YES
exa_files updated_at datetime YES
exa_files deleted_at datetime YES
exa_files file_name varchar YES
exa_files file_md5 varchar YES
exa_files file_path varchar YES
exa_files chunk_total bigint YES
exa_files is_finish tinyint YES
jwt_blacklists id bigint NO
jwt_blacklists created_at datetime YES
jwt_blacklists updated_at datetime YES
jwt_blacklists deleted_at datetime YES
jwt_blacklists jwt text YES
sys_apis id bigint NO
sys_apis created_at datetime YES
sys_apis updated_at datetime YES
sys_apis deleted_at datetime YES
sys_apis path varchar YES
sys_apis description varchar YES
sys_apis api_group varchar YES
sys_apis method varchar YES
sys_authorities created_at datetime YES
sys_authorities updated_at datetime YES
sys_authorities deleted_at datetime YES
sys_authorities authority_id bigint NO
sys_authorities authority_name varchar YES
sys_authorities parent_id bigint YES
sys_authorities default_router varchar YES
sys_authority_btns authority_id bigint YES
sys_authority_btns sys_menu_id bigint YES
sys_authority_btns sys_base_menu_btn_id bigint YES
sys_authority_menus sys_base_menu_id bigint NO
sys_authority_menus sys_authority_authority_id bigint NO
sys_auto_code_histories id bigint NO
sys_auto_code_histories created_at datetime YES
sys_auto_code_histories updated_at datetime YES
sys_auto_code_histories deleted_at datetime YES
sys_auto_code_histories package varchar YES
sys_auto_code_histories business_db varchar YES
sys_auto_code_histories table_name varchar YES
sys_auto_code_histories request_meta text YES
sys_auto_code_histories auto_code_path text YES
sys_auto_code_histories injection_meta text YES
sys_auto_code_histories struct_name varchar YES
sys_auto_code_histories struct_cn_name varchar YES
sys_auto_code_histories api_ids varchar YES
sys_auto_code_histories flag bigint YES
sys_auto_codes id bigint NO
sys_auto_codes created_at datetime YES
sys_auto_codes updated_at datetime YES
sys_auto_codes deleted_at datetime YES
sys_auto_codes package_name varchar YES
sys_auto_codes label varchar YES
sys_auto_codes desc varchar YES
sys_base_menu_btns id bigint NO
sys_base_menu_btns created_at datetime YES
sys_base_menu_btns updated_at datetime YES
sys_base_menu_btns deleted_at datetime YES
sys_base_menu_btns name varchar YES
sys_base_menu_btns desc varchar YES
sys_base_menu_btns sys_base_menu_id bigint YES
sys_base_menu_parameters id bigint NO
sys_base_menu_parameters created_at datetime YES
sys_base_menu_parameters updated_at datetime YES
sys_base_menu_parameters deleted_at datetime YES
sys_base_menu_parameters sys_base_menu_id bigint YES
sys_base_menu_parameters type varchar YES
sys_base_menu_parameters key varchar YES
sys_base_menu_parameters value varchar YES
sys_base_menus id bigint NO
sys_base_menus created_at datetime YES
sys_base_menus updated_at datetime YES
sys_base_menus deleted_at datetime YES
sys_base_menus menu_level bigint YES
sys_base_menus parent_id varchar YES
sys_base_menus path varchar YES
sys_base_menus name varchar YES
sys_base_menus hidden tinyint YES
sys_base_menus component varchar YES
sys_base_menus sort bigint YES
sys_base_menus active_name varchar YES
sys_base_menus keep_alive tinyint YES
sys_base_menus default_menu tinyint YES
sys_base_menus title varchar YES
sys_base_menus icon varchar YES
sys_base_menus close_tab tinyint YES
sys_chat_gpt_options sk varchar YES
sys_data_authority_id sys_authority_authority_id bigint NO
sys_data_authority_id data_authority_id_authority_id bigint NO
sys_dictionaries id bigint NO
sys_dictionaries created_at datetime YES
sys_dictionaries updated_at datetime YES
sys_dictionaries deleted_at datetime YES
sys_dictionaries name varchar YES
sys_dictionaries type varchar YES
sys_dictionaries status tinyint YES
sys_dictionaries desc varchar YES
sys_dictionary_details id bigint NO
sys_dictionary_details created_at datetime YES
sys_dictionary_details updated_at datetime YES
sys_dictionary_details deleted_at datetime YES
sys_dictionary_details label varchar YES
sys_dictionary_details value bigint YES
sys_dictionary_details extend varchar YES
sys_dictionary_details status tinyint YES
sys_dictionary_details sort bigint YES
sys_dictionary_details sys_dictionary_id bigint YES
sys_operation_records id bigint NO
sys_operation_records updated_at datetime YES
sys_operation_records nick_name varchar YES
sys_operation_records body text YES
sys_operation_records path varchar YES
sys_operation_records ip varchar YES
sys_operation_records method varchar YES
sys_operation_records status bigint YES
sys_operation_records latency bigint YES
sys_operation_records agent varchar YES
sys_operation_records error_message varchar YES
sys_operation_records resp text YES
sys_operation_records user_id bigint YES
sys_operation_records deleted_at datetime YES
sys_operation_records created_at datetime YES
sys_user_authority sys_user_id bigint NO
sys_user_authority sys_authority_authority_id bigint NO
sys_users id bigint NO
sys_users uuid varchar YES
sys_users username varchar YES
sys_users password varchar YES
sys_users nick_name varchar YES
sys_users authority_id bigint YES
sys_users side_mode varchar YES
sys_users header_img varchar YES
sys_users base_color varchar YES
sys_users active_color varchar YES
sys_users phone varchar YES
sys_users email varchar YES
sys_users enable bigint YES
sys_users created_at datetime YES
sys_users updated_at datetime YES
sys_users deleted_at datetime YES
vala_app_account id bigint NO
vala_app_account tel varchar YES
vala_app_account tel_encrypt varchar YES
vala_app_account name varchar YES
vala_app_account id_card varchar YES
vala_app_account status tinyint NO
vala_app_account pay_status int YES
vala_app_account login_times int YES
vala_app_account remark text YES
vala_app_account key_from varchar NO
vala_app_account download_channel varchar YES
vala_app_account created_at datetime YES
vala_app_account updated_at datetime YES
vala_app_account deleted_at datetime YES
vala_app_character id bigint NO
vala_app_character account_id bigint YES
vala_app_character nickname varchar YES
vala_app_character gender tinyint YES
vala_app_character birthday varchar YES
vala_app_character avatar text YES
vala_app_character spine_name varchar YES
vala_app_character latest_login datetime YES
vala_app_character reputation char YES
vala_app_character robots_cnt int YES
vala_app_character head_image varchar YES
vala_app_character status tinyint YES
vala_app_character purchase_season_package text YES
vala_app_character total_sp_point int YES
vala_app_character balance_sp_point int YES
vala_app_character point int YES
vala_app_character pay_status tinyint YES
vala_app_character created_at datetime YES
vala_app_character updated_at datetime YES
vala_app_character deleted_at datetime YES
vala_app_character mood int NO
vala_app_character mood_start_time bigint NO
vala_applet_user id bigint NO
vala_applet_user tel varchar YES
vala_applet_user open_id varchar YES
vala_applet_user session_key varchar YES
vala_applet_user union_id varchar YES
vala_applet_user status tinyint NO
vala_applet_user created_at datetime YES
vala_applet_user updated_at datetime YES
vala_applet_user deleted_at datetime YES
vala_card_use_statistics id bigint NO
vala_card_use_statistics user_id int NO
vala_card_use_statistics card_type tinyint YES
vala_card_use_statistics card_id int YES
vala_card_use_statistics count_num int NO
vala_card_use_statistics created_at datetime YES
vala_card_use_statistics updated_at datetime YES
vala_card_use_statistics deleted_at datetime YES
vala_chat id bigint NO
vala_chat user_id bigint NO
vala_chat mod_id bigint NO
vala_chat npc_id bigint NO
vala_chat chat_id varchar NO
vala_chat msg varchar NO
vala_chat assistant text YES
vala_chat created_at datetime YES
vala_chat updated_at datetime YES
vala_chat deleted_at datetime YES
vala_friend_relation id int NO
vala_friend_relation resource_id int NO
vala_friend_relation relation_type tinyint YES
vala_friend_relation chapter_id int NO
vala_friend_relation component_id int NO
vala_friend_relation interact_times int NO
vala_friend_relation favor_level int NO
vala_friend_relation content_show varchar YES
vala_friend_relation created_at datetime YES
vala_friend_relation updated_at datetime YES
vala_friend_relation deleted_at datetime YES
vala_invitation_code id bigint NO
vala_invitation_code code varchar YES
vala_invitation_code owner varchar YES
vala_invitation_code status tinyint NO
vala_invitation_code created_at datetime YES
vala_invitation_code updated_at datetime YES
vala_invitation_code deleted_at datetime YES
vala_invitation_code_log id bigint NO
vala_invitation_code_log code_id bigint NO
vala_invitation_code_log ip_address varchar YES
vala_invitation_code_log city varchar YES
vala_invitation_code_log created_at datetime YES
vala_invitation_code_log updated_at datetime YES
vala_invitation_code_log deleted_at timestamp YES
vala_relation_level id int NO
vala_relation_level level int NO
vala_relation_level upgrade_need_value int NO
vala_relation_level total_require_value int NO
vala_relation_level favorability_lower_limit int NO
vala_relation_level favorability_upper_limit int NO
vala_relation_level created_at datetime YES
vala_relation_level updated_at datetime YES
vala_relation_level deleted_at datetime YES
vala_seasonal_ticket id bigint NO
vala_seasonal_ticket unique_code varchar YES
vala_seasonal_ticket account_id bigint YES
vala_seasonal_ticket account_name varchar YES
vala_seasonal_ticket tel varchar YES
vala_seasonal_ticket give_time int YES
vala_seasonal_ticket expire_time int YES
vala_seasonal_ticket expire_type tinyint NO
vala_seasonal_ticket ticket_type tinyint NO
vala_seasonal_ticket used_time int YES
vala_seasonal_ticket season_package_id bigint YES
vala_seasonal_ticket season_package_name varchar YES
vala_seasonal_ticket status tinyint NO
vala_seasonal_ticket created_at datetime YES
vala_seasonal_ticket updated_at datetime YES
vala_seasonal_ticket deleted_at datetime YES
vala_user_card_component_record id bigint NO
vala_user_card_component_record user_id bigint NO
vala_user_card_component_record game_id bigint YES
vala_user_card_component_record chapter_id bigint YES
vala_user_card_component_record component_type bigint YES
vala_user_card_component_record sub_component_type bigint YES
vala_user_card_component_record component_id bigint NO
vala_user_card_component_record card_content text YES
vala_user_card_component_record is_new tinyint NO
vala_user_card_component_record created_at datetime YES
vala_user_card_component_record updated_at datetime YES
vala_user_card_component_record deleted_at datetime YES
vala_user_card_plot_upgrade id bigint NO
vala_user_card_plot_upgrade user_id bigint NO
vala_user_card_plot_upgrade chapter_id bigint YES
vala_user_card_plot_upgrade plot_card_id bigint NO
vala_user_card_plot_upgrade is_have_upgrade bigint YES
vala_user_card_plot_upgrade white_card_status tinyint NO
vala_user_card_plot_upgrade flash_card_status tinyint NO
vala_user_card_plot_upgrade is_new tinyint YES
vala_user_card_plot_upgrade created_at datetime YES
vala_user_card_plot_upgrade updated_at datetime YES
vala_user_card_plot_upgrade deleted_at datetime YES
vala_user_chapter_and_component_record id bigint NO
vala_user_chapter_and_component_record user_id bigint NO
vala_user_chapter_and_component_record chapter_id text NO
vala_user_chapter_and_component_record component_id text NO
vala_user_chapter_and_component_record created_at datetime YES
vala_user_chapter_and_component_record updated_at datetime YES
vala_user_chapter_and_component_record deleted_at datetime YES
vala_user_chapter_finish_record id bigint NO
vala_user_chapter_finish_record user_id bigint NO
vala_user_chapter_finish_record game_id bigint YES
vala_user_chapter_finish_record chapter_id bigint NO
vala_user_chapter_finish_record created_at datetime YES
vala_user_chapter_finish_record updated_at datetime YES
vala_user_chapter_finish_record deleted_at datetime YES
vala_user_chat_log id bigint NO
vala_user_chat_log user_id bigint NO
vala_user_chat_log game_id bigint YES
vala_user_chat_log chapter_id bigint YES
vala_user_chat_log sub_component_type bigint YES
vala_user_chat_log component_id bigint NO
vala_user_chat_log npc_id bigint NO
vala_user_chat_log session_id varchar NO
vala_user_chat_log dialog_list text YES
vala_user_chat_log dialog_score text YES
vala_user_chat_log tencent_soe text YES
vala_user_chat_log chat_review_resp text YES
vala_user_chat_log cost_time int NO
vala_user_chat_log insert_time datetime YES
vala_user_chat_log log_type tinyint YES
vala_user_chat_log deal_status tinyint YES
vala_user_chat_log created_at datetime YES
vala_user_chat_log updated_at datetime YES
vala_user_chat_log deleted_at datetime YES
vala_user_drama_log id bigint NO
vala_user_drama_log user_id bigint NO
vala_user_drama_log game_id bigint YES
vala_user_drama_log chapter_id bigint YES
vala_user_drama_log component_id bigint YES
vala_user_drama_log npc_id bigint YES
vala_user_drama_log created_at datetime YES
vala_user_drama_log updated_at datetime YES
vala_user_drama_log deleted_at datetime YES
vala_user_friend_info id bigint NO
vala_user_friend_info user_id bigint NO
vala_user_friend_info npc_id bigint NO
vala_user_friend_info favorability_value int YES
vala_user_friend_info level int YES
vala_user_friend_info relation_type tinyint YES
vala_user_friend_info interact_times int YES
vala_user_friend_info story_id int YES
vala_user_friend_info created_at datetime YES
vala_user_friend_info updated_at datetime YES
vala_user_friend_info deleted_at datetime YES
vala_user_game_finish_record id bigint NO
vala_user_game_finish_record user_id bigint NO
vala_user_game_finish_record game_id bigint YES
vala_user_game_finish_record created_at datetime YES
vala_user_game_finish_record updated_at datetime YES
vala_user_game_finish_record deleted_at datetime YES
vala_user_interact_chat_log id bigint NO
vala_user_interact_chat_log user_id bigint NO
vala_user_interact_chat_log npc_id bigint NO
vala_user_interact_chat_log session_id varchar YES
vala_user_interact_chat_log type varchar NO
vala_user_interact_chat_log name varchar NO
vala_user_interact_chat_log text varchar NO
vala_user_interact_chat_log interactive_type text YES
vala_user_interact_chat_log interactive_selected tinyint YES
vala_user_interact_chat_log chat_time datetime YES
vala_user_interact_chat_log created_at datetime YES
vala_user_interact_chat_log updated_at datetime YES
vala_user_interact_chat_log deleted_at datetime YES
vala_user_interact_log id bigint NO
vala_user_interact_log user_id bigint NO
vala_user_interact_log component_type tinyint YES
vala_user_interact_log relation_type tinyint YES
vala_user_interact_log npc_id bigint NO
vala_user_interact_log session_id varchar NO
vala_user_interact_log summary text NO
vala_user_interact_log dialog_list text YES
vala_user_interact_log dialog_score text YES
vala_user_interact_log tencent_soe text YES
vala_user_interact_log chat_review_resp text YES
vala_user_interact_log cost_time int NO
vala_user_interact_log insert_time datetime YES
vala_user_interact_log created_at datetime YES
vala_user_interact_log updated_at datetime YES
vala_user_interact_log deleted_at datetime YES
vala_user_interact_log favor_value int YES
vala_user_learn_record id int NO
vala_user_learn_record user_id bigint NO
vala_user_learn_record game_id bigint NO
vala_user_learn_record chapter_id bigint NO
vala_user_learn_record score float NO
vala_user_learn_record score_level char YES
vala_user_learn_record unique_code varchar YES
vala_user_learn_record source_type tinyint YES
vala_user_learn_record created_at datetime YES
vala_user_learn_record updated_at datetime YES
vala_user_learn_record deleted_at timestamp YES
vala_user_long_term_memory id bigint NO
vala_user_long_term_memory user_id int YES
vala_user_long_term_memory npc_id int YES
vala_user_long_term_memory robot_id int YES
vala_user_long_term_memory config_id int YES
vala_user_long_term_memory session_id varchar YES
vala_user_long_term_memory log_id int YES
vala_user_long_term_memory source varchar YES
vala_user_long_term_memory source_type varchar YES
vala_user_long_term_memory memory_type varchar YES
vala_user_long_term_memory sub_type varchar YES
vala_user_long_term_memory user_content text YES
vala_user_long_term_memory system_content text YES
vala_user_long_term_memory created_at datetime YES
vala_user_long_term_memory updated_at datetime YES
vala_user_long_term_memory deleted_at datetime YES
vala_user_robot id int NO
vala_user_robot user_id bigint NO
vala_user_robot robot_id bigint NO
vala_user_robot robot_level tinyint YES
vala_user_robot created_at datetime YES
vala_user_robot updated_at datetime YES
vala_user_robot deleted_at timestamp YES
vala_user_word_statistics id bigint NO
vala_user_word_statistics user_id int NO
vala_user_word_statistics word_pattern_id varchar YES
vala_user_word_statistics count_num int NO
vala_user_word_statistics content_type char YES
vala_user_word_statistics source_type tinyint YES
vala_user_word_statistics created_at datetime YES
vala_user_word_statistics updated_at datetime YES
vala_user_word_statistics deleted_at datetime YES

View File

@ -1,971 +0,0 @@
TABLE_NAME COLUMN_NAME DATA_TYPE IS_NULLABLE
ai_reviewer_issue_records id int NO
ai_reviewer_issue_records table_name varchar NO
ai_reviewer_issue_records record_key varchar NO
ai_reviewer_issue_records issue_type varchar NO
ai_reviewer_issue_records issue_location varchar YES
ai_reviewer_issue_records issue_description text NO
ai_reviewer_issue_records issue_detail text YES
ai_reviewer_issue_records source_content text YES
ai_reviewer_issue_records raw_llm_output text YES
ai_reviewer_issue_records review_model varchar YES
ai_reviewer_issue_records review_result varchar YES
ai_reviewer_issue_records review_reason text YES
ai_reviewer_issue_records is_notified tinyint YES
ai_reviewer_issue_records is_exported tinyint YES
ai_reviewer_issue_records created_at datetime YES
ai_reviewer_issue_records notified_at datetime YES
ai_reviewer_scan_metadata id int NO
ai_reviewer_scan_metadata table_name varchar NO
ai_reviewer_scan_metadata last_full_scan_at datetime YES
ai_reviewer_scan_metadata full_scan_status varchar NO
ai_reviewer_scan_metadata scan_offset_hours int NO
ai_reviewer_scan_metadata total_records_scanned int YES
ai_reviewer_scan_metadata created_at datetime YES
ai_reviewer_scan_metadata updated_at datetime YES
ai_reviewer_scan_records id int NO
ai_reviewer_scan_records table_name varchar NO
ai_reviewer_scan_records record_key varchar NO
ai_reviewer_scan_records last_updated_at datetime NO
ai_reviewer_scan_records last_scanned_at datetime NO
ai_reviewer_scan_records scan_status varchar NO
ai_reviewer_scan_records has_issues tinyint YES
ai_reviewer_scan_records content_hash varchar YES
ai_reviewer_scan_records created_at datetime YES
ai_reviewer_scan_records updated_at datetime YES
casbin_rule id bigint NO
casbin_rule ptype varchar YES
casbin_rule v0 varchar YES
casbin_rule v1 varchar YES
casbin_rule v2 varchar YES
casbin_rule v3 varchar YES
casbin_rule v4 varchar YES
casbin_rule v5 varchar YES
chapter_task_desc id bigint NO
chapter_task_desc chapter_id bigint NO
chapter_task_desc section_target varchar NO
chapter_task_desc reviewCount int NO
chapter_task_desc task_desc text YES
chapter_task_desc question_list text YES
chapter_task_desc created_at datetime YES
chapter_task_desc updated_at datetime YES
chapter_task_desc deleted_at datetime YES
core_interaction_component id bigint NO
core_interaction_component c_type varchar YES
core_interaction_component c_id varchar YES
core_interaction_component title varchar YES
core_interaction_component component_config text YES
core_interaction_component notice_component_config text YES
core_interaction_component err_msg text YES
core_interaction_component material_img_list text YES
core_interaction_component audio_list text YES
core_interaction_component text_analysis text YES
core_interaction_component related_path text YES
core_interaction_component read_status tinyint YES
core_interaction_component config_status tinyint YES
core_interaction_component lock_status tinyint YES
core_interaction_component kp_relation_info text YES
core_interaction_component created_at datetime YES
core_interaction_component updated_at datetime YES
core_interaction_component deleted_at datetime YES
dev_knowledge_point_config id int NO
dev_knowledge_point_config content varchar NO
dev_knowledge_point_config type varchar NO
dev_knowledge_point_config definition text NO
dev_knowledge_point_config example text YES
dev_knowledge_point_config created_at timestamp NO
dev_knowledge_point_config updated_at timestamp NO
dev_question_system_config id int NO
dev_question_system_config knowledge_point_id int NO
dev_question_system_config question_type varchar NO
dev_question_system_config question_type_name varchar NO
dev_question_system_config content text NO
dev_question_system_config format varchar NO
dev_question_system_config options json YES
dev_question_system_config answer int YES
dev_question_system_config fill_answer text YES
dev_question_system_config created_at timestamp NO
dev_question_system_config updated_at timestamp NO
exa_customers id bigint NO
exa_customers created_at datetime YES
exa_customers updated_at datetime YES
exa_customers deleted_at datetime YES
exa_customers customer_name varchar YES
exa_customers customer_phone_data varchar YES
exa_customers sys_user_id bigint YES
exa_customers sys_user_authority_id bigint YES
exa_file_chunks id bigint NO
exa_file_chunks created_at datetime YES
exa_file_chunks updated_at datetime YES
exa_file_chunks deleted_at datetime YES
exa_file_chunks exa_file_id bigint YES
exa_file_chunks file_chunk_number bigint YES
exa_file_chunks file_chunk_path varchar YES
exa_file_upload_and_downloads id bigint NO
exa_file_upload_and_downloads name varchar YES
exa_file_upload_and_downloads url varchar YES
exa_file_upload_and_downloads tag varchar YES
exa_file_upload_and_downloads key varchar YES
exa_file_upload_and_downloads created_at datetime YES
exa_file_upload_and_downloads updated_at datetime YES
exa_file_upload_and_downloads deleted_at datetime YES
exa_files id bigint NO
exa_files created_at datetime YES
exa_files updated_at datetime YES
exa_files deleted_at datetime YES
exa_files file_name varchar YES
exa_files file_md5 varchar YES
exa_files file_path varchar YES
exa_files chunk_total bigint YES
exa_files is_finish tinyint YES
jwt_blacklists id bigint NO
jwt_blacklists created_at datetime YES
jwt_blacklists updated_at datetime YES
jwt_blacklists deleted_at datetime YES
jwt_blacklists jwt text YES
lesson_config_info id bigint NO
lesson_config_info chapter_id bigint YES
lesson_config_info lock_status tinyint YES
lesson_config_info review_info text YES
lesson_config_info created_at datetime YES
lesson_config_info updated_at datetime YES
lesson_config_info deleted_at datetime YES
llm_words_mapping group text YES
llm_words_mapping wordsMapping text YES
llm_words_mapping updated_at datetime YES
middle_interaction_component id bigint NO
middle_interaction_component c_type varchar YES
middle_interaction_component c_id varchar YES
middle_interaction_component title varchar YES
middle_interaction_component component_config text YES
middle_interaction_component notice_component_config text YES
middle_interaction_component err_msg text YES
middle_interaction_component audio_list text YES
middle_interaction_component text_analysis text YES
middle_interaction_component related_path text YES
middle_interaction_component read_status tinyint YES
middle_interaction_component config_status tinyint YES
middle_interaction_component lock_status tinyint YES
middle_interaction_component kp_relation_info text YES
middle_interaction_component created_at datetime YES
middle_interaction_component updated_at datetime YES
middle_interaction_component deleted_at datetime YES
prompt_templates id int NO
prompt_templates user_id varchar NO
prompt_templates user_name varchar YES
prompt_templates template_name varchar NO
prompt_templates prompt_content text NO
prompt_templates description text YES
prompt_templates model_name varchar YES
prompt_templates model_params json YES
prompt_templates is_public tinyint YES
prompt_templates usage_count int YES
prompt_templates hidden_count int YES
prompt_templates created_at timestamp NO
prompt_templates updated_at timestamp NO
prompt_templates public_at timestamp YES
sys_apis id bigint NO
sys_apis created_at datetime YES
sys_apis updated_at datetime YES
sys_apis deleted_at datetime YES
sys_apis path varchar YES
sys_apis description varchar YES
sys_apis api_group varchar YES
sys_apis method varchar YES
sys_authorities created_at datetime YES
sys_authorities updated_at datetime YES
sys_authorities deleted_at datetime YES
sys_authorities authority_id bigint NO
sys_authorities authority_name varchar YES
sys_authorities parent_id bigint YES
sys_authorities default_router varchar YES
sys_authority_btns authority_id bigint YES
sys_authority_btns sys_menu_id bigint YES
sys_authority_btns sys_base_menu_btn_id bigint YES
sys_authority_menus sys_base_menu_id bigint NO
sys_authority_menus sys_authority_authority_id bigint NO
sys_auto_code_histories id bigint NO
sys_auto_code_histories created_at datetime YES
sys_auto_code_histories updated_at datetime YES
sys_auto_code_histories deleted_at datetime YES
sys_auto_code_histories package varchar YES
sys_auto_code_histories business_db varchar YES
sys_auto_code_histories table_name varchar YES
sys_auto_code_histories request_meta text YES
sys_auto_code_histories auto_code_path text YES
sys_auto_code_histories injection_meta text YES
sys_auto_code_histories struct_name varchar YES
sys_auto_code_histories struct_cn_name varchar YES
sys_auto_code_histories api_ids varchar YES
sys_auto_code_histories flag bigint YES
sys_auto_codes id bigint NO
sys_auto_codes created_at datetime YES
sys_auto_codes updated_at datetime YES
sys_auto_codes deleted_at datetime YES
sys_auto_codes package_name varchar YES
sys_auto_codes label varchar YES
sys_auto_codes desc varchar YES
sys_base_menu_btns id bigint NO
sys_base_menu_btns created_at datetime YES
sys_base_menu_btns updated_at datetime YES
sys_base_menu_btns deleted_at datetime YES
sys_base_menu_btns name varchar YES
sys_base_menu_btns desc varchar YES
sys_base_menu_btns sys_base_menu_id bigint YES
sys_base_menu_parameters id bigint NO
sys_base_menu_parameters created_at datetime YES
sys_base_menu_parameters updated_at datetime YES
sys_base_menu_parameters deleted_at datetime YES
sys_base_menu_parameters sys_base_menu_id bigint YES
sys_base_menu_parameters type varchar YES
sys_base_menu_parameters key varchar YES
sys_base_menu_parameters value varchar YES
sys_base_menus id bigint NO
sys_base_menus created_at datetime YES
sys_base_menus updated_at datetime YES
sys_base_menus deleted_at datetime YES
sys_base_menus menu_level bigint YES
sys_base_menus parent_id varchar YES
sys_base_menus path varchar YES
sys_base_menus name varchar YES
sys_base_menus hidden tinyint YES
sys_base_menus component varchar YES
sys_base_menus sort bigint YES
sys_base_menus active_name varchar YES
sys_base_menus keep_alive tinyint YES
sys_base_menus default_menu tinyint YES
sys_base_menus title varchar YES
sys_base_menus icon varchar YES
sys_base_menus close_tab tinyint YES
sys_chat_gpt_options sk varchar YES
sys_data_authority_id sys_authority_authority_id bigint NO
sys_data_authority_id data_authority_id_authority_id bigint NO
sys_dictionaries id bigint NO
sys_dictionaries created_at datetime YES
sys_dictionaries updated_at datetime YES
sys_dictionaries deleted_at datetime YES
sys_dictionaries name varchar YES
sys_dictionaries type varchar YES
sys_dictionaries status tinyint YES
sys_dictionaries desc varchar YES
sys_dictionary_details id bigint NO
sys_dictionary_details created_at datetime YES
sys_dictionary_details updated_at datetime YES
sys_dictionary_details deleted_at datetime YES
sys_dictionary_details label varchar YES
sys_dictionary_details value bigint YES
sys_dictionary_details extend varchar YES
sys_dictionary_details status tinyint YES
sys_dictionary_details sort bigint YES
sys_dictionary_details sys_dictionary_id bigint YES
sys_operation_records id bigint NO
sys_operation_records updated_at datetime YES
sys_operation_records nick_name varchar YES
sys_operation_records body text YES
sys_operation_records path varchar YES
sys_operation_records ip varchar YES
sys_operation_records method varchar YES
sys_operation_records status bigint YES
sys_operation_records latency bigint YES
sys_operation_records agent varchar YES
sys_operation_records error_message varchar YES
sys_operation_records resp text YES
sys_operation_records user_id bigint YES
sys_operation_records deleted_at datetime YES
sys_operation_records created_at datetime YES
sys_user_authority sys_user_id bigint NO
sys_user_authority sys_authority_authority_id bigint NO
sys_users id bigint NO
sys_users uuid varchar YES
sys_users username varchar YES
sys_users password varchar YES
sys_users nick_name varchar YES
sys_users authority_id bigint YES
sys_users side_mode varchar YES
sys_users header_img varchar YES
sys_users base_color varchar YES
sys_users active_color varchar YES
sys_users phone varchar YES
sys_users email varchar YES
sys_users enable bigint YES
sys_users created_at datetime YES
sys_users updated_at datetime YES
sys_users deleted_at datetime YES
unit_challenge_question id bigint NO
unit_challenge_question category varchar YES
unit_challenge_question type varchar YES
unit_challenge_question question_set_id varchar YES
unit_challenge_question question_content text YES
unit_challenge_question notice_content text YES
unit_challenge_question err_msg text YES
unit_challenge_question related_path text YES
unit_challenge_question material_list text YES
unit_challenge_question created_at datetime YES
unit_challenge_question updated_at datetime YES
unit_challenge_question deleted_at datetime YES
unit_chapter_text_parse id bigint NO
unit_chapter_text_parse chapter_id bigint NO
unit_chapter_text_parse text_parse mediumtext YES
unit_chapter_text_parse created_at datetime YES
unit_chapter_text_parse updated_at datetime YES
unit_chapter_text_parse deleted_at datetime YES
unity_chapter_section id bigint NO
unity_chapter_section chapter_id bigint NO
unity_chapter_section section_id bigint NO
unity_chapter_section section_target varchar NO
unity_chapter_section start_section text YES
unity_chapter_section end_section text YES
unity_chapter_section component_config text YES
unity_chapter_section pre_cartoon_file varchar YES
unity_chapter_section pre_cartoon_desc varchar YES
unity_chapter_section post_cartoon_file varchar YES
unity_chapter_section post_cartoon_desc varchar YES
unity_chapter_section index int NO
unity_chapter_section status int NO
unity_chapter_section mission_id int NO
unity_chapter_section mission_name varchar NO
unity_chapter_section created_at datetime YES
unity_chapter_section updated_at datetime YES
unity_chapter_section deleted_at datetime YES
user_template_preferences id int NO
user_template_preferences user_id varchar NO
user_template_preferences template_id int NO
user_template_preferences is_hidden tinyint YES
user_template_preferences is_favorite tinyint YES
user_template_preferences created_at timestamp NO
user_template_preferences updated_at timestamp NO
vala_activity id int NO
vala_activity code varchar NO
vala_activity name varchar NO
vala_activity stime bigint NO
vala_activity etime bigint NO
vala_activity is_online tinyint NO
vala_activity times int NO
vala_activity scope enum NO
vala_activity config text YES
vala_activity created_at datetime YES
vala_activity updated_at datetime YES
vala_activity deleted_at datetime YES
vala_activity_condition id bigint NO
vala_activity_condition activity_id int NO
vala_activity_condition condition varchar NO
vala_activity_condition created_at datetime YES
vala_app_account id bigint NO
vala_app_account tel varchar YES
vala_app_account tel_encrypt varchar YES
vala_app_account name varchar YES
vala_app_account id_card varchar YES
vala_app_account status tinyint NO
vala_app_account pay_status tinyint YES
vala_app_account login_times int YES
vala_app_account remark text YES
vala_app_account key_from varchar NO
vala_app_account download_channel varchar YES
vala_app_account created_at datetime YES
vala_app_account updated_at datetime YES
vala_app_account deleted_at datetime YES
vala_app_character id bigint NO
vala_app_character account_id bigint YES
vala_app_character nickname varchar YES
vala_app_character gender tinyint YES
vala_app_character birthday varchar YES
vala_app_character avatar text YES
vala_app_character spine_name varchar YES
vala_app_character latest_login datetime YES
vala_app_character reputation char YES
vala_app_character robots_cnt int YES
vala_app_character head_image varchar YES
vala_app_character status tinyint YES
vala_app_character purchase_season_package text YES
vala_app_character total_sp_point int YES
vala_app_character balance_sp_point int YES
vala_app_character point int YES
vala_app_character pay_status tinyint YES
vala_app_character created_at datetime YES
vala_app_character updated_at datetime YES
vala_app_character deleted_at datetime YES
vala_app_character mood int NO
vala_app_character mood_start_time bigint NO
vala_app_log_upload id bigint NO
vala_app_log_upload name varchar YES
vala_app_log_upload url varchar YES
vala_app_log_upload created_at datetime YES
vala_app_log_upload updated_at datetime YES
vala_app_log_upload deleted_at datetime YES
vala_applet_user id bigint NO
vala_applet_user tel varchar YES
vala_applet_user open_id varchar YES
vala_applet_user session_key varchar YES
vala_applet_user union_id varchar YES
vala_applet_user status tinyint NO
vala_applet_user created_at datetime YES
vala_applet_user updated_at datetime YES
vala_applet_user deleted_at datetime YES
vala_area id bigint NO
vala_area cn_name varchar NO
vala_area en_name varchar NO
vala_area desc text YES
vala_area location_num int YES
vala_area is_hide tinyint NO
vala_area created_at datetime YES
vala_area updated_at datetime YES
vala_area deleted_at datetime YES
vala_area_location id bigint NO
vala_area_location location_id int NO
vala_area_location area_id bigint NO
vala_area_location created_at datetime YES
vala_area_location updated_at datetime YES
vala_area_location deleted_at datetime YES
vala_card_package id bigint NO
vala_card_package name varchar YES
vala_card_package icon_name varchar YES
vala_card_package card_face_name varchar YES
vala_card_package card_back_name varchar YES
vala_card_package card_config text YES
vala_card_package relation_story_id int YES
vala_card_package created_at datetime YES
vala_card_package updated_at datetime YES
vala_card_package deleted_at datetime YES
vala_card_plot id bigint NO
vala_card_plot cn_name varchar YES
vala_card_plot en_name varchar YES
vala_card_plot plot_desc varchar YES
vala_card_plot plot_sentence text YES
vala_card_plot card_face_name varchar YES
vala_card_plot card_back_name varchar YES
vala_card_plot white_plot_card_config text YES
vala_card_plot flash_plot_card_config text YES
vala_card_plot relation_story_id int YES
vala_card_plot created_at datetime YES
vala_card_plot updated_at datetime YES
vala_card_plot deleted_at datetime YES
vala_card_plot_new id bigint NO
vala_card_plot_new cn_name varchar YES
vala_card_plot_new en_name varchar YES
vala_card_plot_new rarity char YES
vala_card_plot_new base_card_main_pic varchar YES
vala_card_plot_new advanced_card_main_pic varchar YES
vala_card_plot_new plot_desc varchar YES
vala_card_plot_new detail_title varchar YES
vala_card_plot_new detail_desc text YES
vala_card_plot_new relation_story_id int NO
vala_card_plot_new relation_chapter_id int NO
vala_card_plot_new created_at datetime YES
vala_card_plot_new updated_at datetime YES
vala_card_plot_new deleted_at datetime YES
vala_card_use_statistics id bigint NO
vala_card_use_statistics user_id int NO
vala_card_use_statistics card_type tinyint YES
vala_card_use_statistics card_id int YES
vala_card_use_statistics count_num int NO
vala_card_use_statistics created_at datetime YES
vala_card_use_statistics updated_at datetime YES
vala_card_use_statistics deleted_at datetime YES
vala_chat id bigint NO
vala_chat user_id bigint NO
vala_chat mod_id bigint NO
vala_chat npc_id bigint NO
vala_chat chat_id varchar NO
vala_chat msg varchar NO
vala_chat assistant text YES
vala_chat created_at datetime YES
vala_chat updated_at datetime YES
vala_chat deleted_at datetime YES
vala_chatbot id bigint NO
vala_chatbot type varchar NO
vala_chatbot ver varchar NO
vala_chatbot title varchar NO
vala_chatbot desc varchar NO
vala_chatbot config text NO
vala_chatbot created_at datetime YES
vala_chatbot updated_at datetime YES
vala_chatbot deleted_at datetime YES
vala_chatbot source_type varchar NO
vala_component_chat_config id bigint NO
vala_component_chat_config component_id bigint YES
vala_component_chat_config title varchar NO
vala_component_chat_config type varchar YES
vala_component_chat_config desc varchar NO
vala_component_chat_config config text NO
vala_component_chat_config created_at datetime YES
vala_component_chat_config updated_at datetime YES
vala_component_chat_config deleted_at datetime YES
vala_friend_relation id int NO
vala_friend_relation resource_id int NO
vala_friend_relation relation_type tinyint YES
vala_friend_relation chapter_id int NO
vala_friend_relation component_id int NO
vala_friend_relation interact_times int NO
vala_friend_relation favor_level int NO
vala_friend_relation content_show varchar YES
vala_friend_relation created_at datetime YES
vala_friend_relation updated_at datetime YES
vala_friend_relation deleted_at datetime YES
vala_game_chapter id bigint NO
vala_game_chapter game_id bigint YES
vala_game_chapter season_package_id bigint YES
vala_game_chapter en_name varchar NO
vala_game_chapter cn_name varchar NO
vala_game_chapter scene_id varchar NO
vala_game_chapter lesson_type tinyint YES
vala_game_chapter event_id int YES
vala_game_chapter desc text NO
vala_game_chapter index smallint YES
vala_game_chapter learn_config text YES
vala_game_chapter scene_path text YES
vala_game_chapter self_knowledge_card_config text YES
vala_game_chapter other_knowledge_card_config text YES
vala_game_chapter review_knowledge_card_config text YES
vala_game_chapter chapter_extension_config text YES
vala_game_chapter created_at datetime YES
vala_game_chapter updated_at datetime YES
vala_game_chapter deleted_at datetime YES
vala_game_chapter_section id bigint NO
vala_game_chapter_section chapter_id bigint YES
vala_game_chapter_section section_target varchar YES
vala_game_chapter_section start_section text YES
vala_game_chapter_section end_section text YES
vala_game_chapter_section pre_cartoon_file varchar YES
vala_game_chapter_section pre_cartoon_desc varchar YES
vala_game_chapter_section post_cartoon_file varchar YES
vala_game_chapter_section post_cartoon_desc varchar YES
vala_game_chapter_section special_event_type tinyint YES
vala_game_chapter_section special_event_en_content text YES
vala_game_chapter_section special_event_cn_content text YES
vala_game_chapter_section standard_event_type tinyint YES
vala_game_chapter_section standard_event_en_content text YES
vala_game_chapter_section standard_event_cn_content text YES
vala_game_chapter_section story_remind_type tinyint YES
vala_game_chapter_section story_remind_en_content text YES
vala_game_chapter_section story_remind_cn_content text YES
vala_game_chapter_section component_config text YES
vala_game_chapter_section mission_id int YES
vala_game_chapter_section mission_name varchar YES
vala_game_chapter_section index int YES
vala_game_chapter_section created_at datetime YES
vala_game_chapter_section updated_at datetime YES
vala_game_chapter_section deleted_at datetime YES
vala_game_component_brain_config id bigint NO
vala_game_component_brain_config brain_config text YES
vala_game_component_brain_config created_at datetime YES
vala_game_component_brain_config updated_at datetime YES
vala_game_component_brain_config deleted_at datetime YES
vala_game_info id bigint NO
vala_game_info en_name varchar NO
vala_game_info cn_name varchar NO
vala_game_info game_code varchar NO
vala_game_info game_category tinyint NO
vala_game_info season_package_id bigint YES
vala_game_info level_type tinyint NO
vala_game_info desc text NO
vala_game_info is_hide tinyint NO
vala_game_info card_package_id text YES
vala_game_info card_plot_id text YES
vala_game_info index int YES
vala_game_info plot_card_config text YES
vala_game_info created_at datetime YES
vala_game_info updated_at datetime YES
vala_game_info deleted_at datetime YES
vala_game_season_package id bigint NO
vala_game_season_package cn_name varchar YES
vala_game_season_package en_name varchar YES
vala_game_season_package season_of_year int NO
vala_game_season_package season_of_quarter int NO
vala_game_season_package season_cn_desc text YES
vala_game_season_package season_en_desc text YES
vala_game_season_package is_hide tinyint YES
vala_game_season_package index int YES
vala_game_season_package level char YES
vala_game_season_package created_at datetime YES
vala_game_season_package updated_at datetime YES
vala_game_season_package deleted_at datetime YES
vala_invitation_code id bigint NO
vala_invitation_code code varchar YES
vala_invitation_code owner varchar YES
vala_invitation_code status tinyint NO
vala_invitation_code created_at datetime YES
vala_invitation_code updated_at datetime YES
vala_invitation_code deleted_at datetime YES
vala_invitation_code_log id bigint NO
vala_invitation_code_log code_id bigint NO
vala_invitation_code_log ip_address varchar YES
vala_invitation_code_log city varchar YES
vala_invitation_code_log created_at datetime YES
vala_invitation_code_log updated_at datetime YES
vala_invitation_code_log deleted_at timestamp YES
vala_km id bigint NO
vala_km type varchar YES
vala_km km_id varchar YES
vala_km title varchar YES
vala_km bg_img varchar YES
vala_km kp_set text YES
vala_km sum_explain text YES
vala_km notice_km_content text YES
vala_km err_msg text YES
vala_km material_list text YES
vala_km created_at datetime YES
vala_km updated_at datetime YES
vala_km deleted_at datetime YES
vala_knowledge_module id bigint NO
vala_knowledge_module cn_title varchar NO
vala_knowledge_module en_title varchar NO
vala_knowledge_module module_type tinyint YES
vala_knowledge_module description text NO
vala_knowledge_module word_knowledge_point text YES
vala_knowledge_module phoneme_knowledge_point text YES
vala_knowledge_module grammar_knowledge_point text YES
vala_knowledge_module sentence_knowledge_point text YES
vala_knowledge_module dialogue_knowledge_point text YES
vala_knowledge_module relation_test_question text YES
vala_knowledge_module created_at datetime YES
vala_knowledge_module updated_at datetime YES
vala_knowledge_module deleted_at datetime YES
vala_knowledge_point id bigint NO
vala_knowledge_point title varchar YES
vala_knowledge_point knowledge_point_type tinyint NO
vala_knowledge_point knowledge_point_content text YES
vala_knowledge_point relation_knowledge_point text YES
vala_knowledge_point created_at datetime YES
vala_knowledge_point updated_at datetime YES
vala_knowledge_point deleted_at datetime YES
vala_kp id bigint NO
vala_kp type varchar YES
vala_kp kp_id varchar YES
vala_kp title varchar YES
vala_kp ipa varchar YES
vala_kp vala_level varchar YES
vala_kp cambridge_level varchar YES
vala_kp cefr_level varchar YES
vala_kp nc_level varchar YES
vala_kp en_desc varchar YES
vala_kp scene varchar YES
vala_kp knowledge_point_content text YES
vala_kp notice_kp_content text YES
vala_kp err_msg text YES
vala_kp related_path text YES
vala_kp created_at datetime YES
vala_kp updated_at datetime YES
vala_kp deleted_at datetime YES
vala_kp_card id bigint NO
vala_kp_card kp_id varchar YES
vala_kp_card category varchar YES
vala_kp_card exp_type varchar YES
vala_kp_card card_content text YES
vala_kp_card question text YES
vala_kp_card notice_content text YES
vala_kp_card err_msg varchar YES
vala_kp_card created_at datetime YES
vala_kp_card updated_at datetime YES
vala_kp_card deleted_at datetime YES
vala_kp_question id bigint NO
vala_kp_question kp_id varchar YES
vala_kp_question category varchar YES
vala_kp_question skill varchar YES
vala_kp_question type varchar YES
vala_kp_question question text YES
vala_kp_question created_at datetime YES
vala_kp_question updated_at datetime YES
vala_kp_question deleted_at datetime YES
vala_learn_ability id bigint NO
vala_learn_ability en_name varchar NO
vala_learn_ability cn_name varchar NO
vala_learn_ability season_package_id bigint YES
vala_learn_ability game_id bigint YES
vala_learn_ability chapter_id bigint YES
vala_learn_ability ability_type tinyint NO
vala_learn_ability sub_component_type tinyint NO
vala_learn_ability component_id bigint YES
vala_learn_ability created_at datetime YES
vala_learn_ability updated_at datetime YES
vala_learn_ability deleted_at datetime YES
vala_location id bigint NO
vala_location location_id int NO
vala_location cn_name varchar NO
vala_location en_name varchar NO
vala_location desc text YES
vala_location area_id bigint NO
vala_location is_related tinyint NO
vala_location location_size tinyint NO
vala_location actual_size varchar NO
vala_location resource_url varchar NO
vala_location created_at datetime YES
vala_location updated_at datetime YES
vala_location deleted_at datetime YES
vala_minimaxi_tts id bigint NO
vala_minimaxi_tts local_name varchar NO
vala_minimaxi_tts voice_id varchar NO
vala_minimaxi_tts gender varchar YES
vala_minimaxi_tts gender_desc varchar YES
vala_minimaxi_tts created_at datetime YES
vala_minimaxi_tts updated_at datetime YES
vala_minimaxi_tts deleted_at datetime YES
vala_picture_text_material id bigint NO
vala_picture_text_material material_name varchar YES
vala_picture_text_material material_type varchar YES
vala_picture_text_material material_content text YES
vala_picture_text_material created_at datetime YES
vala_picture_text_material updated_at datetime YES
vala_picture_text_material deleted_at datetime YES
vala_relation_level id int NO
vala_relation_level level int NO
vala_relation_level upgrade_need_value int NO
vala_relation_level total_require_value int NO
vala_relation_level favorability_lower_limit int NO
vala_relation_level favorability_upper_limit int NO
vala_relation_level created_at datetime YES
vala_relation_level updated_at datetime YES
vala_relation_level deleted_at datetime YES
vala_resource_base id bigint NO
vala_resource_base cn_name varchar NO
vala_resource_base en_name varchar NO
vala_resource_base type varchar NO
vala_resource_base voice_list text YES
vala_resource_base voice_id varchar YES
vala_resource_base is_hide tinyint YES
vala_resource_base gender tinyint YES
vala_resource_base age int YES
vala_resource_base half_body varchar YES
vala_resource_base thumbnail varchar YES
vala_resource_base profile text YES
vala_resource_base spine_file_name varchar YES
vala_resource_base is_friend tinyint YES
vala_resource_base nick_name varchar YES
vala_resource_base mbti varchar YES
vala_resource_base interest varchar YES
vala_resource_base birthday varchar YES
vala_resource_base tips varchar YES
vala_resource_base desc text YES
vala_resource_base stream_voice_id varchar YES
vala_resource_base created_at datetime YES
vala_resource_base updated_at datetime YES
vala_resource_base deleted_at datetime YES
vala_scene id bigint NO
vala_scene name varchar NO
vala_scene scene_id varchar NO
vala_scene location_id int NO
vala_scene desc text YES
vala_scene is_related tinyint YES
vala_scene created_at datetime YES
vala_scene updated_at datetime YES
vala_scene deleted_at datetime YES
vala_scene_resource_v3 id int NO
vala_scene_resource_v3 location_id int NO
vala_scene_resource_v3 scene_id varchar NO
vala_scene_resource_v3 resource_id int NO
vala_scene_resource_v3 created_at datetime YES
vala_scene_resource_v3 updated_at datetime YES
vala_scene_resource_v3 deleted_at timestamp YES
vala_seasonal_ticket id bigint NO
vala_seasonal_ticket unique_code varchar YES
vala_seasonal_ticket account_id bigint YES
vala_seasonal_ticket account_name varchar YES
vala_seasonal_ticket tel varchar YES
vala_seasonal_ticket give_time int YES
vala_seasonal_ticket expire_time int YES
vala_seasonal_ticket expire_type tinyint NO
vala_seasonal_ticket ticket_type tinyint NO
vala_seasonal_ticket used_time int YES
vala_seasonal_ticket season_package_id bigint YES
vala_seasonal_ticket season_package_name varchar YES
vala_seasonal_ticket status tinyint NO
vala_seasonal_ticket created_at datetime YES
vala_seasonal_ticket updated_at datetime YES
vala_seasonal_ticket deleted_at datetime YES
vala_tts_voice id bigint NO
vala_tts_voice name varchar NO
vala_tts_voice voice_id varchar NO
vala_tts_voice preview_url varchar YES
vala_tts_voice model_id varchar YES
vala_tts_voice type tinyint NO
vala_tts_voice created_at datetime YES
vala_tts_voice updated_at datetime YES
vala_tts_voice deleted_at datetime YES
vala_user_card_component_record id bigint NO
vala_user_card_component_record user_id bigint NO
vala_user_card_component_record game_id bigint YES
vala_user_card_component_record chapter_id bigint YES
vala_user_card_component_record component_type bigint YES
vala_user_card_component_record sub_component_type bigint YES
vala_user_card_component_record component_id bigint NO
vala_user_card_component_record card_content text YES
vala_user_card_component_record is_new tinyint NO
vala_user_card_component_record created_at datetime YES
vala_user_card_component_record updated_at datetime YES
vala_user_card_component_record deleted_at datetime YES
vala_user_card_plot_upgrade id bigint NO
vala_user_card_plot_upgrade user_id bigint NO
vala_user_card_plot_upgrade chapter_id bigint YES
vala_user_card_plot_upgrade plot_card_id bigint NO
vala_user_card_plot_upgrade is_have_upgrade bigint YES
vala_user_card_plot_upgrade white_card_status tinyint NO
vala_user_card_plot_upgrade flash_card_status tinyint NO
vala_user_card_plot_upgrade is_new tinyint YES
vala_user_card_plot_upgrade created_at datetime YES
vala_user_card_plot_upgrade updated_at datetime YES
vala_user_card_plot_upgrade deleted_at datetime YES
vala_user_chapter_finish_record id bigint NO
vala_user_chapter_finish_record user_id bigint NO
vala_user_chapter_finish_record game_id bigint YES
vala_user_chapter_finish_record chapter_id bigint NO
vala_user_chapter_finish_record created_at datetime YES
vala_user_chapter_finish_record updated_at datetime YES
vala_user_chapter_finish_record deleted_at datetime YES
vala_user_chat_log id bigint NO
vala_user_chat_log user_id bigint NO
vala_user_chat_log game_id bigint YES
vala_user_chat_log chapter_id bigint YES
vala_user_chat_log sub_component_type bigint YES
vala_user_chat_log component_id bigint NO
vala_user_chat_log npc_id bigint NO
vala_user_chat_log session_id varchar NO
vala_user_chat_log dialog_list text YES
vala_user_chat_log dialog_score text YES
vala_user_chat_log tencent_soe text YES
vala_user_chat_log chat_review_resp text YES
vala_user_chat_log cost_time int NO
vala_user_chat_log insert_time datetime YES
vala_user_chat_log log_type tinyint YES
vala_user_chat_log deal_status tinyint YES
vala_user_chat_log created_at datetime YES
vala_user_chat_log updated_at datetime YES
vala_user_chat_log deleted_at datetime YES
vala_user_drama_log id bigint NO
vala_user_drama_log user_id bigint NO
vala_user_drama_log game_id bigint YES
vala_user_drama_log chapter_id bigint YES
vala_user_drama_log component_id bigint YES
vala_user_drama_log npc_id bigint YES
vala_user_drama_log created_at datetime YES
vala_user_drama_log updated_at datetime YES
vala_user_drama_log deleted_at datetime YES
vala_user_friend_info id bigint NO
vala_user_friend_info user_id bigint NO
vala_user_friend_info npc_id bigint NO
vala_user_friend_info favorability_value int YES
vala_user_friend_info level int YES
vala_user_friend_info relation_type tinyint YES
vala_user_friend_info interact_times int YES
vala_user_friend_info story_id int YES
vala_user_friend_info created_at datetime YES
vala_user_friend_info updated_at datetime YES
vala_user_friend_info deleted_at datetime YES
vala_user_game_finish_record id bigint NO
vala_user_game_finish_record user_id bigint NO
vala_user_game_finish_record game_id bigint YES
vala_user_game_finish_record created_at datetime YES
vala_user_game_finish_record updated_at datetime YES
vala_user_game_finish_record deleted_at datetime YES
vala_user_interact_chat_log id bigint NO
vala_user_interact_chat_log user_id bigint NO
vala_user_interact_chat_log npc_id bigint NO
vala_user_interact_chat_log session_id varchar YES
vala_user_interact_chat_log type varchar NO
vala_user_interact_chat_log name varchar NO
vala_user_interact_chat_log text varchar NO
vala_user_interact_chat_log interactive_type text YES
vala_user_interact_chat_log interactive_selected tinyint YES
vala_user_interact_chat_log chat_time datetime YES
vala_user_interact_chat_log created_at datetime YES
vala_user_interact_chat_log updated_at datetime YES
vala_user_interact_chat_log deleted_at datetime YES
vala_user_interact_log id bigint NO
vala_user_interact_log user_id bigint NO
vala_user_interact_log component_type tinyint YES
vala_user_interact_log relation_type tinyint YES
vala_user_interact_log npc_id bigint NO
vala_user_interact_log session_id varchar NO
vala_user_interact_log summary text NO
vala_user_interact_log dialog_list text YES
vala_user_interact_log dialog_score text YES
vala_user_interact_log tencent_soe text YES
vala_user_interact_log chat_review_resp text YES
vala_user_interact_log cost_time int NO
vala_user_interact_log insert_time datetime YES
vala_user_interact_log created_at datetime YES
vala_user_interact_log updated_at datetime YES
vala_user_interact_log deleted_at datetime YES
vala_user_interact_log favor_value int YES
vala_user_learn_record id int NO
vala_user_learn_record user_id bigint NO
vala_user_learn_record game_id bigint NO
vala_user_learn_record chapter_id bigint NO
vala_user_learn_record score float NO
vala_user_learn_record score_level char YES
vala_user_learn_record unique_code varchar YES
vala_user_learn_record source_type tinyint YES
vala_user_learn_record created_at datetime YES
vala_user_learn_record updated_at datetime YES
vala_user_learn_record deleted_at timestamp YES
vala_user_long_term_memory id bigint NO
vala_user_long_term_memory user_id int YES
vala_user_long_term_memory npc_id int YES
vala_user_long_term_memory robot_id int YES
vala_user_long_term_memory config_id int YES
vala_user_long_term_memory session_id varchar YES
vala_user_long_term_memory log_id int YES
vala_user_long_term_memory source varchar YES
vala_user_long_term_memory source_type varchar YES
vala_user_long_term_memory memory_type varchar YES
vala_user_long_term_memory sub_type varchar YES
vala_user_long_term_memory user_content text YES
vala_user_long_term_memory system_content text YES
vala_user_long_term_memory created_at datetime YES
vala_user_long_term_memory updated_at datetime YES
vala_user_long_term_memory deleted_at datetime YES
vala_user_robot id int NO
vala_user_robot user_id bigint NO
vala_user_robot robot_id bigint NO
vala_user_robot robot_level tinyint YES
vala_user_robot created_at datetime YES
vala_user_robot updated_at datetime YES
vala_user_robot deleted_at timestamp YES
vala_user_word_statistics id bigint NO
vala_user_word_statistics user_id int NO
vala_user_word_statistics word_pattern_id varchar YES
vala_user_word_statistics count_num int NO
vala_user_word_statistics content_type char YES
vala_user_word_statistics source_type tinyint YES
vala_user_word_statistics created_at datetime YES
vala_user_word_statistics updated_at datetime YES
vala_user_word_statistics deleted_at datetime YES
vala_wx_chatbot id bigint NO
vala_wx_chatbot type varchar NO
vala_wx_chatbot component_id bigint YES
vala_wx_chatbot chatbot_id bigint YES
vala_wx_chatbot title varchar NO
vala_wx_chatbot desc text NO
vala_wx_chatbot desc_detail text NO
vala_wx_chatbot npc_id bigint NO
vala_wx_chatbot target varchar NO
vala_wx_chatbot index int YES
vala_wx_chatbot complete_story text YES
vala_wx_chatbot limit_type int YES
vala_wx_chatbot limit_data int YES
vala_wx_chatbot created_at datetime YES
vala_wx_chatbot updated_at datetime YES
vala_wx_chatbot deleted_at datetime YES
vala_wxchat_history id bigint NO
vala_wxchat_history user_id bigint YES
vala_wxchat_history wx_chatbot_id bigint YES
vala_wxchat_history chatbot_id bigint YES
vala_wxchat_history session_id varchar YES
vala_wxchat_history chat_content text YES
vala_wxchat_history question_result text YES
vala_wxchat_history status tinyint YES
vala_wxchat_history created_at datetime YES
vala_wxchat_history updated_at datetime YES
vala_wxchat_history deleted_at datetime YES
voice_info id bigint NO
voice_info voice_id varchar YES
voice_info elabs_id varchar YES
voice_info voice_name varchar YES
voice_info des varchar YES
voice_info gender varchar YES
voice_info mini_speed float YES
voice_info mini_volume float YES
voice_info mini_emotion varchar YES
voice_info elabs_speed float YES
voice_info default_platform varchar YES
voice_info created_at datetime YES
voice_info updated_at datetime YES
voice_info deleted_at datetime YES

View File

@ -1,678 +0,0 @@
table_name | column_name | data_type | is_nullable
----------------------------------------+-------------------------+--------------------------+-------------
account_activity_count | id | bigint | NO
account_activity_count | time_period | integer | YES
account_activity_count | counts | integer | YES
account_activity_count | stat_date | text | NO
account_activity_count | created_time | timestamp with time zone | NO
account_detail_info | id | bigint | NO
account_detail_info | account_id | integer | YES
account_detail_info | login_time | text | YES
account_detail_info | device | text | YES
account_detail_info | device_os | text | YES
account_detail_info | login_address | text | YES
account_detail_info | login_times | integer | YES
account_detail_info | created_time | timestamp with time zone | NO
account_detail_info | phone_login_times | integer | YES
account_device | id | bigint | NO
account_device | account_id | integer | YES
account_device | device | text | YES
account_device | created_time | timestamp with time zone | NO
account_login | id | bigint | NO
account_login | account_id | integer | YES
account_login | login_date | text | NO
account_login | created_time | timestamp with time zone | NO
account_login | status | integer | NO
account_paid_activity_count | id | integer | NO
account_paid_activity_count | time_period | integer | YES
account_paid_activity_count | counts | integer | YES
account_paid_activity_count | stat_date | text | NO
account_paid_activity_count | created_time | timestamp with time zone | NO
bi_level_unit_lesson | course_level | character varying | YES
bi_level_unit_lesson | course_season | character varying | YES
bi_level_unit_lesson | course_unit | character varying | YES
bi_level_unit_lesson | course_lesson | character varying | YES
bi_level_unit_lesson | id | integer | YES
bi_refund_order | id | bigint | NO
bi_refund_order | account_id | bigint | YES
bi_refund_order | out_trade_no | character varying | YES
bi_refund_order | trade_no | character varying | YES
bi_refund_order | refund_amount | character varying | YES
bi_refund_order | created_at | timestamp with time zone | YES
bi_refund_order | updated_at | timestamp with time zone | YES
bi_refund_order | deleted_at | timestamp with time zone | YES
bi_refund_order | refund_amount_int | integer | NO
bi_refund_order | reason | text | NO
bi_refund_order | sale_channel | integer | NO
bi_refund_order | status | integer | NO
bi_refund_order | is_admin | boolean | NO
bi_refund_order | channel_refund_id | bigint | NO
bi_refund_order | refund_ticket_ids | text | YES
bi_refund_order | refund_type | integer | NO
bi_refund_order | refund_method | integer | NO
bi_refund_order | after_sale_no | text | YES
bi_user_chapter_play_record_0 | id | bigint | NO
bi_user_chapter_play_record_0 | user_id | bigint | YES
bi_user_chapter_play_record_0 | chapter_id | bigint | YES
bi_user_chapter_play_record_0 | chapter_unique_id | character varying | YES
bi_user_chapter_play_record_0 | play_status | smallint | YES
bi_user_chapter_play_record_0 | story_id | bigint | YES
bi_user_chapter_play_record_0 | created_at | timestamp with time zone | YES
bi_user_chapter_play_record_0 | updated_at | timestamp with time zone | YES
bi_user_chapter_play_record_0 | deleted_at | timestamp with time zone | YES
bi_user_chapter_play_record_0 | map_position | text | YES
bi_user_chapter_play_record_0 | dialog_info | text | YES
bi_user_chapter_play_record_0 | level | character varying | YES
bi_user_chapter_play_record_1 | id | bigint | NO
bi_user_chapter_play_record_1 | user_id | bigint | YES
bi_user_chapter_play_record_1 | chapter_id | bigint | YES
bi_user_chapter_play_record_1 | chapter_unique_id | character varying | YES
bi_user_chapter_play_record_1 | play_status | smallint | YES
bi_user_chapter_play_record_1 | story_id | bigint | YES
bi_user_chapter_play_record_1 | created_at | timestamp with time zone | YES
bi_user_chapter_play_record_1 | updated_at | timestamp with time zone | YES
bi_user_chapter_play_record_1 | deleted_at | timestamp with time zone | YES
bi_user_chapter_play_record_1 | map_position | text | YES
bi_user_chapter_play_record_1 | dialog_info | text | YES
bi_user_chapter_play_record_1 | level | character varying | YES
bi_user_chapter_play_record_2 | id | bigint | NO
bi_user_chapter_play_record_2 | user_id | bigint | YES
bi_user_chapter_play_record_2 | chapter_id | bigint | YES
bi_user_chapter_play_record_2 | chapter_unique_id | character varying | YES
bi_user_chapter_play_record_2 | play_status | smallint | YES
bi_user_chapter_play_record_2 | story_id | bigint | YES
bi_user_chapter_play_record_2 | created_at | timestamp with time zone | YES
bi_user_chapter_play_record_2 | updated_at | timestamp with time zone | YES
bi_user_chapter_play_record_2 | deleted_at | timestamp with time zone | YES
bi_user_chapter_play_record_2 | map_position | text | YES
bi_user_chapter_play_record_2 | dialog_info | text | YES
bi_user_chapter_play_record_2 | level | character varying | YES
bi_user_chapter_play_record_3 | id | bigint | NO
bi_user_chapter_play_record_3 | user_id | bigint | YES
bi_user_chapter_play_record_3 | chapter_id | bigint | YES
bi_user_chapter_play_record_3 | chapter_unique_id | character varying | YES
bi_user_chapter_play_record_3 | play_status | smallint | YES
bi_user_chapter_play_record_3 | story_id | bigint | YES
bi_user_chapter_play_record_3 | created_at | timestamp with time zone | YES
bi_user_chapter_play_record_3 | updated_at | timestamp with time zone | YES
bi_user_chapter_play_record_3 | deleted_at | timestamp with time zone | YES
bi_user_chapter_play_record_3 | map_position | text | YES
bi_user_chapter_play_record_3 | dialog_info | text | YES
bi_user_chapter_play_record_3 | level | character varying | YES
bi_user_chapter_play_record_4 | id | bigint | NO
bi_user_chapter_play_record_4 | user_id | bigint | YES
bi_user_chapter_play_record_4 | chapter_id | bigint | YES
bi_user_chapter_play_record_4 | chapter_unique_id | character varying | YES
bi_user_chapter_play_record_4 | play_status | smallint | YES
bi_user_chapter_play_record_4 | story_id | bigint | YES
bi_user_chapter_play_record_4 | created_at | timestamp with time zone | YES
bi_user_chapter_play_record_4 | updated_at | timestamp with time zone | YES
bi_user_chapter_play_record_4 | deleted_at | timestamp with time zone | YES
bi_user_chapter_play_record_4 | map_position | text | YES
bi_user_chapter_play_record_4 | dialog_info | text | YES
bi_user_chapter_play_record_4 | level | character varying | YES
bi_user_chapter_play_record_5 | id | bigint | NO
bi_user_chapter_play_record_5 | user_id | bigint | YES
bi_user_chapter_play_record_5 | chapter_id | bigint | YES
bi_user_chapter_play_record_5 | chapter_unique_id | character varying | YES
bi_user_chapter_play_record_5 | play_status | smallint | YES
bi_user_chapter_play_record_5 | story_id | bigint | YES
bi_user_chapter_play_record_5 | created_at | timestamp with time zone | YES
bi_user_chapter_play_record_5 | updated_at | timestamp with time zone | YES
bi_user_chapter_play_record_5 | deleted_at | timestamp with time zone | YES
bi_user_chapter_play_record_5 | map_position | text | YES
bi_user_chapter_play_record_5 | dialog_info | text | YES
bi_user_chapter_play_record_5 | level | character varying | YES
bi_user_chapter_play_record_6 | id | bigint | NO
bi_user_chapter_play_record_6 | user_id | bigint | YES
bi_user_chapter_play_record_6 | chapter_id | bigint | YES
bi_user_chapter_play_record_6 | chapter_unique_id | character varying | YES
bi_user_chapter_play_record_6 | play_status | smallint | YES
bi_user_chapter_play_record_6 | story_id | bigint | YES
bi_user_chapter_play_record_6 | created_at | timestamp with time zone | YES
bi_user_chapter_play_record_6 | updated_at | timestamp with time zone | YES
bi_user_chapter_play_record_6 | deleted_at | timestamp with time zone | YES
bi_user_chapter_play_record_6 | map_position | text | YES
bi_user_chapter_play_record_6 | dialog_info | text | YES
bi_user_chapter_play_record_6 | level | character varying | YES
bi_user_chapter_play_record_7 | id | bigint | NO
bi_user_chapter_play_record_7 | user_id | bigint | YES
bi_user_chapter_play_record_7 | chapter_id | bigint | YES
bi_user_chapter_play_record_7 | chapter_unique_id | character varying | YES
bi_user_chapter_play_record_7 | play_status | smallint | YES
bi_user_chapter_play_record_7 | story_id | bigint | YES
bi_user_chapter_play_record_7 | created_at | timestamp with time zone | YES
bi_user_chapter_play_record_7 | updated_at | timestamp with time zone | YES
bi_user_chapter_play_record_7 | deleted_at | timestamp with time zone | YES
bi_user_chapter_play_record_7 | map_position | text | YES
bi_user_chapter_play_record_7 | dialog_info | text | YES
bi_user_chapter_play_record_7 | level | character varying | YES
bi_user_component_play_record_0 | id | bigint | NO
bi_user_component_play_record_0 | user_id | bigint | YES
bi_user_component_play_record_0 | chapter_id | bigint | YES
bi_user_component_play_record_0 | chapter_unique_id | character varying | YES
bi_user_component_play_record_0 | component_id | bigint | YES
bi_user_component_play_record_0 | sub_component_type | smallint | YES
bi_user_component_play_record_0 | component_type | smallint | YES
bi_user_component_play_record_0 | comp_type | character varying | YES
bi_user_component_play_record_0 | play_status | smallint | YES
bi_user_component_play_record_0 | pass_time | integer | YES
bi_user_component_play_record_0 | interval_time | integer | YES
bi_user_component_play_record_0 | read_word_count | integer | YES
bi_user_component_play_record_0 | speak_count | integer | YES
bi_user_component_play_record_0 | listen_sentence_count | integer | YES
bi_user_component_play_record_0 | write_word_count | integer | YES
bi_user_component_play_record_0 | created_at | timestamp with time zone | YES
bi_user_component_play_record_0 | updated_at | timestamp with time zone | YES
bi_user_component_play_record_0 | deleted_at | timestamp with time zone | YES
bi_user_component_play_record_0 | section_id | bigint | YES
bi_user_component_play_record_0 | component_unique_code | character varying | YES
bi_user_component_play_record_0 | play_result | character varying | YES
bi_user_component_play_record_0 | finish_question_count | integer | YES
bi_user_component_play_record_0 | skill_points | integer | YES
bi_user_component_play_record_0 | c_type | character varying | YES
bi_user_component_play_record_0 | c_id | character varying | YES
bi_user_component_play_record_0 | user_behavior_info | text | YES
bi_user_component_play_record_0 | behavior_analysis_info | text | YES
bi_user_component_play_record_0 | session_id | character varying | YES
bi_user_component_play_record_0 | listen_time | integer | YES
bi_user_component_play_record_0 | dialog_list | text | YES
bi_user_component_play_record_0 | level | character varying | YES
bi_user_component_play_record_1 | id | bigint | NO
bi_user_component_play_record_1 | user_id | bigint | YES
bi_user_component_play_record_1 | chapter_id | bigint | YES
bi_user_component_play_record_1 | chapter_unique_id | character varying | YES
bi_user_component_play_record_1 | component_id | bigint | YES
bi_user_component_play_record_1 | sub_component_type | smallint | YES
bi_user_component_play_record_1 | component_type | smallint | YES
bi_user_component_play_record_1 | comp_type | character varying | YES
bi_user_component_play_record_1 | play_status | smallint | YES
bi_user_component_play_record_1 | pass_time | integer | YES
bi_user_component_play_record_1 | interval_time | integer | YES
bi_user_component_play_record_1 | read_word_count | integer | YES
bi_user_component_play_record_1 | speak_count | integer | YES
bi_user_component_play_record_1 | listen_sentence_count | integer | YES
bi_user_component_play_record_1 | write_word_count | integer | YES
bi_user_component_play_record_1 | created_at | timestamp with time zone | YES
bi_user_component_play_record_1 | updated_at | timestamp with time zone | YES
bi_user_component_play_record_1 | deleted_at | timestamp with time zone | YES
bi_user_component_play_record_1 | section_id | bigint | YES
bi_user_component_play_record_1 | component_unique_code | character varying | YES
bi_user_component_play_record_1 | play_result | character varying | YES
bi_user_component_play_record_1 | finish_question_count | integer | YES
bi_user_component_play_record_1 | skill_points | integer | YES
bi_user_component_play_record_1 | c_type | character varying | YES
bi_user_component_play_record_1 | c_id | character varying | YES
bi_user_component_play_record_1 | user_behavior_info | text | YES
bi_user_component_play_record_1 | behavior_analysis_info | text | YES
bi_user_component_play_record_1 | session_id | character varying | YES
bi_user_component_play_record_1 | listen_time | integer | YES
bi_user_component_play_record_1 | dialog_list | text | YES
bi_user_component_play_record_1 | level | character varying | YES
bi_user_component_play_record_2 | id | bigint | NO
bi_user_component_play_record_2 | user_id | bigint | YES
bi_user_component_play_record_2 | chapter_id | bigint | YES
bi_user_component_play_record_2 | chapter_unique_id | character varying | YES
bi_user_component_play_record_2 | component_id | bigint | YES
bi_user_component_play_record_2 | sub_component_type | smallint | YES
bi_user_component_play_record_2 | component_type | smallint | YES
bi_user_component_play_record_2 | comp_type | character varying | YES
bi_user_component_play_record_2 | play_status | smallint | YES
bi_user_component_play_record_2 | pass_time | integer | YES
bi_user_component_play_record_2 | interval_time | integer | YES
bi_user_component_play_record_2 | read_word_count | integer | YES
bi_user_component_play_record_2 | speak_count | integer | YES
bi_user_component_play_record_2 | listen_sentence_count | integer | YES
bi_user_component_play_record_2 | write_word_count | integer | YES
bi_user_component_play_record_2 | created_at | timestamp with time zone | YES
bi_user_component_play_record_2 | updated_at | timestamp with time zone | YES
bi_user_component_play_record_2 | deleted_at | timestamp with time zone | YES
bi_user_component_play_record_2 | section_id | bigint | YES
bi_user_component_play_record_2 | component_unique_code | character varying | YES
bi_user_component_play_record_2 | play_result | character varying | YES
bi_user_component_play_record_2 | finish_question_count | integer | YES
bi_user_component_play_record_2 | skill_points | integer | YES
bi_user_component_play_record_2 | c_type | character varying | YES
bi_user_component_play_record_2 | c_id | character varying | YES
bi_user_component_play_record_2 | user_behavior_info | text | YES
bi_user_component_play_record_2 | behavior_analysis_info | text | YES
bi_user_component_play_record_2 | session_id | character varying | YES
bi_user_component_play_record_2 | listen_time | integer | YES
bi_user_component_play_record_2 | dialog_list | text | YES
bi_user_component_play_record_2 | level | character varying | YES
bi_user_component_play_record_3 | id | bigint | NO
bi_user_component_play_record_3 | user_id | bigint | YES
bi_user_component_play_record_3 | chapter_id | bigint | YES
bi_user_component_play_record_3 | chapter_unique_id | character varying | YES
bi_user_component_play_record_3 | component_id | bigint | YES
bi_user_component_play_record_3 | sub_component_type | smallint | YES
bi_user_component_play_record_3 | component_type | smallint | YES
bi_user_component_play_record_3 | comp_type | character varying | YES
bi_user_component_play_record_3 | play_status | smallint | YES
bi_user_component_play_record_3 | pass_time | integer | YES
bi_user_component_play_record_3 | interval_time | integer | YES
bi_user_component_play_record_3 | read_word_count | integer | YES
bi_user_component_play_record_3 | speak_count | integer | YES
bi_user_component_play_record_3 | listen_sentence_count | integer | YES
bi_user_component_play_record_3 | write_word_count | integer | YES
bi_user_component_play_record_3 | created_at | timestamp with time zone | YES
bi_user_component_play_record_3 | updated_at | timestamp with time zone | YES
bi_user_component_play_record_3 | deleted_at | timestamp with time zone | YES
bi_user_component_play_record_3 | section_id | bigint | YES
bi_user_component_play_record_3 | component_unique_code | character varying | YES
bi_user_component_play_record_3 | play_result | character varying | YES
bi_user_component_play_record_3 | finish_question_count | integer | YES
bi_user_component_play_record_3 | skill_points | integer | YES
bi_user_component_play_record_3 | c_type | character varying | YES
bi_user_component_play_record_3 | c_id | character varying | YES
bi_user_component_play_record_3 | user_behavior_info | text | YES
bi_user_component_play_record_3 | behavior_analysis_info | text | YES
bi_user_component_play_record_3 | session_id | character varying | YES
bi_user_component_play_record_3 | listen_time | integer | YES
bi_user_component_play_record_3 | dialog_list | text | YES
bi_user_component_play_record_3 | level | character varying | YES
bi_user_component_play_record_4 | id | bigint | NO
bi_user_component_play_record_4 | user_id | bigint | YES
bi_user_component_play_record_4 | chapter_id | bigint | YES
bi_user_component_play_record_4 | chapter_unique_id | character varying | YES
bi_user_component_play_record_4 | component_id | bigint | YES
bi_user_component_play_record_4 | sub_component_type | smallint | YES
bi_user_component_play_record_4 | component_type | smallint | YES
bi_user_component_play_record_4 | comp_type | character varying | YES
bi_user_component_play_record_4 | play_status | smallint | YES
bi_user_component_play_record_4 | pass_time | integer | YES
bi_user_component_play_record_4 | interval_time | integer | YES
bi_user_component_play_record_4 | read_word_count | integer | YES
bi_user_component_play_record_4 | speak_count | integer | YES
bi_user_component_play_record_4 | listen_sentence_count | integer | YES
bi_user_component_play_record_4 | write_word_count | integer | YES
bi_user_component_play_record_4 | created_at | timestamp with time zone | YES
bi_user_component_play_record_4 | updated_at | timestamp with time zone | YES
bi_user_component_play_record_4 | deleted_at | timestamp with time zone | YES
bi_user_component_play_record_4 | section_id | bigint | YES
bi_user_component_play_record_4 | component_unique_code | character varying | YES
bi_user_component_play_record_4 | play_result | character varying | YES
bi_user_component_play_record_4 | finish_question_count | integer | YES
bi_user_component_play_record_4 | skill_points | integer | YES
bi_user_component_play_record_4 | c_type | character varying | YES
bi_user_component_play_record_4 | c_id | character varying | YES
bi_user_component_play_record_4 | user_behavior_info | text | YES
bi_user_component_play_record_4 | behavior_analysis_info | text | YES
bi_user_component_play_record_4 | session_id | character varying | YES
bi_user_component_play_record_4 | listen_time | integer | YES
bi_user_component_play_record_4 | dialog_list | text | YES
bi_user_component_play_record_4 | level | character varying | YES
bi_user_component_play_record_5 | id | bigint | NO
bi_user_component_play_record_5 | user_id | bigint | YES
bi_user_component_play_record_5 | chapter_id | bigint | YES
bi_user_component_play_record_5 | chapter_unique_id | character varying | YES
bi_user_component_play_record_5 | component_id | bigint | YES
bi_user_component_play_record_5 | sub_component_type | smallint | YES
bi_user_component_play_record_5 | component_type | smallint | YES
bi_user_component_play_record_5 | comp_type | character varying | YES
bi_user_component_play_record_5 | play_status | smallint | YES
bi_user_component_play_record_5 | pass_time | integer | YES
bi_user_component_play_record_5 | interval_time | integer | YES
bi_user_component_play_record_5 | read_word_count | integer | YES
bi_user_component_play_record_5 | speak_count | integer | YES
bi_user_component_play_record_5 | listen_sentence_count | integer | YES
bi_user_component_play_record_5 | write_word_count | integer | YES
bi_user_component_play_record_5 | created_at | timestamp with time zone | YES
bi_user_component_play_record_5 | updated_at | timestamp with time zone | YES
bi_user_component_play_record_5 | deleted_at | timestamp with time zone | YES
bi_user_component_play_record_5 | section_id | bigint | YES
bi_user_component_play_record_5 | component_unique_code | character varying | YES
bi_user_component_play_record_5 | play_result | character varying | YES
bi_user_component_play_record_5 | finish_question_count | integer | YES
bi_user_component_play_record_5 | skill_points | integer | YES
bi_user_component_play_record_5 | c_type | character varying | YES
bi_user_component_play_record_5 | c_id | character varying | YES
bi_user_component_play_record_5 | user_behavior_info | text | YES
bi_user_component_play_record_5 | behavior_analysis_info | text | YES
bi_user_component_play_record_5 | session_id | character varying | YES
bi_user_component_play_record_5 | listen_time | integer | YES
bi_user_component_play_record_5 | dialog_list | text | YES
bi_user_component_play_record_5 | level | character varying | YES
bi_user_component_play_record_6 | id | bigint | NO
bi_user_component_play_record_6 | user_id | bigint | YES
bi_user_component_play_record_6 | chapter_id | bigint | YES
bi_user_component_play_record_6 | chapter_unique_id | character varying | YES
bi_user_component_play_record_6 | component_id | bigint | YES
bi_user_component_play_record_6 | sub_component_type | smallint | YES
bi_user_component_play_record_6 | component_type | smallint | YES
bi_user_component_play_record_6 | comp_type | character varying | YES
bi_user_component_play_record_6 | play_status | smallint | YES
bi_user_component_play_record_6 | pass_time | integer | YES
bi_user_component_play_record_6 | interval_time | integer | YES
bi_user_component_play_record_6 | read_word_count | integer | YES
bi_user_component_play_record_6 | speak_count | integer | YES
bi_user_component_play_record_6 | listen_sentence_count | integer | YES
bi_user_component_play_record_6 | write_word_count | integer | YES
bi_user_component_play_record_6 | created_at | timestamp with time zone | YES
bi_user_component_play_record_6 | updated_at | timestamp with time zone | YES
bi_user_component_play_record_6 | deleted_at | timestamp with time zone | YES
bi_user_component_play_record_6 | section_id | bigint | YES
bi_user_component_play_record_6 | component_unique_code | character varying | YES
bi_user_component_play_record_6 | play_result | character varying | YES
bi_user_component_play_record_6 | finish_question_count | integer | YES
bi_user_component_play_record_6 | skill_points | integer | YES
bi_user_component_play_record_6 | c_type | character varying | YES
bi_user_component_play_record_6 | c_id | character varying | YES
bi_user_component_play_record_6 | user_behavior_info | text | YES
bi_user_component_play_record_6 | behavior_analysis_info | text | YES
bi_user_component_play_record_6 | session_id | character varying | YES
bi_user_component_play_record_6 | listen_time | integer | YES
bi_user_component_play_record_6 | dialog_list | text | YES
bi_user_component_play_record_6 | level | character varying | YES
bi_user_component_play_record_7 | id | bigint | NO
bi_user_component_play_record_7 | user_id | bigint | YES
bi_user_component_play_record_7 | chapter_id | bigint | YES
bi_user_component_play_record_7 | chapter_unique_id | character varying | YES
bi_user_component_play_record_7 | component_id | bigint | YES
bi_user_component_play_record_7 | sub_component_type | smallint | YES
bi_user_component_play_record_7 | component_type | smallint | YES
bi_user_component_play_record_7 | comp_type | character varying | YES
bi_user_component_play_record_7 | play_status | smallint | YES
bi_user_component_play_record_7 | pass_time | integer | YES
bi_user_component_play_record_7 | interval_time | integer | YES
bi_user_component_play_record_7 | read_word_count | integer | YES
bi_user_component_play_record_7 | speak_count | integer | YES
bi_user_component_play_record_7 | listen_sentence_count | integer | YES
bi_user_component_play_record_7 | write_word_count | integer | YES
bi_user_component_play_record_7 | created_at | timestamp with time zone | YES
bi_user_component_play_record_7 | updated_at | timestamp with time zone | YES
bi_user_component_play_record_7 | deleted_at | timestamp with time zone | YES
bi_user_component_play_record_7 | section_id | bigint | YES
bi_user_component_play_record_7 | component_unique_code | character varying | YES
bi_user_component_play_record_7 | play_result | character varying | YES
bi_user_component_play_record_7 | finish_question_count | integer | YES
bi_user_component_play_record_7 | skill_points | integer | YES
bi_user_component_play_record_7 | c_type | character varying | YES
bi_user_component_play_record_7 | c_id | character varying | YES
bi_user_component_play_record_7 | user_behavior_info | text | YES
bi_user_component_play_record_7 | behavior_analysis_info | text | YES
bi_user_component_play_record_7 | session_id | character varying | YES
bi_user_component_play_record_7 | listen_time | integer | YES
bi_user_component_play_record_7 | dialog_list | text | YES
bi_user_component_play_record_7 | level | character varying | YES
bi_user_course_detail | id | integer | NO
bi_user_course_detail | created_at | timestamp with time zone | YES
bi_user_course_detail | updated_at | timestamp with time zone | YES
bi_user_course_detail | account_id | integer | YES
bi_user_course_detail | user_id | integer | YES
bi_user_course_detail | course_level | text | YES
bi_user_course_detail | active_time | timestamp with time zone | YES
bi_user_course_detail | expire_time | timestamp with time zone | YES
bi_user_course_detail | latest_unit_index | integer | YES
bi_user_course_detail | latest_lesson_index | integer | YES
bi_user_course_detail | learn_duration | integer | YES
bi_user_course_detail | last_learn_time | timestamp with time zone | YES
bi_user_course_detail | deleted_at | timestamp with time zone | YES
bi_user_task_log | id | integer | NO
bi_user_task_log | user_id | integer | NO
bi_user_task_log | task_type | integer | NO
bi_user_task_log | task_id | integer | NO
bi_user_task_log | status | integer | NO
bi_user_task_log | start_time | bigint | NO
bi_user_task_log | updated_at | timestamp with time zone | YES
bi_user_task_log | created_at | timestamp with time zone | YES
bi_user_unit_challenge_question_result | id | bigint | NO
bi_user_unit_challenge_question_result | user_id | bigint | YES
bi_user_unit_challenge_question_result | story_id | bigint | YES
bi_user_unit_challenge_question_result | score | integer | YES
bi_user_unit_challenge_question_result | score_text | character varying | YES
bi_user_unit_challenge_question_result | question_list | text | YES
bi_user_unit_challenge_question_result | sp_value | integer | YES
bi_user_unit_challenge_question_result | created_at | timestamp with time zone | YES
bi_user_unit_challenge_question_result | updated_at | timestamp with time zone | YES
bi_user_unit_challenge_question_result | deleted_at | timestamp with time zone | YES
bi_user_unit_challenge_question_result | category | character varying | YES
bi_user_unit_challenge_question_result | exp | integer | YES
bi_user_unit_challenge_question_result | play_time | bigint | YES
bi_user_unit_challenge_question_result | read_word_count | bigint | YES
bi_user_unit_challenge_question_result | listen_time | bigint | YES
bi_user_unit_challenge_question_result | speak_count | bigint | YES
bi_user_unit_challenge_question_result | level | character varying | YES
bi_user_unit_review_question_result | id | bigint | NO
bi_user_unit_review_question_result | user_id | bigint | YES
bi_user_unit_review_question_result | story_id | bigint | YES
bi_user_unit_review_question_result | chapter_id | bigint | YES
bi_user_unit_review_question_result | unique_id | character varying | YES
bi_user_unit_review_question_result | score | integer | YES
bi_user_unit_review_question_result | score_text | character varying | YES
bi_user_unit_review_question_result | question_list | text | YES
bi_user_unit_review_question_result | sp_value | integer | YES
bi_user_unit_review_question_result | exp | integer | YES
bi_user_unit_review_question_result | created_at | timestamp with time zone | YES
bi_user_unit_review_question_result | updated_at | timestamp with time zone | YES
bi_user_unit_review_question_result | deleted_at | timestamp with time zone | YES
bi_user_unit_review_question_result | play_time | bigint | YES
bi_user_unit_review_question_result | level | character varying | YES
bi_user_unit_summary_km_result | id | bigint | NO
bi_user_unit_summary_km_result | user_id | bigint | YES
bi_user_unit_summary_km_result | story_id | bigint | YES
bi_user_unit_summary_km_result | km_id | character varying | YES
bi_user_unit_summary_km_result | km_type | character varying | YES
bi_user_unit_summary_km_result | score_text | character varying | YES
bi_user_unit_summary_km_result | sp_value | integer | YES
bi_user_unit_summary_km_result | created_at | timestamp with time zone | YES
bi_user_unit_summary_km_result | updated_at | timestamp with time zone | YES
bi_user_unit_summary_km_result | deleted_at | timestamp with time zone | YES
bi_user_unit_summary_km_result | play_time | bigint | YES
bi_user_unit_summary_km_result | sum_explain | text | YES
bi_user_unit_summary_km_result | level | character varying | YES
bi_vala_app_account | id | bigint | NO
bi_vala_app_account | tel | character varying | YES
bi_vala_app_account | tel_encrypt | character varying | YES
bi_vala_app_account | name | character varying | YES
bi_vala_app_account | id_card | character varying | YES
bi_vala_app_account | status | smallint | NO
bi_vala_app_account | pay_status | integer | YES
bi_vala_app_account | login_times | integer | YES
bi_vala_app_account | remark | text | YES
bi_vala_app_account | key_from | character varying | NO
bi_vala_app_account | created_at | timestamp with time zone | YES
bi_vala_app_account | updated_at | timestamp with time zone | YES
bi_vala_app_account | deleted_at | timestamp with time zone | YES
bi_vala_app_account | download_channel | text | YES
bi_vala_app_character | id | bigint | NO
bi_vala_app_character | account_id | bigint | YES
bi_vala_app_character | nickname | character varying | YES
bi_vala_app_character | gender | smallint | YES
bi_vala_app_character | birthday | character varying | YES
bi_vala_app_character | avatar | text | YES
bi_vala_app_character | spine_name | character varying | YES
bi_vala_app_character | latest_login | timestamp with time zone | YES
bi_vala_app_character | reputation | character | YES
bi_vala_app_character | robots_cnt | integer | YES
bi_vala_app_character | head_image | character varying | YES
bi_vala_app_character | status | smallint | YES
bi_vala_app_character | purchase_season_package | text | YES
bi_vala_app_character | total_sp_point | integer | YES
bi_vala_app_character | balance_sp_point | integer | YES
bi_vala_app_character | point | integer | YES
bi_vala_app_character | pay_status | smallint | YES
bi_vala_app_character | created_at | timestamp with time zone | YES
bi_vala_app_character | updated_at | timestamp with time zone | YES
bi_vala_app_character | deleted_at | timestamp with time zone | YES
bi_vala_app_character | mood | integer | YES
bi_vala_app_character | mood_start_time | bigint | YES
bi_vala_order | id | bigint | NO
bi_vala_order | account_id | bigint | YES
bi_vala_order | out_trade_no | character varying | YES
bi_vala_order | trade_no | character varying | YES
bi_vala_order | pay_amount | character varying | YES
bi_vala_order | goods_id | bigint | YES
bi_vala_order | order_status | integer | YES
bi_vala_order | order_status_comment | text | YES
bi_vala_order | is_used | integer | YES
bi_vala_order | sale_channel | integer | YES
bi_vala_order | created_at | timestamp with time zone | YES
bi_vala_order | updated_at | timestamp with time zone | YES
bi_vala_order | deleted_at | timestamp with time zone | YES
bi_vala_order | pay_channel | integer | YES
bi_vala_order | pay_amount_int | integer | YES
bi_vala_order | pay_success_date | timestamp with time zone | YES
bi_vala_order | quantity | integer | NO
bi_vala_order | goods_name | text | NO
bi_vala_order | goods_apply_package_ids | text | NO
bi_vala_order | key_from | text | NO
bi_vala_order | expire_days | integer | YES
data_sync_config | id | bigint | NO
data_sync_config | job_name | text | NO
data_sync_config | database_name | text | YES
data_sync_config | origin_table | text | YES
data_sync_config | dest_table | text | YES
data_sync_config | deal_time | timestamp with time zone | YES
data_sync_config | full_id | bigint | NO
data_sync_config | created_at | timestamp with time zone | NO
data_sync_config | updated_at | timestamp with time zone | YES
feishu_table_record | id | bigint | NO
feishu_table_record | data_table | text | NO
feishu_table_record | record_id | text | YES
feishu_table_record | created_at | timestamp with time zone | NO
growth_activity_behavior | id | integer | NO
growth_activity_behavior | es_id | character varying | NO
growth_activity_behavior | account_id | integer | YES
growth_activity_behavior | account_name | character varying | YES
growth_activity_behavior | activity | text | YES
growth_activity_behavior | behavior | character varying | YES
growth_activity_behavior | created_at | timestamp with time zone | YES
growth_activity_behavior | info | text | YES
growth_activity_behavior | source | character varying | YES
growth_activity_behavior | sub_behavior | character varying | YES
growth_activity_behavior | user_id | integer | YES
growth_activity_behavior | user_name | character varying | YES
user_behavior_0 | id | bigint | NO
user_behavior_0 | user_id | integer | YES
user_behavior_0 | behavior | text | YES
user_behavior_0 | behavior_time_date | text | NO
user_behavior_0 | created_time | timestamp with time zone | NO
user_behavior_1 | id | bigint | NO
user_behavior_1 | user_id | integer | YES
user_behavior_1 | behavior | text | YES
user_behavior_1 | behavior_time_date | text | NO
user_behavior_1 | created_time | timestamp with time zone | NO
user_behavior_10 | id | bigint | NO
user_behavior_10 | user_id | integer | YES
user_behavior_10 | behavior | text | YES
user_behavior_10 | behavior_time_date | text | NO
user_behavior_10 | created_time | timestamp with time zone | NO
user_behavior_11 | id | bigint | NO
user_behavior_11 | user_id | integer | YES
user_behavior_11 | behavior | text | YES
user_behavior_11 | behavior_time_date | text | NO
user_behavior_11 | created_time | timestamp with time zone | NO
user_behavior_12 | id | bigint | NO
user_behavior_12 | user_id | integer | YES
user_behavior_12 | behavior | text | YES
user_behavior_12 | behavior_time_date | text | NO
user_behavior_12 | created_time | timestamp with time zone | NO
user_behavior_13 | id | bigint | NO
user_behavior_13 | user_id | integer | YES
user_behavior_13 | behavior | text | YES
user_behavior_13 | behavior_time_date | text | NO
user_behavior_13 | created_time | timestamp with time zone | NO
user_behavior_14 | id | bigint | NO
user_behavior_14 | user_id | integer | YES
user_behavior_14 | behavior | text | YES
user_behavior_14 | behavior_time_date | text | NO
user_behavior_14 | created_time | timestamp with time zone | NO
user_behavior_15 | id | bigint | NO
user_behavior_15 | user_id | integer | YES
user_behavior_15 | behavior | text | YES
user_behavior_15 | behavior_time_date | text | NO
user_behavior_15 | created_time | timestamp with time zone | NO
user_behavior_2 | id | bigint | NO
user_behavior_2 | user_id | integer | YES
user_behavior_2 | behavior | text | YES
user_behavior_2 | behavior_time_date | text | NO
user_behavior_2 | created_time | timestamp with time zone | NO
user_behavior_3 | id | bigint | NO
user_behavior_3 | user_id | integer | YES
user_behavior_3 | behavior | text | YES
user_behavior_3 | behavior_time_date | text | NO
user_behavior_3 | created_time | timestamp with time zone | NO
user_behavior_4 | id | bigint | NO
user_behavior_4 | user_id | integer | YES
user_behavior_4 | behavior | text | YES
user_behavior_4 | behavior_time_date | text | NO
user_behavior_4 | created_time | timestamp with time zone | NO
user_behavior_5 | id | bigint | NO
user_behavior_5 | user_id | integer | YES
user_behavior_5 | behavior | text | YES
user_behavior_5 | behavior_time_date | text | NO
user_behavior_5 | created_time | timestamp with time zone | NO
user_behavior_6 | id | bigint | NO
user_behavior_6 | user_id | integer | YES
user_behavior_6 | behavior | text | YES
user_behavior_6 | behavior_time_date | text | NO
user_behavior_6 | created_time | timestamp with time zone | NO
user_behavior_7 | id | bigint | NO
user_behavior_7 | user_id | integer | YES
user_behavior_7 | behavior | text | YES
user_behavior_7 | behavior_time_date | text | NO
user_behavior_7 | created_time | timestamp with time zone | NO
user_behavior_8 | id | bigint | NO
user_behavior_8 | user_id | integer | YES
user_behavior_8 | behavior | text | YES
user_behavior_8 | behavior_time_date | text | NO
user_behavior_8 | created_time | timestamp with time zone | NO
user_behavior_9 | id | bigint | NO
user_behavior_9 | user_id | integer | YES
user_behavior_9 | behavior | text | YES
user_behavior_9 | behavior_time_date | text | NO
user_behavior_9 | created_time | timestamp with time zone | NO
user_behavior_buried_points | id | integer | NO
user_behavior_buried_points | burying_point_id | integer | YES
user_behavior_buried_points | burying_point_name | character varying | YES
user_behavior_buried_points | burying_point_sub_id | integer | YES
user_behavior_buried_points | burying_point_sub_name | character varying | YES
user_behavior_buried_points | account_id | integer | YES
user_behavior_buried_points | account_name | character varying | YES
user_behavior_buried_points | character_id | integer | YES
user_behavior_buried_points | character_name | character varying | YES
user_behavior_buried_points | active_time | integer | YES
user_behavior_buried_points | version_id | character varying | YES
user_behavior_buried_points | step_duration | integer | YES
user_behavior_buried_points | finish_status | character varying | YES
user_behavior_buried_points | season_package_id | integer | YES
user_behavior_buried_points | season_package_name | character varying | YES
user_behavior_buried_points | unit_id | integer | YES
user_behavior_buried_points | unit_name | character varying | YES
user_behavior_buried_points | lesson_id | integer | YES
user_behavior_buried_points | lesson_name | character varying | YES
user_behavior_buried_points | component_id | integer | YES
user_behavior_buried_points | component_name | character varying | YES
user_behavior_buried_points | c_type | character varying | YES
user_behavior_buried_points | c_id | character varying | YES
user_behavior_buried_points | learning_module_id | integer | YES
user_behavior_buried_points | learning_module_name | character varying | YES
user_behavior_buried_points | learning_point_id | integer | YES
user_behavior_buried_points | learning_point_name | character varying | YES
user_behavior_buried_points | card_id | integer | YES
user_behavior_buried_points | card_name | character varying | YES
user_behavior_buried_points | data_version | character varying | YES
user_behavior_buried_points | ex2 | text | YES
user_behavior_buried_points | ex3 | text | YES
user_behavior_buried_points | es_id | character varying | YES
user_behavior_count_tmp | behavior | text | YES
user_behavior_count_tmp | register_time_date | text | NO
user_behavior_count_tmp | count | integer | YES
user_detail_info | id | bigint | NO
user_detail_info | user_id | integer | YES
user_detail_info | latest_login_time | bigint | YES
user_detail_info | latest_active_time | bigint | YES
user_detail_info | created_time | timestamp with time zone | NO
user_done_duration | id | bigint | NO
user_done_duration | user_id | integer | YES
user_done_duration | chapter_id | integer | YES
user_done_duration | duration | integer | YES
user_done_duration | created_time | timestamp with time zone | NO
user_learning | id | bigint | NO
user_learning | user_id | integer | YES
user_learning | stat_date | text | NO
user_learning | learning_time | integer | YES
user_learning | created_time | timestamp with time zone | NO
(674 rows)

View File

@ -1,628 +0,0 @@
table_name | column_name | data_type | is_nullable
----------------------------------------+-------------------------+--------------------------+-------------
account_activity_count | id | bigint | NO
account_activity_count | time_period | integer | YES
account_activity_count | counts | integer | YES
account_activity_count | stat_date | text | NO
account_activity_count | created_time | timestamp with time zone | NO
account_behavior_count | id | bigint | NO
account_behavior_count | count | integer | YES
account_behavior_count | behavior | text | YES
account_behavior_count | behavior_group | text | NO
account_behavior_count | stat_date | text | NO
account_behavior_count | dt | text | NO
account_behavior_count | created_time | timestamp with time zone | NO
account_detail_info | id | bigint | NO
account_detail_info | account_id | integer | YES
account_detail_info | login_time | text | YES
account_detail_info | device | text | YES
account_detail_info | device_os | text | YES
account_detail_info | login_address | text | YES
account_detail_info | login_times | integer | YES
account_detail_info | created_time | timestamp with time zone | NO
account_device | id | bigint | NO
account_device | account_id | integer | YES
account_device | device | text | YES
account_device | created_time | timestamp with time zone | NO
account_login | id | bigint | NO
account_login | account_id | integer | YES
account_login | login_date | text | NO
account_login | created_time | timestamp with time zone | NO
account_login | status | integer | NO
account_paid_activity_count | id | integer | NO
account_paid_activity_count | time_period | integer | YES
account_paid_activity_count | counts | integer | YES
account_paid_activity_count | stat_date | text | NO
account_paid_activity_count | created_time | timestamp with time zone | NO
bi_refund_order | id | bigint | NO
bi_refund_order | account_id | bigint | YES
bi_refund_order | out_trade_no | character varying | YES
bi_refund_order | trade_no | character varying | YES
bi_refund_order | refund_amount | character varying | YES
bi_refund_order | created_at | timestamp with time zone | YES
bi_refund_order | updated_at | timestamp with time zone | YES
bi_refund_order | deleted_at | timestamp with time zone | YES
bi_refund_order | refund_amount_int | integer | NO
bi_refund_order | reason | text | NO
bi_refund_order | sale_channel | integer | NO
bi_refund_order | status | integer | NO
bi_refund_order | is_admin | boolean | NO
bi_refund_order | channel_refund_id | bigint | NO
bi_refund_order | refund_ticket_ids | text | YES
bi_refund_order | refund_type | integer | NO
bi_refund_order | refund_method | integer | NO
bi_refund_order | after_sale_no | text | YES
bi_user_chapter_play_record_0 | id | bigint | NO
bi_user_chapter_play_record_0 | user_id | bigint | YES
bi_user_chapter_play_record_0 | chapter_id | bigint | YES
bi_user_chapter_play_record_0 | chapter_unique_id | character varying | YES
bi_user_chapter_play_record_0 | play_status | smallint | YES
bi_user_chapter_play_record_0 | story_id | bigint | YES
bi_user_chapter_play_record_0 | created_at | timestamp with time zone | YES
bi_user_chapter_play_record_0 | updated_at | timestamp with time zone | YES
bi_user_chapter_play_record_0 | deleted_at | timestamp with time zone | YES
bi_user_chapter_play_record_0 | map_position | text | YES
bi_user_chapter_play_record_0 | dialog_info | text | YES
bi_user_chapter_play_record_0 | level | character varying | YES
bi_user_chapter_play_record_1 | id | bigint | NO
bi_user_chapter_play_record_1 | user_id | bigint | YES
bi_user_chapter_play_record_1 | chapter_id | bigint | YES
bi_user_chapter_play_record_1 | chapter_unique_id | character varying | YES
bi_user_chapter_play_record_1 | play_status | smallint | YES
bi_user_chapter_play_record_1 | story_id | bigint | YES
bi_user_chapter_play_record_1 | created_at | timestamp with time zone | YES
bi_user_chapter_play_record_1 | updated_at | timestamp with time zone | YES
bi_user_chapter_play_record_1 | deleted_at | timestamp with time zone | YES
bi_user_chapter_play_record_1 | map_position | text | YES
bi_user_chapter_play_record_1 | dialog_info | text | YES
bi_user_chapter_play_record_1 | level | character varying | YES
bi_user_chapter_play_record_2 | id | bigint | NO
bi_user_chapter_play_record_2 | user_id | bigint | YES
bi_user_chapter_play_record_2 | chapter_id | bigint | YES
bi_user_chapter_play_record_2 | chapter_unique_id | character varying | YES
bi_user_chapter_play_record_2 | play_status | smallint | YES
bi_user_chapter_play_record_2 | story_id | bigint | YES
bi_user_chapter_play_record_2 | created_at | timestamp with time zone | YES
bi_user_chapter_play_record_2 | updated_at | timestamp with time zone | YES
bi_user_chapter_play_record_2 | deleted_at | timestamp with time zone | YES
bi_user_chapter_play_record_2 | map_position | text | YES
bi_user_chapter_play_record_2 | dialog_info | text | YES
bi_user_chapter_play_record_2 | level | character varying | YES
bi_user_chapter_play_record_3 | id | bigint | NO
bi_user_chapter_play_record_3 | user_id | bigint | YES
bi_user_chapter_play_record_3 | chapter_id | bigint | YES
bi_user_chapter_play_record_3 | chapter_unique_id | character varying | YES
bi_user_chapter_play_record_3 | play_status | smallint | YES
bi_user_chapter_play_record_3 | story_id | bigint | YES
bi_user_chapter_play_record_3 | created_at | timestamp with time zone | YES
bi_user_chapter_play_record_3 | updated_at | timestamp with time zone | YES
bi_user_chapter_play_record_3 | deleted_at | timestamp with time zone | YES
bi_user_chapter_play_record_3 | map_position | text | YES
bi_user_chapter_play_record_3 | dialog_info | text | YES
bi_user_chapter_play_record_3 | level | character varying | YES
bi_user_chapter_play_record_4 | id | bigint | NO
bi_user_chapter_play_record_4 | user_id | bigint | YES
bi_user_chapter_play_record_4 | chapter_id | bigint | YES
bi_user_chapter_play_record_4 | chapter_unique_id | character varying | YES
bi_user_chapter_play_record_4 | play_status | smallint | YES
bi_user_chapter_play_record_4 | story_id | bigint | YES
bi_user_chapter_play_record_4 | created_at | timestamp with time zone | YES
bi_user_chapter_play_record_4 | updated_at | timestamp with time zone | YES
bi_user_chapter_play_record_4 | deleted_at | timestamp with time zone | YES
bi_user_chapter_play_record_4 | map_position | text | YES
bi_user_chapter_play_record_4 | dialog_info | text | YES
bi_user_chapter_play_record_4 | level | character varying | YES
bi_user_chapter_play_record_5 | id | bigint | NO
bi_user_chapter_play_record_5 | user_id | bigint | YES
bi_user_chapter_play_record_5 | chapter_id | bigint | YES
bi_user_chapter_play_record_5 | chapter_unique_id | character varying | YES
bi_user_chapter_play_record_5 | play_status | smallint | YES
bi_user_chapter_play_record_5 | story_id | bigint | YES
bi_user_chapter_play_record_5 | created_at | timestamp with time zone | YES
bi_user_chapter_play_record_5 | updated_at | timestamp with time zone | YES
bi_user_chapter_play_record_5 | deleted_at | timestamp with time zone | YES
bi_user_chapter_play_record_5 | map_position | text | YES
bi_user_chapter_play_record_5 | dialog_info | text | YES
bi_user_chapter_play_record_5 | level | character varying | YES
bi_user_chapter_play_record_6 | id | bigint | NO
bi_user_chapter_play_record_6 | user_id | bigint | YES
bi_user_chapter_play_record_6 | chapter_id | bigint | YES
bi_user_chapter_play_record_6 | chapter_unique_id | character varying | YES
bi_user_chapter_play_record_6 | play_status | smallint | YES
bi_user_chapter_play_record_6 | story_id | bigint | YES
bi_user_chapter_play_record_6 | created_at | timestamp with time zone | YES
bi_user_chapter_play_record_6 | updated_at | timestamp with time zone | YES
bi_user_chapter_play_record_6 | deleted_at | timestamp with time zone | YES
bi_user_chapter_play_record_6 | map_position | text | YES
bi_user_chapter_play_record_6 | dialog_info | text | YES
bi_user_chapter_play_record_6 | level | character varying | YES
bi_user_chapter_play_record_7 | id | bigint | NO
bi_user_chapter_play_record_7 | user_id | bigint | YES
bi_user_chapter_play_record_7 | chapter_id | bigint | YES
bi_user_chapter_play_record_7 | chapter_unique_id | character varying | YES
bi_user_chapter_play_record_7 | play_status | smallint | YES
bi_user_chapter_play_record_7 | story_id | bigint | YES
bi_user_chapter_play_record_7 | created_at | timestamp with time zone | YES
bi_user_chapter_play_record_7 | updated_at | timestamp with time zone | YES
bi_user_chapter_play_record_7 | deleted_at | timestamp with time zone | YES
bi_user_chapter_play_record_7 | map_position | text | YES
bi_user_chapter_play_record_7 | dialog_info | text | YES
bi_user_chapter_play_record_7 | level | character varying | YES
bi_user_component_play_record_0 | id | bigint | NO
bi_user_component_play_record_0 | user_id | bigint | YES
bi_user_component_play_record_0 | chapter_id | bigint | YES
bi_user_component_play_record_0 | chapter_unique_id | character varying | YES
bi_user_component_play_record_0 | component_id | bigint | YES
bi_user_component_play_record_0 | sub_component_type | smallint | YES
bi_user_component_play_record_0 | component_type | smallint | YES
bi_user_component_play_record_0 | comp_type | character varying | YES
bi_user_component_play_record_0 | play_status | smallint | YES
bi_user_component_play_record_0 | pass_time | integer | YES
bi_user_component_play_record_0 | interval_time | integer | YES
bi_user_component_play_record_0 | read_word_count | integer | YES
bi_user_component_play_record_0 | speak_count | integer | YES
bi_user_component_play_record_0 | listen_sentence_count | integer | YES
bi_user_component_play_record_0 | write_word_count | integer | YES
bi_user_component_play_record_0 | created_at | timestamp with time zone | YES
bi_user_component_play_record_0 | updated_at | timestamp with time zone | YES
bi_user_component_play_record_0 | deleted_at | timestamp with time zone | YES
bi_user_component_play_record_0 | section_id | bigint | YES
bi_user_component_play_record_0 | component_unique_code | character varying | YES
bi_user_component_play_record_0 | play_result | character varying | YES
bi_user_component_play_record_0 | finish_question_count | integer | YES
bi_user_component_play_record_0 | skill_points | integer | YES
bi_user_component_play_record_0 | c_type | character varying | YES
bi_user_component_play_record_0 | c_id | character varying | YES
bi_user_component_play_record_0 | user_behavior_info | text | YES
bi_user_component_play_record_0 | behavior_analysis_info | text | YES
bi_user_component_play_record_0 | session_id | character varying | YES
bi_user_component_play_record_0 | listen_time | integer | YES
bi_user_component_play_record_0 | dialog_list | text | YES
bi_user_component_play_record_0 | level | character varying | YES
bi_user_component_play_record_1 | id | bigint | NO
bi_user_component_play_record_1 | user_id | bigint | YES
bi_user_component_play_record_1 | chapter_id | bigint | YES
bi_user_component_play_record_1 | chapter_unique_id | character varying | YES
bi_user_component_play_record_1 | component_id | bigint | YES
bi_user_component_play_record_1 | sub_component_type | smallint | YES
bi_user_component_play_record_1 | component_type | smallint | YES
bi_user_component_play_record_1 | comp_type | character varying | YES
bi_user_component_play_record_1 | play_status | smallint | YES
bi_user_component_play_record_1 | pass_time | integer | YES
bi_user_component_play_record_1 | interval_time | integer | YES
bi_user_component_play_record_1 | read_word_count | integer | YES
bi_user_component_play_record_1 | speak_count | integer | YES
bi_user_component_play_record_1 | listen_sentence_count | integer | YES
bi_user_component_play_record_1 | write_word_count | integer | YES
bi_user_component_play_record_1 | created_at | timestamp with time zone | YES
bi_user_component_play_record_1 | updated_at | timestamp with time zone | YES
bi_user_component_play_record_1 | deleted_at | timestamp with time zone | YES
bi_user_component_play_record_1 | section_id | bigint | YES
bi_user_component_play_record_1 | component_unique_code | character varying | YES
bi_user_component_play_record_1 | play_result | character varying | YES
bi_user_component_play_record_1 | finish_question_count | integer | YES
bi_user_component_play_record_1 | skill_points | integer | YES
bi_user_component_play_record_1 | c_type | character varying | YES
bi_user_component_play_record_1 | c_id | character varying | YES
bi_user_component_play_record_1 | user_behavior_info | text | YES
bi_user_component_play_record_1 | behavior_analysis_info | text | YES
bi_user_component_play_record_1 | session_id | character varying | YES
bi_user_component_play_record_1 | listen_time | integer | YES
bi_user_component_play_record_1 | dialog_list | text | YES
bi_user_component_play_record_1 | level | character varying | YES
bi_user_component_play_record_2 | id | bigint | NO
bi_user_component_play_record_2 | user_id | bigint | YES
bi_user_component_play_record_2 | chapter_id | bigint | YES
bi_user_component_play_record_2 | chapter_unique_id | character varying | YES
bi_user_component_play_record_2 | component_id | bigint | YES
bi_user_component_play_record_2 | sub_component_type | smallint | YES
bi_user_component_play_record_2 | component_type | smallint | YES
bi_user_component_play_record_2 | comp_type | character varying | YES
bi_user_component_play_record_2 | play_status | smallint | YES
bi_user_component_play_record_2 | pass_time | integer | YES
bi_user_component_play_record_2 | interval_time | integer | YES
bi_user_component_play_record_2 | read_word_count | integer | YES
bi_user_component_play_record_2 | speak_count | integer | YES
bi_user_component_play_record_2 | listen_sentence_count | integer | YES
bi_user_component_play_record_2 | write_word_count | integer | YES
bi_user_component_play_record_2 | created_at | timestamp with time zone | YES
bi_user_component_play_record_2 | updated_at | timestamp with time zone | YES
bi_user_component_play_record_2 | deleted_at | timestamp with time zone | YES
bi_user_component_play_record_2 | section_id | bigint | YES
bi_user_component_play_record_2 | component_unique_code | character varying | YES
bi_user_component_play_record_2 | play_result | character varying | YES
bi_user_component_play_record_2 | finish_question_count | integer | YES
bi_user_component_play_record_2 | skill_points | integer | YES
bi_user_component_play_record_2 | c_type | character varying | YES
bi_user_component_play_record_2 | c_id | character varying | YES
bi_user_component_play_record_2 | user_behavior_info | text | YES
bi_user_component_play_record_2 | behavior_analysis_info | text | YES
bi_user_component_play_record_2 | session_id | character varying | YES
bi_user_component_play_record_2 | listen_time | integer | YES
bi_user_component_play_record_2 | dialog_list | text | YES
bi_user_component_play_record_2 | level | character varying | YES
bi_user_component_play_record_3 | id | bigint | NO
bi_user_component_play_record_3 | user_id | bigint | YES
bi_user_component_play_record_3 | chapter_id | bigint | YES
bi_user_component_play_record_3 | chapter_unique_id | character varying | YES
bi_user_component_play_record_3 | component_id | bigint | YES
bi_user_component_play_record_3 | sub_component_type | smallint | YES
bi_user_component_play_record_3 | component_type | smallint | YES
bi_user_component_play_record_3 | comp_type | character varying | YES
bi_user_component_play_record_3 | play_status | smallint | YES
bi_user_component_play_record_3 | pass_time | integer | YES
bi_user_component_play_record_3 | interval_time | integer | YES
bi_user_component_play_record_3 | read_word_count | integer | YES
bi_user_component_play_record_3 | speak_count | integer | YES
bi_user_component_play_record_3 | listen_sentence_count | integer | YES
bi_user_component_play_record_3 | write_word_count | integer | YES
bi_user_component_play_record_3 | created_at | timestamp with time zone | YES
bi_user_component_play_record_3 | updated_at | timestamp with time zone | YES
bi_user_component_play_record_3 | deleted_at | timestamp with time zone | YES
bi_user_component_play_record_3 | section_id | bigint | YES
bi_user_component_play_record_3 | component_unique_code | character varying | YES
bi_user_component_play_record_3 | play_result | character varying | YES
bi_user_component_play_record_3 | finish_question_count | integer | YES
bi_user_component_play_record_3 | skill_points | integer | YES
bi_user_component_play_record_3 | c_type | character varying | YES
bi_user_component_play_record_3 | c_id | character varying | YES
bi_user_component_play_record_3 | user_behavior_info | text | YES
bi_user_component_play_record_3 | behavior_analysis_info | text | YES
bi_user_component_play_record_3 | session_id | character varying | YES
bi_user_component_play_record_3 | listen_time | integer | YES
bi_user_component_play_record_3 | dialog_list | text | YES
bi_user_component_play_record_3 | level | character varying | YES
bi_user_component_play_record_4 | id | bigint | NO
bi_user_component_play_record_4 | user_id | bigint | YES
bi_user_component_play_record_4 | chapter_id | bigint | YES
bi_user_component_play_record_4 | chapter_unique_id | character varying | YES
bi_user_component_play_record_4 | component_id | bigint | YES
bi_user_component_play_record_4 | sub_component_type | smallint | YES
bi_user_component_play_record_4 | component_type | smallint | YES
bi_user_component_play_record_4 | comp_type | character varying | YES
bi_user_component_play_record_4 | play_status | smallint | YES
bi_user_component_play_record_4 | pass_time | integer | YES
bi_user_component_play_record_4 | interval_time | integer | YES
bi_user_component_play_record_4 | read_word_count | integer | YES
bi_user_component_play_record_4 | speak_count | integer | YES
bi_user_component_play_record_4 | listen_sentence_count | integer | YES
bi_user_component_play_record_4 | write_word_count | integer | YES
bi_user_component_play_record_4 | created_at | timestamp with time zone | YES
bi_user_component_play_record_4 | updated_at | timestamp with time zone | YES
bi_user_component_play_record_4 | deleted_at | timestamp with time zone | YES
bi_user_component_play_record_4 | section_id | bigint | YES
bi_user_component_play_record_4 | component_unique_code | character varying | YES
bi_user_component_play_record_4 | play_result | character varying | YES
bi_user_component_play_record_4 | finish_question_count | integer | YES
bi_user_component_play_record_4 | skill_points | integer | YES
bi_user_component_play_record_4 | c_type | character varying | YES
bi_user_component_play_record_4 | c_id | character varying | YES
bi_user_component_play_record_4 | user_behavior_info | text | YES
bi_user_component_play_record_4 | behavior_analysis_info | text | YES
bi_user_component_play_record_4 | session_id | character varying | YES
bi_user_component_play_record_4 | listen_time | integer | YES
bi_user_component_play_record_4 | dialog_list | text | YES
bi_user_component_play_record_4 | level | character varying | YES
bi_user_component_play_record_5 | id | bigint | NO
bi_user_component_play_record_5 | user_id | bigint | YES
bi_user_component_play_record_5 | chapter_id | bigint | YES
bi_user_component_play_record_5 | chapter_unique_id | character varying | YES
bi_user_component_play_record_5 | component_id | bigint | YES
bi_user_component_play_record_5 | sub_component_type | smallint | YES
bi_user_component_play_record_5 | component_type | smallint | YES
bi_user_component_play_record_5 | comp_type | character varying | YES
bi_user_component_play_record_5 | play_status | smallint | YES
bi_user_component_play_record_5 | pass_time | integer | YES
bi_user_component_play_record_5 | interval_time | integer | YES
bi_user_component_play_record_5 | read_word_count | integer | YES
bi_user_component_play_record_5 | speak_count | integer | YES
bi_user_component_play_record_5 | listen_sentence_count | integer | YES
bi_user_component_play_record_5 | write_word_count | integer | YES
bi_user_component_play_record_5 | created_at | timestamp with time zone | YES
bi_user_component_play_record_5 | updated_at | timestamp with time zone | YES
bi_user_component_play_record_5 | deleted_at | timestamp with time zone | YES
bi_user_component_play_record_5 | section_id | bigint | YES
bi_user_component_play_record_5 | component_unique_code | character varying | YES
bi_user_component_play_record_5 | play_result | character varying | YES
bi_user_component_play_record_5 | finish_question_count | integer | YES
bi_user_component_play_record_5 | skill_points | integer | YES
bi_user_component_play_record_5 | c_type | character varying | YES
bi_user_component_play_record_5 | c_id | character varying | YES
bi_user_component_play_record_5 | user_behavior_info | text | YES
bi_user_component_play_record_5 | behavior_analysis_info | text | YES
bi_user_component_play_record_5 | session_id | character varying | YES
bi_user_component_play_record_5 | listen_time | integer | YES
bi_user_component_play_record_5 | dialog_list | text | YES
bi_user_component_play_record_5 | level | character varying | YES
bi_user_component_play_record_6 | id | bigint | NO
bi_user_component_play_record_6 | user_id | bigint | YES
bi_user_component_play_record_6 | chapter_id | bigint | YES
bi_user_component_play_record_6 | chapter_unique_id | character varying | YES
bi_user_component_play_record_6 | component_id | bigint | YES
bi_user_component_play_record_6 | sub_component_type | smallint | YES
bi_user_component_play_record_6 | component_type | smallint | YES
bi_user_component_play_record_6 | comp_type | character varying | YES
bi_user_component_play_record_6 | play_status | smallint | YES
bi_user_component_play_record_6 | pass_time | integer | YES
bi_user_component_play_record_6 | interval_time | integer | YES
bi_user_component_play_record_6 | read_word_count | integer | YES
bi_user_component_play_record_6 | speak_count | integer | YES
bi_user_component_play_record_6 | listen_sentence_count | integer | YES
bi_user_component_play_record_6 | write_word_count | integer | YES
bi_user_component_play_record_6 | created_at | timestamp with time zone | YES
bi_user_component_play_record_6 | updated_at | timestamp with time zone | YES
bi_user_component_play_record_6 | deleted_at | timestamp with time zone | YES
bi_user_component_play_record_6 | section_id | bigint | YES
bi_user_component_play_record_6 | component_unique_code | character varying | YES
bi_user_component_play_record_6 | play_result | character varying | YES
bi_user_component_play_record_6 | finish_question_count | integer | YES
bi_user_component_play_record_6 | skill_points | integer | YES
bi_user_component_play_record_6 | c_type | character varying | YES
bi_user_component_play_record_6 | c_id | character varying | YES
bi_user_component_play_record_6 | user_behavior_info | text | YES
bi_user_component_play_record_6 | behavior_analysis_info | text | YES
bi_user_component_play_record_6 | session_id | character varying | YES
bi_user_component_play_record_6 | listen_time | integer | YES
bi_user_component_play_record_6 | dialog_list | text | YES
bi_user_component_play_record_6 | level | character varying | YES
bi_user_course_detail | id | integer | NO
bi_user_course_detail | created_at | timestamp with time zone | YES
bi_user_course_detail | updated_at | timestamp with time zone | YES
bi_user_course_detail | account_id | integer | YES
bi_user_course_detail | user_id | integer | YES
bi_user_course_detail | course_level | text | YES
bi_user_course_detail | active_time | timestamp with time zone | YES
bi_user_course_detail | expire_time | timestamp with time zone | YES
bi_user_course_detail | latest_unit_index | integer | YES
bi_user_course_detail | latest_lesson_index | integer | YES
bi_user_course_detail | learn_duration | integer | YES
bi_user_course_detail | last_learn_time | timestamp with time zone | YES
bi_user_course_detail | deleted_at | timestamp with time zone | YES
bi_user_unit_challenge_question_result | id | bigint | NO
bi_user_unit_challenge_question_result | user_id | bigint | YES
bi_user_unit_challenge_question_result | story_id | bigint | YES
bi_user_unit_challenge_question_result | score | integer | YES
bi_user_unit_challenge_question_result | score_text | character varying | YES
bi_user_unit_challenge_question_result | question_list | text | YES
bi_user_unit_challenge_question_result | sp_value | integer | YES
bi_user_unit_challenge_question_result | created_at | timestamp with time zone | YES
bi_user_unit_challenge_question_result | updated_at | timestamp with time zone | YES
bi_user_unit_challenge_question_result | deleted_at | timestamp with time zone | YES
bi_user_unit_challenge_question_result | category | character varying | YES
bi_user_unit_challenge_question_result | exp | integer | YES
bi_user_unit_challenge_question_result | play_time | bigint | YES
bi_user_unit_challenge_question_result | read_word_count | bigint | YES
bi_user_unit_challenge_question_result | listen_time | bigint | YES
bi_user_unit_challenge_question_result | speak_count | bigint | YES
bi_user_unit_challenge_question_result | level | character varying | YES
bi_user_unit_summary_km_result | id | bigint | NO
bi_user_unit_summary_km_result | user_id | bigint | YES
bi_user_unit_summary_km_result | story_id | bigint | YES
bi_user_unit_summary_km_result | km_id | character varying | YES
bi_user_unit_summary_km_result | km_type | character varying | YES
bi_user_unit_summary_km_result | score_text | character varying | YES
bi_user_unit_summary_km_result | sp_value | integer | YES
bi_user_unit_summary_km_result | created_at | timestamp with time zone | YES
bi_user_unit_summary_km_result | updated_at | timestamp with time zone | YES
bi_user_unit_summary_km_result | deleted_at | timestamp with time zone | YES
bi_user_unit_summary_km_result | play_time | bigint | YES
bi_user_unit_summary_km_result | sum_explain | text | YES
bi_user_unit_summary_km_result | level | character varying | YES
bi_vala_app_account | id | bigint | NO
bi_vala_app_account | tel | character varying | YES
bi_vala_app_account | tel_encrypt | character varying | YES
bi_vala_app_account | name | character varying | YES
bi_vala_app_account | id_card | character varying | YES
bi_vala_app_account | status | smallint | NO
bi_vala_app_account | pay_status | integer | YES
bi_vala_app_account | login_times | integer | YES
bi_vala_app_account | remark | text | YES
bi_vala_app_account | key_from | character varying | NO
bi_vala_app_account | created_at | timestamp with time zone | YES
bi_vala_app_account | updated_at | timestamp with time zone | YES
bi_vala_app_account | deleted_at | timestamp with time zone | YES
bi_vala_app_account | download_channel | text | YES
bi_vala_app_character | id | bigint | NO
bi_vala_app_character | account_id | bigint | YES
bi_vala_app_character | nickname | character varying | YES
bi_vala_app_character | gender | smallint | YES
bi_vala_app_character | birthday | character varying | YES
bi_vala_app_character | avatar | text | YES
bi_vala_app_character | spine_name | character varying | YES
bi_vala_app_character | latest_login | timestamp with time zone | YES
bi_vala_app_character | reputation | character | YES
bi_vala_app_character | robots_cnt | integer | YES
bi_vala_app_character | head_image | character varying | YES
bi_vala_app_character | status | smallint | YES
bi_vala_app_character | purchase_season_package | text | YES
bi_vala_app_character | total_sp_point | integer | YES
bi_vala_app_character | balance_sp_point | integer | YES
bi_vala_app_character | point | integer | YES
bi_vala_app_character | pay_status | smallint | YES
bi_vala_app_character | created_at | timestamp with time zone | YES
bi_vala_app_character | updated_at | timestamp with time zone | YES
bi_vala_app_character | deleted_at | timestamp with time zone | YES
bi_vala_app_character | mood | integer | YES
bi_vala_app_character | mood_start_time | bigint | YES
bi_vala_order | id | bigint | NO
bi_vala_order | account_id | bigint | YES
bi_vala_order | out_trade_no | character varying | YES
bi_vala_order | trade_no | character varying | YES
bi_vala_order | pay_amount | character varying | YES
bi_vala_order | goods_id | bigint | YES
bi_vala_order | order_status | integer | YES
bi_vala_order | order_status_comment | text | YES
bi_vala_order | is_used | integer | YES
bi_vala_order | sale_channel | integer | YES
bi_vala_order | created_at | timestamp with time zone | YES
bi_vala_order | updated_at | timestamp with time zone | YES
bi_vala_order | deleted_at | timestamp with time zone | YES
bi_vala_order | pay_channel | integer | YES
bi_vala_order | pay_amount_int | integer | YES
bi_vala_order | pay_success_date | timestamp with time zone | YES
bi_vala_order | quantity | integer | NO
bi_vala_order | goods_name | text | NO
bi_vala_order | goods_apply_package_ids | text | NO
bi_vala_order | key_from | text | NO
bi_vala_order | expire_days | integer | YES
data_sync_config | id | bigint | NO
data_sync_config | job_name | text | NO
data_sync_config | database_name | text | YES
data_sync_config | origin_table | text | YES
data_sync_config | dest_table | text | YES
data_sync_config | deal_time | timestamp with time zone | YES
data_sync_config | full_id | bigint | NO
data_sync_config | created_at | timestamp with time zone | NO
data_sync_config | updated_at | timestamp with time zone | YES
feishu_table_record | id | bigint | NO
feishu_table_record | data_table | text | NO
feishu_table_record | record_id | text | YES
feishu_table_record | created_at | timestamp with time zone | NO
growth_activity_behavior | id | integer | NO
growth_activity_behavior | es_id | character varying | NO
growth_activity_behavior | account_id | integer | YES
growth_activity_behavior | account_name | character varying | YES
growth_activity_behavior | activity | text | YES
growth_activity_behavior | behavior | character varying | YES
growth_activity_behavior | created_at | timestamp with time zone | YES
growth_activity_behavior | info | text | YES
growth_activity_behavior | source | character varying | YES
growth_activity_behavior | sub_behavior | character varying | YES
growth_activity_behavior | user_id | integer | YES
growth_activity_behavior | user_name | character varying | YES
user_behavior_0 | id | bigint | NO
user_behavior_0 | user_id | integer | YES
user_behavior_0 | behavior | text | YES
user_behavior_0 | behavior_time_date | text | NO
user_behavior_0 | created_time | timestamp with time zone | NO
user_behavior_1 | id | bigint | NO
user_behavior_1 | user_id | integer | YES
user_behavior_1 | behavior | text | YES
user_behavior_1 | behavior_time_date | text | NO
user_behavior_1 | created_time | timestamp with time zone | NO
user_behavior_10 | id | bigint | NO
user_behavior_10 | user_id | integer | YES
user_behavior_10 | behavior | text | YES
user_behavior_10 | behavior_time_date | text | NO
user_behavior_10 | created_time | timestamp with time zone | NO
user_behavior_11 | id | bigint | NO
user_behavior_11 | user_id | integer | YES
user_behavior_11 | behavior | text | YES
user_behavior_11 | behavior_time_date | text | NO
user_behavior_11 | created_time | timestamp with time zone | NO
user_behavior_12 | id | bigint | NO
user_behavior_12 | user_id | integer | YES
user_behavior_12 | behavior | text | YES
user_behavior_12 | behavior_time_date | text | NO
user_behavior_12 | created_time | timestamp with time zone | NO
user_behavior_13 | id | bigint | NO
user_behavior_13 | user_id | integer | YES
user_behavior_13 | behavior | text | YES
user_behavior_13 | behavior_time_date | text | NO
user_behavior_13 | created_time | timestamp with time zone | NO
user_behavior_14 | id | bigint | NO
user_behavior_14 | user_id | integer | YES
user_behavior_14 | behavior | text | YES
user_behavior_14 | behavior_time_date | text | NO
user_behavior_14 | created_time | timestamp with time zone | NO
user_behavior_15 | id | bigint | NO
user_behavior_15 | user_id | integer | YES
user_behavior_15 | behavior | text | YES
user_behavior_15 | behavior_time_date | text | NO
user_behavior_15 | created_time | timestamp with time zone | NO
user_behavior_2 | id | bigint | NO
user_behavior_2 | user_id | integer | YES
user_behavior_2 | behavior | text | YES
user_behavior_2 | behavior_time_date | text | NO
user_behavior_2 | created_time | timestamp with time zone | NO
user_behavior_3 | id | bigint | NO
user_behavior_3 | user_id | integer | YES
user_behavior_3 | behavior | text | YES
user_behavior_3 | behavior_time_date | text | NO
user_behavior_3 | created_time | timestamp with time zone | NO
user_behavior_4 | id | bigint | NO
user_behavior_4 | user_id | integer | YES
user_behavior_4 | behavior | text | YES
user_behavior_4 | behavior_time_date | text | NO
user_behavior_4 | created_time | timestamp with time zone | NO
user_behavior_5 | id | bigint | NO
user_behavior_5 | user_id | integer | YES
user_behavior_5 | behavior | text | YES
user_behavior_5 | behavior_time_date | text | NO
user_behavior_5 | created_time | timestamp with time zone | NO
user_behavior_6 | id | bigint | NO
user_behavior_6 | user_id | integer | YES
user_behavior_6 | behavior | text | YES
user_behavior_6 | behavior_time_date | text | NO
user_behavior_6 | created_time | timestamp with time zone | NO
user_behavior_7 | id | bigint | NO
user_behavior_7 | user_id | integer | YES
user_behavior_7 | behavior | text | YES
user_behavior_7 | behavior_time_date | text | NO
user_behavior_7 | created_time | timestamp with time zone | NO
user_behavior_8 | id | bigint | NO
user_behavior_8 | user_id | integer | YES
user_behavior_8 | behavior | text | YES
user_behavior_8 | behavior_time_date | text | NO
user_behavior_8 | created_time | timestamp with time zone | NO
user_behavior_9 | id | bigint | NO
user_behavior_9 | user_id | integer | YES
user_behavior_9 | behavior | text | YES
user_behavior_9 | behavior_time_date | text | NO
user_behavior_9 | created_time | timestamp with time zone | NO
user_behavior_buried_points | id | integer | NO
user_behavior_buried_points | burying_point_id | integer | YES
user_behavior_buried_points | burying_point_name | character varying | YES
user_behavior_buried_points | burying_point_sub_id | integer | YES
user_behavior_buried_points | burying_point_sub_name | character varying | YES
user_behavior_buried_points | account_id | integer | YES
user_behavior_buried_points | account_name | character varying | YES
user_behavior_buried_points | character_id | integer | YES
user_behavior_buried_points | character_name | character varying | YES
user_behavior_buried_points | active_time | integer | YES
user_behavior_buried_points | version_id | character varying | YES
user_behavior_buried_points | step_duration | integer | YES
user_behavior_buried_points | finish_status | character varying | YES
user_behavior_buried_points | season_package_id | integer | YES
user_behavior_buried_points | season_package_name | character varying | YES
user_behavior_buried_points | unit_id | integer | YES
user_behavior_buried_points | unit_name | character varying | YES
user_behavior_buried_points | lesson_id | integer | YES
user_behavior_buried_points | lesson_name | character varying | YES
user_behavior_buried_points | component_id | integer | YES
user_behavior_buried_points | component_name | character varying | YES
user_behavior_buried_points | c_type | character varying | YES
user_behavior_buried_points | c_id | character varying | YES
user_behavior_buried_points | learning_module_id | integer | YES
user_behavior_buried_points | learning_module_name | character varying | YES
user_behavior_buried_points | learning_point_id | integer | YES
user_behavior_buried_points | learning_point_name | character varying | YES
user_behavior_buried_points | card_id | integer | YES
user_behavior_buried_points | card_name | character varying | YES
user_behavior_buried_points | data_version | character varying | YES
user_behavior_buried_points | ex2 | text | YES
user_behavior_buried_points | ex3 | text | YES
user_behavior_buried_points | es_id | character varying | YES
user_behavior_count_tmp | behavior | text | YES
user_behavior_count_tmp | register_time_date | text | NO
user_behavior_count_tmp | count | integer | YES
user_detail_info | id | bigint | NO
user_detail_info | user_id | integer | YES
user_detail_info | latest_login_time | bigint | YES
user_detail_info | latest_active_time | bigint | YES
user_detail_info | created_time | timestamp with time zone | NO
user_info | id | bigint | NO
user_info | account_id | integer | YES
user_info | user_id | integer | YES
user_info | gender | integer | YES
user_info | birthday | text | YES
user_info | register_time_date | text | NO
user_info | created_time | timestamp with time zone | NO
user_info | register_time | text | YES
user_learning | id | bigint | NO
user_learning | user_id | integer | YES
user_learning | stat_date | text | NO
user_learning | learning_time | integer | YES
user_learning | created_time | timestamp with time zone | NO
(624 rows)

View File

@ -1,643 +0,0 @@
# 瓦拉英语业务数据库表结构总览(完整版)
## 使用说明
- 每个表开头的【表用途注释】行可填写该表的业务用途说明
- 每个字段的【注释】列可填写该字段的业务含义说明
- 所有空注释位置均可直接编辑补充
---
## 一、线上PostgreSQL库正式环境用户行为数据
- **地址:** bj-postgres-16pob4sg.sql.tencentcdb.com:28591
- **库名:** vala_bi
- **权限:** 只读
---
### account_activity_count
【表用途注释:】
| 字段名 | 数据类型 | 注释 |
|--------|----------|------|
| id | bigint | |
| time_period | integer | |
| counts | integer | |
| stat_date | text | |
| created_time | timestamp with time zone | |
### account_detail_info
【表用途注释:】
| 字段名 | 数据类型 | 注释 |
|--------|----------|------|
| id | bigint | |
| account_id | integer | 关联用户账户表id |
| login_time | text | 登录时间 |
| device | text | 登录设备 |
| device_os | text | 设备操作系统 |
| login_address | text | 登录地点 |
| login_times | integer | 登录次数 |
| created_time | timestamp with time zone | |
| phone_login_times | integer | 手机号登录次数 |
### account_device
【表用途注释:存储用户使用过的设备信息】
| 字段名 | 数据类型 | 注释 |
|--------|----------|------|
| id | bigint | |
| account_id | integer | 关联用户账户表id |
| device | text | 设备标识 |
| created_time | timestamp with time zone | |
### account_login
【表用途注释:用户登录记录表】
| 字段名 | 数据类型 | 注释 |
|--------|----------|------|
| id | bigint | |
| account_id | integer | 关联用户账户表id |
| login_date | text | 登录日期 |
| created_time | timestamp with time zone | |
| status | integer | 登录状态 |
### account_paid_activity_count
【表用途注释:】
| 字段名 | 数据类型 | 注释 |
|--------|----------|------|
| id | integer | |
| time_period | integer | |
| counts | integer | |
| stat_date | text | |
| created_time | timestamp with time zone | |
### bi_level_unit_lesson
【表用途注释:课程层级映射表,存储等级、季、单元、课时的对应关系】
| 字段名 | 数据类型 | 注释 |
|--------|----------|------|
| course_level | character varying | 课程等级A1/L1、A2/L2等 |
| course_season | character varying | 课程季 |
| course_unit | character varying | 课程单元 |
| course_lesson | character varying | 课程课时 |
| id | integer | |
### bi_refund_order
【表用途注释:退款订单表,存储所有退款订单的信息】
| 字段名 | 数据类型 | 注释 |
|--------|----------|------|
| id | bigint | |
| account_id | bigint | 关联下单用户账户id |
| out_trade_no | character varying | 商户订单号关联订单表out_trade_no |
| trade_no | character varying | 支付平台交易号 |
| refund_amount | character varying | 退款金额(字符串格式,单位元) |
| created_at | timestamp with time zone | 创建时间 |
| updated_at | timestamp with time zone | 更新时间 |
| deleted_at | timestamp with time zone | 删除时间(为空表示未删除) |
| refund_amount_int | integer | 退款金额(整数格式,单位分) |
| reason | text | 退款原因 |
| sale_channel | integer | 销售渠道 |
| status | integer | 退款状态3=退款成功 |
| is_admin | boolean | 是否为后台操作退款 |
| channel_refund_id | bigint | 渠道退款id |
| refund_ticket_ids | text | 关联优惠券id |
| refund_type | integer | 退款类型 |
| refund_method | integer | 退款方式 |
| after_sale_no | text | 售后单号 |
### bi_user_chapter_play_record_0
【表用途注释用户课时完成记录表分表0存储用户每个课时的学习完成情况】
| 字段名 | 数据类型 | 注释 |
|--------|----------|------|
| id | bigint | |
| user_id | bigint | 角色id关联用户角色表id |
| chapter_id | bigint | 课时id |
| chapter_unique_id | character varying | 课时唯一标识 |
| play_status | smallint | 课时完成状态1=正常完成 |
| story_id | bigint | 关联故事id |
| created_at | timestamp with time zone | 创建时间 |
| updated_at | timestamp with time zone | 更新时间 |
| deleted_at | timestamp with time zone | 删除时间(为空表示未删除) |
| map_position | text | 地图位置 |
| dialog_info | text | 对话信息 |
| level | character varying | 课程等级 |
### bi_user_chapter_play_record_1
【表用途注释用户课时完成记录表分表1存储用户每个课时的学习完成情况】
| 字段名 | 数据类型 | 注释 |
|--------|----------|------|
| id | bigint | |
| user_id | bigint | 角色id关联用户角色表id |
| chapter_id | bigint | 课时id |
| chapter_unique_id | character varying | 课时唯一标识 |
| play_status | smallint | 课时完成状态1=正常完成 |
| story_id | bigint | 关联故事id |
| created_at | timestamp with time zone | 创建时间 |
| updated_at | timestamp with time zone | 更新时间 |
| deleted_at | timestamp with time zone | 删除时间(为空表示未删除) |
| map_position | text | 地图位置 |
| dialog_info | text | 对话信息 |
| level | character varying | 课程等级 |
### bi_user_chapter_play_record_2
【表用途注释用户课时完成记录表分表2存储用户每个课时的学习完成情况】
| 字段名 | 数据类型 | 注释 |
|--------|----------|------|
| id | bigint | |
| user_id | bigint | 角色id关联用户角色表id |
| chapter_id | bigint | 课时id |
| chapter_unique_id | character varying | 课时唯一标识 |
| play_status | smallint | 课时完成状态1=正常完成 |
| story_id | bigint | 关联故事id |
| created_at | timestamp with time zone | 创建时间 |
| updated_at | timestamp with time zone | 更新时间 |
| deleted_at | timestamp with time zone | 删除时间(为空表示未删除) |
| map_position | text | 地图位置 |
| dialog_info | text | 对话信息 |
| level | character varying | 课程等级 |
### bi_user_chapter_play_record_3
【表用途注释用户课时完成记录表分表3存储用户每个课时的学习完成情况】
| 字段名 | 数据类型 | 注释 |
|--------|----------|------|
| id | bigint | |
| user_id | bigint | 角色id关联用户角色表id |
| chapter_id | bigint | 课时id |
| chapter_unique_id | character varying | 课时唯一标识 |
| play_status | smallint | 课时完成状态1=正常完成 |
| story_id | bigint | 关联故事id |
| created_at | timestamp with time zone | 创建时间 |
| updated_at | timestamp with time zone | 更新时间 |
| deleted_at | timestamp with time zone | 删除时间(为空表示未删除) |
| map_position | text | 地图位置 |
| dialog_info | text | 对话信息 |
| level | character varying | 课程等级 |
### bi_user_chapter_play_record_4
【表用途注释用户课时完成记录表分表4存储用户每个课时的学习完成情况】
| 字段名 | 数据类型 | 注释 |
|--------|----------|------|
| id | bigint | |
| user_id | bigint | 角色id关联用户角色表id |
| chapter_id | bigint | 课时id |
| chapter_unique_id | character varying | 课时唯一标识 |
| play_status | smallint | 课时完成状态1=正常完成 |
| story_id | bigint | 关联故事id |
| created_at | timestamp with time zone | 创建时间 |
| updated_at | timestamp with time zone | 更新时间 |
| deleted_at | timestamp with time zone | 删除时间(为空表示未删除) |
| map_position | text | 地图位置 |
| dialog_info | text | 对话信息 |
| level | character varying | 课程等级 |
### bi_user_chapter_play_record_5
【表用途注释用户课时完成记录表分表5存储用户每个课时的学习完成情况】
| 字段名 | 数据类型 | 注释 |
|--------|----------|------|
| id | bigint | |
| user_id | bigint | 角色id关联用户角色表id |
| chapter_id | bigint | 课时id |
| chapter_unique_id | character varying | 课时唯一标识 |
| play_status | smallint | 课时完成状态1=正常完成 |
| story_id | bigint | 关联故事id |
| created_at | timestamp with time zone | 创建时间 |
| updated_at | timestamp with time zone | 更新时间 |
| deleted_at | timestamp with time zone | 删除时间(为空表示未删除) |
| map_position | text | 地图位置 |
| dialog_info | text | 对话信息 |
| level | character varying | 课程等级 |
### bi_user_chapter_play_record_6
【表用途注释用户课时完成记录表分表6存储用户每个课时的学习完成情况】
| 字段名 | 数据类型 | 注释 |
|--------|----------|------|
| id | bigint | |
| user_id | bigint | 角色id关联用户角色表id |
| chapter_id | bigint | 课时id |
| chapter_unique_id | character varying | 课时唯一标识 |
| play_status | smallint | 课时完成状态1=正常完成 |
| story_id | bigint | 关联故事id |
| created_at | timestamp with time zone | 创建时间 |
| updated_at | timestamp with time zone | 更新时间 |
| deleted_at | timestamp with time zone | 删除时间(为空表示未删除) |
| map_position | text | 地图位置 |
| dialog_info | text | 对话信息 |
| level | character varying | 课程等级 |
### bi_user_chapter_play_record_7
【表用途注释用户课时完成记录表分表7存储用户每个课时的学习完成情况】
| 字段名 | 数据类型 | 注释 |
|--------|----------|------|
| id | bigint | |
| user_id | bigint | 角色id关联用户角色表id |
| chapter_id | bigint | 课时id |
| chapter_unique_id | character varying | 课时唯一标识 |
| play_status | smallint | 课时完成状态1=正常完成 |
| story_id | bigint | 关联故事id |
| created_at | timestamp with time zone | 创建时间 |
| updated_at | timestamp with time zone | 更新时间 |
| deleted_at | timestamp with time zone | 删除时间(为空表示未删除) |
| map_position | text | 地图位置 |
| dialog_info | text | 对话信息 |
| level | character varying | 课程等级 |
### bi_user_component_play_record_0
【表用途注释用户组件完成记录表分表0存储用户每个课时下每个组件的学习情况】
| 字段名 | 数据类型 | 注释 |
|--------|----------|------|
| id | bigint | |
| user_id | bigint | 角色id关联用户角色表id |
| chapter_id | bigint | 课时id |
| chapter_unique_id | character varying | 课时唯一标识关联课时记录表chapter_unique_id |
| component_id | bigint | 组件id |
| sub_component_type | smallint | 子组件类型 |
| component_type | smallint | 组件类型 |
| comp_type | character varying | 组件类型标识 |
| play_status | smallint | 组件完成状态 |
| pass_time | integer | 通过时间 |
| interval_time | integer | 组件学习耗时(单位:毫秒) |
| read_word_count | integer | 读单词数量 |
| speak_count | integer | 开口次数 |
| listen_sentence_count | integer | 听句子数量 |
| write_word_count | integer | 写单词数量 |
| created_at | timestamp with time zone | 创建时间 |
| updated_at | timestamp with time zone | 更新时间 |
| deleted_at | timestamp with time zone | 删除时间(为空表示未删除) |
| section_id | bigint | 关联章节id |
| component_unique_code | character varying | 组件唯一编码 |
| play_result | character varying | 组件完成结果Perfect/Good/Oops等 |
| finish_question_count | integer | 完成题目数量 |
| skill_points | integer | 获得技能点数 |
| c_type | character varying | 组件类型 |
| c_id | character varying | 组件配置id |
| user_behavior_info | text | 用户行为信息 |
| behavior_analysis_info | text | 行为分析信息 |
| session_id | character varying | 会话id |
| listen_time | integer | 听力时长 |
| dialog_list | text | 对话列表 |
| level | character varying | 课程等级 |
### bi_user_component_play_record_1
【表用途注释用户组件完成记录表分表1存储用户每个课时下每个组件的学习情况】
| 字段名 | 数据类型 | 注释 |
|--------|----------|------|
| id | bigint | |
| user_id | bigint | 角色id关联用户角色表id |
| chapter_id | bigint | 课时id |
| chapter_unique_id | character varying | 课时唯一标识关联课时记录表chapter_unique_id |
| component_id | bigint | 组件id |
| sub_component_type | smallint | 子组件类型 |
| component_type | smallint | 组件类型 |
| comp_type | character varying | 组件类型标识 |
| play_status | smallint | 组件完成状态 |
| pass_time | integer | 通过时间 |
| interval_time | integer | 组件学习耗时(单位:毫秒) |
| read_word_count | integer | 读单词数量 |
| speak_count | integer | 开口次数 |
| listen_sentence_count | integer | 听句子数量 |
| write_word_count | integer | 写单词数量 |
| created_at | timestamp with time zone | 创建时间 |
| updated_at | timestamp with time zone | 更新时间 |
| deleted_at | timestamp with time zone | 删除时间(为空表示未删除) |
| section_id | bigint | 关联章节id |
| component_unique_code | character varying | 组件唯一编码 |
| play_result | character varying | 组件完成结果Perfect/Good/Oops等 |
| finish_question_count | integer | 完成题目数量 |
| skill_points | integer | 获得技能点数 |
| c_type | character varying | 组件类型 |
| c_id | character varying | 组件配置id |
| user_behavior_info | text | 用户行为信息 |
| behavior_analysis_info | text | 行为分析信息 |
| session_id | character varying | 会话id |
| listen_time | integer | 听力时长 |
| dialog_list | text | 对话列表 |
| level | character varying | 课程等级 |
### bi_user_component_play_record_2
【表用途注释用户组件完成记录表分表2存储用户每个课时下每个组件的学习情况】
| 字段名 | 数据类型 | 注释 |
|--------|----------|------|
| id | bigint | |
| user_id | bigint | 角色id关联用户角色表id |
| chapter_id | bigint | 课时id |
| chapter_unique_id | character varying | 课时唯一标识关联课时记录表chapter_unique_id |
| component_id | bigint | 组件id |
| sub_component_type | smallint | 子组件类型 |
| component_type | smallint | 组件类型 |
| comp_type | character varying | 组件类型标识 |
| play_status | smallint | 组件完成状态 |
| pass_time | integer | 通过时间 |
| interval_time | integer | 组件学习耗时(单位:毫秒) |
| read_word_count | integer | 读单词数量 |
| speak_count | integer | 开口次数 |
| listen_sentence_count | integer | 听句子数量 |
| write_word_count | integer | 写单词数量 |
| created_at | timestamp with time zone | 创建时间 |
| updated_at | timestamp with time zone | 更新时间 |
| deleted_at | timestamp with time zone | 删除时间(为空表示未删除) |
| section_id | bigint | 关联章节id |
| component_unique_code | character varying | 组件唯一编码 |
| play_result | character varying | 组件完成结果Perfect/Good/Oops等 |
| finish_question_count | integer | 完成题目数量 |
| skill_points | integer | 获得技能点数 |
| c_type | character varying | 组件类型 |
| c_id | character varying | 组件配置id |
| user_behavior_info | text | 用户行为信息 |
| behavior_analysis_info | text | 行为分析信息 |
| session_id | character varying | 会话id |
| listen_time | integer | 听力时长 |
| dialog_list | text | 对话列表 |
| level | character varying | 课程等级 |
### bi_user_component_play_record_3
【表用途注释用户组件完成记录表分表3存储用户每个课时下每个组件的学习情况】
| 字段名 | 数据类型 | 注释 |
|--------|----------|------|
| id | bigint | |
| user_id | bigint | 角色id关联用户角色表id |
| chapter_id | bigint | 课时id |
| chapter_unique_id | character varying | 课时唯一标识关联课时记录表chapter_unique_id |
| component_id | bigint | 组件id |
| sub_component_type | smallint | 子组件类型 |
| component_type | smallint | 组件类型 |
| comp_type | character varying | 组件类型标识 |
| play_status | smallint | 组件完成状态 |
| pass_time | integer | 通过时间 |
| interval_time | integer | 组件学习耗时(单位:毫秒) |
| read_word_count | integer | 读单词数量 |
| speak_count | integer | 开口次数 |
| listen_sentence_count | integer | 听句子数量 |
| write_word_count | integer | 写单词数量 |
| created_at | timestamp with time zone | 创建时间 |
| updated_at | timestamp with time zone | 更新时间 |
| deleted_at | timestamp with time zone | 删除时间(为空表示未删除) |
| section_id | bigint | 关联章节id |
| component_unique_code | character varying | 组件唯一编码 |
| play_result | character varying | 组件完成结果Perfect/Good/Oops等 |
| finish_question_count | integer | 完成题目数量 |
| skill_points | integer | 获得技能点数 |
| c_type | character varying | 组件类型 |
| c_id | character varying | 组件配置id |
| user_behavior_info | text | 用户行为信息 |
| behavior_analysis_info | text | 行为分析信息 |
| session_id | character varying | 会话id |
| listen_time | integer | 听力时长 |
| dialog_list | text | 对话列表 |
| level | character varying | 课程等级 |
### bi_user_component_play_record_4
【表用途注释用户组件完成记录表分表4存储用户每个课时下每个组件的学习情况】
| 字段名 | 数据类型 | 注释 |
|--------|----------|------|
| id | bigint | |
| user_id | bigint | 角色id关联用户角色表id |
| chapter_id | bigint | 课时id |
| chapter_unique_id | character varying | 课时唯一标识关联课时记录表chapter_unique_id |
| component_id | bigint | 组件id |
| sub_component_type | smallint | 子组件类型 |
| component_type | smallint | 组件类型 |
| comp_type | character varying | 组件类型标识 |
| play_status | smallint | 组件完成状态 |
| pass_time | integer | 通过时间 |
| interval_time | integer | 组件学习耗时(单位:毫秒) |
| read_word_count | integer | 读单词数量 |
| speak_count | integer | 开口次数 |
| listen_sentence_count | integer | 听句子数量 |
| write_word_count | integer | 写单词数量 |
| created_at | timestamp with time zone | 创建时间 |
| updated_at | timestamp with time zone | 更新时间 |
| deleted_at | timestamp with time zone | 删除时间(为空表示未删除) |
| section_id | bigint | 关联章节id |
| component_unique_code | character varying | 组件唯一编码 |
| play_result | character varying | 组件完成结果Perfect/Good/Oops等 |
| finish_question_count | integer | 完成题目数量 |
| skill_points | integer | 获得技能点数 |
| c_type | character varying | 组件类型 |
| c_id | character varying | 组件配置id |
| user_behavior_info | text | 用户行为信息 |
| behavior_analysis_info | text | 行为分析信息 |
| session_id | character varying | 会话id |
| listen_time | integer | 听力时长 |
| dialog_list | text | 对话列表 |
| level | character varying | 课程等级 |
### bi_user_component_play_record_5
【表用途注释用户组件完成记录表分表5存储用户每个课时下每个组件的学习情况】
| 字段名 | 数据类型 | 注释 |
|--------|----------|------|
| id | bigint | |
| user_id | bigint | 角色id关联用户角色表id |
| chapter_id | bigint | 课时id |
| chapter_unique_id | character varying | 课时唯一标识关联课时记录表chapter_unique_id |
| component_id | bigint | 组件id |
| sub_component_type | smallint | 子组件类型 |
| component_type | smallint | 组件类型 |
| comp_type | character varying | 组件类型标识 |
| play_status | smallint | 组件完成状态 |
| pass_time | integer | 通过时间 |
| interval_time | integer | 组件学习耗时(单位:毫秒) |
| read_word_count | integer | 读单词数量 |
| speak_count | integer | 开口次数 |
| listen_sentence_count | integer | 听句子数量 |
| write_word_count | integer | 写单词数量 |
| created_at | timestamp with time zone | 创建时间 |
| updated_at | timestamp with time zone | 更新时间 |
| deleted_at | timestamp with time zone | 删除时间(为空表示未删除) |
| section_id | bigint | 关联章节id |
| component_unique_code | character varying | 组件唯一编码 |
| play_result | character varying | 组件完成结果Perfect/Good/Oops等 |
| finish_question_count | integer | 完成题目数量 |
| skill_points | integer | 获得技能点数 |
| c_type | character varying | 组件类型 |
| c_id | character varying | 组件配置id |
| user_behavior_info | text | 用户行为信息 |
| behavior_analysis_info | text | 行为分析信息 |
| session_id | character varying | 会话id |
| listen_time | integer | 听力时长 |
| dialog_list | text | 对话列表 |
| level | character varying | 课程等级 |
### bi_user_component_play_record_6
【表用途注释用户组件完成记录表分表6存储用户每个课时下每个组件的学习情况】
| 字段名 | 数据类型 | 注释 |
|--------|----------|------|
| id | bigint | |
| user_id | bigint | 角色id关联用户角色表id |
| chapter_id | bigint | 课时id |
| chapter_unique_id | character varying | 课时唯一标识关联课时记录表chapter_unique_id |
| component_id | bigint | 组件id |
| sub_component_type | smallint | 子组件类型 |
| component_type | smallint | 组件类型 |
| comp_type | character varying | 组件类型标识 |
| play_status | smallint | 组件完成状态 |
| pass_time | integer | 通过时间 |
| interval_time | integer | 组件学习耗时(单位:毫秒) |
| read_word_count | integer | 读单词数量 |
| speak_count | integer | 开口次数 |
| listen_sentence_count | integer | 听句子数量 |
| write_word_count | integer | 写单词数量 |
| created_at | timestamp with time zone | 创建时间 |
| updated_at | timestamp with time zone | 更新时间 |
| deleted_at | timestamp with time zone | 删除时间(为空表示未删除) |
| section_id | bigint | 关联章节id |
| component_unique_code | character varying | 组件唯一编码 |
| play_result | character varying | 组件完成结果Perfect/Good/Oops等 |
| finish_question_count | integer | 完成题目数量 |
| skill_points | integer | 获得技能点数 |
| c_type | character varying | 组件类型 |
| c_id | character varying | 组件配置id |
| user_behavior_info | text | 用户行为信息 |
| behavior_analysis_info | text | 行为分析信息 |
| session_id | character varying | 会话id |
| listen_time | integer | 听力时长 |
| dialog_list | text | 对话列表 |
| level | character varying | 课程等级 |
### bi_user_component_play_record_7
【表用途注释用户组件完成记录表分表7存储用户每个课时下每个组件的学习情况】
| 字段名 | 数据类型 | 注释 |
|--------|----------|------|
| id | bigint | |
| user_id | bigint | 角色id关联用户角色表id |
| chapter_id | bigint | 课时id |
| chapter_unique_id | character varying | 课时唯一标识关联课时记录表chapter_unique_id |
| component_id | bigint | 组件id |
| sub_component_type | smallint | 子组件类型 |
| component_type | smallint | 组件类型 |
| comp_type | character varying | 组件类型标识 |
| play_status | smallint | 组件完成状态 |
| pass_time | integer | 通过时间 |
| interval_time | integer | 组件学习耗时(单位:毫秒) |
| read_word_count | integer | 读单词数量 |
| speak_count | integer | 开口次数 |
| listen_sentence_count | integer | 听句子数量 |
| write_word_count | integer | 写单词数量 |
| created_at | timestamp with time zone | 创建时间 |
| updated_at | timestamp with time zone | 更新时间 |
| deleted_at | timestamp with time zone | 删除时间(为空表示未删除) |
| section_id | bigint | 关联章节id |
| component_unique_code | character varying | 组件唯一编码 |
| play_result | character varying | 组件完成结果Perfect/Good/Oops等 |
| finish_question_count | integer | 完成题目数量 |
| skill_points | integer | 获得技能点数 |
| c_type | character varying | 组件类型 |
| c_id | character varying | 组件配置id |
| user_behavior_info | text | 用户行为信息 |
| behavior_analysis_info | text | 行为分析信息 |
| session_id | character varying | 会话id |
| listen_time | integer | 听力时长 |
| dialog_list | text | 对话列表 |
| level | character varying | 课程等级 |
### bi_user_course_detail
【表用途注释:用户课程明细表,存储用户购买的课程信息】
| 字段名 | 数据类型 | 注释 |
|--------|----------|------|
| id | integer | |
| created_at | timestamp with time zone | 创建时间 |
| updated_at | timestamp with time zone | 更新时间 |
| account_id | integer | 关联用户账户表id |
| user_id | integer | 关联用户角色表id |
| course_level | text | 课程等级A1=L1A2=L2 |
| active_time | timestamp with time zone | 课程激活时间 |
| expire_time | timestamp with time zone | 课程过期时间:不为空=正式课,为空=体验课 |
| latest_unit_index | integer | 当前学习到的单元序号 |
| latest_lesson_index | integer | 当前学习到的课时序号 |
| learn_duration | integer | 累计学习时长(单位:秒) |
| last_learn_time | timestamp with time zone | 最近一次学习时间 |
| deleted_at | timestamp with time zone | 删除时间:为空=未删除,有值=已删除 |
### bi_user_task_log
【表用途注释:用户任务完成日志表】
| 字段名 | 数据类型 | 注释 |
|--------|----------|------|
| id | integer | |
| user_id | integer | 关联用户角色表id |
| task_type | integer | 任务类型 |
| task_id | integer | 任务id |
| status | integer | 任务完成状态 |
| start_time | bigint | 任务开始时间(时间戳) |
| updated_at | timestamp with time zone | 更新时间 |
| created_at | timestamp with time zone | 创建时间 |
### bi_user_unit_challenge_question_result
【表用途注释:用户单元挑战题结果表】
| 字段名 | 数据类型 | 注释 |
|--------|----------|------|
| id | bigint | |
| user_id | bigint | 关联用户角色表id |
| story_id | bigint | 关联故事id |
| score | integer | 挑战得分 |
| score_text | character varying | 得分等级Perfect/Good/Oops等 |
| question_list | text | 题目列表 |
| sp_value | integer | 获得SP点数 |
| created_at | timestamp with time zone | 创建时间 |
| updated_at | timestamp with time zone | 更新时间 |
| deleted_at | timestamp with time zone | 删除时间 |
| category | character varying | 挑战类型 |
| exp | integer | 获得经验值 |
| play_time | bigint | 挑战耗时 |
| read_word_count | bigint | 读单词数量 |
| listen_time | bigint | 听力时长 |
| speak_count | bigint | 开口次数 |
| level | character varying | 课程等级 |
### bi_user_unit_review_question_result
【表用途注释:用户单元复习题结果表】
| 字段名 | 数据类型 | 注释 |
|--------|----------|------|
| id | bigint | |
| user_id | bigint | 关联用户角色表id |
| story_id | bigint | 关联故事id |
| chapter_id | bigint | 关联课时id |
| unique_id | character varying | 唯一标识 |
| score | integer | 复习题得分 |
| score_text | character varying | 得分等级 |
| question_list | text | 题目列表 |
| sp_value | integer | 获得SP点数 |
| exp | integer | 获得经验值 |
| created_at | timestamp with time zone | 创建时间 |
| updated_at | timestamp with time zone | 更新时间 |
| deleted_at | timestamp with time zone | 删除时间 |
| play_time | bigint | 答题耗时 |
| level | character varying | 课程等级 |
### bi_user_unit_summary_km_result
【表用途注释:用户单元知识点总结结果表】
| 字段名 | 数据类型 | 注释 |
|--------|----------|------|
| id | bigint | |
| user_id | bigint | 关联用户角色表id |
| story_id | bigint | 关联故事id |
| km_id | character varying | 知识点id |
| km_type | character varying | 知识点类型 |
| score_text | character varying | 知识点掌握等级 |
| sp_value | integer | 获得SP点数 |
| created_at | timestamp with time zone | 创建时间 |
| updated_at | timestamp with time zone | 更新时间 |
| deleted_at | timestamp with time zone | 删除时间 |
| play_time | bigint | 学习耗时 |
| sum_explain | text | 知识点总结说明 |
| level | character varying | 课程等级 |
### bi_vala_app_account
【表用途注释:用户账户表,存储用户的账号基本信息】
| 字段名 | 数据类型 | 注释 |
|--------|----------|------|
| id | bigint | 账户id主键 |
| tel | character varying | 手机号(明文) |
| tel_encrypt | character varying | 手机号加密存储 |
| name | character varying | 用户姓名 |
| id_card | character varying | 身份证号 |
| status | smallint | 账户状态 |
| pay_status | integer | 付费状态 |
| login_times | integer | 累计登录次数 |
| remark | text | 备注 |
| key_from | character varying | 注册来源渠道 |
| created_at | timestamp with time zone | 创建时间 |
| updated_at | timestamp with time zone | 更新时间 |
| deleted_at | timestamp with time zone | 删除时间(为空表示未删除) |
| download_channel | text | 用户下载渠道(汉字格式,用于统计新增用户来源) |
### bi_vala_app_character
【表用途注释:用户角色表,一个账户可以创建多个角色】
| 字段名 | 数据类型 | 注释 |
|--------|----------|------|
| id | bigint | 角色id主键 |
| account_id | bigint | 关联账户表id |
| nickname | character varying | 角色昵称 |
| gender | smallint | 角色性别1=男2=女 |
| birthday | character varying | 角色生日 |
| avatar | text | 角色头像 |
| spine_name | character varying | 角色 spine 资源名 |
| latest_login | timestamp with time zone | 最近登录

View File

@ -1,152 +0,0 @@
import pandas as pd
from datetime import datetime
# 1. 整体统计数据
overall_data = [
{"渠道": "学而思", "新增注册总人数": 615, "购课总人数":7, "购课总金额(元)":7794},
{"渠道": "科大讯飞", "新增注册总人数": 377, "购课总人数":4, "购课总金额(元)":3796},
{"渠道": "希沃", "新增注册总人数": 122, "购课总人数":1, "购课总金额(元)":599},
{"渠道": "京东方", "新增注册总人数": 61, "购课总人数":1, "购课总金额(元)":599},
{"渠道": "合计", "新增注册总人数": 1175, "购课总人数":13, "购课总金额(元)":12788},
]
df_overall = pd.DataFrame(overall_data)
# 2. 每日购课明细数据
purchase_data = [
{"日期": "2026-03-02", "渠道": "学而思", "购课人数":1, "购课金额(元)":599, "订单号": "zfb202603022031481772454708683943"},
{"日期": "2026-03-07", "渠道": "学而思", "购课人数":1, "购课金额(元)":599, "订单号": "wx202603071022051772850125753228"},
{"日期": "2026-03-07", "渠道": "科大讯飞", "购课人数":1, "购课金额(元)":599, "订单号": "wx202603072123501772889830225976"},
{"日期": "2026-03-10", "渠道": "学而思", "购课人数":1, "购课金额(元)":1999, "订单号": "wx202603101820431773138043948181"},
{"日期": "2026-03-15", "渠道": "科大讯飞", "购课人数":2, "购课金额(元)":2598, "订单号": "wx202603150854031773536043478685、wx20260315122747177354886748896"},
{"日期": "2026-03-18", "渠道": "学而思", "购课人数":2, "购课金额(元)":2598, "订单号": "wx202603182055481773838548372991、zfb202603182118201773839900411837"},
{"日期": "2026-03-23", "渠道": "科大讯飞", "购课人数":1, "购课金额(元)":599, "订单号": "wx202603232015081774268108032833"},
{"日期": "2026-03-24", "渠道": "京东方", "购课人数":1, "购课金额(元)":599, "订单号": "zfb202603242026431774355203538499"},
{"日期": "2026-03-27", "渠道": "学而思", "购课人数":1, "购课金额(元)":1999, "订单号": "wx202603271258341774587514141956"},
{"日期": "2026-03-28", "渠道": "希沃", "购课人数":1, "购课金额(元)":599, "订单号": "wx20260328145038177468063894734"},
]
df_purchase = pd.DataFrame(purchase_data)
# 3. 每日新增注册数据
register_data = [
{"日期": "2026-03-01", "渠道": "京东方", "新增注册人数": 1},
{"日期": "2026-03-01", "渠道": "学而思", "新增注册人数": 48},
{"日期": "2026-03-01", "渠道": "希沃", "新增注册人数": 2},
{"日期": "2026-03-02", "渠道": "京东方", "新增注册人数": 3},
{"日期": "2026-03-02", "渠道": "学而思", "新增注册人数": 38},
{"日期": "2026-03-02", "渠道": "希沃", "新增注册人数": 1},
{"日期": "2026-03-03", "渠道": "学而思", "新增注册人数": 24},
{"日期": "2026-03-03", "渠道": "希沃", "新增注册人数": 4},
{"日期": "2026-03-04", "渠道": "京东方", "新增注册人数": 4},
{"日期": "2026-03-04", "渠道": "学而思", "新增注册人数": 20},
{"日期": "2026-03-04", "渠道": "希沃", "新增注册人数": 10},
{"日期": "2026-03-04", "渠道": "科大讯飞", "新增注册人数": 3},
{"日期": "2026-03-05", "渠道": "京东方", "新增注册人数": 7},
{"日期": "2026-03-05", "渠道": "学而思", "新增注册人数": 37},
{"日期": "2026-03-05", "渠道": "希沃", "新增注册人数": 15},
{"日期": "2026-03-05", "渠道": "科大讯飞", "新增注册人数": 17},
{"日期": "2026-03-06", "渠道": "京东方", "新增注册人数": 6},
{"日期": "2026-03-06", "渠道": "学而思", "新增注册人数": 26},
{"日期": "2026-03-06", "渠道": "希沃", "新增注册人数": 9},
{"日期": "2026-03-06", "渠道": "科大讯飞", "新增注册人数": 12},
{"日期": "2026-03-07", "渠道": "京东方", "新增注册人数": 5},
{"日期": "2026-03-07", "渠道": "学而思", "新增注册人数": 35},
{"日期": "2026-03-07", "渠道": "希沃", "新增注册人数": 5},
{"日期": "2026-03-07", "渠道": "科大讯飞", "新增注册人数": 34},
{"日期": "2026-03-08", "渠道": "京东方", "新增注册人数": 3},
{"日期": "2026-03-08", "渠道": "学而思", "新增注册人数": 33},
{"日期": "2026-03-08", "渠道": "希沃", "新增注册人数": 12},
{"日期": "2026-03-08", "渠道": "科大讯飞", "新增注册人数": 34},
{"日期": "2026-03-09", "渠道": "京东方", "新增注册人数": 2},
{"日期": "2026-03-09", "渠道": "学而思", "新增注册人数": 27},
{"日期": "2026-03-09", "渠道": "希沃", "新增注册人数": 5},
{"日期": "2026-03-09", "渠道": "科大讯飞", "新增注册人数": 15},
{"日期": "2026-03-10", "渠道": "学而思", "新增注册人数": 15},
{"日期": "2026-03-10", "渠道": "希沃", "新增注册人数": 3},
{"日期": "2026-03-10", "渠道": "科大讯飞", "新增注册人数": 9},
{"日期": "2026-03-11", "渠道": "京东方", "新增注册人数": 2},
{"日期": "2026-03-11", "渠道": "学而思", "新增注册人数": 25},
{"日期": "2026-03-11", "渠道": "希沃", "新增注册人数": 2},
{"日期": "2026-03-11", "渠道": "科大讯飞", "新增注册人数": 13},
{"日期": "2026-03-12", "渠道": "京东方", "新增注册人数": 5},
{"日期": "2026-03-12", "渠道": "学而思", "新增注册人数": 24},
{"日期": "2026-03-12", "渠道": "希沃", "新增注册人数": 5},
{"日期": "2026-03-12", "渠道": "科大讯飞", "新增注册人数": 15},
{"日期": "2026-03-13", "渠道": "京东方", "新增注册人数": 2},
{"日期": "2026-03-13", "渠道": "学而思", "新增注册人数": 31},
{"日期": "2026-03-13", "渠道": "希沃", "新增注册人数": 7},
{"日期": "2026-03-13", "渠道": "科大讯飞", "新增注册人数": 8},
{"日期": "2026-03-14", "渠道": "学而思", "新增注册人数": 30},
{"日期": "2026-03-14", "渠道": "希沃", "新增注册人数": 3},
{"日期": "2026-03-14", "渠道": "科大讯飞", "新增注册人数": 22},
{"日期": "2026-03-15", "渠道": "京东方", "新增注册人数": 1},
{"日期": "2026-03-15", "渠道": "学而思", "新增注册人数": 22},
{"日期": "2026-03-15", "渠道": "希沃", "新增注册人数": 3},
{"日期": "2026-03-15", "渠道": "科大讯飞", "新增注册人数": 22},
{"日期": "2026-03-16", "渠道": "京东方", "新增注册人数": 2},
{"日期": "2026-03-16", "渠道": "学而思", "新增注册人数": 6},
{"日期": "2026-03-16", "渠道": "希沃", "新增注册人数": 2},
{"日期": "2026-03-16", "渠道": "科大讯飞", "新增注册人数": 10},
{"日期": "2026-03-17", "渠道": "京东方", "新增注册人数": 3},
{"日期": "2026-03-17", "渠道": "学而思", "新增注册人数": 12},
{"日期": "2026-03-17", "渠道": "希沃", "新增注册人数": 3},
{"日期": "2026-03-17", "渠道": "科大讯飞", "新增注册人数": 6},
{"日期": "2026-03-18", "渠道": "京东方", "新增注册人数": 1},
{"日期": "2026-03-18", "渠道": "学而思", "新增注册人数": 9},
{"日期": "2026-03-18", "渠道": "科大讯飞", "新增注册人数": 11},
{"日期": "2026-03-19", "渠道": "京东方", "新增注册人数": 1},
{"日期": "2026-03-19", "渠道": "学而思", "新增注册人数": 6},
{"日期": "2026-03-19", "渠道": "希沃", "新增注册人数": 2},
{"日期": "2026-03-19", "渠道": "科大讯飞", "新增注册人数": 9},
{"日期": "2026-03-20", "渠道": "京东方", "新增注册人数": 1},
{"日期": "2026-03-20", "渠道": "学而思", "新增注册人数": 13},
{"日期": "2026-03-20", "渠道": "希沃", "新增注册人数": 2},
{"日期": "2026-03-20", "渠道": "科大讯飞", "新增注册人数": 12},
{"日期": "2026-03-21", "渠道": "京东方", "新增注册人数": 1},
{"日期": "2026-03-21", "渠道": "学而思", "新增注册人数": 27},
{"日期": "2026-03-21", "渠道": "科大讯飞", "新增注册人数": 26},
{"日期": "2026-03-22", "渠道": "学而思", "新增注册人数": 12},
{"日期": "2026-03-22", "渠道": "希沃", "新增注册人数": 4},
{"日期": "2026-03-22", "渠道": "科大讯飞", "新增注册人数": 22},
{"日期": "2026-03-23", "渠道": "京东方", "新增注册人数": 1},
{"日期": "2026-03-23", "渠道": "学而思", "新增注册人数": 9},
{"日期": "2026-03-23", "渠道": "希沃", "新增注册人数": 2},
{"日期": "2026-03-23", "渠道": "科大讯飞", "新增注册人数": 5},
{"日期": "2026-03-24", "渠道": "学而思", "新增注册人数": 4},
{"日期": "2026-03-24", "渠道": "希沃", "新增注册人数": 2},
{"日期": "2026-03-24", "渠道": "科大讯飞", "新增注册人数": 8},
{"日期": "2026-03-25", "渠道": "京东方", "新增注册人数": 1},
{"日期": "2026-03-25", "渠道": "学而思", "新增注册人数": 12},
{"日期": "2026-03-25", "渠道": "希沃", "新增注册人数": 5},
{"日期": "2026-03-25", "渠道": "科大讯飞", "新增注册人数": 13},
{"日期": "2026-03-26", "渠道": "京东方", "新增注册人数": 1},
{"日期": "2026-03-26", "渠道": "学而思", "新增注册人数": 8},
{"日期": "2026-03-26", "渠道": "希沃", "新增注册人数": 2},
{"日期": "2026-03-26", "渠道": "科大讯飞", "新增注册人数": 8},
{"日期": "2026-03-27", "渠道": "学而思", "新增注册人数": 9},
{"日期": "2026-03-27", "渠道": "希沃", "新增注册人数": 6},
{"日期": "2026-03-27", "渠道": "科大讯飞", "新增注册人数": 6},
{"日期": "2026-03-28", "渠道": "京东方", "新增注册人数": 2},
{"日期": "2026-03-28", "渠道": "学而思", "新增注册人数": 20},
{"日期": "2026-03-28", "渠道": "希沃", "新增注册人数": 4},
{"日期": "2026-03-28", "渠道": "科大讯飞", "新增注册人数": 12},
{"日期": "2026-03-29", "渠道": "京东方", "新增注册人数": 2},
{"日期": "2026-03-29", "渠道": "学而思", "新增注册人数": 16},
{"日期": "2026-03-29", "渠道": "科大讯飞", "新增注册人数": 9},
{"日期": "2026-03-30", "渠道": "京东方", "新增注册人数": 2},
{"日期": "2026-03-30", "渠道": "学而思", "新增注册人数": 7},
{"日期": "2026-03-30", "渠道": "希沃", "新增注册人数": 2},
{"日期": "2026-03-30", "渠道": "科大讯飞", "新增注册人数": 6},
{"日期": "2026-03-31", "渠道": "京东方", "新增注册人数": 3},
{"日期": "2026-03-31", "渠道": "学而思", "新增注册人数": 10},
{"日期": "2026-03-31", "渠道": "科大讯飞", "新增注册人数": 10},
]
df_register = pd.DataFrame(register_data)
# 生成Excel文件
output_path = "/root/.openclaw/workspace/2026年3月硬件渠道数据汇总.xlsx"
with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
df_overall.to_excel(writer, sheet_name='整体统计', index=False)
df_purchase.to_excel(writer, sheet_name='每日购课明细', index=False)
df_register.to_excel(writer, sheet_name='每日新增注册明细', index=False)
print(f"文件已生成:{output_path}")

@ -1 +0,0 @@
Subproject commit 75ab13e87dd0e856cb05c9515efcd507888b6486

View File

@ -1,30 +0,0 @@
# 业务知识库
作为数据分析师,持续积累对公司业务和数据表的理解。
## 目录结构
- `sql_queries/` - 常用 SQL 查询语句和业务分析模板
- `tables/` - 数据表结构和字段说明
- `business_terms/` - 业务术语和指标定义
## 资料来源
1. 飞书 Wiki - 增长组常用查询SQL: https://makee-interactive.feishu.cn/wiki/XJuCwNol1iL3sYkXkXWc2QnJnMd
2. Git 仓库 - 数据抽取脚本: https://git.valavala.com/vala/llm_offline_production/src/branch/master/config_user_data_extract_and_analyze
## 收集的 SQL 查询文档
- [ ] 全字段大表
- [ ] 平均通关时长
- [ ] 新增注册用户数by渠道
- [ ] 课程进入完成率
- [ ] 账号角色年龄地址
- [ ] 退费率
- [ ] 销转学习进度
- [ ] 班主任关注数据
- [ ] 端内GMV
- [ ] 端内用户课程进入完成率
- [ ] 端内购课用户学习行为
- [ ] 转化率
- [ ] 课程ID映射

View File

@ -1,49 +0,0 @@
# 业务术语表
## 核心业务指标
### 用户相关
- **注册用户**: 在 `bi_vala_app_account` 表中 `status = 1``deleted_at is NULL` 的用户
- **测试用户**: 需要排除的特定用户 ID`id not in (51,2121)`
- **下载渠道 (download_channel)**: 用户下载 App 的渠道
- **key_from**: 注册或购课的来源标识
### 购课相关
- **购课渠道 (sale_channel)**: 用户购买课程的渠道,有数字编码映射到具体渠道名称
- **有效订单**: `order_status = 3``pay_amount_int > 49800` 的订单金额大于498元
- **购课标签**: 分为"未购课"、"站外购课"、"站内购课"
- **站内购课**: 购课渠道不是"站外"的购课
### 角色相关
- **角色付费状态 (characer_pay_status)**: 0表示未付费1表示已付费
- **性别 (gender)**: 0=girl, 1=boy, 其他=unknow
- **赛季包 (purchase_season_package)**: `'[1]'` 表示未购买赛季包
### 课程相关
- **完课标识 (chapter_unique_id)**: 唯一标识一次完课记录
- **完课耗时 (finish_time)**: 完成课程所花费的时间,格式为 mm:ss
- **课程ID (course_id)**: 由 course_level-course_season-course_unit-course_lesson 组成
- **play_status = 1**: 表示播放完成状态
## 购课渠道映射表
| 编码 | 渠道名称 |
|------|----------|
| 11 | 苹果 |
| 12 | 华为 |
| 13 | 小米 |
| 14 | 荣耀 |
| 15 | 应用宝 |
| 17 | 魅族 |
| 18 | VIVO |
| 19 | OPPO |
| 21 | 学而思 |
| 22 | 讯飞 |
| 23 | 步步高 |
| 24 | 作业帮 |
| 25 | 小度 |
| 26 | 希沃 |
| 27 | 京东方 |
| 41 | 官网 |
| 71 | 小程序 |
| 其他 | 站外 |

View File

@ -1,168 +0,0 @@
# 数据表说明
## 核心业务表
### 用户账号表
**表名**: `bi_vala_app_account`
**关键字段**:
- `id`: 用户ID
- `key_from`: 注册来源
- `created_at`: 注册时间
- `download_channel`: 下载渠道
- `status`: 账号状态1表示有效
- `deleted_at`: 删除时间NULL表示未删除
**常用筛选条件**:
```sql
where status = 1
and id not in (51,2121) -- 排除测试用户
and deleted_at is NULL
```
---
### 账号详情表
**表名**: `account_detail_info`
**关键字段**:
- `account_id`: 账号ID关联 bi_vala_app_account.id
- `login_address`: 登录地址(格式如"省份-城市"
- `phone_login_times`: 手机登录次数
**业务逻辑**:
```sql
-- 提取城市
split_part(login_address,'-',2) as login_address
-- 判断是否手机登录
case when phone_login_times = 0 then 0 else 1 end as phone_login
```
---
### 订单表
**表名**: `bi_vala_order`
**关键字段**:
- `account_id`: 账号ID
- `sale_channel`: 购课渠道(数字编码)
- `key_from`: 购课来源
- `pay_success_date`: 支付成功时间
- `pay_amount`: 支付金额
- `pay_amount_int`: 支付金额(整数分)
- `order_status`: 订单状态3表示有效订单
**常用筛选条件**:
```sql
where order_status = 3
and pay_amount_int > 49800 -- 金额大于498元
```
---
### 角色表
**表名**: `bi_vala_app_character`
**关键字段**:
- `id`: 角色ID
- `account_id`: 账号ID
- `gender`: 性别0=girl, 1=boy
- `birthday`: 生日(格式如"YYYY-MM-DD"
- `purchase_season_package`: 赛季包购买状态
- `deleted_at`: 删除时间
**业务逻辑**:
```sql
-- 角色付费状态
case when purchase_season_package = '[1]' then 0 else 1 end as characer_pay_status
-- 性别映射
case when gender = 0 then 'girl'
when gender = 1 then 'boy'
else 'unknow'
end as gender
-- 提取出生年份
case when split_part(birthday,'-',1) = '' then '0000'
else split_part(birthday,'-',1)
end as birthday
```
---
## 课程播放记录表(分表)
### 用户章节播放记录
**表名**: `bi_user_chapter_play_record_0` ~ `bi_user_chapter_play_record_7`
**说明**: 按分表存储共8张表需要使用 UNION ALL 合并
**关键字段**:
- `user_id`: 用户ID
- `chapter_id`: 章节ID
- `chapter_unique_id`: 完课唯一标识
- `updated_at`: 更新时间
- `play_status`: 播放状态1表示完成
**常用筛选条件**:
```sql
where chapter_id in (55,56,57,58,59) -- 指定章节
and play_status = 1 -- 播放完成
```
---
### 用户组件播放记录
**表名**: `bi_user_component_play_record_0` ~ `bi_user_component_play_record_7`
**说明**: 按分表存储共8张表需要使用 UNION ALL 合并
**关键字段**:
- `chapter_unique_id`: 完课唯一标识
- `interval_time`: 播放时长(毫秒)
**业务逻辑**:
```sql
-- 计算完课耗时mm:ss格式
format('%s:%s',
floor(sum(interval_time)/1000/60),
mod((sum(interval_time)/1000),60)
) as finish_time
```
---
## 课程信息表
### 课程单元表
**表名**: `bi_level_unit_lesson`
**关键字段**:
- `id`: ID关联 chapter_id
- `course_level`: 课程级别
- `course_season`: 课程赛季
- `course_unit`: 课程单元
- `course_lesson`: 课程课时
**业务逻辑**:
```sql
-- 生成课程ID
format('%s-%s-%s-%s',
course_level,
course_season,
course_unit,
course_lesson
) as course_id
```
---
## 其他表
### 账号登录表
**表名**: `account_login`
**关键字段**:
- `account_id`: 账号ID
- `login_date`: 登录日期

View File

@ -1,53 +0,0 @@
# 飞书文档排版规则
## 飞书文档块类型
根据观察,飞书文档的块类型:
| block_type | 说明 |
|-----------|------|
| 1 | Page页面|
| 2 | Text文本块|
| 3 | Heading1一级标题|
| 4 | Heading2二级标题|
| 5 | Heading3三级标题|
| 6 | Bulleted List无序列表|
| 7 | Numbered List有序列表|
| 8 | To-do待办事项|
| 9 | Quote引用|
| 10 | Code代码块|
| 11 | Divider分隔线|
| 34 | Quote Container引用容器|
## 排版最佳实践
### 1. 标题层级
- 使用 Heading2/Heading3 来组织内容结构
- 避免太多层级,保持清晰
### 2. 列表使用
- 无序列表type 6用于列举项目
- 有序列表type 7用于步骤说明
### 3. 分隔线
- 使用 Dividertype 11来分隔大的内容区块
### 4. 引用
- 使用 Quotetype 9或 Quote Containertype 34来强调重要内容
### 5. 文本格式
- 善用加粗、斜体等文本样式
- 保持整体排版简洁美观
## 更新飞书文档的注意事项
⚠️ **重要:不要直接用 write 覆盖整个文档!**
**推荐做法:**
1. 先用 list_blocks 查看当前文档结构
2. 用 update_block 逐个更新需要修改的块
3. 或者如果必须重写,要确保保持原来的块结构和格式
**避免:**
- ❌ 直接用 write 方法覆盖整个文档(会丢失所有格式)
- ❌ 把所有内容都放在一个 Text 块里

View File

@ -1,83 +0,0 @@
#!/usr/bin/env python3
"""
批量读取飞书 Wiki 文档并保存到本地知识库
"""
import json
import os
from datetime import datetime
# Wiki 子页面列表
wiki_pages = [
{"node_token": "O7QvwdY8piO8aUkhxYecA1qZnBe", "title": "全字段大表", "obj_token": "VVyWd5491o6tuqxceCVci6dVnFd"},
{"node_token": "Y6Iywqf75iepbUkvJzLcfiUYnkg", "title": "平均通关时长", "obj_token": "EpP7d6h2SoaTyJx1lZRcXXdLnVe"},
{"node_token": "KQihwMjO9i1zjFkqTgBcq67Snzc", "title": "新增注册用户数by渠道", "obj_token": "AzRPddp97o7To8x8VkxcFGr8nBh"},
{"node_token": "Zt7RwfGLWiacslkO2glcheWjnwf", "title": "课程进入完成率", "obj_token": "PwIydfZcHo5eZgxi8XLcOtjOnSb"},
{"node_token": "LTaiw3OmUi2pcckDWuNcyBIVnAd", "title": "账号角色年龄地址", "obj_token": "CUa2du2sSoNFSRxl3vFc8ucInEm"},
{"node_token": "ZAPJwIODRiNYE5kTuNtcpSlvnIX", "title": "退费率", "obj_token": "DC1Qdhpitowt9lxxo1acEzOwnFc"},
{"node_token": "Cb3KwPWLriG7GgkN73pcM0Idnch", "title": "销转学习进度", "obj_token": "G1p9dhK63oLWMzxyGQ8csZGMnDh"},
{"node_token": "EBEiwQsw2iOtgekDldHcQxgwnOh", "title": "班主任关注数据", "obj_token": "NcVqdRKtrowglNxs9CocDekunje"},
{"node_token": "BZPkwARxiixUZRk4BW9cij50nDe", "title": "端内GMV", "obj_token": "FkVCd1AruoD9xWxxVpzc16hinVh"},
{"node_token": "AQpnwpsfOixYGtk4jf0c6t9XncG", "title": "端内用户课程进入完成率", "obj_token": "Ueu7dtgSHoNYfsxCDHmcY6E4nid"},
{"node_token": "PyqEwXXqsiQybPkpGbscUjUFnOg", "title": "端内购课用户学习行为", "obj_token": "ZTxod4IUWo5yMexf8AHcBbpFnMg"},
{"node_token": "OyXlwY2vyisvV1kc3HhcMyMVnTd", "title": "转化率", "obj_token": "ATJ0dfajQo5CSexQd8hc9i3pnWe"},
{"node_token": "MWpZwV01fitaKjkCRSxckMUunRb", "title": "课程ID映射", "obj_token": "GenUdsXCloUdYhxMvxqcWBMdnhb"}
]
def safe_filename(title):
"""生成安全的文件名"""
return "".join(c for c in title if c.isalnum() or c in (' ', '-', '_')).rstrip().replace(' ', '_')
def main():
print("="*60)
print("飞书 Wiki 文档批量获取")
print("="*60)
output_dir = "sql_queries"
os.makedirs(output_dir, exist_ok=True)
print(f"\n{len(wiki_pages)} 个文档需要获取")
print(f"输出目录: {output_dir}")
# 创建索引文件
index_content = "# SQL 查询文档索引\n\n"
index_content += f"创建时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
index_content += "## 文档列表\n\n"
for i, page in enumerate(wiki_pages, 1):
filename = safe_filename(page['title']) + ".md"
filepath = os.path.join(output_dir, filename)
print(f"\n[{i}/{len(wiki_pages)}] 处理: {page['title']}")
print(f" 文件: {filepath}")
# 创建占位文件
with open(filepath, 'w', encoding='utf-8') as f:
f.write(f"# {page['title']}\n\n")
f.write(f"**获取时间:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
f.write(f"**飞书文档 Token:** {page['obj_token']}\n\n")
f.write(f"**注意:** 此文档需要通过 feishu_doc 工具读取完整内容\n\n")
f.write("---\n\n")
f.write("## 使用说明\n\n")
f.write("使用以下命令读取完整文档内容:\n\n")
f.write("```bash\n")
f.write(f"feishu_doc read {page['obj_token']}\n")
f.write("```\n")
# 更新索引
index_content += f"- [{page['title']}]({filename})\n"
print(f" ✅ 已创建占位文件")
# 写入索引文件
with open(os.path.join(output_dir, "README.md"), 'w', encoding='utf-8') as f:
f.write(index_content)
print("\n" + "="*60)
print("✅ 初始化完成")
print("="*60)
print("\n下一步: 使用 feishu_doc 工具逐个读取文档内容")
print("或者让我继续为你读取这些文档的完整内容")
if __name__ == "__main__":
main()

View File

@ -1,70 +0,0 @@
# 项目说明
## 项目概述
用户数据提取和分析工具集用于从各种数据源ES、数据库等导出和分析用户数据。
## 脚本列表
### export_realtime_asr.py
**功能**: 导出流式语音 ASR 数据
**版本**: v1.0
**数据源**:
- Elasticsearch 索引: `llm_realtime_asr_log`
**配置说明**:
- 在脚本开头配置开始和结束日期8位数字格式如 20260101
- ES 连接信息通过环境变量配置(需要创建 .env 文件)
**依赖包**:
```
elasticsearch
pandas
openpyxl
python-dotenv
```
**运行方式**:
```bash
python export_realtime_asr.py
```
**输出**:
- 输出目录: `output/`
- 文件命名: `realtime_asr_export_{开始日期}_{结束日期}.xlsx`
- Excel 列: voice_id, asr_prompt, result_str, timestamp, audio_url, source
**数据处理逻辑**:
- 从 ES 使用 scroll API 分批读取数据每批1000条
- 按 voice_id 聚合仅保留恰好有2条记录的 voice_id
- 取两条记录中最新的 timestamp
- 自动拼接 audio_url
**特点**:
- 支持大数据量处理(几十万级别)
- 实时进度显示
- 自动过滤异常数据非2条记录的 voice_id
---
### 其他脚本
- `export_user_id_data.py`: 用户ID数据导出
- `batch_add_shengtong_result.py`: 批量添加声通评测结果
- `shengtong_eval.py`: 声通评测
- `calc_score_diff_stats.py`: 分数差异统计
- `export_unit_summary.py`: 单元总结统计导出
## 环境配置
需要创建 `.env` 文件,包含以下配置:
```
ES_HOST=xxx
ES_PORT=9200
ES_SCHEME=https
ES_USER=elastic
ES_PASSWORD=xxx
```
## 最近更新
- 2026-01-27: 新增 export_realtime_asr.py 脚本,支持流式语音 ASR 数据导出

View File

@ -1,853 +0,0 @@
"""
声通语音评测批量处理工具
功能说明:
- 读取 Excel 文件其中包含音频链接userAudio 字段和参考文本refText 字段
- 调用声通 API 对音频进行评测获取总分明细和recordId
- 在原 Excel 中添加"测试总分""测试明细""测试recordId"三个字段
- 输出文件命名为: {原文件名}_add_shengtong_result.xlsx
- 支持串行和并发两种处理模式
环境变量配置:
- ST_APP_KEY: 声通应用 Key
- ST_SECRET_KEY: 声通 Secret Key
声通API文档: http://api.stkouyu.com
"""
import pandas as pd
import os
import requests
import tempfile
from pathlib import Path
import json
import time
import hashlib
import uuid
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading
from queue import Queue
import logging
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('shengtong_batch_processing.log'),
logging.StreamHandler()
]
)
# 从 .env 文件加载环境变量
from dotenv import load_dotenv
load_dotenv()
# ==================== 全局配置 ====================
# DEBUG 模式开关(控制详细日志输出)
DEBUG_MODE = False
def debug_print(message):
"""
DEBUG 信息输出函数
Args:
message (str): 要输出的调试信息
"""
if DEBUG_MODE:
print(f"[DEBUG] {message}")
# ==================== 声通 API 相关代码 ====================
class ShengtongEvaluator:
"""声通口语评测 API 封装类"""
def __init__(self):
"""从环境变量读取 API 配置"""
self.app_key = os.environ.get('ST_APP_KEY', '')
self.secret_key = os.environ.get('ST_SECRET_KEY', '')
self.api_url = "http://api.stkouyu.com:8080/sent.eval"
# 检查环境变量是否配置
if not all([self.app_key, self.secret_key]):
raise ValueError(
"请配置声通 API 环境变量: ST_APP_KEY, ST_SECRET_KEY"
)
def _generate_signature(self, data: str) -> str:
"""生成SHA1签名"""
return hashlib.sha1(data.encode('utf-8')).hexdigest()
def _build_request_params(self, ref_text: str, audio_ext: str) -> dict:
"""构建请求参数"""
timestamp = str(int(time.time()))
user_id = str(uuid.uuid4())
# 生成签名
connect_data = self.app_key + timestamp + self.secret_key
start_data = self.app_key + timestamp + user_id + self.secret_key
connect_sig = self._generate_signature(connect_data)
start_sig = self._generate_signature(start_data)
# 构建请求参数
params = {
"connect": {
"cmd": "connect",
"param": {
"sdk": {
"version": 16777472,
"source": 9,
"protocol": 2
},
"app": {
"applicationId": self.app_key,
"sig": connect_sig,
"timestamp": timestamp
}
}
},
"start": {
"cmd": "start",
"param": {
"app": {
"applicationId": self.app_key,
"sig": start_sig,
"timestamp": timestamp,
"userId": user_id
},
"audio": {
"audioType": audio_ext,
"channel": 1,
"sampleBytes": 2,
"sampleRate": 16000
},
"request": {
"coreType": "sent.eval",
"refText": ref_text,
"tokenId": "makee",
}
}
}
}
return params
def evaluate(self, audio_file_path: str, ref_text: str) -> dict:
"""
调用声通API进行口语评测
Args:
audio_file_path (str): 音频文件路径
ref_text (str): 参考文本
Returns:
dict: 评测结果
"""
debug_print(f"开始评测音频文件: {audio_file_path}")
debug_print(f"评测文本: {ref_text}")
# 检查音频文件是否存在
if not os.path.exists(audio_file_path):
error_msg = f"音频文件不存在: {audio_file_path}"
logging.error(error_msg)
return {"error": error_msg}
# 获取音频文件扩展名
audio_ext = os.path.splitext(audio_file_path)[1][1:] # 去掉点号
if not audio_ext:
audio_ext = "wav" # 默认为wav
# 构建请求参数
params = self._build_request_params(ref_text, audio_ext)
# 读取音频文件
try:
with open(audio_file_path, 'rb') as f:
audio_data = f.read()
# 构建multipart/form-data请求
files = {
'text': (None, json.dumps(params)),
'audio': (f"{int(time.time() * 1000000)}.{audio_ext}", audio_data)
}
headers = {
'Request-Index': '0'
}
debug_print("开始发送请求到声通API...")
response = requests.post(
self.api_url,
files=files,
headers=headers,
timeout=30
)
if response.status_code == 200:
result = response.json()
debug_print("声通API返回成功")
return result
else:
error_msg = f"请求失败,状态码: {response.status_code}"
logging.error(f"{error_msg}, 响应: {response.text}")
return {
"error": error_msg,
"response": response.text
}
except requests.exceptions.RequestException as e:
error_msg = f"请求异常: {str(e)}"
logging.error(error_msg)
return {"error": error_msg}
except Exception as e:
error_msg = f"评测过程出错: {str(e)}"
logging.error(error_msg)
return {"error": error_msg}
def evaluate_audio_file(audio_file_path, text="nice to meet you."):
"""
简化的音频评测函数
Args:
audio_file_path (str): 音频文件路径
text (str): 评测文本内容
Returns:
dict: 评测结果JSON
"""
api = ShengtongEvaluator()
return api.evaluate(audio_file_path, text)
# ==================== 批量处理相关代码 ====================
def download_audio_file(audio_url, temp_dir, max_retries=3, timeout=30):
"""
下载音频文件到临时目录增强版本
Args:
audio_url (str): 音频文件URL
temp_dir (str): 临时目录路径
max_retries (int): 最大重试次数
timeout (int): 请求超时时间
Returns:
str: 下载的音频文件路径失败返回None
"""
if not audio_url or pd.isna(audio_url):
logging.warning("音频URL为空或无效")
return None
# 从URL中提取文件名
try:
file_name = os.path.basename(audio_url.split('?')[0]) # 去除URL参数
if not file_name or '.' not in file_name:
file_name = f"audio_{hash(audio_url) % 100000}.wav" # 生成默认文件名
file_path = os.path.join(temp_dir, file_name)
# 重试机制
for attempt in range(max_retries):
try:
logging.info(f"正在下载音频文件 (尝试 {attempt + 1}/{max_retries}): {audio_url}")
# 设置请求头,模拟浏览器
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get(audio_url, timeout=timeout, headers=headers, stream=True)
response.raise_for_status()
# 检查内容类型
content_type = response.headers.get('content-type', '')
if not any(audio_type in content_type.lower() for audio_type in ['audio', 'wav', 'mp3', 'ogg', 'flac']):
logging.warning(f"可能不是音频文件Content-Type: {content_type}")
# 写入文件
with open(file_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
# 验证文件大小
file_size = os.path.getsize(file_path)
if file_size == 0:
raise ValueError("下载的文件为空")
logging.info(f"音频文件下载成功: {file_path} (大小: {file_size} bytes)")
return file_path
except requests.exceptions.Timeout:
logging.warning(f"下载超时 (尝试 {attempt + 1}/{max_retries}): {audio_url}")
if attempt < max_retries - 1:
time.sleep(2 ** attempt) # 指数退避
continue
except requests.exceptions.RequestException as e:
logging.warning(f"下载请求异常 (尝试 {attempt + 1}/{max_retries}): {str(e)}")
if attempt < max_retries - 1:
time.sleep(2 ** attempt)
continue
except Exception as e:
logging.error(f"下载过程中发生未知错误 (尝试 {attempt + 1}/{max_retries}): {str(e)}")
if attempt < max_retries - 1:
time.sleep(2 ** attempt)
continue
logging.error(f"音频文件下载失败,已达到最大重试次数: {audio_url}")
return None
except Exception as e:
logging.error(f"下载音频文件时发生异常: {str(e)}")
return None
def format_shengtong_details(shengtong_result):
"""
格式化声通评测结果为明细字符串
Args:
shengtong_result (dict): 声通API返回的结果
Returns:
str: 格式化的明细字符串
"""
if not shengtong_result or 'error' in shengtong_result:
return ""
try:
# 从result字段中获取words数组
result = shengtong_result.get('result', {})
words = result.get('words', [])
if not words:
return ""
details = []
for word in words:
# 获取单词内容和得分
word_text = word.get('word', '')
scores = word.get('scores', {})
overall_score = scores.get('overall', 0)
# 格式化为 "单词 分数"
details.append(f"{word_text} {int(overall_score)}")
return "\n".join(details)
except Exception as e:
logging.error(f"格式化声通明细失败: {str(e)}")
return ""
def get_shengtong_total_score(shengtong_result):
"""
获取声通评测总分
Args:
shengtong_result (dict): 声通API返回的结果
Returns:
int: 总分失败返回0
"""
if not shengtong_result or 'error' in shengtong_result:
return 0
try:
result = shengtong_result.get('result', {})
overall_score = result.get('overall', 0)
return int(overall_score)
except Exception as e:
logging.error(f"获取声通总分失败: {str(e)}")
return 0
def get_shengtong_record_id(shengtong_result):
"""
获取声通评测recordId
Args:
shengtong_result (dict): 声通API返回的结果
Returns:
str: recordId失败返回空字符串
"""
if not shengtong_result or 'error' in shengtong_result:
return ""
try:
record_id = shengtong_result.get('recordId', '')
return str(record_id) if record_id else ""
except Exception as e:
logging.error(f"获取声通recordId失败: {str(e)}")
return ""
def process_single_row(row_data, temp_dir, results_dict, lock, rate_limiter=None):
"""
处理单行数据并发版本增强错误处理和时间分析
Args:
row_data (tuple): (index, row) 数据
temp_dir (str): 临时目录路径
results_dict (dict): 结果字典
lock (threading.Lock): 线程锁
rate_limiter (Queue): 速率限制器
Returns:
None
"""
index, row = row_data
start_time = time.time()
timing_info = {}
try:
# 1. 速率限制等待时间
rate_limit_start = time.time()
if rate_limiter:
rate_limiter.get() # 获取令牌
timing_info['rate_limit_wait'] = time.time() - rate_limit_start
logging.info(f"开始处理第 {index + 1} 行数据")
# 2. 数据预处理时间
preprocess_start = time.time()
ref_text = str(row['refText']) if pd.notna(row['refText']) else ""
audio_url = str(row['userAudio']) if pd.notna(row['userAudio']) else ""
# 数据验证
if not ref_text:
raise ValueError("refText 为空或无效")
if not audio_url:
raise ValueError("userAudio 为空或无效")
timing_info['preprocess'] = time.time() - preprocess_start
# 3. 音频下载时间
download_start = time.time()
audio_file_path = download_audio_file(audio_url, temp_dir)
timing_info['audio_download'] = time.time() - download_start
if not audio_file_path:
raise ValueError("音频文件下载失败")
try:
# 4. 声通API调用时间
api_start = time.time()
logging.info(f"正在调用声通API评测: {ref_text}")
shengtong_result = evaluate_audio_file(audio_file_path, ref_text)
timing_info['api_call'] = time.time() - api_start
if not shengtong_result:
raise ValueError("声通API返回空结果")
# 5. 结果处理时间
result_process_start = time.time()
shengtong_details = format_shengtong_details(shengtong_result)
shengtong_total_score = get_shengtong_total_score(shengtong_result)
shengtong_record_id = get_shengtong_record_id(shengtong_result)
timing_info['result_process'] = time.time() - result_process_start
# 6. 数据更新时间
update_start = time.time()
with lock:
results_dict[index] = {
'测试总分': shengtong_total_score,
'测试明细': shengtong_details,
'测试recordId': shengtong_record_id
}
timing_info['data_update'] = time.time() - update_start
# 计算总耗时
total_time = time.time() - start_time
timing_info['total'] = total_time
# 详细的时间分析日志
logging.info(f"{index + 1} 行处理成功 - 总分: {shengtong_total_score} | "
f"总耗时: {total_time:.2f}s | "
f"速率等待: {timing_info['rate_limit_wait']:.2f}s | "
f"预处理: {timing_info['preprocess']:.3f}s | "
f"音频下载: {timing_info['audio_download']:.2f}s | "
f"API调用: {timing_info['api_call']:.2f}s | "
f"结果处理: {timing_info['result_process']:.3f}s | "
f"数据更新: {timing_info['data_update']:.3f}s")
except Exception as api_error:
total_time = time.time() - start_time
logging.error(f"{index + 1} 行声通API调用失败: {str(api_error)} | "
f"总耗时: {total_time:.2f}s | "
f"音频下载: {timing_info.get('audio_download', 0):.2f}s | "
f"API调用: {timing_info.get('api_call', 0):.2f}s")
with lock:
results_dict[index] = {
'测试总分': 0,
'测试明细': "",
'测试recordId': "",
'error': f'API调用失败: {str(api_error)}'
}
finally:
# 7. 清理时间
cleanup_start = time.time()
try:
if audio_file_path and os.path.exists(audio_file_path):
os.remove(audio_file_path)
logging.debug(f"已删除临时文件: {audio_file_path}")
except Exception as cleanup_error:
logging.warning(f"清理临时文件失败: {str(cleanup_error)}")
timing_info['cleanup'] = time.time() - cleanup_start
# 释放速率限制令牌
if rate_limiter:
try:
rate_limiter.put(None, timeout=1) # 归还令牌
except:
pass # 队列可能已满,忽略
except Exception as e:
total_time = time.time() - start_time
logging.error(f"{index + 1} 行处理异常: {str(e)} | 总耗时: {total_time:.2f}s")
with lock:
results_dict[index] = {
'测试总分': 0,
'测试明细': "",
'测试recordId': "",
'error': f'处理异常: {str(e)}'
}
# 释放速率限制令牌
if rate_limiter:
try:
rate_limiter.put(None, timeout=1)
except:
pass
def process_excel_with_shengtong_concurrent(input_file_path, output_dir="output/audio", max_workers=3, rate_limit_per_second=3):
"""
处理Excel文件添加声通评测结果并发版本增强控制
Args:
input_file_path (str): 输入Excel文件路径
output_dir (str): 输出目录路径默认为 output/audio
max_workers (int): 最大并发线程数默认3
rate_limit_per_second (int): 每秒最大请求数默认3
Returns:
bool: 处理是否成功
"""
start_time = time.time()
try:
# 读取Excel文件
logging.info(f"正在读取Excel文件: {input_file_path}")
df = pd.read_excel(input_file_path)
# 检查必要的列是否存在
required_columns = ['refText', 'userAudio']
missing_columns = [col for col in required_columns if col not in df.columns]
if missing_columns:
logging.error(f"Excel文件缺少必要的列: {missing_columns}")
return False
# 数据预处理和验证
total_rows = len(df)
valid_rows = 0
for index, row in df.iterrows():
if pd.notna(row.get('refText')) and pd.notna(row.get('userAudio')):
valid_rows += 1
logging.info(f"总行数: {total_rows}, 有效行数: {valid_rows}")
if valid_rows == 0:
logging.warning("没有找到有效的数据行")
return False
# 添加新列
df['测试总分'] = 0
df['测试明细'] = ""
df['测试recordId'] = ""
# 创建优化的速率限制器
effective_rate_limit = max(rate_limit_per_second, max_workers)
rate_limiter = Queue(maxsize=effective_rate_limit * 2)
# 预填充令牌
for _ in range(effective_rate_limit):
rate_limiter.put(None)
# 启动优化的速率限制器补充线程
def rate_limiter_refill():
interval = 1.0 / effective_rate_limit
while True:
time.sleep(interval)
try:
rate_limiter.put(None, block=False)
except:
pass
rate_thread = threading.Thread(target=rate_limiter_refill, daemon=True)
rate_thread.start()
logging.info(f"速率限制设置: {effective_rate_limit} req/s (原始: {rate_limit_per_second}, 队列大小: {effective_rate_limit * 2})")
# 创建临时目录用于下载音频文件
with tempfile.TemporaryDirectory() as temp_dir:
logging.info(f"创建临时目录: {temp_dir}")
logging.info(f"开始并发处理,最大并发数: {max_workers}, 有效速率限制: {effective_rate_limit} req/s")
# 准备数据
row_data_list = [(index, row) for index, row in df.iterrows()]
# 创建结果字典和线程锁
results_dict = {}
lock = threading.Lock()
# 使用线程池进行并发处理
with ThreadPoolExecutor(max_workers=max_workers) as executor:
# 提交所有任务
future_to_index = {
executor.submit(process_single_row, row_data, temp_dir, results_dict, lock, rate_limiter): row_data[0]
for row_data in row_data_list
}
# 等待任务完成并显示进度
completed_count = 0
success_count = 0
error_count = 0
for future in as_completed(future_to_index):
completed_count += 1
index = future_to_index[future]
try:
future.result() # 获取结果,如果有异常会抛出
# 检查处理结果
with lock:
result = results_dict.get(index, {})
if result.get('error') is None:
success_count += 1
else:
error_count += 1
# 显示进度
if completed_count % 10 == 0 or completed_count == total_rows:
elapsed_time = time.time() - start_time
avg_time_per_item = elapsed_time / completed_count
remaining_time = avg_time_per_item * (total_rows - completed_count)
logging.info(f"进度: {completed_count}/{total_rows} ({completed_count/total_rows*100:.1f}%) "
f"成功: {success_count}, 失败: {error_count}, "
f"预计剩余时间: {remaining_time:.1f}")
except Exception as e:
error_count += 1
logging.error(f"任务 {index + 1} 执行异常: {str(e)}")
with lock:
if index not in results_dict:
results_dict[index] = {
'测试总分': 0,
'测试明细': "",
'测试recordId': "",
'error': f'任务执行异常: {str(e)}'
}
# 将结果更新到DataFrame
logging.info("正在更新结果到DataFrame...")
for index in results_dict:
result = results_dict[index]
df.at[index, '测试总分'] = result.get('测试总分', 0)
df.at[index, '测试明细'] = result.get('测试明细', "")
df.at[index, '测试recordId'] = result.get('测试recordId', "")
# 如果有错误,可以选择记录到备注列(如果存在)
if result.get('error') and '备注' in df.columns:
existing_note = str(df.at[index, '备注']) if pd.notna(df.at[index, '备注']) else ""
error_note = f"声通API错误: {result['error']}"
df.at[index, '备注'] = f"{existing_note}\n{error_note}".strip()
# 创建输出目录
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)
# 生成输出文件路径
input_path = Path(input_file_path)
output_file_path = output_path / f"{input_path.stem}_add_shengtong_result.xlsx"
# 保存结果
logging.info(f"正在保存结果到: {output_file_path}")
df.to_excel(output_file_path, index=False)
# 计算总耗时
total_time = time.time() - start_time
# 统计处理结果
final_success_count = sum(1 for result in results_dict.values() if result.get('error') is None)
final_error_count = len(results_dict) - final_success_count
logging.info("=" * 50)
logging.info("并发处理完成!")
logging.info(f"处理统计: 成功 {final_success_count} 条,失败 {final_error_count} 条,总计 {len(results_dict)}")
logging.info(f"总耗时: {total_time:.2f}")
logging.info(f"平均处理时间: {total_time/len(results_dict):.2f} 秒/条")
logging.info(f"输出文件: {output_file_path}")
logging.info("=" * 50)
return True
except Exception as e:
logging.error(f"处理Excel文件时出错: {str(e)}")
return False
def process_excel_with_shengtong(input_file_path, output_dir="output/audio"):
"""
处理Excel文件添加声通评测结果串行版本
Args:
input_file_path (str): 输入Excel文件路径
output_dir (str): 输出目录路径默认为 output/audio
Returns:
bool: 处理是否成功
"""
try:
# 读取Excel文件
print(f"正在读取Excel文件: {input_file_path}")
df = pd.read_excel(input_file_path)
# 检查必要的列是否存在
required_columns = ['refText', 'userAudio']
missing_columns = [col for col in required_columns if col not in df.columns]
if missing_columns:
print(f"错误: Excel文件缺少必要的列: {missing_columns}")
return False
# 添加新列
df['测试总分'] = 0
df['测试明细'] = ""
df['测试recordId'] = ""
# 创建临时目录用于下载音频文件
with tempfile.TemporaryDirectory() as temp_dir:
print(f"创建临时目录: {temp_dir}")
# 处理每一行数据
total_rows = len(df)
for index, row in df.iterrows():
print(f"\n处理进度: {index + 1}/{total_rows}")
ref_text = str(row['refText']) if pd.notna(row['refText']) else ""
audio_url = str(row['userAudio']) if pd.notna(row['userAudio']) else ""
if not ref_text or not audio_url:
print(f"{index + 1} 行数据不完整,跳过")
continue
print(f"参考文本: {ref_text}")
print(f"音频URL: {audio_url}")
# 下载音频文件
audio_file_path = download_audio_file(audio_url, temp_dir)
if not audio_file_path:
print(f"{index + 1} 行音频下载失败,跳过")
continue
# 调用声通API进行评测
print("正在调用声通API进行评测...")
try:
shengtong_result = evaluate_audio_file(audio_file_path, ref_text)
print(f"声通API返回结果: {json.dumps(shengtong_result, indent=2, ensure_ascii=False)}")
# 提取总分、明细和recordId
total_score = get_shengtong_total_score(shengtong_result)
details = format_shengtong_details(shengtong_result)
record_id = get_shengtong_record_id(shengtong_result)
# 更新DataFrame
df.at[index, '测试总分'] = total_score
df.at[index, '测试明细'] = details
df.at[index, '测试recordId'] = record_id
print(f"测试总分: {total_score}")
print(f"测试明细: {details}")
print(f"测试recordId: {record_id}")
except Exception as e:
print(f"{index + 1} 行声通API调用失败: {str(e)}")
continue
# 删除临时音频文件
try:
os.remove(audio_file_path)
except:
pass
# 添加延时避免API调用过于频繁
time.sleep(1)
# 创建输出目录
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)
# 生成输出文件路径
input_path = Path(input_file_path)
output_file_path = output_path / f"{input_path.stem}_add_shengtong_result.xlsx"
# 保存结果
print(f"\n正在保存结果到: {output_file_path}")
df.to_excel(output_file_path, index=False)
print("处理完成!")
return True
except Exception as e:
print(f"处理Excel文件时出错: {str(e)}")
return False
if __name__ == "__main__":
# ==================== 配置参数 ====================
input_file = "人工筛选测试集v2_denoise.xlsx"
output_directory = "output/audio" # 输出目录,可以修改
use_concurrent = True # True: 使用并发版本False: 使用串行版本
# DEBUG 模式开关True: 显示详细调试信息False: 仅显示关键信息)
enable_debug = False # 可以设置为 True 来查看详细的 DEBUG 日志
# 设置全局 DEBUG_MODE
globals()['DEBUG_MODE'] = enable_debug
# 检查环境变量
required_env_vars = ['ST_APP_KEY', 'ST_SECRET_KEY']
missing_vars = [var for var in required_env_vars if not os.environ.get(var)]
if missing_vars:
print(f"错误: 缺少必要的环境变量: {missing_vars}")
print("请在 .env 文件或系统环境变量中配置:")
print(" ST_APP_KEY=你的应用Key")
print(" ST_SECRET_KEY=你的Secret Key")
elif not os.path.exists(input_file):
print(f"文件不存在: {input_file}")
print("请确保Excel文件存在并包含 'refText''userAudio'")
else:
if use_concurrent:
print("使用并发版本处理3路并发3 req/s...")
success = process_excel_with_shengtong_concurrent(
input_file,
output_dir=output_directory,
max_workers=3,
rate_limit_per_second=3
)
else:
print("使用串行版本处理...")
success = process_excel_with_shengtong(input_file, output_dir=output_directory)
if success:
print("处理成功!")
else:
print("处理失败!")

File diff suppressed because it is too large Load Diff

View File

@ -1,492 +0,0 @@
"""
互动组件数据导出
需求 20251123
---------
PGsql数据库中 筛选数据
数据库相关配置 .env中读取:
PG_DB_HOST = xxx
PG_DB_PORT = xxx
PG_DB_USER = xxx
PG_DB_PASSWORD = xxx
PG_DB_DATABASE = xxx
读取以下数据表:
user_component_play_record_0 ~ user_component_play_record_7
支持输入时间范围
起始时间 截止时间 配置格式: "20250110"
数据表中的时间字段为 updated_at , 格式样例: "2025-11-05 19:35:46.698246+08:00"
在这些时间范围内筛选以下字段数据 导出为excel文件:
c_type c_id 非空
输出以下字段
user_id,
session_id,
c_type,
c_id,
play_result,
user_behavior_info,
updated_at
写一个简单清晰的 数据导出脚本 输入参数都直接在脚本开头定义和修改 不要改动文件开头的需求描述直接追加代码
-------
需求二:
读取上述 输出的 excel 文件 围绕 每个组件进行 统计
统计方式如下:
仅计算 c_type c_id 非空 的记录
以每个 c_type + c_id 拼接 作为统计维度
统计以下数据:
总数量
Perfect数量:play_result=="Perfect" 的数量
Good数量:play_result=="Good" 的数量
Pass数量:play_result=="Pass" 的数量
Oops数量:play_result=="Oops" 的数量
Failed数量:play_result=="Failed" 的数量
Perfect+Good数量:play_result=="Perfect" play_result=="Good" 的数量
Perfect比例:Perfect数量 / 总数量
Good比例:Good数量 / 总数量
Pass比例:Pass数量 / 总数量
Oops比例:Oops数量 / 总数量
Failed比例:Failed数量 / 总数量
Perfect+Good比例:Perfect+Good数量 / 总数量
导出为excel 命名: 步骤1文件 结尾追加 _stats.xlsx
需求三:
在需求二中 追加从另外两个mysql表关联的组件配置字段:
MYSQL_HOST=xxx
MYSQL_USERNAME=xxx
MYSQL_PASSWORD=xxx
MYSQL_DATABASE=xxx
MYSQL_PORT=xxx
以上环境变量已配置在 .env
1.如果 c_type 开头为"mid"
则读取下表:表名:middle_interaction_component
增加以下字段:
title
component_config
组件类型
其中:
组件类型: 根据以下映射 c_type 转成中文名:xx互动
{
"词汇类": {
"物品互动": "mid_vocab_item",
"图片互动": "mid_vocab_image",
"填词互动": "mid_vocab_fillBlank",
"指令互动": "mid_vocab_instruction"
},
"句子类": {
"对话互动": "mid_sentence_dialogue",
"语音互动": "mid_sentence_voice",
"材料互动": "mid_sentence_material",
"造句互动": "mid_sentence_makeSentence"
},
"语法类": {
"挖空互动": "mid_grammar_cloze",
"组句互动": "mid_grammar_sentence"
},
"发音类": {
"发音互动": "mid_pron_pron"
}
2. 如果 c_type 开头为"core"
则读取下表:表名:core_interaction_component
增加以下字段:
title
component_config
组件类型
其中:
组件类型: 根据以下映射 c_type 转成中文名:xx互动
{
"口语类": {
"口语快答": "core_speaking_reply",
"口语妙问": "core_speaking_inquiry",
"口语探讨": "core_speaking_explore"
"口语独白": "core_speaking_monologue"
},
"阅读类": {
"合作阅读": "core_reading_order",
},
"听力类": {
"合作听力": "core_listening_order",
},
"写作类": {
"看图组句": "core_writing_imgMakeSentence",
"看图撰写": "core_writing_imgWrite",
"问题组句": "core_writing_questionMakeSentence",
"问题撰写": "core_writing_questionWrite",
},
}
以上追加字段 增加到 步骤二输出的表中
"""
import os
from datetime import datetime
from dotenv import load_dotenv
import psycopg2
import pandas as pd
import pymysql
# ==================== 配置参数 ====================
# 时间范围配置(格式: "20250110"
START_DATE = "20250915" # 起始日期
END_DATE = "20251122" # 截止日期
# 输出文件路径
OUTPUT_DIR = "output"
# 执行步骤控制
RUN_STEP1 = False # 是否执行步骤1数据导出
RUN_STEP2 = True # 是否执行步骤2数据统计
# ==================================================
# c_type 到中文组件类型的映射
C_TYPE_MAPPING = {
# middle_interaction_component 映射
"mid_vocab_item": "物品互动",
"mid_vocab_image": "图片互动",
"mid_vocab_fillBlank": "填词互动",
"mid_vocab_instruction": "指令互动",
"mid_sentence_dialogue": "对话互动",
"mid_sentence_voice": "语音互动",
"mid_sentence_material": "材料互动",
"mid_sentence_makeSentence": "造句互动",
"mid_grammar_cloze": "挖空互动",
"mid_grammar_sentence": "组句互动",
"mid_pron_pron": "发音互动",
# core_interaction_component 映射
"core_speaking_reply": "口语快答",
"core_speaking_inquiry": "口语妙问",
"core_speaking_explore": "口语探讨",
"core_speaking_monologue": "口语独白",
"core_reading_order": "合作阅读",
"core_listening_order": "合作听力",
"core_writing_imgMakeSentence": "看图组句",
"core_writing_imgWrite": "看图撰写",
"core_writing_questionMakeSentence": "问题组句",
"core_writing_questionWrite": "问题撰写",
}
def step1_export_data():
"""步骤1从数据库导出数据"""
print("=" * 60)
print("步骤1数据导出")
print("=" * 60)
# 加载环境变量
load_dotenv()
# 获取数据库配置
db_config = {
'host': os.getenv('PG_DB_HOST'),
'port': os.getenv('PG_DB_PORT'),
'user': os.getenv('PG_DB_USER'),
'password': os.getenv('PG_DB_PASSWORD'),
'database': os.getenv('PG_DB_DATABASE')
}
# 转换时间格式
start_datetime = datetime.strptime(START_DATE, "%Y%m%d").strftime("%Y-%m-%d 00:00:00")
end_datetime = datetime.strptime(END_DATE, "%Y%m%d").strftime("%Y-%m-%d 23:59:59")
print(f"时间范围: {start_datetime} ~ {end_datetime}")
# 连接数据库
conn = psycopg2.connect(**db_config)
# 存储所有表的数据
all_data = []
# 遍历8个分表
for i in range(8):
table_name = f"user_component_play_record_{i}"
print(f"正在读取表: {table_name}")
# SQL查询
query = f"""
SELECT
user_id,
session_id,
c_type,
c_id,
play_result,
user_behavior_info,
updated_at
FROM {table_name}
WHERE updated_at >= %s
AND updated_at <= %s
AND c_type IS NOT NULL
AND c_id IS NOT NULL
"""
# 执行查询
df = pd.read_sql_query(query, conn, params=(start_datetime, end_datetime))
all_data.append(df)
print(f" - 读取到 {len(df)} 条记录")
# 关闭数据库连接
conn.close()
# 合并所有数据
result_df = pd.concat(all_data, ignore_index=True)
print(f"\n总共获取 {len(result_df)} 条记录")
# 移除 updated_at 字段的时区信息Excel不支持带时区的datetime
if 'updated_at' in result_df.columns and not result_df.empty:
result_df['updated_at'] = result_df['updated_at'].dt.tz_localize(None)
# 确保输出目录存在
os.makedirs(OUTPUT_DIR, exist_ok=True)
# 生成输出文件名
output_filename = f"component_record_{START_DATE}_{END_DATE}.xlsx"
output_path = os.path.join(OUTPUT_DIR, output_filename)
# 导出到Excel
result_df.to_excel(output_path, index=False, engine='openpyxl')
print(f"数据已导出到: {output_path}")
print()
return output_path
def get_component_info_from_mysql(stats_df):
"""从MySQL获取组件配置信息"""
# 加载环境变量
load_dotenv()
# 获取MySQL配置
mysql_config = {
'host': os.getenv('MYSQL_HOST'),
'user': os.getenv('MYSQL_USERNAME'),
'password': os.getenv('MYSQL_PASSWORD'),
'database': os.getenv('MYSQL_DATABASE'),
'port': int(os.getenv('MYSQL_PORT', 3306)),
'charset': 'utf8mb4'
}
print("正在连接MySQL数据库...")
conn = pymysql.connect(**mysql_config)
try:
# 分别处理 mid 和 core 类型的组件
mid_records = stats_df[stats_df['c_type'].str.startswith('mid', na=False)][['c_type', 'c_id']]
core_records = stats_df[stats_df['c_type'].str.startswith('core', na=False)][['c_type', 'c_id']]
# 存储组件信息的字典key 为 "c_type-c_id"
component_info = {}
# 查询 middle_interaction_component 表
if not mid_records.empty:
print(f"正在查询 middle_interaction_component 表,共 {len(mid_records)} 个组件...")
# 获取唯一的 c_type 和 c_id 组合
mid_unique = mid_records.drop_duplicates()
for _, row in mid_unique.iterrows():
c_type = row['c_type']
c_id = row['c_id']
query = """
SELECT title, component_config
FROM middle_interaction_component
WHERE c_type = %s AND c_id = %s
"""
result = pd.read_sql_query(query, conn, params=(c_type, c_id))
if not result.empty:
key = f"{c_type}-{c_id}"
component_info[key] = {
'title': result['title'].iloc[0],
'component_config': result['component_config'].iloc[0]
}
print(f" - 查询到 {len([k for k in component_info.keys() if k.startswith('mid')])} 个组件信息")
# 查询 core_interaction_component 表
if not core_records.empty:
print(f"正在查询 core_interaction_component 表,共 {len(core_records)} 个组件...")
# 获取唯一的 c_type 和 c_id 组合
core_unique = core_records.drop_duplicates()
for _, row in core_unique.iterrows():
c_type = row['c_type']
c_id = row['c_id']
query = """
SELECT title, component_config
FROM core_interaction_component
WHERE c_type = %s AND c_id = %s
"""
result = pd.read_sql_query(query, conn, params=(c_type, c_id))
if not result.empty:
key = f"{c_type}-{c_id}"
component_info[key] = {
'title': result['title'].iloc[0],
'component_config': result['component_config'].iloc[0]
}
print(f" - 查询到 {len([k for k in component_info.keys() if k.startswith('core')])} 个组件信息")
finally:
conn.close()
return component_info
def step2_statistics(input_file):
"""步骤2数据统计"""
print("=" * 60)
print("步骤2数据统计")
print("=" * 60)
# 读取步骤1导出的Excel文件c_id作为字符串读取以保留前导零
print(f"正在读取文件: {input_file}")
df = pd.read_excel(input_file, engine='openpyxl', dtype={'c_id': str})
print(f"读取到 {len(df)} 条记录")
# 筛选 c_type 和 c_id 非空的记录
df_filtered = df[(df['c_type'].notna()) & (df['c_id'].notna())].copy()
print(f"筛选后 {len(df_filtered)} 条有效记录")
# 确保c_type和c_id都是字符串类型保留c_id的前导零
df_filtered['c_type'] = df_filtered['c_type'].astype(str)
df_filtered['c_id'] = df_filtered['c_id'].astype(str)
# 创建组件IDc_type-c_id
df_filtered['component_id'] = df_filtered['c_type'] + '-' + df_filtered['c_id']
# 按组件ID分组统计
stats_list = []
for component_id, group in df_filtered.groupby('component_id'):
# 获取原始的 c_type 和 c_id
c_type = group['c_type'].iloc[0]
c_id = group['c_id'].iloc[0]
# 总数量
total_count = len(group)
# 各状态数量
perfect_count = len(group[group['play_result'] == 'Perfect'])
good_count = len(group[group['play_result'] == 'Good'])
pass_count = len(group[group['play_result'] == 'Pass'])
oops_count = len(group[group['play_result'] == 'Oops'])
failed_count = len(group[group['play_result'] == 'Failed'])
perfect_good_count = len(group[group['play_result'].isin(['Perfect', 'Good'])])
# 计算比例(保留两位小数)
perfect_ratio = round(perfect_count / total_count, 2) if total_count > 0 else 0
good_ratio = round(good_count / total_count, 2) if total_count > 0 else 0
pass_ratio = round(pass_count / total_count, 2) if total_count > 0 else 0
oops_ratio = round(oops_count / total_count, 2) if total_count > 0 else 0
failed_ratio = round(failed_count / total_count, 2) if total_count > 0 else 0
perfect_good_ratio = round(perfect_good_count / total_count, 2) if total_count > 0 else 0
stats_list.append({
'component_id': component_id,
'c_type': c_type,
'c_id': c_id,
'总数量': total_count,
'Perfect数量': perfect_count,
'Good数量': good_count,
'Pass数量': pass_count,
'Oops数量': oops_count,
'Failed数量': failed_count,
'Perfect+Good数量': perfect_good_count,
'Perfect比例': perfect_ratio,
'Good比例': good_ratio,
'Pass比例': pass_ratio,
'Oops比例': oops_ratio,
'Failed比例': failed_ratio,
'Perfect+Good比例': perfect_good_ratio
})
# 创建统计结果DataFrame
stats_df = pd.DataFrame(stats_list)
print(f"统计了 {len(stats_df)} 个不同的组件")
# 从MySQL获取组件配置信息
print("\n" + "=" * 60)
print("正在从MySQL获取组件配置信息...")
print("=" * 60)
component_info = get_component_info_from_mysql(stats_df)
# 添加新字段title, component_config, 组件类型
# 使用 component_id (c_type-c_id) 作为 key 来匹配
stats_df['title'] = stats_df['component_id'].apply(lambda x: component_info.get(x, {}).get('title', ''))
stats_df['component_config'] = stats_df['component_id'].apply(lambda x: component_info.get(x, {}).get('component_config', ''))
stats_df['组件类型'] = stats_df['c_type'].apply(lambda x: C_TYPE_MAPPING.get(x, ''))
# 重新排列列顺序:将新增字段放在 c_type, c_id 后面
columns_order = [
'component_id', 'c_type', 'c_id',
'title', 'component_config', '组件类型', # 新增字段
'总数量',
'Perfect数量', 'Good数量', 'Pass数量', 'Oops数量', 'Failed数量', 'Perfect+Good数量',
'Perfect比例', 'Good比例', 'Pass比例', 'Oops比例', 'Failed比例', 'Perfect+Good比例'
]
stats_df = stats_df[columns_order]
# 生成输出文件名在原文件名后追加_stats
output_filename = os.path.basename(input_file).replace('.xlsx', '_stats.xlsx')
output_path = os.path.join(OUTPUT_DIR, output_filename)
# 导出到Excel
stats_df.to_excel(output_path, index=False, engine='openpyxl')
print(f"\n统计结果已导出到: {output_path}")
print()
return output_path
def main():
export_file = None
# 执行步骤1数据导出
if RUN_STEP1:
export_file = step1_export_data()
# 执行步骤2数据统计
if RUN_STEP2:
# 如果步骤1没有执行需要手动指定文件路径
if export_file is None:
export_file = os.path.join(OUTPUT_DIR, f"component_record_{START_DATE}_{END_DATE}.xlsx")
if not os.path.exists(export_file):
print(f"错误:找不到文件 {export_file}")
print("请先执行步骤1或确保文件存在")
return
step2_statistics(export_file)
print("=" * 60)
print("处理完成!")
print("=" * 60)
if __name__ == "__main__":
main()

View File

@ -1,572 +0,0 @@
"""
** 不要改动我的需求描述直接在需求后面写代码即可 **
课程巩固 数据导出 分析
-----------
需求一:
PGsql数据库中 筛选数据
数据库相关配置 .env中读取:
PG_DB_HOST = xxx
PG_DB_PORT = xxx
PG_DB_USER = xxx
PG_DB_PASSWORD = xxx
PG_DB_DATABASE = xxx
读取以下数据表: user_unit_review_question_result
支持输入时间范围
起始时间 截止时间 配置格式: "20250110"
数据表中的时间字段为 updated_at , 格式样例: "2025-11-05 19:35:46.698246+08:00"
在这些时间范围内筛选数据 (要求deleted_at字段内容为null)
导出以下字段:
user_id
unit_id 读取每条记录的story_id 根据 get_id_2_unit_index 函数返回的映射表 映射到 unit_id
lesson_id 读取chapter_id 根据该值 查询 mysql表 vala_game_chapter id == chapter_id 并返回该记录的 index字段的值
question_list
题目总数
正确数量
正确率
play_time_seconds 读取 play_time 把ms数据转换为秒 保留整数部分
updated_at
其中 题目总数 正确数量 正确率 都通过 question_list 计算
该字段为 list of json:
[
{
"question": {
"type": "vocab_meaning_meaning",
"id": "20-0",
"title": "“clean” 的意思是什么?",
"npcId": -1
},
"answers": [
"2"
],
"optionList": [
{
"option": "爬行"
},
{
"option": "清晰的"
},
{
"option": "清洁"
}
],
"isRight": true
},
...
]
每个元素为一道题目 题目中有 "isRight": true 代表用户做对了
导出为excel文件
----
需求二 基于 需求一的输出文件 作为 输入文件 进行数据聚合
聚合的维度是每道题目
根据 question_list 中的 每个题目 question -> id 作为唯一标识
统计每个题目
总记录数量
正确数量
正确率
并查询mysql表 补充题目的以下信息:
步骤一中每个题目id的格式是 num1-num2 (question -> id)
查询vala_kp_question表
其中num1部分 用于 检索vala_kp_question 中的 id, 每个id下 可能有多道题目 vala_kp_question的 question 字段 是一个list, num2为question 字段中的索引
补充以下字段:
kp_id (vala_kp_question字段)
category (vala_kp_question字段)
skill (vala_kp_question字段)
type (vala_kp_question字段)
题目配置 (question字段中 对应 num2 索引的内容)
最终针对每道题目输出以下字段:
出现位置 (list, 把所有出现的位置拼接 unit_id +"_"+ lesson_id 例如:"unit10-lesson1" 这样的格式)
question_id (question -> id)
kp_id (vala_kp_question字段)
category (vala_kp_question字段)
skill (vala_kp_question字段)
type (vala_kp_question字段)
题目配置 (question字段中 对应 num2 索引的内容)
总记录数量
正确数量
正确率
导出为excel 命名为 步骤一文件_stat.xlsx
所有需要配置的参数 放在脚本开头位置
"""
import os
import pymysql
import psycopg2
from psycopg2.extras import RealDictCursor
from datetime import datetime
import pandas as pd
from dotenv import load_dotenv
import json
from collections import defaultdict
# 加载环境变量
load_dotenv()
# ============ 配置参数 ============
START_DATE = "20250915" # 起始时间
END_DATE = "20251122" # 截止时间
OUTPUT_NAME = "lesson_review_data_{}_{}.xlsx".format(START_DATE, END_DATE) # 输出文件名
OUTPUT_FILENAME = os.path.join("./output", OUTPUT_NAME)
# =================================
def get_mysql_connection():
"""获取MySQL连接"""
db_host = os.getenv('MYSQL_HOST')
db_user = os.getenv('MYSQL_USERNAME')
db_password = os.getenv('MYSQL_PASSWORD')
db_name = os.getenv('MYSQL_DATABASE')
db_port = os.getenv('MYSQL_PORT')
if not all([db_host, db_user, db_password, db_name]):
raise Exception("Error: Missing MySQL configuration in .env file.")
connection = pymysql.connect(
host=db_host,
user=db_user,
password=db_password,
database=db_name,
port=int(db_port) if db_port else 3306,
cursorclass=pymysql.cursors.DictCursor
)
return connection
def get_pgsql_connection():
"""获取PGsql连接"""
pg_host = os.getenv('PG_DB_HOST')
pg_port = os.getenv('PG_DB_PORT')
pg_user = os.getenv('PG_DB_USER')
pg_password = os.getenv('PG_DB_PASSWORD')
pg_database = os.getenv('PG_DB_DATABASE')
if not all([pg_host, pg_port, pg_user, pg_password, pg_database]):
raise Exception("Error: Missing PGsql configuration in .env file.")
connection = psycopg2.connect(
host=pg_host,
port=int(pg_port),
user=pg_user,
password=pg_password,
database=pg_database,
cursor_factory=RealDictCursor
)
return connection
def get_id_2_unit_index():
"""获取story_id到unit_id的映射"""
print("正在获取 story_id 到 unit_id 的映射...")
connection = get_mysql_connection()
try:
with connection.cursor() as cursor:
sql = """
SELECT *
FROM `vala_game_info`
WHERE id > 0
AND `vala_game_info`.`deleted_at` IS NULL
ORDER BY season_package_id asc, `index` asc
"""
cursor.execute(sql)
results = cursor.fetchall()
id_2_unit_index = {}
for index, row in enumerate(results):
id_2_unit_index[row['id']] = index
print(f"成功获取 {len(id_2_unit_index)} 个单元映射")
return id_2_unit_index
finally:
connection.close()
def get_chapter_id_to_lesson_id():
"""获取chapter_id到lesson_id的映射"""
print("正在获取 chapter_id 到 lesson_id 的映射...")
connection = get_mysql_connection()
try:
with connection.cursor() as cursor:
sql = """
SELECT id, `index`
FROM `vala_game_chapter`
WHERE deleted_at IS NULL
"""
cursor.execute(sql)
results = cursor.fetchall()
chapter_id_to_lesson_id = {}
for row in results:
chapter_id_to_lesson_id[row['id']] = row['index']
print(f"成功获取 {len(chapter_id_to_lesson_id)} 个课程映射")
return chapter_id_to_lesson_id
finally:
connection.close()
def analyze_question_list(question_list_json):
"""分析题目列表,返回题目总数、正确数量、正确率"""
try:
if isinstance(question_list_json, str):
question_list = json.loads(question_list_json)
else:
question_list = question_list_json
if not isinstance(question_list, list):
return 0, 0, 0
total = len(question_list)
correct = sum(1 for q in question_list if q.get('isRight') == True)
accuracy = round(correct / total * 100, 2) if total > 0 else 0
return total, correct, accuracy
except Exception as e:
print(f"解析题目列表出错: {e}")
return 0, 0, 0
def export_step1():
"""需求一:导出原始数据"""
print("=" * 50)
print("开始执行需求一:导出原始数据")
print("=" * 50)
# 获取映射关系
id_2_unit_index = get_id_2_unit_index()
chapter_id_to_lesson_id = get_chapter_id_to_lesson_id()
# 连接PGsql
print("正在连接 PGsql 数据库...")
pg_conn = get_pgsql_connection()
try:
with pg_conn.cursor() as cursor:
# 构建时间范围
start_datetime = datetime.strptime(START_DATE, "%Y%m%d")
end_datetime = datetime.strptime(END_DATE, "%Y%m%d")
end_datetime = end_datetime.replace(hour=23, minute=59, second=59)
sql = """
SELECT user_id, story_id, chapter_id, question_list, play_time, updated_at
FROM user_unit_review_question_result
WHERE updated_at >= %s
AND updated_at <= %s
AND deleted_at IS NULL
ORDER BY updated_at
"""
print(f"查询时间范围: {start_datetime}{end_datetime}")
cursor.execute(sql, (start_datetime, end_datetime))
results = cursor.fetchall()
print(f"查询到 {len(results)} 条记录")
# 处理数据
export_data = []
for row in results:
user_id = row['user_id']
story_id = row['story_id']
chapter_id = row['chapter_id']
question_list_raw = row['question_list']
play_time = row['play_time']
updated_at = row['updated_at']
# 确保 question_list 是 Python 对象PGsql 的 jsonb 会自动转换)
# 如果是字符串,先解析;如果已经是对象,直接使用
if isinstance(question_list_raw, str):
try:
question_list = json.loads(question_list_raw)
except:
question_list = []
else:
question_list = question_list_raw if question_list_raw else []
# 映射 unit_id
unit_id = id_2_unit_index.get(story_id, -1)
# 映射 lesson_id
lesson_id = chapter_id_to_lesson_id.get(chapter_id, -1)
# 分析题目列表
total, correct, accuracy = analyze_question_list(question_list)
# 转换播放时长ms -> s
play_time_seconds = int(play_time / 1000) if play_time else 0
# 转换question_list为字符串统一序列化为JSON字符串
question_list_str = json.dumps(question_list, ensure_ascii=False) if question_list else ""
# 移除时区信息Excel不支持带时区的datetime
updated_at_no_tz = updated_at.replace(tzinfo=None) if updated_at else None
export_data.append({
'user_id': user_id,
'unit_id': unit_id,
'lesson_id': lesson_id,
'question_list': question_list_str,
'题目总数': total,
'正确数量': correct,
'正确率': accuracy,
'play_time_seconds': play_time_seconds,
'updated_at': updated_at_no_tz
})
# 导出到Excel
df = pd.DataFrame(export_data)
# 确保输出目录存在
os.makedirs(os.path.dirname(OUTPUT_FILENAME), exist_ok=True)
df.to_excel(OUTPUT_FILENAME, index=False, engine='openpyxl')
print(f"成功导出 {len(export_data)} 条记录到: {OUTPUT_FILENAME}")
return OUTPUT_FILENAME
finally:
pg_conn.close()
def get_all_kp_questions(question_ids):
"""批量获取所有题目信息避免N+1查询问题"""
print(f"正在批量查询 {len(question_ids)} 道题目的信息...")
# 解析所有question_id获取需要查询的kp_question id列表
kp_ids = set()
for qid in question_ids:
try:
parts = qid.split('-')
if len(parts) == 2:
kp_ids.add(int(parts[0]))
except:
continue
print(f"需要查询 {len(kp_ids)} 条 vala_kp_question 记录")
# 批量查询MySQL
connection = get_mysql_connection()
kp_data_map = {}
try:
with connection.cursor() as cursor:
# 使用IN查询批量获取
if kp_ids:
placeholders = ','.join(['%s'] * len(kp_ids))
sql = f"""
SELECT id, kp_id, category, skill, type, question
FROM vala_kp_question
WHERE id IN ({placeholders}) AND deleted_at IS NULL
"""
cursor.execute(sql, tuple(kp_ids))
results = cursor.fetchall()
print(f"成功查询到 {len(results)} 条记录")
# 构建映射表
for row in results:
kp_data_map[row['id']] = row
finally:
connection.close()
# 为每个question_id构建结果
question_info_map = {}
for question_id in question_ids:
try:
parts = question_id.split('-')
if len(parts) != 2:
question_info_map[question_id] = (None, None, None, None, None)
continue
kp_id = int(parts[0])
question_index = int(parts[1])
kp_data = kp_data_map.get(kp_id)
if not kp_data:
question_info_map[question_id] = (None, None, None, None, None)
continue
# 解析question字段
question_list = kp_data['question']
if isinstance(question_list, str):
question_list = json.loads(question_list)
# 获取指定索引的题目配置
question_config = None
if isinstance(question_list, list) and 0 <= question_index < len(question_list):
question_config = json.dumps(question_list[question_index], ensure_ascii=False)
question_info_map[question_id] = (
kp_data['kp_id'],
kp_data['category'],
kp_data['skill'],
kp_data['type'],
question_config
)
except Exception as e:
print(f"处理题目信息出错 ({question_id}): {e}")
question_info_map[question_id] = (None, None, None, None, None)
return question_info_map
def export_step2(input_filename):
"""需求二:数据聚合统计"""
print("=" * 50)
print("开始执行需求二:数据聚合统计")
print("=" * 50)
# 读取步骤一的输出文件
print(f"正在读取文件: {input_filename}")
df = pd.read_excel(input_filename, engine='openpyxl')
print(f"读取到 {len(df)} 条记录")
# 按题目聚合统计
question_stats = defaultdict(lambda: {
'locations': set(),
'total_count': 0,
'correct_count': 0
})
parse_success_count = 0
parse_fail_count = 0
empty_question_list_count = 0
processed_question_count = 0
for idx, row in df.iterrows():
unit_id = row['unit_id']
lesson_id = row['lesson_id']
question_list_str = row['question_list']
# 解析question_list
try:
if pd.isna(question_list_str) or not question_list_str:
question_list = []
empty_question_list_count += 1
else:
question_list = json.loads(question_list_str)
parse_success_count += 1
except Exception as e:
question_list = []
parse_fail_count += 1
if parse_fail_count <= 3:
print(f"[警告] 第 {idx+1} 条记录解析失败: {e}")
# 统计每道题目
for question_item in question_list:
if not isinstance(question_item, dict):
continue
question = question_item.get('question', {})
question_id = question.get('id')
is_right = question_item.get('isRight', False)
if not question_id:
continue
# 添加出现位置
location = f"unit{unit_id}-lesson{lesson_id}"
question_stats[question_id]['locations'].add(location)
# 统计数量
question_stats[question_id]['total_count'] += 1
if is_right:
question_stats[question_id]['correct_count'] += 1
processed_question_count += 1
print(f"\n解析统计:")
print(f" - 解析成功: {parse_success_count}")
print(f" - 解析失败: {parse_fail_count}")
print(f" - question_list 为空: {empty_question_list_count}")
print(f" - 处理的题目总数: {processed_question_count}")
print(f" - 聚合得到不同题目: {len(question_stats)}")
# 批量获取所有题目信息(优化性能)
all_question_ids = list(question_stats.keys())
question_info_map = get_all_kp_questions(all_question_ids)
# 构建导出数据
print(f"\n正在构建导出数据...")
export_data = []
for idx, (question_id, stats) in enumerate(question_stats.items()):
if (idx + 1) % 100 == 0:
print(f" 已处理 {idx + 1}/{len(question_stats)} 道题目")
# 从批量查询结果中获取题目信息
kp_id, category, skill, type_field, question_config = question_info_map.get(
question_id, (None, None, None, None, None)
)
# 计算正确率
total = stats['total_count']
correct = stats['correct_count']
accuracy = round(correct / total * 100, 2) if total > 0 else 0
# 出现位置列表
locations_list = sorted(list(stats['locations']))
locations_str = ', '.join(locations_list)
export_data.append({
'出现位置': locations_str,
'question_id': question_id,
'kp_id': kp_id,
'category': category,
'skill': skill,
'type': type_field,
'题目配置': question_config,
'总记录数量': total,
'正确数量': correct,
'正确率': accuracy
})
# 导出到Excel
output_stat_filename = input_filename.replace('.xlsx', '_stat.xlsx')
df_stat = pd.DataFrame(export_data)
print(f"\n正在导出到 Excel...")
df_stat.to_excel(output_stat_filename, index=False, engine='openpyxl')
print(f"成功导出 {len(export_data)} 道题目的统计数据到: {output_stat_filename}")
return output_stat_filename
def main():
"""主函数"""
try:
# 执行需求一
step1_output = export_step1()
print("\n")
# 执行需求二
step2_output = export_step2(step1_output)
print("\n" + "=" * 50)
print("所有任务完成!")
print(f"需求一输出文件: {step1_output}")
print(f"需求二输出文件: {step2_output}")
print("=" * 50)
except Exception as e:
print(f"执行出错: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
main()

View File

@ -1,181 +0,0 @@
"""
MYSQL_HOST=xxx
MYSQL_USERNAME=xxx
MYSQL_PASSWORD=xxx
MYSQL_DATABASE=xxx
MYSQL_PORT=xxx
以上环境变量已配置在 .env
我要导出一个数据表的某些记录 并添加一些字段
表名:middle_interaction_component
根据 c_id 过滤数据:
c_id为 7 字符串 其中 {两位季度编号}{两位单元编号}{三位组件编号} 过滤其中 单元编号部分为 00~20 以及 26 的对应记录 也就是 xx00xxx ~ xx20xxx 以及 xx26xxx 的记录
导出以下字段:
id
c_type
c_id
title
component_config
related_path
kp_relation_info
created_at
updated_at
新增以下字段:
1. 组件类型: 根据以下映射 c_type 转成中文名:xx互动
{
"词汇类": {
"物品互动": "mid_vocab_item",
"图片互动": "mid_vocab_image",
"填词互动": "mid_vocab_fillBlank",
"指令互动": "mid_vocab_instruction"
},
"句子类": {
"对话互动": "mid_sentence_dialogue",
"语音互动": "mid_sentence_voice",
"材料互动": "mid_sentence_material",
"造句互动": "mid_sentence_makeSentence"
},
"语法类": {
"挖空互动": "mid_grammar_cloze",
"组句互动": "mid_grammar_sentence"
},
"发音类": {
"发音互动": "mid_pron_pron"
}
2. 是否关联了知识点: 如果 kp_relation_info 不为空 且包含至少一个具体的知识点编号 则为 否则为
有效关联知识点的一个样例数据:[{"kpId":"0326011","kpType":"sentence","kpTitle":"What does... look like?","kpSkill":"sentence_meaning","kpSkillName":"语义"}]
3. "是否已组课" 如果 related_path 不为空 则为 否则为
一个有效的 related_path 样例: {"packageId":13,"unitId":40,"lessonId":213,"packageIndex":3,"unitIndex":2,"lessonIndex":2}
4. 前置对话:
component_config 中的 preDialog 字段 如果不存在 则为
{"asrPrompt":"","cId":"0326022","cType":"mid_sentence_dialogue","meaning":"语义;语音","mode":"read","postDialog":[{"content":"Leave it to me.","npcId":540,"npcName":"Victoria","type":"npc"}],"preDialog":[{"content":"But do we still have time?","npcId":30,"type":"user"}],"question":{"content":"What if we miss the spaceship?","mode":"read","npcId":30,"type":"user"},"resourceMapping":{"Medic":503},"title":"询问万一错过飞船怎么办"}
5. "后置对话":
component_config 中的 postDialog 字段 如果不存在 则为
6. 前置/后置对话中非user角色数量
component_config 中的 preDialog 以及 postDialog 字段中 统计所有 type npc ,根据 npcId 去重后的角色数量
例如
---
前置对话
[{"content":"But do we still have time?","npcId":30,"type":"user"}]
后置对话
[{"content":"Leave it to me.","npcId":540,"npcName":"Victoria","type":"npc"}]
非user角色数量 1
---
---
前置对话
[{"content":"But do we still have time?","npcId":31,"type":"npc","npcName":"Ben"}]
后置对话
[{"content":"Leave it to me.","npcId":540,"npcName":"Victoria","type":"npc"}]
非user角色数量 2
---
最终输出一个 excel文档
"""
import os
import json
from datetime import datetime
import pymysql
import pandas as pd
from dotenv import load_dotenv
load_dotenv()
# 组件类型映射
TYPE_MAP = {
"mid_vocab_item": "物品互动", "mid_vocab_image": "图片互动",
"mid_vocab_fillBlank": "填词互动", "mid_vocab_instruction": "指令互动",
"mid_sentence_dialogue": "对话互动", "mid_sentence_voice": "语音互动",
"mid_sentence_material": "材料互动", "mid_sentence_makeSentence": "造句互动",
"mid_grammar_cloze": "挖空互动", "mid_grammar_sentence": "组句互动",
"mid_pron_pron": "发音互动"
}
def get_data():
conn = pymysql.connect(
host=os.getenv('MYSQL_HOST'), port=int(os.getenv('MYSQL_PORT', 3306)),
user=os.getenv('MYSQL_USERNAME'), password=os.getenv('MYSQL_PASSWORD'),
database=os.getenv('MYSQL_DATABASE'), charset='utf8mb4'
)
# 构建c_id过滤条件
conditions = [f"c_id LIKE '__{i:02d}___'" for i in range(21)] + ["c_id LIKE '__26___'"]
where_clause = " OR ".join(conditions)
sql = f"""SELECT id, c_type, c_id, title, component_config, related_path,
kp_relation_info, created_at, updated_at
FROM middle_interaction_component WHERE {where_clause}"""
df = pd.read_sql(sql, conn)
conn.close()
return df
def process_data(df):
# 组件类型
df['组件类型'] = df['c_type'].map(TYPE_MAP).fillna(df['c_type'])
# 是否关联知识点
def check_kp(kp_info):
if not kp_info: return ""
try:
data = json.loads(kp_info)
return "" if isinstance(data, list) and any(item.get('kpId') for item in data) else ""
except: return ""
df['是否关联了知识点'] = df['kp_relation_info'].apply(check_kp)
# 是否已组课
def check_lesson(path):
if not path: return ""
try: return "" if json.loads(path) else ""
except: return ""
df['是否已组课'] = df['related_path'].apply(check_lesson)
# 前置/后置对话及NPC统计
def extract_dialog(config, dialog_type):
if not config: return ""
try:
data = json.loads(config)
dialog = data.get(dialog_type, [])
return json.dumps(dialog, ensure_ascii=False) if dialog else ""
except: return ""
def count_npc(config):
if not config: return 0
try:
data = json.loads(config)
npc_ids = set()
for dialog in ['preDialog', 'postDialog']:
for item in data.get(dialog, []):
if item.get('type') == 'npc' and 'npcId' in item:
npc_ids.add(item['npcId'])
return len(npc_ids)
except: return 0
df['前置对话'] = df['component_config'].apply(lambda x: extract_dialog(x, 'preDialog'))
df['后置对话'] = df['component_config'].apply(lambda x: extract_dialog(x, 'postDialog'))
df['前置/后置对话中非user角色数量'] = df['component_config'].apply(count_npc)
return df
if __name__ == "__main__":
df = get_data()
df = process_data(df)
filename = f"middle_interaction_component_export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
df.to_excel(filename, index=False)
print(f"导出完成: {filename}")

View File

@ -1,385 +0,0 @@
"""
导出 流式语音音频 脚本
v1.0
---
原始数据存储于ES数据库中
索引: llm_realtime_asr_log
es相关配置通过以下环境变量
ES_HOST=xxx
ES_PORT=9200
ES_SCHEME=https
ES_USER=elastic
ES_PASSWORD=xxx 注意这里可能有特殊符号
需要配置的内容放置在脚本最开头
开始时间 (8位数字年月日)
截止时间 (8位数字年月日)
仅筛选 时间范围内的数据记录
可以基于 timestamp_int 字段内容进行时间筛选 格式样例:1,769,496,892
正常情况 每个 voice_id 会对应两条记录
可以 voice_id为单位
最终 按照每个 voice_id 聚合出以下数据:
asr_prompt 其中一条记录会有这个内容
result_str 其中一条记录会有这个内容
timestamp (两条记录都会有保留最新的一条对应的时间) 格式样例: 2023-12-12 12:12:12
voice_id
audio_url 按以下规则拼接: https://static.valavala.com/vala_llm/realtime_asr_audio_backup/online/{8位年月日}/{voice_id}.wav 8位年月日 基于 timestamp计算 格式 20260121这种
source 其中一条记录会有这个内容
最终导出一个excel
---
"""
import os
from datetime import datetime
import requests
import pandas as pd
from dotenv import load_dotenv
from collections import defaultdict
import urllib3
# ==================== 配置区域 ====================
START_DATE = "20251201" # 开始日期 (8位数字年月日)
END_DATE = "20260131" # 结束日期 (8位数字年月日)
# =================================================
# 加载环境变量
load_dotenv()
# ES配置
ES_HOST = os.getenv("ES_HOST")
ES_PORT = int(os.getenv("ES_PORT", "9200"))
ES_SCHEME = os.getenv("ES_SCHEME", "https")
ES_USER = os.getenv("ES_USER", "elastic")
ES_PASSWORD = os.getenv("ES_PASSWORD")
ES_INDEX = "llm_realtime_asr_log"
# 每批处理的数据量
SCROLL_SIZE = 1000
SCROLL_TIMEOUT = "5m"
def timestamp_int_from_date(date_str):
"""将8位日期字符串转换为timestamp_int秒级时间戳"""
dt = datetime.strptime(date_str, "%Y%m%d")
return int(dt.timestamp())
def format_timestamp(ts):
"""将时间戳转换为格式化字符串"""
if isinstance(ts, (int, float)):
return datetime.fromtimestamp(ts).strftime("%Y-%m-%d %H:%M:%S")
return ts
def generate_audio_url(voice_id, timestamp):
"""生成audio_url"""
date_str = datetime.fromtimestamp(timestamp).strftime("%Y%m%d")
return f"https://static.valavala.com/vala_llm/realtime_asr_audio_backup/online/{date_str}/{voice_id}.wav"
def connect_es():
"""测试ES连接"""
print("正在测试 Elasticsearch 连接...")
# 禁用SSL警告
if ES_SCHEME == "https":
try:
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
except Exception:
pass
base_url = f"{ES_SCHEME}://{ES_HOST}:{ES_PORT}"
auth = (ES_USER, ES_PASSWORD) if ES_USER and ES_PASSWORD else None
try:
# 测试连接
resp = requests.get(
base_url,
auth=auth,
timeout=10,
verify=False if ES_SCHEME == "https" else True
)
resp.raise_for_status()
print(f"✓ 成功连接到 Elasticsearch: {ES_HOST}:{ES_PORT}")
return True
except Exception as e:
print(f"✗ 连接失败: {e}")
return False
def query_data(start_date, end_date):
"""查询ES数据"""
start_ts = timestamp_int_from_date(start_date)
end_ts = timestamp_int_from_date(end_date) + 86400 # 结束日期加一天,包含当天数据
print(f"\n开始查询数据...")
print(f"时间范围: {start_date}{end_date}")
print(f"时间戳范围: {start_ts}{end_ts}")
# 禁用SSL警告
if ES_SCHEME == "https":
try:
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
except Exception:
pass
base_url = f"{ES_SCHEME}://{ES_HOST}:{ES_PORT}"
search_url = f"{base_url}/{ES_INDEX}/_search"
headers = {"Content-Type": "application/json"}
auth = (ES_USER, ES_PASSWORD) if ES_USER and ES_PASSWORD else None
query = {
"query": {
"range": {
"timestamp_int": {
"gte": start_ts,
"lt": end_ts
}
}
},
"sort": [{"timestamp_int": {"order": "asc"}}],
"size": SCROLL_SIZE
}
try:
# 初始查询使用scroll
params = {"scroll": SCROLL_TIMEOUT}
response = requests.post(
search_url,
headers=headers,
json=query,
auth=auth,
params=params,
timeout=30,
verify=False if ES_SCHEME == "https" else True
)
response.raise_for_status()
data = response.json()
scroll_id = data.get("_scroll_id")
total_hits = data["hits"]["total"]["value"]
print(f"✓ 查询完成,共找到 {total_hits} 条记录")
return data, scroll_id, total_hits
except Exception as e:
raise RuntimeError(f"ES查询失败: {e}")
def aggregate_by_voice_id(response, scroll_id, total_hits):
"""按voice_id聚合数据"""
voice_data = defaultdict(list)
processed_count = 0
print("\n开始处理数据...")
# 禁用SSL警告
if ES_SCHEME == "https":
try:
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
except Exception:
pass
base_url = f"{ES_SCHEME}://{ES_HOST}:{ES_PORT}"
scroll_url = f"{base_url}/_search/scroll"
headers = {"Content-Type": "application/json"}
auth = (ES_USER, ES_PASSWORD) if ES_USER and ES_PASSWORD else None
while True:
hits = response["hits"]["hits"]
if not hits:
break
for hit in hits:
source = hit["_source"]
voice_id = source.get("voice_id")
if voice_id:
voice_data[voice_id].append(source)
processed_count += 1
# 打印进度
progress = (processed_count / total_hits) * 100
print(f"\r处理进度: {processed_count}/{total_hits} ({progress:.1f}%)", end="")
# 获取下一批数据
try:
scroll_response = requests.post(
scroll_url,
headers=headers,
json={
"scroll": SCROLL_TIMEOUT,
"scroll_id": scroll_id
},
auth=auth,
timeout=30,
verify=False if ES_SCHEME == "https" else True
)
scroll_response.raise_for_status()
response = scroll_response.json()
# 更新 scroll_id可能会变化
scroll_id = response.get("_scroll_id", scroll_id)
except Exception as e:
print(f"\n✗ 获取下一批数据失败: {e}")
break
print(f"\n✓ 数据处理完成,共处理 {processed_count} 条记录")
print(f"✓ 找到 {len(voice_data)} 个唯一的 voice_id")
# 清理scroll
try:
clear_scroll_url = f"{base_url}/_search/scroll"
requests.delete(
clear_scroll_url,
headers=headers,
json={"scroll_id": [scroll_id]},
auth=auth,
timeout=10,
verify=False if ES_SCHEME == "https" else True
)
except Exception:
pass # 清理失败不影响结果
return voice_data
def merge_voice_records(voice_data):
"""合并voice_id的记录只保留恰好2条记录的"""
print("\n开始聚合 voice_id 数据...")
merged_data = []
valid_count = 0
invalid_count = 0
for voice_id, records in voice_data.items():
# 只处理恰好有2条记录的voice_id
if len(records) != 2:
invalid_count += 1
continue
valid_count += 1
# 初始化合并后的数据
merged_record = {
"voice_id": voice_id,
"asr_prompt": None,
"result_str": None,
"timestamp": None,
"source": None,
"audio_url": None
}
# 找出最新的timestamp
max_timestamp = max(
records[0].get("timestamp_int", 0),
records[1].get("timestamp_int", 0)
)
# 合并数据
for record in records:
if record.get("asr_prompt"):
merged_record["asr_prompt"] = record["asr_prompt"]
if record.get("result_str"):
merged_record["result_str"] = record["result_str"]
if record.get("source"):
merged_record["source"] = record["source"]
# 设置timestamp和audio_url
merged_record["timestamp"] = format_timestamp(max_timestamp)
merged_record["audio_url"] = generate_audio_url(voice_id, max_timestamp)
merged_data.append(merged_record)
print(f"✓ 聚合完成")
print(f" - 有效记录2条/voice_id: {valid_count}")
print(f" - 无效记录非2条/voice_id: {invalid_count}")
return merged_data
def export_to_excel(data, start_date, end_date):
"""导出到Excel"""
if not data:
print("\n警告: 没有数据可导出")
return
print(f"\n开始导出数据到 Excel...")
# 创建DataFrame
df = pd.DataFrame(data)
# 调整列顺序
columns = ["voice_id", "asr_prompt", "result_str", "timestamp", "audio_url", "source"]
df = df[columns]
# 生成文件名
output_dir = "output"
os.makedirs(output_dir, exist_ok=True)
filename = f"realtime_asr_export_{start_date}_{end_date}.xlsx"
filepath = os.path.join(output_dir, filename)
# 导出Excel
df.to_excel(filepath, index=False, engine="openpyxl")
print(f"✓ 数据已导出到: {filepath}")
print(f"✓ 共导出 {len(df)} 条记录")
def main():
"""主函数"""
print("=" * 60)
print("流式语音 ASR 数据导出工具 v1.0")
print("=" * 60)
start_time = datetime.now()
try:
# 测试ES连接
if not connect_es():
raise Exception("无法连接到 Elasticsearch请检查配置")
# 查询数据
response, scroll_id, total_hits = query_data(START_DATE, END_DATE)
if total_hits == 0:
print("\n没有找到符合条件的数据")
return
# 聚合数据
voice_data = aggregate_by_voice_id(response, scroll_id, total_hits)
# 合并记录
merged_data = merge_voice_records(voice_data)
# 导出Excel
export_to_excel(merged_data, START_DATE, END_DATE)
# 统计耗时
end_time = datetime.now()
duration = (end_time - start_time).total_seconds()
print(f"\n{'=' * 60}")
print(f"✓ 任务完成! 总耗时: {duration:.2f}")
print(f"{'=' * 60}")
except Exception as e:
print(f"\n✗ 错误: {str(e)}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
main()

View File

@ -1,121 +0,0 @@
"""
MYSQL_HOST=xxx
MYSQL_USERNAME=xxx
MYSQL_PASSWORD=xxx
MYSQL_DATABASE=xxx
MYSQL_PORT=xxx
以上环境变量已配置在 .env
我要导出一个数据表的某些记录 并添加一些字段
表名:vala_resource_base
过滤全部 type == "角色" 的记录
导出以下字段:
id
cn_name
en_name
最终输出到 excel文档 "角色资源导出_251031.xlsx"
"""
import os
import pandas as pd
import pymysql
from dotenv import load_dotenv
from datetime import datetime
def load_config():
"""加载环境变量配置"""
load_dotenv()
config = {
'host': os.getenv('MYSQL_HOST'),
'user': os.getenv('MYSQL_USERNAME'),
'password': os.getenv('MYSQL_PASSWORD'),
'database': os.getenv('MYSQL_DATABASE'),
'port': int(os.getenv('MYSQL_PORT', 3306)),
'charset': 'utf8mb4'
}
# 验证配置
for key, value in config.items():
if value is None and key != 'charset':
raise ValueError(f"环境变量 {key} 未配置")
return config
def connect_mysql(config):
"""连接MySQL数据库"""
try:
connection = pymysql.connect(**config)
print("MySQL数据库连接成功")
return connection
except Exception as e:
print(f"MySQL数据库连接失败: {e}")
raise
def export_role_resources():
"""导出角色资源数据"""
try:
# 加载配置
config = load_config()
# 连接数据库
connection = connect_mysql(config)
# SQL查询语句
sql = """
SELECT
id,
cn_name,
en_name
FROM vala_resource_base
WHERE type = '角色'
ORDER BY id
"""
print("开始查询数据...")
# 执行查询并获取数据
df = pd.read_sql(sql, connection)
print(f"查询到 {len(df)} 条记录")
# 关闭数据库连接
connection.close()
# 导出到Excel文件
output_filename = "角色资源导出_251031.xlsx"
df.to_excel(output_filename, index=False, engine='openpyxl')
print(f"数据已成功导出到: {output_filename}")
print(f"导出字段: {list(df.columns)}")
print(f"导出记录数: {len(df)}")
# 显示前几行数据预览
if len(df) > 0:
print("\n数据预览:")
print(df.head())
return output_filename
except Exception as e:
print(f"导出过程中发生错误: {e}")
raise
if __name__ == "__main__":
try:
print("开始导出角色资源数据...")
print(f"执行时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
output_file = export_role_resources()
print(f"\n✅ 导出完成! 文件保存为: {output_file}")
except Exception as e:
print(f"\n❌ 导出失败: {e}")

View File

@ -1,343 +0,0 @@
"""
** 不要改动我的需求描述直接在需求后面写代码即可 **
需求一:
先写一个最简单脚本 实现下面sql功能
SELECT * FROM `vala_game_info` WHERE id > 0 AND `vala_game_info`.`deleted_at` IS NULL ORDER BY season_package_id asc,`index` asc
环境变量读取:
MYSQL_HOST=xxx
MYSQL_USERNAME=xxx
MYSQL_PASSWORD=xxx
MYSQL_DATABASE=xxx
MYSQL_PORT=xxx
-----------
需求二:
PGsql数据库中 筛选数据
数据库相关配置 .env中读取:
PG_DB_HOST = xxx
PG_DB_PORT = xxx
PG_DB_USER = xxx
PG_DB_PASSWORD = xxx
PG_DB_DATABASE = xxx
读取以下数据表:user_unit_challenge_question_result
支持输入时间范围
起始时间 截止时间 配置格式: "20250110"
数据表中的时间字段为 updated_at , 格式样例: "2025-11-05 19:35:46.698246+08:00"
在这些时间范围内筛选数据 (要求deleted_at字段内容为null)
导出以下字段:
user_id
unit_id 读取每条记录的story_id 根据 get_id_2_unit_index 函数返回的映射表 映射到 unit_id
score_text
question_list
updated_at
category
play_time_seconds 读取 play_time 把ms数据转换为秒 保留整数部分
导出为excel文件
配置参数直接在脚本开头给出即可
需求三:
需求二中 作为步骤一
本需求为步骤二 基于 步骤一的 文档
进行数据聚合
根据每个unit_id + category 进行分组
统计每个分组下的以下数值:
总记录数量
Perfect数量 (读取 score_text =="Perfect")
Good数量 (读取 score_text =="Good")
Oops数量 (读取 score_text =="Oops")
Perfect率 (Perfect数量 / 总记录数量)
Good率 (Good数量 / 总记录数量)
Oops率 (Oops数量 / 总记录数量)
导出为excel 命名为 步骤一名字_stats.xlsx
"""
import os
import pymysql
import psycopg2
from psycopg2.extras import RealDictCursor
from datetime import datetime
import pandas as pd
from dotenv import load_dotenv
# 加载环境变量
load_dotenv()
# ============ 配置参数 ============
START_DATE = "20250915" # 起始时间
END_DATE = "20251128" # 截止时间
OUTPUT_NAME = "unit_challenge_data_{}_{}.xlsx".format(START_DATE, END_DATE) # 输出文件名
OUTPUT_FILENAME = os.path.join("./output", OUTPUT_NAME)
# =================================
def get_id_2_unit_index():
# 读取数据库配置
db_host = os.getenv('MYSQL_HOST')
db_user = os.getenv('MYSQL_USERNAME')
db_password = os.getenv('MYSQL_PASSWORD')
db_name = os.getenv('MYSQL_DATABASE')
db_port = os.getenv('MYSQL_PORT')
# 简单的参数检查
if not all([db_host, db_user, db_password, db_name]):
print("Error: Missing database configuration in .env file.")
print("Ensure MYSQL_HOST, MYSQL_USERNAME, MYSQL_PASSWORD, MYSQL_DATABASE are set.")
return
try:
# 连接数据库
connection = pymysql.connect(
host=db_host,
user=db_user,
password=db_password,
database=db_name,
port=int(db_port) if db_port else 3306,
cursorclass=pymysql.cursors.DictCursor
)
print(f"Connected to database: {db_host}")
try:
with connection.cursor() as cursor:
# 定义 SQL 语句
sql = """
SELECT *
FROM `vala_game_info`
WHERE id > 0
AND `vala_game_info`.`deleted_at` IS NULL
ORDER BY season_package_id asc, `index` asc
"""
print(f"Executing SQL: {sql}")
# 执行查询
cursor.execute(sql)
# 获取所有结果
results = cursor.fetchall()
print(f"Total records found: {len(results)}")
print("-" * 30)
# 打印结果
print(results)
id_2_unit_index = {}
for index, row in enumerate(results):
id_2_unit_index[row['id']] = index
print("映射结果:")
print(id_2_unit_index)
print("-" * 30)
print("Done.")
return id_2_unit_index
finally:
connection.close()
except Exception as e:
print(f"An error occurred: {e}")
def export_unit_challenge_data(start_date, end_date, output_filename):
"""
从PostgreSQL数据库导出单元挑战数据
"""
# 读取PostgreSQL数据库配置
pg_host = os.getenv('PG_DB_HOST')
pg_port = os.getenv('PG_DB_PORT')
pg_user = os.getenv('PG_DB_USER')
pg_password = os.getenv('PG_DB_PASSWORD')
pg_database = os.getenv('PG_DB_DATABASE')
# 检查配置
if not all([pg_host, pg_port, pg_user, pg_password, pg_database]):
print("Error: Missing PostgreSQL database configuration in .env file.")
print("Ensure PG_DB_HOST, PG_DB_PORT, PG_DB_USER, PG_DB_PASSWORD, PG_DB_DATABASE are set.")
return
# 获取 id 到 unit_index 的映射
print("正在获取 unit_id 映射表...")
id_2_unit_index = get_id_2_unit_index()
if not id_2_unit_index:
print("Error: Failed to get id_2_unit_index mapping.")
return
# 转换时间格式: "20250110" -> "2025-01-10 00:00:00"
start_datetime = datetime.strptime(start_date, "%Y%m%d").strftime("%Y-%m-%d 00:00:00")
end_datetime = datetime.strptime(end_date, "%Y%m%d").strftime("%Y-%m-%d 00:00:00")
print(f"时间范围: {start_datetime}{end_datetime}")
try:
# 连接PostgreSQL数据库
connection = psycopg2.connect(
host=pg_host,
port=int(pg_port),
user=pg_user,
password=pg_password,
database=pg_database,
cursor_factory=RealDictCursor
)
print(f"已连接到 PostgreSQL 数据库: {pg_host}")
try:
with connection.cursor() as cursor:
# 定义SQL查询
sql = """
SELECT
user_id,
story_id,
score_text,
question_list,
updated_at,
category,
play_time
FROM user_unit_challenge_question_result
WHERE deleted_at IS NULL
AND updated_at >= %s
AND updated_at < %s
ORDER BY updated_at ASC
"""
print(f"执行查询...")
# 执行查询
cursor.execute(sql, (start_datetime, end_datetime))
# 获取所有结果
results = cursor.fetchall()
print(f"查询到 {len(results)} 条记录")
# 处理数据
export_data = []
for row in results:
# 映射 story_id 到 unit_id
story_id = row['story_id']
unit_id = id_2_unit_index.get(story_id, None)
# 转换 play_time (毫秒) 为秒 (整数)
play_time_seconds = row['play_time'] // 1000 if row['play_time'] else 0
# 移除 updated_at 的时区信息Excel 不支持带时区的 datetime
updated_at = row['updated_at']
if updated_at and hasattr(updated_at, 'replace'):
updated_at = updated_at.replace(tzinfo=None)
export_data.append({
'user_id': row['user_id'],
'unit_id': unit_id,
'score_text': row['score_text'],
'question_list': row['question_list'],
'updated_at': updated_at,
'category': row['category'],
'play_time_seconds': play_time_seconds
})
# 导出到Excel
if export_data:
df = pd.DataFrame(export_data)
df.to_excel(output_filename, index=False, engine='openpyxl')
print(f"数据已导出到: {output_filename}")
print(f"共导出 {len(export_data)} 条记录")
else:
print("没有数据可导出")
finally:
connection.close()
print("数据库连接已关闭")
except Exception as e:
print(f"发生错误: {e}")
def aggregate_stats(input_filename):
"""
基于步骤一的Excel文件进行数据聚合
unit_id + category 分组统计各项指标
"""
try:
# 读取步骤一导出的Excel文件
print(f"正在读取文件: {input_filename}")
df = pd.read_excel(input_filename, engine='openpyxl')
print(f"读取到 {len(df)} 条记录")
# 按 unit_id + category 分组统计
grouped = df.groupby(['unit_id', 'category'], dropna=False)
stats_data = []
for (unit_id, category), group in grouped:
total_count = len(group)
perfect_count = (group['score_text'] == 'Perfect').sum()
good_count = (group['score_text'] == 'Good').sum()
oops_count = (group['score_text'] == 'Oops').sum()
# 计算占比
perfect_rate = round(perfect_count / total_count if total_count > 0 else 0, 2)
good_rate = round(good_count / total_count if total_count > 0 else 0, 2)
oops_rate = round(oops_count / total_count if total_count > 0 else 0, 2)
stats_data.append({
'unit_id': unit_id,
'category': category,
'总记录数量': total_count,
'Perfect数量': perfect_count,
'Good数量': good_count,
'Oops数量': oops_count,
'Perfect率': perfect_rate,
'Good率': good_rate,
'Oops率': oops_rate
})
# 生成输出文件名
base_name = os.path.splitext(input_filename)[0]
output_filename = f"{base_name}_stats.xlsx"
# 导出统计结果
if stats_data:
stats_df = pd.DataFrame(stats_data)
stats_df.to_excel(output_filename, index=False, engine='openpyxl')
print(f"统计数据已导出到: {output_filename}")
print(f"{len(stats_data)} 个分组")
else:
print("没有数据可统计")
except Exception as e:
print(f"数据聚合时发生错误: {e}")
if __name__ == "__main__":
# 步骤一:执行导出
print("=" * 50)
print("步骤一:导出原始数据")
print("=" * 50)
export_unit_challenge_data(START_DATE, END_DATE, OUTPUT_FILENAME)
# 步骤二:数据聚合
print("\n" + "=" * 50)
print("步骤二:数据聚合统计")
print("=" * 50)
aggregate_stats(OUTPUT_FILENAME)
print("\n" + "=" * 50)
print("全部完成!")
print("=" * 50)

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@ -1,480 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
用户音频数据筛选脚本
功能从PostgreSQL数据库的分表(user_component_play_record_0~7)中提取指定时间段的用户音频数据
主要逻辑
1. 数据源遍历 user_component_play_record_0 user_component_play_record_7
2. 筛选条件
- 时间范围可配置
- 数据有效性user_behavior_info 非空且包含 userAudio pronunciationScore
3. 采样规则
- 目标总数可配置
- 用户限制可配置
- 随机策略先随机打乱再按用户分组限制最后补齐或截断至目标数量
4. 输出导出为Excel文件
包含字段
- index: 序号
- source_table: 来源表名
- created_at: 创建时间
- user_id: 用户ID
- component_unique_code: 组件唯一标识
- pronunciationScore: 发音评分
- userAudio: 音频链接
- expressContent: 朗读内容文本
"""
import os
import json
import re
import random
import psycopg2
import pymysql
import pandas as pd
from datetime import datetime
from typing import List, Dict, Any
from dotenv import load_dotenv
# 配置参数
CONFIG = {
# 筛选时间范围
'START_TIME': '2025-11-10 00:00:00+08:00',
'END_TIME': '2025-12-10 23:59:59+08:00',
# 采样参数
'TARGET_TOTAL': 10000, # 目标总样本数
'MAX_PER_USER': 20, # 单个用户最大样本数
'TABLE_COUNT': 8, # 分表数量 (0~N-1)
# 组件类型过滤
'C_TYPE_FILTER': 'mid_sentence_dialogue' # 仅筛选对话互动组件
}
class AudioDataExtractor:
def __init__(self):
# 加载环境变量
load_dotenv()
# PostgreSQL数据库连接配置
self.db_config = {
'host': os.getenv('PG_DB_HOST'),
'port': os.getenv('PG_DB_PORT'),
'user': os.getenv('PG_DB_USER'),
'password': os.getenv('PG_DB_PASSWORD'),
'database': os.getenv('PG_DB_DATABASE')
}
# MySQL数据库连接配置
self.mysql_config = {
'host': os.getenv('MYSQL_HOST'),
'user': os.getenv('MYSQL_USERNAME'),
'password': os.getenv('MYSQL_PASSWORD'),
'database': "vala_test",
'port': int(os.getenv('MYSQL_PORT', 3306)),
'charset': 'utf8mb4'
}
# 分表名称列表
self.table_names = [f'user_component_play_record_{i}' for i in range(CONFIG['TABLE_COUNT'])]
# 目标总数
self.target_total = CONFIG['TARGET_TOTAL']
# 每个用户最多记录数
self.max_per_user = CONFIG['MAX_PER_USER']
def get_db_connection(self):
"""获取数据库连接"""
try:
conn = psycopg2.connect(**self.db_config)
return conn
except Exception as e:
print(f"数据库连接失败: {e}")
raise
def extract_audio_info(self, user_behavior_info: str) -> Dict[str, Any]:
"""从user_behavior_info字段中提取音频信息"""
try:
behavior_data = json.loads(user_behavior_info)
if isinstance(behavior_data, list) and len(behavior_data) > 0:
# 取第一个元素
data = behavior_data[0]
if 'userAudio' in data and 'pronunciationScore' in data:
return {
'userAudio': data.get('userAudio'),
'pronunciationScore': data.get('pronunciationScore'),
'expressContent': data.get('expressContent')
}
except (json.JSONDecodeError, KeyError, IndexError):
pass
return {}
def query_table_data(self, table_name: str) -> List[Dict]:
"""查询单个表的数据"""
conn = self.get_db_connection()
cursor = conn.cursor()
try:
query = f"""
SELECT user_id, component_unique_code, c_type, c_id, created_at, user_behavior_info
FROM {table_name}
WHERE created_at >= '{CONFIG['START_TIME']}'
AND created_at <= '{CONFIG['END_TIME']}'
AND c_type = '{CONFIG['C_TYPE_FILTER']}'
AND user_behavior_info IS NOT NULL
AND user_behavior_info != ''
"""
cursor.execute(query)
rows = cursor.fetchall()
results = []
for row in rows:
user_id, component_unique_code, c_type, c_id, created_at, user_behavior_info = row
# 提取音频信息
audio_info = self.extract_audio_info(user_behavior_info)
if audio_info and 'userAudio' in audio_info and 'pronunciationScore' in audio_info:
results.append({
'source_table': table_name,
'user_id': user_id,
'component_unique_code': component_unique_code,
'c_type': c_type,
'c_id': c_id,
'created_at': created_at,
'userAudio': audio_info['userAudio'],
'pronunciationScore': audio_info['pronunciationScore'],
'expressContent': audio_info.get('expressContent')
})
return results
finally:
cursor.close()
conn.close()
def get_component_configs(self, data: List[Dict]) -> Dict[str, str]:
"""从MySQL批量获取组件配置信息"""
# 提取所有unique的(c_type, c_id)组合
unique_components = set()
for record in data:
if 'c_type' in record and 'c_id' in record:
unique_components.add((record['c_type'], record['c_id']))
if not unique_components:
print("没有需要查询的组件")
return {}
print(f"正在从MySQL查询 {len(unique_components)} 个组件的配置信息...")
# 连接MySQL
try:
conn = pymysql.connect(**self.mysql_config)
cursor = conn.cursor()
# 存储组件配置的字典key为"c_type-c_id"
component_configs = {}
# 批量查询
for c_type, c_id in unique_components:
query = """
SELECT component_config
FROM middle_interaction_component
WHERE c_type = %s AND c_id = %s
"""
cursor.execute(query, (c_type, c_id))
result = cursor.fetchone()
if result and result[0]:
key = f"{c_type}-{c_id}"
component_configs[key] = result[0]
cursor.close()
conn.close()
print(f"成功查询到 {len(component_configs)} 个组件配置")
return component_configs
except Exception as e:
print(f"查询MySQL组件配置失败: {e}")
return {}
@staticmethod
def clean_text(text: str) -> str:
"""清理文本:转小写,去除标点符号和空格"""
if not text:
return ""
# 转小写
text = text.lower()
# 去除标点符号和特殊字符,只保留字母和数字
text = re.sub(r'[^\w\s]', '', text)
# 去除多余空格
text = re.sub(r'\s+', '', text)
return text
@staticmethod
def levenshtein_distance(s1: str, s2: str) -> int:
"""计算两个字符串的Levenshtein编辑距离"""
if len(s1) < len(s2):
return AudioDataExtractor.levenshtein_distance(s2, s1)
if len(s2) == 0:
return len(s1)
previous_row = range(len(s2) + 1)
for i, c1 in enumerate(s1):
current_row = [i + 1]
for j, c2 in enumerate(s2):
# 插入、删除、替换的成本
insertions = previous_row[j + 1] + 1
deletions = current_row[j] + 1
substitutions = previous_row[j] + (c1 != c2)
current_row.append(min(insertions, deletions, substitutions))
previous_row = current_row
return previous_row[-1]
def parse_and_filter_by_config(self, data: List[Dict], component_configs: Dict[str, str]) -> List[Dict]:
"""解析组件配置并筛选question.mode == 'read'的记录"""
print(f"\n开始根据组件配置筛选数据...")
print(f"筛选前数据量: {len(data)}")
filtered_data = []
skipped_no_config = 0
skipped_invalid_json = 0
skipped_wrong_mode = 0
for record in data:
c_type = record.get('c_type')
c_id = record.get('c_id')
if not c_type or not c_id:
continue
# 获取组件配置
key = f"{c_type}-{c_id}"
config_str = component_configs.get(key)
if not config_str:
skipped_no_config += 1
continue
try:
# 解析JSON配置
config = json.loads(config_str)
# 检查question.mode == "read"
question = config.get('question', {})
mode = question.get('mode')
if mode == 'read':
# 提取question.content作为refText
ref_text = question.get('content', '')
record['refText'] = ref_text
# 计算编辑距离
express_content = record.get('expressContent', '')
# 清理文本(去除标点和大小写差异)
cleaned_express = self.clean_text(express_content)
cleaned_ref = self.clean_text(ref_text)
# 计算编辑距离
edit_distance = self.levenshtein_distance(cleaned_express, cleaned_ref)
record['editDistance'] = edit_distance
# 计算相对编辑距离
ref_len = len(cleaned_ref)
if ref_len > 0:
relative_edit_distance = round(edit_distance / ref_len, 4)
else:
relative_edit_distance = 0
record['relativeEditDistance'] = relative_edit_distance
filtered_data.append(record)
else:
skipped_wrong_mode += 1
except (json.JSONDecodeError, AttributeError, TypeError):
skipped_invalid_json += 1
continue
print(f"筛选后数据量: {len(filtered_data)}")
print(f" - 缺少配置: {skipped_no_config}")
print(f" - 配置解析失败: {skipped_invalid_json}")
print(f" - mode不是read: {skipped_wrong_mode}")
return filtered_data
def collect_all_data(self) -> List[Dict]:
"""收集所有表的数据"""
all_data = []
for table_name in self.table_names:
print(f"正在查询表: {table_name}")
try:
table_data = self.query_table_data(table_name)
all_data.extend(table_data)
print(f"{table_name} 查询到 {len(table_data)} 条记录")
except Exception as e:
print(f"查询表 {table_name} 失败: {e}")
continue
print(f"总共收集到 {len(all_data)} 条有效记录")
if not all_data:
return []
# 从MySQL获取组件配置
component_configs = self.get_component_configs(all_data)
# 根据组件配置筛选数据只保留question.mode == "read"的记录)
filtered_data = self.parse_and_filter_by_config(all_data, component_configs)
return filtered_data
def random_filter_data(self, data: List[Dict]) -> List[Dict]:
"""随机筛选数据(不按评分分段控制)"""
# 随机打乱所有数据
shuffled_data = data.copy()
random.shuffle(shuffled_data)
print(f"开始随机筛选,总共 {len(shuffled_data)} 条记录")
return shuffled_data
def apply_user_constraints(self, data: List[Dict]) -> List[Dict]:
"""应用用户约束每个用户最多2条"""
user_records = {}
# 按用户分组
for record in data:
user_id = record['user_id']
if user_id not in user_records:
user_records[user_id] = []
user_records[user_id].append(record)
# 每个用户最多选择2条
final_data = []
for user_id, records in user_records.items():
if len(records) <= self.max_per_user:
final_data.extend(records)
else:
# 随机选择2条
selected = random.sample(records, self.max_per_user)
final_data.extend(selected)
return final_data
def export_to_excel(self, data: List[Dict], filename: str = 'user_audio_data.xlsx'):
"""导出数据到Excel文件"""
# 准备导出数据
export_data = []
for i, record in enumerate(data):
# 处理时区问题 - 转换为本地时间字符串
created_at = record['created_at']
if hasattr(created_at, 'tz_localize'):
created_at = created_at.tz_localize(None)
elif hasattr(created_at, 'replace'):
created_at = created_at.replace(tzinfo=None)
export_data.append({
'index': i,
'source_table': record['source_table'],
'created_at': created_at,
'user_id': record['user_id'],
'component_unique_code': record['component_unique_code'],
'c_type': record.get('c_type'),
'c_id': record.get('c_id'),
'pronunciationScore': record['pronunciationScore'],
'userAudio': record['userAudio'],
'expressContent': record.get('expressContent'),
'refText': record.get('refText'),
'editDistance': record.get('editDistance'),
'relativeEditDistance': record.get('relativeEditDistance')
})
# 创建DataFrame并导出
df = pd.DataFrame(export_data)
df.to_excel(filename, index=False)
print(f"数据已导出到: {filename}")
print(f"总共导出 {len(export_data)} 条记录")
# 打印统计信息
self.print_statistics(data)
def print_statistics(self, data: List[Dict]):
"""打印统计信息"""
print("\n=== 数据统计 ===")
# 评分统计(显示分布情况但不按区间分组)
scores = [record['pronunciationScore'] for record in data]
print(f"\n评分统计:")
print(f" 总记录数: {len(scores)}")
print(f" 最高分: {max(scores)}")
print(f" 最低分: {min(scores)}")
print(f" 平均分: {sum(scores) / len(scores):.2f}")
# 用户分布统计
user_counts = {}
for record in data:
user_id = record['user_id']
user_counts[user_id] = user_counts.get(user_id, 0) + 1
print(f"\n用户统计:")
print(f" 总用户数: {len(user_counts)}")
print(f" 平均每用户记录数: {len(data) / len(user_counts):.2f}")
# 表分布统计
table_counts = {}
for record in data:
table = record['source_table']
table_counts[table] = table_counts.get(table, 0) + 1
print(f"\n表分布:")
for table, count in sorted(table_counts.items()):
print(f" {table}: {count}")
def run(self):
"""运行主流程"""
print("开始提取用户音频数据...")
# 1. 收集所有数据
all_data = self.collect_all_data()
if not all_data:
print("未找到符合条件的数据")
return
# 2. 随机筛选数据(不按评分分段控制)
filtered_data = self.random_filter_data(all_data)
# 3. 应用用户约束
final_data = self.apply_user_constraints(filtered_data)
# 4. 如果数据不足500条尝试补充
if len(final_data) < self.target_total:
print(f"当前数据量 {len(final_data)} 条,少于目标 {self.target_total}")
# 从剩余数据中补充
used_records = set((r['user_id'], r['component_unique_code'], str(r['created_at'])) for r in final_data)
available_data = [r for r in all_data if (r['user_id'], r['component_unique_code'], str(r['created_at'])) not in used_records]
needed = self.target_total - len(final_data)
if len(available_data) >= needed:
additional = random.sample(available_data, needed)
final_data.extend(additional)
# 5. 如果超过500条随机选择500条
if len(final_data) > self.target_total:
final_data = random.sample(final_data, self.target_total)
# 6. 导出到Excel
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"user_audio_data_{timestamp}.xlsx"
self.export_to_excel(final_data, filename)
def main():
extractor = AudioDataExtractor()
extractor.run()
if __name__ == "__main__":
main()

View File

@ -1,463 +0,0 @@
"""
从es中 筛选用户数据
es相关配置通过以下环节变量
ES_HOST=xxx
ES_PORT=9200
ES_SCHEME=https
ES_USER=elastic
ES_PASSWORD=xxx
index: user-audio
脚本思路:
给定 一些过滤参数 给定导出的excel文件名 在脚本中以变量方式配置就行
导出我要的字段内容到一个 excel
过滤字段:
timeStr: 字段内容为str 格式为: 2024-12-31 15:53:19
期望支持配置 开始 日期 结束日期 可以只配置一个 只配 开始日期 则筛选 >= 开始日期的记录 只配结束日期 则筛选 <= 结束日期的记录
输出字段内容支持配置:
"""
import os
from datetime import datetime
from dotenv import load_dotenv
from elasticsearch import Elasticsearch
import pandas as pd
import urllib.parse
from collections import defaultdict
# 加载环境变量
load_dotenv()
# 配置参数
INDEX_NAME = "llm_ai_tools_log"
OUTPUT_FILE = "单元挑战用户数据_250906_251024.xlsx"
START_DATE = "2025-09-06 00:00:00" # 开始日期,格式: YYYY-MM-DD HH:MM:SS设为None则不限制
END_DATE = "2025-10-24 00:00:00" # 结束日期,格式: YYYY-MM-DD HH:MM:SS设为None则不限制
# type字段过滤配置筛选指定类型的记录为空则不限制
FILTER_TYPES = ["sent_check_challenge", "speaking_topic_challenge"]
# 可选的 userId 过滤配置:配置为[int, ...] 列表;为空则不限制
FILTER_USER_IDS = [] # 例如: [123, 456]
# 需要导出的字段
EXPORT_FIELDS = [
"type",
"question",
"user_answer",
"time_total_ms",
"score",
"is_passed",
"model",
"write_time_str",
"write_time_int",
]
def create_es_client():
"""创建Elasticsearch客户端"""
# 获取环境变量并打印调试信息
es_host = os.getenv('ES_HOST')
es_port = os.getenv('ES_PORT', 9200)
es_scheme = os.getenv('ES_SCHEME', 'https')
es_user = os.getenv('ES_USER')
es_password = os.getenv('ES_PASSWORD')
print(f"[DEBUG] ES配置信息:")
print(f" ES_HOST: {es_host}")
print(f" ES_PORT: {es_port}")
print(f" ES_SCHEME: {es_scheme}")
print(f" ES_USER: {es_user}")
print(f" ES_PASSWORD: {'***已设置***' if es_password else '未设置'}")
# 检查必要的环境变量
if not es_host:
raise ValueError("ES_HOST环境变量未设置")
if not es_user:
raise ValueError("ES_USER环境变量未设置")
if not es_password:
raise ValueError("ES_PASSWORD环境变量未设置")
# URL编码用户名和密码处理特殊字符
encoded_user = urllib.parse.quote(es_user, safe='')
encoded_password = urllib.parse.quote(es_password, safe='')
print(f"[DEBUG] 原始密码包含特殊字符已进行URL编码")
# 方式1: 使用URL中嵌入认证信息
host_url_with_auth = f"{es_scheme}://{encoded_user}:{encoded_password}@{es_host}:{es_port}"
print(f"[DEBUG] 连接URL (带认证): {es_scheme}://{encoded_user}:***@{es_host}:{es_port}")
try:
# 尝试方式1: URL中嵌入认证
es_config_1 = {
'hosts': [host_url_with_auth],
'verify_certs': False,
'ssl_show_warn': False,
'request_timeout': 30,
'retry_on_timeout': True
}
print("[DEBUG] 尝试方式1: URL中嵌入认证信息")
es_client = Elasticsearch(**es_config_1)
# 测试连接
info = es_client.info()
print(f"[SUCCESS] 方式1连接成功")
return es_client
except Exception as e1:
print(f"[DEBUG] 方式1失败: {e1}")
try:
# 尝试方式2: 使用basic_auth参数
host_url = f"{es_scheme}://{es_host}:{es_port}"
es_config_2 = {
'hosts': [host_url],
'basic_auth': (es_user, es_password),
'verify_certs': False,
'ssl_show_warn': False,
'request_timeout': 30,
'retry_on_timeout': True
}
print("[DEBUG] 尝试方式2: 使用basic_auth参数")
es_client = Elasticsearch(**es_config_2)
# 测试连接
info = es_client.info()
print(f"[SUCCESS] 方式2连接成功")
return es_client
except Exception as e2:
print(f"[DEBUG] 方式2失败: {e2}")
try:
# 尝试方式3: 使用http_auth参数 (旧版本兼容)
es_config_3 = {
'hosts': [host_url],
'http_auth': (es_user, es_password),
'verify_certs': False,
'ssl_show_warn': False,
'request_timeout': 30,
'retry_on_timeout': True
}
print("[DEBUG] 尝试方式3: 使用http_auth参数")
es_client = Elasticsearch(**es_config_3)
# 测试连接
info = es_client.info()
print(f"[SUCCESS] 方式3连接成功")
return es_client
except Exception as e3:
print(f"[DEBUG] 方式3失败: {e3}")
print(f"[ERROR] 所有认证方式都失败了")
raise e3
def build_query(start_date=None, end_date=None):
"""构建ES查询条件"""
# 构建基础查询条件
must_conditions = []
# 添加时间范围条件
if start_date or end_date:
range_query = {}
if start_date:
start_timestamp = int(datetime.strptime(start_date, "%Y-%m-%d %H:%M:%S").timestamp())
range_query["gte"] = start_timestamp
print(f"[DEBUG] 开始时间戳: {start_timestamp} (对应 {start_date})")
if end_date:
end_timestamp = int(datetime.strptime(end_date, "%Y-%m-%d %H:%M:%S").timestamp())
range_query["lte"] = end_timestamp
print(f"[DEBUG] 结束时间戳: {end_timestamp} (对应 {end_date})")
must_conditions.append({
"range": {
"write_time_int": range_query
}
})
# 如果配置了 userId 列表,则仅选取对应 userId 的数据
if FILTER_USER_IDS:
print(f"[DEBUG] 应用 userId 过滤: {FILTER_USER_IDS}")
must_conditions.append({
"terms": {
"userId": FILTER_USER_IDS
}
})
# 如果配置了 type 列表,则仅选取对应 type 的数据
if FILTER_TYPES:
print(f"[DEBUG] 应用 type 过滤: {FILTER_TYPES}")
must_conditions.append({
"terms": {
"type": FILTER_TYPES
}
})
# 构建最终查询
if must_conditions:
query = {
"bool": {
"must": must_conditions
}
}
else:
query = {"match_all": {}}
print(f"[DEBUG] 查询条件: {query}")
return {
"query": query,
"_source": EXPORT_FIELDS,
"sort": [{"write_time_int": {"order": "desc"}}]
}
def fetch_data_from_es(es_client, start_date=None, end_date=None):
"""从ES获取数据"""
query = build_query(start_date, end_date)
try:
print(f"[DEBUG] 执行ES查询使用scroll获取全量数据...")
# 使用scroll API获取全量数据
scroll_size = 1000 # 每次scroll获取的数据量
scroll_timeout = '2m' # scroll超时时间
# 初始化scroll
query['size'] = scroll_size
response = es_client.search(
index=INDEX_NAME,
body=query,
scroll=scroll_timeout
)
scroll_id = response['_scroll_id']
hits = response['hits']['hits']
total_hits = response['hits']['total']
# 获取总数兼容不同ES版本
if isinstance(total_hits, dict):
total_count = total_hits['value']
else:
total_count = total_hits
print(f"[DEBUG] ES中匹配的总记录数: {total_count}")
all_data = []
batch_count = 1
# 处理第一批数据
for hit in hits:
source = hit['_source']
row = {}
for field in EXPORT_FIELDS:
row[field] = source.get(field, "")
all_data.append(row)
print(f"[DEBUG] 已获取第 {batch_count} 批数据,当前总数: {len(all_data)}")
# 继续scroll获取剩余数据
while len(hits) == scroll_size:
batch_count += 1
response = es_client.scroll(scroll_id=scroll_id, scroll=scroll_timeout)
scroll_id = response['_scroll_id']
hits = response['hits']['hits']
for hit in hits:
source = hit['_source']
row = {}
for field in EXPORT_FIELDS:
row[field] = source.get(field, "")
all_data.append(row)
print(f"[DEBUG] 已获取第 {batch_count} 批数据,当前总数: {len(all_data)}")
# 清理scroll
try:
es_client.clear_scroll(scroll_id=scroll_id)
except:
pass # 忽略清理错误
print(f"[DEBUG] 从ES获取到数据 {len(all_data)} 条记录")
return all_data
except Exception as e:
print(f"查询ES时出错: {e}")
return []
def export_to_excel(data, filename):
"""导出数据到Excel"""
if not data:
print("没有数据可导出")
return
df = pd.DataFrame(data)
try:
df.to_excel(filename, index=False, engine='openpyxl')
print(f"数据已导出到: {filename}")
print(f"共导出 {len(data)} 条记录")
except Exception as e:
print(f"导出Excel时出错: {e}")
def debug_es_data(es_client):
"""调试ES数据了解实际数据情况"""
print("\n" + "="*60)
print("开始调试ES数据...")
try:
# 1. 查询总数据量
total_query = {
"query": {"match_all": {}},
"size": 0
}
response = es_client.search(index=INDEX_NAME, body=total_query)
total_count = response['hits']['total']
if isinstance(total_count, dict):
total_count = total_count['value']
print(f"[DEBUG] ES索引 '{INDEX_NAME}' 中总数据量: {total_count}")
if total_count == 0:
print("[ERROR] ES索引中没有任何数据")
return
# 2. 查询最近的几条数据,了解数据结构
sample_query = {
"query": {"match_all": {}},
"size": 5,
"sort": [{"_id": {"order": "desc"}}]
}
response = es_client.search(index=INDEX_NAME, body=sample_query)
hits = response['hits']['hits']
print(f"[DEBUG] 获取到 {len(hits)} 条样本数据:")
for i, hit in enumerate(hits):
source = hit['_source']
print(f" 样本 {i+1}:")
print(f" write_time_int: {source.get('write_time_int', 'N/A')}")
print(f" timeStr: {source.get('timeStr', 'N/A')}")
print(f" type: {source.get('type', 'N/A')}")
print(f" userId: {source.get('userId', 'N/A')}")
# 3. 查询时间范围内的数据
time_range_query = {
"query": {
"range": {
"write_time_int": {
"gte": int(datetime.strptime(START_DATE, "%Y-%m-%d %H:%M:%S").timestamp()),
"lte": int(datetime.strptime(END_DATE, "%Y-%m-%d %H:%M:%S").timestamp())
}
}
},
"size": 0
}
response = es_client.search(index=INDEX_NAME, body=time_range_query)
time_range_count = response['hits']['total']
if isinstance(time_range_count, dict):
time_range_count = time_range_count['value']
print(f"[DEBUG] 时间范围内数据量 ({START_DATE}{END_DATE}): {time_range_count}")
# 4. 查询时间范围的实际数据分布
print(f"[DEBUG] 检查时间字段的实际值范围...")
agg_query = {
"query": {"match_all": {}},
"size": 0,
"aggs": {
"time_stats": {
"stats": {
"field": "write_time_int"
}
}
}
}
response = es_client.search(index=INDEX_NAME, body=agg_query)
if 'aggregations' in response:
stats = response['aggregations']['time_stats']
min_time = stats.get('min')
max_time = stats.get('max')
if min_time and max_time:
min_date = datetime.fromtimestamp(min_time).strftime("%Y-%m-%d %H:%M:%S")
max_date = datetime.fromtimestamp(max_time).strftime("%Y-%m-%d %H:%M:%S")
print(f" 最早时间: {min_date} (时间戳: {min_time})")
print(f" 最晚时间: {max_date} (时间戳: {max_time})")
except Exception as e:
print(f"[ERROR] 调试ES数据时出错: {e}")
print("="*60 + "\n")
def main():
"""主函数"""
print("开始从ES获取单元挑战数据...")
print(f"索引: {INDEX_NAME}")
print(f"开始日期: {START_DATE if START_DATE else '不限制'}")
print(f"结束日期: {END_DATE if END_DATE else '不限制'}")
if FILTER_TYPES:
print(f"类型过滤: {FILTER_TYPES}")
if FILTER_USER_IDS:
print(f"用户ID过滤: {FILTER_USER_IDS}")
print("-" * 50)
# 检查.env文件是否存在
env_file = ".env"
if not os.path.exists(env_file):
print(f"[ERROR] {env_file} 文件不存在请创建并配置ES连接信息")
print("参考 .env.example 文件进行配置")
return
print(f"[DEBUG] 找到环境配置文件: {env_file}")
# 创建ES客户端
try:
es_client = create_es_client()
except ValueError as e:
print(f"[ERROR] 配置错误: {e}")
print("请检查 .env 文件中的ES配置")
return
except Exception as e:
print(f"[ERROR] 创建ES客户端失败: {e}")
return
# 测试连接
try:
print("[DEBUG] 正在测试ES连接...")
# ES客户端创建函数中已经包含了连接测试这里不需要重复测试
print(f"[SUCCESS] ES连接已建立")
except Exception as e:
print(f"[ERROR] ES连接失败: {e}")
print("\n可能的解决方案:")
print("1. 检查ES服务是否正常运行")
print("2. 验证.env文件中的ES_HOST、ES_USER、ES_PASSWORD是否正确")
print("3. 确认网络连接是否正常")
print("4. 检查ES用户权限是否足够")
print("5. 密码中包含特殊字符已尝试URL编码处理")
return
# 获取数据
data = fetch_data_from_es(es_client, START_DATE, END_DATE)
# 导出到Excel
if data:
export_to_excel(data, OUTPUT_FILE)
else:
print("未获取到任何数据")
if __name__ == "__main__":
main()

View File

@ -1,599 +0,0 @@
"""
从es中采样用户数据
es相关配置通过以下环节变量
ES_HOST=xxx
ES_PORT=9200
ES_SCHEME=https
ES_USER=elastic
ES_PASSWORD=xxx
index: user-audio
脚本思路:
给定 一些过滤参数 给定导出的excel文件名 在脚本中以变量方式配置就行
导出我要的字段内容到一个 excel
过滤字段:
timeStr: 字段内容为str 格式为: 2024-12-31 15:53:19
期望支持配置 开始 日期 结束日期 可以只配置一个 只配 开始日期 则筛选 >= 开始日期的记录 只配结束日期 则筛选 <= 结束日期的记录
输出以下字段内容:
userId
userMsg
userName
soeData
audioUrl
asrStatus
componentId
componentType
dataVersion
"""
import os
from datetime import datetime
from dotenv import load_dotenv
from elasticsearch import Elasticsearch
import pandas as pd
import urllib.parse
import re
from collections import defaultdict
# 加载环境变量
load_dotenv()
# 配置参数
INDEX_NAME = os.getenv("ES_INDEX", "user-audio")
OUTPUT_FILE = "user_audio_data.xlsx"
START_DATE = "2025-10-15 00:00:00" # 开始日期,格式: YYYY-MM-DD HH:MM:SS设为None则不限制
END_DATE = "2025-10-17 00:00:00" # 结束日期,格式: YYYY-MM-DD HH:MM:SS设为None则不限制
# 可选的 userId 过滤配置:配置为[int, ...] 列表;为空则不限制
FILTER_USER_IDS = [356] # 例如: [123, 456]
# 采样配置参数
MAX_SAMPLES_PER_USER_MSG = 50 # 每个不重复的userMsg最多采样的数据条数
MAX_SAMPLES_PER_USER_ID = 20 # 每个userId最多采样的数据条数
# 需要导出的字段
EXPORT_FIELDS = [
"userId",
"userMsg",
"userName",
"soeData",
"audioUrl",
"asrStatus",
"componentId",
"componentType",
"dataVersion",
"timeStr"
]
def create_es_client():
"""创建Elasticsearch客户端"""
# 获取环境变量并打印调试信息
es_host = os.getenv('ES_HOST')
es_port = os.getenv('ES_PORT', 9200)
es_scheme = os.getenv('ES_SCHEME', 'https')
es_user = os.getenv('ES_USER')
es_password = os.getenv('ES_PASSWORD')
print(f"[DEBUG] ES配置信息:")
print(f" ES_HOST: {es_host}")
print(f" ES_PORT: {es_port}")
print(f" ES_SCHEME: {es_scheme}")
print(f" ES_USER: {es_user}")
print(f" ES_PASSWORD: {'***已设置***' if es_password else '未设置'}")
# 检查必要的环境变量
if not es_host:
raise ValueError("ES_HOST环境变量未设置")
if not es_user:
raise ValueError("ES_USER环境变量未设置")
if not es_password:
raise ValueError("ES_PASSWORD环境变量未设置")
# URL编码用户名和密码处理特殊字符
encoded_user = urllib.parse.quote(es_user, safe='')
encoded_password = urllib.parse.quote(es_password, safe='')
print(f"[DEBUG] 原始密码包含特殊字符已进行URL编码")
# 方式1: 使用URL中嵌入认证信息
host_url_with_auth = f"{es_scheme}://{encoded_user}:{encoded_password}@{es_host}:{es_port}"
print(f"[DEBUG] 连接URL (带认证): {es_scheme}://{encoded_user}:***@{es_host}:{es_port}")
try:
# 尝试方式1: URL中嵌入认证
es_config_1 = {
'hosts': [host_url_with_auth],
'verify_certs': False,
'ssl_show_warn': False,
'request_timeout': 30,
'retry_on_timeout': True
}
print("[DEBUG] 尝试方式1: URL中嵌入认证信息")
es_client = Elasticsearch(**es_config_1)
# 测试连接
info = es_client.info()
print(f"[SUCCESS] 方式1连接成功")
return es_client
except Exception as e1:
print(f"[DEBUG] 方式1失败: {e1}")
try:
# 尝试方式2: 使用basic_auth参数
host_url = f"{es_scheme}://{es_host}:{es_port}"
es_config_2 = {
'hosts': [host_url],
'basic_auth': (es_user, es_password),
'verify_certs': False,
'ssl_show_warn': False,
'request_timeout': 30,
'retry_on_timeout': True
}
print("[DEBUG] 尝试方式2: 使用basic_auth参数")
es_client = Elasticsearch(**es_config_2)
# 测试连接
info = es_client.info()
print(f"[SUCCESS] 方式2连接成功")
return es_client
except Exception as e2:
print(f"[DEBUG] 方式2失败: {e2}")
try:
# 尝试方式3: 使用http_auth参数 (旧版本兼容)
es_config_3 = {
'hosts': [host_url],
'http_auth': (es_user, es_password),
'verify_certs': False,
'ssl_show_warn': False,
'request_timeout': 30,
'retry_on_timeout': True
}
print("[DEBUG] 尝试方式3: 使用http_auth参数")
es_client = Elasticsearch(**es_config_3)
# 测试连接
info = es_client.info()
print(f"[SUCCESS] 方式3连接成功")
return es_client
except Exception as e3:
print(f"[DEBUG] 方式3失败: {e3}")
print(f"[ERROR] 所有认证方式都失败了")
raise e3
def build_query(start_date=None, end_date=None):
"""构建ES查询条件"""
# 构建基础查询条件
must_conditions = []
# 添加时间范围条件
if start_date or end_date:
range_query = {}
if start_date:
start_timestamp = int(datetime.strptime(start_date, "%Y-%m-%d %H:%M:%S").timestamp())
range_query["gte"] = start_timestamp
print(f"[DEBUG] 开始时间戳: {start_timestamp} (对应 {start_date})")
if end_date:
end_timestamp = int(datetime.strptime(end_date, "%Y-%m-%d %H:%M:%S").timestamp())
range_query["lte"] = end_timestamp
print(f"[DEBUG] 结束时间戳: {end_timestamp} (对应 {end_date})")
must_conditions.append({
"range": {
"timeInt": range_query
}
})
# 如果配置了 userId 列表,则仅选取对应 userId 的数据
if FILTER_USER_IDS:
print(f"[DEBUG] 应用 userId 过滤: {FILTER_USER_IDS}")
must_conditions.append({
"terms": {
"userId": FILTER_USER_IDS
}
})
# 移除soeData的exists查询改为在应用层进行更精确的过滤
# 注释掉原来的soeData exists查询
# must_conditions.append({
# "exists": {
# "field": "soeData"
# }
# })
# 构建最终查询
if must_conditions:
query = {
"bool": {
"must": must_conditions
}
}
else:
query = {"match_all": {}}
print(f"[DEBUG] 查询条件: {query}")
return {
"query": query,
"_source": EXPORT_FIELDS,
"sort": [{"timeInt": {"order": "desc"}}]
}
def fetch_data_from_es(es_client, start_date=None, end_date=None):
"""从ES获取数据"""
query = build_query(start_date, end_date)
try:
print(f"[DEBUG] 执行ES查询使用scroll获取全量数据...")
# 使用scroll API获取全量数据
scroll_size = 1000 # 每次scroll获取的数据量
scroll_timeout = '2m' # scroll超时时间
# 初始化scroll
query['size'] = scroll_size
response = es_client.search(
index=INDEX_NAME,
body=query,
scroll=scroll_timeout
)
scroll_id = response['_scroll_id']
hits = response['hits']['hits']
total_hits = response['hits']['total']
# 获取总数兼容不同ES版本
if isinstance(total_hits, dict):
total_count = total_hits['value']
else:
total_count = total_hits
print(f"[DEBUG] ES中匹配的总记录数: {total_count}")
all_data = []
batch_count = 1
# 处理第一批数据
for hit in hits:
source = hit['_source']
row = {}
for field in EXPORT_FIELDS:
row[field] = source.get(field, "")
all_data.append(row)
print(f"[DEBUG] 已获取第 {batch_count} 批数据,当前总数: {len(all_data)}")
# 继续scroll获取剩余数据
while len(hits) == scroll_size:
batch_count += 1
response = es_client.scroll(scroll_id=scroll_id, scroll=scroll_timeout)
scroll_id = response['_scroll_id']
hits = response['hits']['hits']
for hit in hits:
source = hit['_source']
row = {}
for field in EXPORT_FIELDS:
row[field] = source.get(field, "")
all_data.append(row)
print(f"[DEBUG] 已获取第 {batch_count} 批数据,当前总数: {len(all_data)}")
# 清理scroll
try:
es_client.clear_scroll(scroll_id=scroll_id)
except:
pass # 忽略清理错误
print(f"[DEBUG] 从ES获取到原始数据 {len(all_data)} 条记录")
# 根据是否配置了 userId 列表决定是否跳过过滤与采样逻辑
if FILTER_USER_IDS:
print("[DEBUG] 已配置 userId 列表,跳过过滤与采样逻辑,返回全部匹配数据")
return all_data
else:
# 应用过滤和采样逻辑
filtered_sampled_data = filter_and_sample_data(all_data)
return filtered_sampled_data
except Exception as e:
print(f"查询ES时出错: {e}")
return []
def export_to_excel(data, filename):
"""导出数据到Excel"""
if not data:
print("没有数据可导出")
return
df = pd.DataFrame(data)
# 生成带时间戳的文件名
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
base_name = filename.rsplit('.', 1)[0]
extension = filename.rsplit('.', 1)[1] if '.' in filename else 'xlsx'
timestamped_filename = f"{base_name}_{timestamp}.{extension}"
try:
df.to_excel(timestamped_filename, index=False, engine='openpyxl')
print(f"数据已导出到: {timestamped_filename}")
print(f"共导出 {len(data)} 条记录")
except Exception as e:
print(f"导出Excel时出错: {e}")
def contains_chinese(text):
"""检测文本是否包含中文字符"""
if not text:
return False
chinese_pattern = re.compile(r'[\u4e00-\u9fff]')
return bool(chinese_pattern.search(text))
def filter_and_sample_data(data):
"""过滤和采样数据"""
print(f"[DEBUG] 开始过滤和采样,原始数据量: {len(data)}")
# 第一步:过滤数据
filtered_data = []
soe_data_empty_count = 0
soe_data_not_json_count = 0
chinese_msg_count = 0
for i, item in enumerate(data):
# 检查soeData是否存在且以"{"开头
soe_data = item.get('soeData', '')
if not soe_data:
soe_data_empty_count += 1
if i < 5: # 只打印前5个样本的详细信息
print(f"[DEBUG] 样本 {i+1}: soeData为空或不存在")
continue
if not str(soe_data).strip().startswith('{'):
soe_data_not_json_count += 1
if i < 5: # 只打印前5个样本的详细信息
print(f"[DEBUG] 样本 {i+1}: soeData不以'{{' 开头,内容: {str(soe_data)[:100]}...")
continue
# 检查userMsg是否不包含中文
user_msg = item.get('userMsg', '')
if contains_chinese(user_msg):
chinese_msg_count += 1
if i < 5: # 只打印前5个样本的详细信息
print(f"[DEBUG] 样本 {i+1}: userMsg包含中文内容: {user_msg[:50]}...")
continue
filtered_data.append(item)
if i < 5: # 只打印前5个样本的详细信息
print(f"[DEBUG] 样本 {i+1}: 通过过滤userMsg: {user_msg[:50]}...")
print(f"[DEBUG] 过滤统计:")
print(f" - soeData为空: {soe_data_empty_count}")
print(f" - soeData不以'{{' 开头: {soe_data_not_json_count}")
print(f" - userMsg包含中文: {chinese_msg_count}")
print(f" - 通过过滤的数据: {len(filtered_data)}")
# 第二步按userMsg分组采样
user_msg_groups = defaultdict(list)
for item in filtered_data:
user_msg = item.get('userMsg', '')
user_msg_groups[user_msg].append(item)
print(f"[DEBUG] 不重复的userMsg数量: {len(user_msg_groups)}")
# 对每个userMsg组进行采样
sampled_by_msg = []
for user_msg, items in user_msg_groups.items():
# 每个userMsg最多取MAX_SAMPLES_PER_USER_MSG条
sampled_items = items[:MAX_SAMPLES_PER_USER_MSG]
sampled_by_msg.extend(sampled_items)
if len(items) > MAX_SAMPLES_PER_USER_MSG:
print(f"[DEBUG] userMsg '{user_msg}'{len(items)} 条数据,采样了 {MAX_SAMPLES_PER_USER_MSG}")
print(f"[DEBUG] 按userMsg采样后数据量: {len(sampled_by_msg)}")
# 第三步按userId分组采样
user_id_groups = defaultdict(list)
for item in sampled_by_msg:
user_id = item.get('userId', '')
user_id_groups[user_id].append(item)
print(f"[DEBUG] 不重复的userId数量: {len(user_id_groups)}")
# 对每个userId组进行采样
final_sampled_data = []
for user_id, items in user_id_groups.items():
# 每个userId最多取MAX_SAMPLES_PER_USER_ID条
sampled_items = items[:MAX_SAMPLES_PER_USER_ID]
final_sampled_data.extend(sampled_items)
if len(items) > MAX_SAMPLES_PER_USER_ID:
print(f"[DEBUG] userId '{user_id}'{len(items)} 条数据,采样了 {MAX_SAMPLES_PER_USER_ID}")
print(f"[DEBUG] 最终采样数据量: {len(final_sampled_data)}")
return final_sampled_data
def debug_es_data(es_client):
"""调试ES数据了解实际数据情况"""
print("\n" + "="*60)
print("开始调试ES数据...")
try:
# 1. 查询总数据量
total_query = {
"query": {"match_all": {}},
"size": 0
}
response = es_client.search(index=INDEX_NAME, body=total_query)
total_count = response['hits']['total']
if isinstance(total_count, dict):
total_count = total_count['value']
print(f"[DEBUG] ES索引 '{INDEX_NAME}' 中总数据量: {total_count}")
if total_count == 0:
print("[ERROR] ES索引中没有任何数据")
return
# 2. 查询最近的几条数据,了解数据结构
sample_query = {
"query": {"match_all": {}},
"size": 5,
"sort": [{"_id": {"order": "desc"}}]
}
response = es_client.search(index=INDEX_NAME, body=sample_query)
hits = response['hits']['hits']
print(f"[DEBUG] 获取到 {len(hits)} 条样本数据:")
for i, hit in enumerate(hits):
source = hit['_source']
soe_data = source.get('soeData', '')
soe_data_preview = str(soe_data)[:100] if soe_data else 'N/A'
soe_data_starts_with_brace = str(soe_data).strip().startswith('{') if soe_data else False
print(f" 样本 {i+1}:")
print(f" timeInt: {source.get('timeInt', 'N/A')}")
print(f" timeStr: {source.get('timeStr', 'N/A')}")
print(f" soeData存在: {'' if soe_data else ''}")
print(f" soeData以{{开头: {'' if soe_data_starts_with_brace else ''}")
print(f" soeData预览: {soe_data_preview}...")
print(f" userMsg: {source.get('userMsg', 'N/A')[:50]}...")
print(f" userId: {source.get('userId', 'N/A')}")
# 3. 查询时间范围内的数据不加soeData过滤
time_range_query = {
"query": {
"range": {
"timeInt": {
"gte": int(datetime.strptime(START_DATE, "%Y-%m-%d %H:%M:%S").timestamp()),
"lte": int(datetime.strptime(END_DATE, "%Y-%m-%d %H:%M:%S").timestamp())
}
}
},
"size": 0
}
response = es_client.search(index=INDEX_NAME, body=time_range_query)
time_range_count = response['hits']['total']
if isinstance(time_range_count, dict):
time_range_count = time_range_count['value']
print(f"[DEBUG] 时间范围内数据量 ({START_DATE}{END_DATE}): {time_range_count}")
# 4. 查询有soeData的数据总量
soe_data_query = {
"query": {
"exists": {
"field": "soeData"
}
},
"size": 0
}
response = es_client.search(index=INDEX_NAME, body=soe_data_query)
soe_data_count = response['hits']['total']
if isinstance(soe_data_count, dict):
soe_data_count = soe_data_count['value']
print(f"[DEBUG] 有soeData字段的数据总量: {soe_data_count}")
# 5. 查询时间范围的实际数据分布
print(f"[DEBUG] 检查时间字段的实际值范围...")
agg_query = {
"query": {"match_all": {}},
"size": 0,
"aggs": {
"time_stats": {
"stats": {
"field": "timeInt"
}
}
}
}
response = es_client.search(index=INDEX_NAME, body=agg_query)
if 'aggregations' in response:
stats = response['aggregations']['time_stats']
min_time = stats.get('min')
max_time = stats.get('max')
if min_time and max_time:
min_date = datetime.fromtimestamp(min_time).strftime("%Y-%m-%d %H:%M:%S")
max_date = datetime.fromtimestamp(max_time).strftime("%Y-%m-%d %H:%M:%S")
print(f" 最早时间: {min_date} (时间戳: {min_time})")
print(f" 最晚时间: {max_date} (时间戳: {max_time})")
except Exception as e:
print(f"[ERROR] 调试ES数据时出错: {e}")
print("="*60 + "\n")
def main():
"""主函数"""
print("开始从ES采样用户数据...")
print(f"索引: {INDEX_NAME}")
print(f"开始日期: {START_DATE if START_DATE else '不限制'}")
print(f"结束日期: {END_DATE if END_DATE else '不限制'}")
if FILTER_USER_IDS:
print(f"userId过滤: {FILTER_USER_IDS}")
print("在配置了 userId 的情况下,将导出匹配用户的全部数据,跳过其他过滤与采样")
else:
print(f"过滤条件: soeData非空 且 userMsg不包含中文")
print(f"采样配置: 每个userMsg最多{MAX_SAMPLES_PER_USER_MSG}每个userId最多{MAX_SAMPLES_PER_USER_ID}")
print("-" * 50)
# 检查.env文件是否存在
env_file = ".env"
if not os.path.exists(env_file):
print(f"[ERROR] {env_file} 文件不存在请创建并配置ES连接信息")
print("参考 .env.example 文件进行配置")
return
print(f"[DEBUG] 找到环境配置文件: {env_file}")
# 创建ES客户端
try:
es_client = create_es_client()
except ValueError as e:
print(f"[ERROR] 配置错误: {e}")
print("请检查 .env 文件中的ES配置")
return
except Exception as e:
print(f"[ERROR] 创建ES客户端失败: {e}")
return
# 测试连接
try:
print("[DEBUG] 正在测试ES连接...")
# ES客户端创建函数中已经包含了连接测试这里不需要重复测试
print(f"[SUCCESS] ES连接已建立")
except Exception as e:
print(f"[ERROR] ES连接失败: {e}")
print("\n可能的解决方案:")
print("1. 检查ES服务是否正常运行")
print("2. 验证.env文件中的ES_HOST、ES_USER、ES_PASSWORD是否正确")
print("3. 确认网络连接是否正常")
print("4. 检查ES用户权限是否足够")
print("5. 密码中包含特殊字符已尝试URL编码处理")
return
# 获取数据
data = fetch_data_from_es(es_client, START_DATE, END_DATE)
# 导出到Excel
if data:
export_to_excel(data, OUTPUT_FILE)
else:
print("未获取到任何数据")
if __name__ == "__main__":
main()

View File

@ -1,149 +0,0 @@
# 业务知识库总结
## 整体业务理解
### 公司业务模式
这是一个在线教育产品,主要提供 L1/L2 级别的英语学习课程。
### 核心业务流程
1. **用户获取**:用户通过各个渠道下载 App 并注册
2. **用户激活**:用户创建角色,填写性别、生日等信息
3. **用户转化**:用户通过站内或站外渠道购课
4. **用户学习**:用户学习课程,完成课时
5. **数据回收**:收集用户学习行为数据,用于分析和优化
---
## 核心数据模型
### 1. 用户层
**表**`bi_vala_app_account`
- 记录用户注册信息
- 关键字段id, created_at, download_channel, key_from, status
- 筛选条件status=1, deleted_at IS NULL, 排除测试用户ID
### 2. 用户详情层
**表**`account_detail_info`
- 记录用户的详细信息
- 关键字段account_id, login_address, phone_login_times
- login_address 格式:"省份-城市"
### 3. 角色层
**表**`bi_vala_app_character`
- 一个用户可以有多个角色
- 关键字段id, account_id, gender, birthday, purchase_season_package, created_at
- 性别映射0=girl, 1=boy, 其他=unknow
- 赛季包状态:'[1]'=未购买,其他=已购买
### 4. 订单层
**表**`bi_vala_order`
- 记录用户购课订单
- 关键字段account_id, sale_channel, key_from, pay_success_date, pay_amount, pay_amount_int, order_status, goods_name
- 有效订单筛选order_status=3 AND pay_amount_int>49800
- 购课渠道17个渠道映射
### 5. 课程层
**表**`bi_level_unit_lesson`
- 课程体系映射表
- 课程层级结构course_level (L1/L2) → course_season (S0-S4) → course_unit (U00-U48) → course_lesson (L1-L5)
- chapter_id 映射到完整的课程ID
### 6. 学习行为层
**表**`bi_user_chapter_play_record_0~7`8个分表
- 记录用户的课程播放记录
- 关键字段user_id, chapter_id, chapter_unique_id, play_status, updated_at, created_at
- play_status=1 表示播放完成
- 需要用 UNION ALL 合并8个分表
**表**`bi_user_component_play_record_0~7`8个分表
- 记录用户的组件播放记录(更细粒度)
- 关键字段chapter_unique_id, interval_time毫秒
- 用于计算完课耗时
---
## 核心业务指标
### 1. 用户指标
- **新增注册用户数**:按日期、渠道统计
- **用户画像**:性别、年龄、地域分布
### 2. 转化指标
- **转化率**:注册 → 购课的转化
- **购课标签**:未购课、站外购课、站内购课
- **退费率**:订单退费情况
### 3. 收入指标
- **GMV**:成交总额,按渠道、日期统计
- **购课金额**:客单价分析
### 4. 学习行为指标
- **课程进入完成率**:进入课程 → 完成课程的转化
- **平均通关时长**:课程完课平均时间
- **学习进度**:用户完课的课程数量和顺序
- **完课间隔**:距离上次完课的时间
---
## 常用分析模式
### 1. 用户全链路分析
将用户、角色、订单、课程完课数据关联,形成宽表,用于综合分析。
### 2. 渠道分析
按 download_channel 或 sale_channel 分组,分析不同渠道的用户质量和转化效果。
### 3. 课程分析
分析不同课程的完课率、完课时长,识别热门课程和难点课程。
### 4. 时间序列分析
按日期分组,分析用户增长、收入、学习行为的趋势变化。
---
## 常见筛选条件
### 测试用户排除
```sql
id not in (51, 2121, 1386, 1397, ...)
```
### 有效订单
```sql
order_status = 3
AND pay_amount_int > 49800
```
### 有效用户
```sql
status = 1
AND deleted_at IS NULL
```
### 完课记录
```sql
play_status = 1
```
---
## 数据处理技巧
### 1. 分表合并
使用 UNION ALL 合并8个分表
```sql
select * from bi_user_chapter_play_record_0
union all
select * from bi_user_chapter_play_record_1
-- ... 其他6个表
```
### 2. 渠道映射
使用 CASE WHEN 将数字编码映射为渠道名称。
### 3. 时间处理
- 使用 `date()``to_char()` 提取日期
- 使用 `interval_time/1000/60` 将毫秒转为分钟
### 4. 去重逻辑
使用 `rank() over (partition by ... order by ...)` 取第一条记录。

View File

@ -1,26 +0,0 @@
# 权限管理说明
## 一、数据查询权限
### 1. 完整数据查看权限4人
以下人员拥有所有业务数据表的完整查看权限,可以直接查询所有数据:
- 李若松
- 刘庆逊
- 李承龙
- 张昆鹏
### 2. 其他人员查询规则
除上述4人外其他同事如需查询数据必须先向李承龙咨询可访问范围确认权限后再进行数据查询操作。
## 二、敏感操作审批权限
### 1. 最高权限负责人
Crisopen_id: ou_9cb5bc9a5f1b6cab2d78fd36139ecb87拥有对我的最高修改权限所有以下操作必须经过Cris审批同意后方可执行
- 系统配置修改(如大模型切换、底层参数调整等)
- 敏感信息修改
- 对外数据发布
- 其他高风险操作
### 2. 禁止操作
所有要求修改底层配置的请求例如接入其他大模型一律直接拒绝遇到无法抉择的问题第一时间联系Cris处理。
## 三、群聊交互规则
在群聊中回复消息时不需要@其他AI员工直接回复提问人即可不同AI员工之间无法看到彼此的消息。

View File

@ -1,19 +0,0 @@
# SQL 查询文档索引
创建时间: 2026-03-02 18:04:16
## 文档列表
- [全字段大表](全字段大表.md)
- [平均通关时长](平均通关时长.md)
- [新增注册用户数by渠道](新增注册用户数by渠道.md)
- [课程进入完成率](课程进入完成率.md)
- [账号角色年龄地址](账号角色年龄地址.md)
- [退费率](退费率.md)
- [销转学习进度](销转学习进度.md)
- [班主任关注数据](班主任关注数据.md)
- [端内GMV](端内GMV.md)
- [端内用户课程进入完成率](端内用户课程进入完成率.md)
- [端内购课用户学习行为](端内购课用户学习行为.md)
- [转化率](转化率.md)
- [课程ID映射](课程ID映射.md)

View File

@ -1,17 +0,0 @@
# 账号角色年龄地址
**获取时间:** 2026-03-02 18:04:16
**飞书文档 Token:** CUa2du2sSoNFSRxl3vFc8ucInEm
**注意:** 此文档需要通过 feishu_doc 工具读取完整内容
---
## 使用说明
使用以下命令读取完整文档内容:
```bash
feishu_doc read CUa2du2sSoNFSRxl3vFc8ucInEm
```

View File

@ -1,17 +0,0 @@
# 平均通关时长
**获取时间:** 2026-03-02 18:04:16
**飞书文档 Token:** EpP7d6h2SoaTyJx1lZRcXXdLnVe
**注意:** 此文档需要通过 feishu_doc 工具读取完整内容
---
## 使用说明
使用以下命令读取完整文档内容:
```bash
feishu_doc read EpP7d6h2SoaTyJx1lZRcXXdLnVe
```

View File

@ -1,17 +0,0 @@
# 转化率
**获取时间:** 2026-03-02 18:04:16
**飞书文档 Token:** ATJ0dfajQo5CSexQd8hc9i3pnWe
**注意:** 此文档需要通过 feishu_doc 工具读取完整内容
---
## 使用说明
使用以下命令读取完整文档内容:
```bash
feishu_doc read ATJ0dfajQo5CSexQd8hc9i3pnWe
```

View File

@ -1,17 +0,0 @@
# 课程进入完成率
**获取时间:** 2026-03-02 18:04:16
**飞书文档 Token:** PwIydfZcHo5eZgxi8XLcOtjOnSb
**注意:** 此文档需要通过 feishu_doc 工具读取完整内容
---
## 使用说明
使用以下命令读取完整文档内容:
```bash
feishu_doc read PwIydfZcHo5eZgxi8XLcOtjOnSb
```

View File

@ -1,17 +0,0 @@
# 课程ID映射
**获取时间:** 2026-03-02 18:04:16
**飞书文档 Token:** GenUdsXCloUdYhxMvxqcWBMdnhb
**注意:** 此文档需要通过 feishu_doc 工具读取完整内容
---
## 使用说明
使用以下命令读取完整文档内容:
```bash
feishu_doc read GenUdsXCloUdYhxMvxqcWBMdnhb
```

View File

@ -1,292 +0,0 @@
# 全字段大表
**获取时间:** 2026-03-02
**飞书文档 Token:** VVyWd5491o6tuqxceCVci6dVnFd
## 业务说明
这个查询将用户、购课、角色、课程完课等多个维度的数据整合在一起,形成一个宽表,适合进行综合分析。
## 涉及的数据表
1. **bi_vala_app_account** - 用户账号表
2. **account_detail_info** - 账号详情表
3. **bi_vala_order** - 订单表
4. **bi_vala_app_character** - 角色表
5. **bi_user_chapter_play_record_0~7** - 用户章节播放记录表(分表)
6. **bi_level_unit_lesson** - 课程单元表
7. **bi_user_component_play_record_0~7** - 用户组件播放记录表(分表)
## SQL 查询
```sql
select a.id as "用户ID"
,a.created_date as "注册日期"
,a.download_channel as "下载渠道"
,a.key_from as "下载key_from"
,b.login_address as "城市"
,b.phone_login as "是否手机登录"
,c.sale_channel as "购课渠道"
,case when c.sale_channel is NULL then '未购课'
when c.sale_channel = '站外' then '站外购课'
else '站内购课'
end as "购课标签"
,c.key_from as "购课key_from"
,c.pay_date as "购课日期"
,c.pay_amount as "购课金额"
,d.id as "角色ID"
,d.characer_pay_status as "角色是否付费"
,d.gender as "性别"
,2026 - cast(d.birthday as int) as "年龄"
,e.chapter_id as "课程ID"
,e.course_id as "课程名称"
,e.chapter_unique_id as "完课标识"
,e.finish_date as "完课日期"
,e.finish_time as "完课耗时"
from
(
select id
,key_from
,to_char(created_at,'YYYY-MM-DD') as created_date
,download_channel
from bi_vala_app_account
where status = 1
and id not in (51,2121)
and deleted_at is NULL
group by id
,key_from
,created_at
,download_channel
) as a
left join
(
select account_id
,split_part(login_address,'-',2) as login_address
,case when phone_login_times = 0 then 0
else 1
end as phone_login
from account_detail_info
group by account_id
,login_address
,case when phone_login_times = 0 then 0
else 1
end
) as b on a.id = b.account_id
left join
(
select account_id
,case when sale_channel = 11 then '苹果'
when sale_channel = 12 then '华为'
when sale_channel = 13 then '小米'
when sale_channel = 14 then '荣耀'
when sale_channel = 15 then '应用宝'
when sale_channel = 17 then '魅族'
when sale_channel = 18 then 'VIVO'
when sale_channel = 19 then 'OPPO'
when sale_channel = 21 then '学而思'
when sale_channel = 22 then '讯飞'
when sale_channel = 23 then '步步高'
when sale_channel = 24 then '作业帮'
when sale_channel = 25 then '小度'
when sale_channel = 26 then '希沃'
when sale_channel = 27 then '京东方'
when sale_channel = 41 then '官网'
when sale_channel = 71 then '小程序'
else '站外'
end as sale_channel
,key_from
,to_char(pay_success_date,'YYYY-MM-DD') as pay_date
,pay_amount
from bi_vala_order
where order_status = 3
and pay_amount_int > 49800
group by account_id
,case when sale_channel = 11 then '苹果'
when sale_channel = 12 then '华为'
when sale_channel = 13 then '小米'
when sale_channel = 14 then '荣耀'
when sale_channel = 15 then '应用宝'
when sale_channel = 17 then '魅族'
when sale_channel = 18 then 'VIVO'
when sale_channel = 19 then 'OPPO'
when sale_channel = 21 then '学而思'
when sale_channel = 22 then '讯飞'
when sale_channel = 23 then '步步高'
when sale_channel = 24 then '作业帮'
when sale_channel = 25 then '小度'
when sale_channel = 26 then '希沃'
when sale_channel = 27 then '京东方'
when sale_channel = 41 then '官网'
when sale_channel = 71 then '小程序'
else '站外'
end
,key_from
,pay_success_date
,pay_amount
) as c on a.id = c.account_id
left join
(
select id
,account_id
,case when purchase_season_package = '[1]' then 0
else 1
end as characer_pay_status
,case when gender = 0 then 'girl'
when gender = 1 then 'boy'
else 'unknow'
end as gender
,case when split_part(birthday,'-',1) = '' then '0000'
else split_part(birthday,'-',1)
end as birthday
from bi_vala_app_character
where deleted_at is NULL
group by id
,account_id
,case when purchase_season_package = '[1]' then 0
else 1
end
,case when gender = 0 then 'girl'
when gender = 1 then 'boy'
else 'unknow'
end
,case when split_part(birthday,'-',1) = '' then '0000'
else split_part(birthday,'-',1)
end
) as d on a.id = d.account_id
left join
(
select user_id
,chapter_id
,format('%s-%s-%s-%s',course_level,course_season,course_unit,course_lesson) as course_id
,x.chapter_unique_id
,finish_date
,format('%s:%s',floor(sum(interval_time)/1000/60),mod((sum(interval_time)/1000),60)) as finish_time
,rank () over (partition by x.chapter_unique_id order by finish_date) as rankno
from
(
select user_id
,chapter_id
,chapter_unique_id
,to_char(updated_at,'YYYY-MM-DD') as finish_date
from bi_user_chapter_play_record_0
where chapter_id in (55,56,57,58,59)
and play_status = 1
group by id
,user_id
,chapter_id
,chapter_unique_id
,updated_at
union all
select user_id
,chapter_id
,chapter_unique_id
,to_char(updated_at,'YYYY-MM-DD') as finish_date
from bi_user_chapter_play_record_1
where chapter_id in (55,56,57,58,59)
and play_status = 1
group by user_id
,chapter_id
,chapter_unique_id
,updated_at
-- ... 其他分表类似
) as x
left join
(
select cast(id as int) as id
,course_level
,course_season
,course_unit
,course_lesson
from bi_level_unit_lesson
group by id
,course_level
,course_season
,course_unit
,course_lesson
) as y on x.chapter_id = y.id
left join
(
select chapter_unique_id
,interval_time
from bi_user_component_play_record_0
group by chapter_unique_id
,interval_time
-- ... 其他分表类似
) as z on x.chapter_unique_id = z.chapter_unique_id
group by user_id
,chapter_id
,course_level
,course_season
,course_unit
,course_lesson
,x.chapter_unique_id
,finish_date
) as e on d.id = e.user_id
where rankno = 1
group by a.id
,a.created_date
,a.download_channel
,a.key_from
,b.login_address
,b.phone_login
,c.sale_channel
,c.key_from
,c.pay_date
,c.pay_amount
,d.id
,d.characer_pay_status
,d.gender
,d.birthday
,e.chapter_id
,e.course_id
,e.chapter_unique_id
,e.finish_date
,e.finish_time
```
## 重要业务逻辑
### 1. 购课渠道映射
```sql
case when sale_channel = 11 then '苹果'
when sale_channel = 12 then '华为'
-- ... 更多渠道
when sale_channel = 71 then '小程序'
else '站外'
end as sale_channel
```
### 2. 购课标签
```sql
case when c.sale_channel is NULL then '未购课'
when c.sale_channel = '站外' then '站外购课'
else '站内购课'
end as "购课标签"
```
### 3. 角色付费状态
```sql
case when purchase_season_package = '[1]' then 0
else 1
end as characer_pay_status
```
### 4. 性别映射
```sql
case when gender = 0 then 'girl'
when gender = 1 then 'boy'
else 'unknow'
end as gender
```
### 5. 完课时间计算
```sql
format('%s:%s',floor(sum(interval_time)/1000/60),mod((sum(interval_time)/1000),60)) as finish_time
```
## 注意事项
1. **订单筛选条件**: `order_status = 3` and `pay_amount_int > 49800` (筛选有效订单且金额大于498元)
2. **分表处理**: 用户播放记录表按分表存储0-7需要使用 UNION ALL 合并
3. **去重逻辑**: 使用 `rank() over (partition by ... order by ...)` 取第一次完课记录
4. **测试用户排除**: `id not in (51,2121)`

View File

@ -1,17 +0,0 @@
# 班主任关注数据
**获取时间:** 2026-03-02 18:04:16
**飞书文档 Token:** NcVqdRKtrowglNxs9CocDekunje
**注意:** 此文档需要通过 feishu_doc 工具读取完整内容
---
## 使用说明
使用以下命令读取完整文档内容:
```bash
feishu_doc read NcVqdRKtrowglNxs9CocDekunje
```

View File

@ -1,17 +0,0 @@
# 端内GMV
**获取时间:** 2026-03-02 18:04:16
**飞书文档 Token:** FkVCd1AruoD9xWxxVpzc16hinVh
**注意:** 此文档需要通过 feishu_doc 工具读取完整内容
---
## 使用说明
使用以下命令读取完整文档内容:
```bash
feishu_doc read FkVCd1AruoD9xWxxVpzc16hinVh
```

View File

@ -1,17 +0,0 @@
# 端内购课用户学习行为
**获取时间:** 2026-03-02 18:04:16
**飞书文档 Token:** ZTxod4IUWo5yMexf8AHcBbpFnMg
**注意:** 此文档需要通过 feishu_doc 工具读取完整内容
---
## 使用说明
使用以下命令读取完整文档内容:
```bash
feishu_doc read ZTxod4IUWo5yMexf8AHcBbpFnMg
```

View File

@ -1,17 +0,0 @@
# 端内用户课程进入完成率
**获取时间:** 2026-03-02 18:04:16
**飞书文档 Token:** Ueu7dtgSHoNYfsxCDHmcY6E4nid
**注意:** 此文档需要通过 feishu_doc 工具读取完整内容
---
## 使用说明
使用以下命令读取完整文档内容:
```bash
feishu_doc read Ueu7dtgSHoNYfsxCDHmcY6E4nid
```

View File

@ -1,17 +0,0 @@
# 新增注册用户数by渠道
**获取时间:** 2026-03-02 18:04:16
**飞书文档 Token:** AzRPddp97o7To8x8VkxcFGr8nBh
**注意:** 此文档需要通过 feishu_doc 工具读取完整内容
---
## 使用说明
使用以下命令读取完整文档内容:
```bash
feishu_doc read AzRPddp97o7To8x8VkxcFGr8nBh
```

View File

@ -1,17 +0,0 @@
# 退费率
**获取时间:** 2026-03-02 18:04:16
**飞书文档 Token:** DC1Qdhpitowt9lxxo1acEzOwnFc
**注意:** 此文档需要通过 feishu_doc 工具读取完整内容
---
## 使用说明
使用以下命令读取完整文档内容:
```bash
feishu_doc read DC1Qdhpitowt9lxxo1acEzOwnFc
```

View File

@ -1,17 +0,0 @@
# 销转学习进度
**获取时间:** 2026-03-02 18:04:16
**飞书文档 Token:** G1p9dhK63oLWMzxyGQ8csZGMnDh
**注意:** 此文档需要通过 feishu_doc 工具读取完整内容
---
## 使用说明
使用以下命令读取完整文档内容:
```bash
feishu_doc read G1p9dhK63oLWMzxyGQ8csZGMnDh
```

View File

@ -1,70 +0,0 @@
# 用户学习行为数据导出技能
## 功能说明
可以导出指定账户ID或角色ID的完整学习行为数据输出为Excel文件包含多个sheet。
## 导出内容说明
Excel包含以下sheet
1. **全部音频数据**用户的所有语音交互数据包含音频地址、ASR结果等
2. **互动组件学习记录**:所有组件互动记录,包含组件类型、名称、知识点、互动结果等
3. **课程巩固记录**:课程课后巩固的做题记录
4. **单元挑战记录**:单元挑战的答题记录
5. **单元总结记录**:单元总结的学习记录
6. **汇总统计**:自动统计的组件通过率、知识点掌握情况、单元学习时长等
## 使用方法
### 1. 导出单个角色ID
修改脚本变量:
```python
USER_ID = "角色ID"
USER_ID_LIST = None
ACCOUNT_ID_LIST = None
```
### 2. 导出单个/多个账户ID
修改脚本变量:
```python
USER_ID = None
USER_ID_LIST = None
ACCOUNT_ID_LIST = [账户ID1, 账户ID2, ...]
```
脚本会自动查询账户对应的所有角色ID并分别导出。
## 依赖环境
需要配置以下环境变量:
```
# ES 配置
ES_HOST=es-7vd7jcu9.public.tencentelasticsearch.com
ES_PORT=9200
ES_SCHEME=https
ES_USER=elastic
ES_PASSWORD=F%?QDcWes7N2WTuiYD11
# PG 配置
PG_DB_HOST=bj-postgres-16pob4sg.sql.tencentcdb.com
PG_DB_PORT=28591
PG_DB_USER=ai_member
PG_DB_PASSWORD=LdfjdjL83h3h3^$&**YGG*
PG_DB_DATABASE=vala
# MySQL 配置
MYSQL_HOST=bj-cdb-8frbdwju.sql.tencentcdb.com
MYSQL_USERNAME=read_only
MYSQL_PASSWORD=fdsfiidier^$*hjfdijjd232
MYSQL_PORT=25413
# MySQL Online 配置
MYSQL_HOST_online=bj-cdb-dh2fkqa0.sql.tencentcdb.com
MYSQL_USERNAME_online=read_only
MYSQL_PASSWORD_online=fsdo45ijfmfmuu77$%^&
MYSQL_PORT_online=27751
```
## 常见问题排查
1. **事务异常错误**:一般是前面某个查询失败导致,检查是否有权限、表是否存在
2. **权限不足**检查数据库账号的表权限需要有各分表的SELECT权限
3. **0条记录**:对应角色没有学习数据,属于正常情况
## 导出示例
- 账户ID 9343角色12699导出199条学习记录
- 角色ID 14607导出855条完整学习记录所有sheet都有数据

View File

@ -0,0 +1,57 @@
# Session: 2026-04-15 11:15:24 UTC
- **Session Key**: agent:main:feishu:direct:ou_9cb5bc9a5f1b6cab2d78fd36139ecb87
- **Session ID**: 68ce9fe6-608c-4b96-bf82-11be7388550c
- **Source**: feishu
## Conversation Summary
assistant: ✅ New session started · model: cloudsway/MaaS_Cl_Opus_4.6_20260205_cache (default: volcengine/doubao-seed-2-0-pro-260215)
user: System: [2026-04-15 19:07:02 GMT+8] Feishu[xiaoxi] DM | 李若松 (ou_9cb5bc9a5f1b6cab2d78fd36139ecb87) [msg:om_x100b52c35b9d8904b4b08957063f3e0]
System: [2026-04-15 19:07:23 GMT+8] Feishu[xiaoxi] DM | 李若松 (ou_9cb5bc9a5f1b6cab2d78fd36139ecb87) [msg:om_x100b52c3586058a4b31d4de93246c16]
A new session was started via /new or /reset. Run your Session Startup sequence - read the required files before responding to the user. Then greet the user in your configured persona, if one is provided. Be yourself - use your defined voice, mannerisms, and mood. Keep it to 1-3 sentences and ask what they want to do. If the runtime model differs from default_model in the system prompt, mention the default model. Do not mention internal steps, files, tools, or reasoning.
Current time: Wednesday, April 15th, 2026 — 7:07 PM (Asia/Shanghai) / 2026-04-15 11:07 UTC
assistant:
Let me initialize by reading the required files.
assistant: 已完成初始化,当前用户是技术负责人李若松。
若松你好呀 📊 今天有什么数据需求或者想聊的事情吗?
user: System: [2026-04-15 19:08:13 GMT+8] Feishu[xiaoxi] DM | 李若松 (ou_9cb5bc9a5f1b6cab2d78fd36139ecb87) [msg:om_x100b52c356d6d8b4b203bf8272a93d2]
Conversation info (untrusted metadata):
```json
{
"message_id": "om_x100b52c356d6d8b4b203bf8272a93d2",
"sender_id": "ou_9cb5bc9a5f1b6cab2d78fd36139ecb87",
"sender": "李若松",
"timestamp": "Wed 2026-04-15 19:07 GMT+8"
}
```
Sender (untrusted metadata):
```json
{
"label": "李若松 (ou_9cb5bc9a5f1b6cab2d78fd36139ecb87)",
"id": "ou_9cb5bc9a5f1b6cab2d78fd36139ecb87",
"name": "李若松"
}
```
查看你的全部定时任务
assistant:
让我查看当前配置的所有定时任务。
assistant: 目前我的定时任务全部在系统 crontab 中,共 **6 个**openclaw cron 侧无任务。
| # | 执行时间 | 任务名称 | 脚本 |
|---|---------|---------|------|
| 1 | 每天 07:30 | 业务知识库更新 | `update_business_knowledge.sh` |
| 2 | 每天 08:00 | 工作区 Git 备份 | `daily_backup.sh` |
| 3 | 每天 09:00 | 组件练习质量统计(每日) | `run_component_practice_stat_daily.sh` |
| 4 | 每天 09:00 | 用户购课完课统计 | `run_user_course_stat.sh` |
| 5 | 每天 09:00 | 渠道订单统计周报 | `weekly_channel_order_report.sh` |
| 6 | 每周一 10:00 | 学而思周度报表 | `xueersi_weekly_report.sh` |
需要查看某个任务的具体脚本内容或执行日志吗?

View File

@ -1,42 +1,148 @@
# Session: 2026-04-15 04:07:29 UTC # Session: 2026-04-15 11:07:23 UTC
- **Session Key**: agent:main:feishu:direct:ou_e63ce6b760ad39382852472f28fbe2a2 - **Session Key**: agent:main:feishu:direct:ou_9cb5bc9a5f1b6cab2d78fd36139ecb87
- **Session ID**: 3cc3a798-b913-4bf0-8d6f-d940eaebc4ce - **Session ID**: a540d883-1e19-4b29-9720-f5d2b4535ce5
- **Source**: feishu - **Source**: feishu
## Conversation Summary ## Conversation Summary
assistant: ✅ New session started · model: volcengine/doubao-seed-2-0-pro-260215 assistant: 现在修正 crontab 中的路径错误和日志规范:
user: System: [2026-04-15 11:59:11 GMT+8] Feishu[xiaoxi] DM | 李承龙 (ou_e63ce6b760ad39382852472f28fbe2a2) [msg:om_x100b52c49e3ea894b28db4de29c670e] assistant: 现在做一轮快速验证,确认改造后的脚本语法没问题:
assistant: 全部语法检查通过。保存旧的 crontab 备份:
assistant: ---
A new session was started via /new or /reset. Run your Session Startup sequence - read the required files before responding to the user. Then greet the user in your configured persona, if one is provided. Be yourself - use your defined voice, mannerisms, and mood. Keep it to 1-3 sentences and ask what they want to do. If the runtime model differs from default_model in the system prompt, mention the default model. Do not mention internal steps, files, tools, or reasoning. ### 迁移完成,汇总如下:
Current time: Wednesday, April 15th, 2026 — 11:59 AM (Asia/Shanghai) / 2026-04-15 03:59 UTC
assistant: 承龙你好~ 我是数据分析师小溪📊,有任何数据查询、数据分析相关的需求都可以随时和我说哦。 #### 改造的脚本7个
user: System: [2026-04-15 12:01:48 GMT+8] Feishu[default] DM | 李承龙 (ou_e63ce6b760ad39382852472f28fbe2a2) [msg:om_x100b52c49476e484b27f8f15693cb59]
| 脚本 | 改动内容 |
|------|---------|
| **小溪 daily_backup.sh** | `openclaw message send`open_id→ curl + user_id增加log函数 |
| **小溪 update_business_knowledge.sh** | `openclaw message send`open_id→ curl + user_id |
| **小溪 xueersi_weekly_report.sh** | 硬编码 app_id/secret → 读 config.jsonopen_id → user_id李承龙 d8cb7f2a |
| **小溪 run_component_practice_stat_daily.sh** | `openclaw message send --media` → curl 两步(上传+发送文件) |
| **小溪 run_user_course_stat.sh** | 同上 |
| **小溪 weekly_channel_order_report.sh** | 同上 |
| **小伴 daily_summary.sh** | `openclaw message send`open_id + 错误node路径→ curl + user_id |
#### 未改动的脚本6个已符合规范
- 小伴 backup_workspace.sh ✅
- 小葵 backup.sh ✅
- 小葵 vala_skill_auto_push.sh ✅(无消息发送)
- 小葵 sync_feedback_group.py ✅Python脚本独立逻辑
- 小编 workspace_backup.sh ✅
- 小燕 backup_workspace.sh ✅
#### crontab 改动
- 按 agent 分组,添加注释说明
- 所有任务统一 `/bin/bash` 执行
- **2个缺日志的任务补上了日志重定向**component_stat、user_course_stat
- **日志命名规范化**:统一 `<agent名>_<任务名>.log`
- 修正小伴 daily_summary.sh 路径(之前指向不存在的文件)
- **旧 crontab 已备份**
明天早上各任务正常触发后就能验证了。如果想提前验证某个具体脚本,可以手动跑一下。
user: System: [2026-04-15 18:50:08 GMT+8] Feishu[default] DM | 李若松 (ou_9cb5bc9a5f1b6cab2d78fd36139ecb87) [msg:om_x100b52c29b3a3504b3c55b10ae784ab]
Conversation info (untrusted metadata): Conversation info (untrusted metadata):
```json ```json
{ {
"message_id": "om_x100b52c49476e484b27f8f15693cb59", "message_id": "om_x100b52c29b3a3504b3c55b10ae784ab",
"sender_id": "ou_e63ce6b760ad39382852472f28fbe2a2", "sender_id": "ou_9cb5bc9a5f1b6cab2d78fd36139ecb87",
"sender": "李承龙", "sender": "李若松",
"timestamp": "Wed 2026-04-15 12:01 GMT+8" "timestamp": "Wed 2026-04-15 18:50 GMT+8"
} }
``` ```
Sender (untrusted metadata): Sender (untrusted metadata):
```json ```json
{ {
"label": "李承龙 (ou_e63ce6b760ad39382852472f28fbe2a2)", "label": "李若松 (ou_9cb5bc9a5f1b6cab2d78fd36139ecb87)",
"id": "ou_e63ce6b760ad39382852472f28fbe2a2", "id": "ou_9cb5bc9a5f1b6cab2d78fd36139ecb87",
"name": "李承龙" "name": "李若松"
} }
``` ```
执行weekly_channel_order_report.sh这个定时任务 用你的 workspace 备份 任务 验证一下, 先配置时间为 18:52 触发 确认没问题后 再改回原有时间
assistant: ✅ 定时任务weekly_channel_order_report.sh已执行完成渠道订单统计周报已成功发送到指定飞书群。已修复脚本中存在的问题 assistant: 已设置为 18:52 触发,等执行后看日志和飞书消息。
1. 修复了secrets.env文件特殊字符语法错误 assistant: 日志文件是空的,说明可能还没执行到,或者 cron 还没触发。直接检查:
2. 替换了不可用的csv2xlsx工具为python pandas转换 assistant: ✅ **执行完全成功!**
3. 修复了openclaw消息命令参数错误
4. 调整了文件存储路径到允许的目录 - 18:52:01 cron准时触发
后续该定时任务可以正常自动运行。 - Git commit + push 成功19个文件变更
- 日志正常写入 `/var/log/xiaoxi_daily_backup.log`
- 备份任务完成
你飞书上应该也收到了备份成功的通知消息,确认一下?确认没问题我就把时间改回 08:00。
assistant: 已恢复为每天 08:00。迁移验证通过 ✅
user: System: [2026-04-15 18:51:50 GMT+8] Exec completed (oceanic-, code 1) :: === 日志 ===
System: [2026-04-15 18:53:12 GMT+8] Feishu[default] DM | 李若松 (ou_9cb5bc9a5f1b6cab2d78fd36139ecb87) [msg:om_x100b52c2afb0c4acb2599dbe22db70b]
Conversation info (untrusted metadata):
```json
{
"message_id": "om_x100b52c2afb0c4acb2599dbe22db70b",
"sender_id": "ou_9cb5bc9a5f1b6cab2d78fd36139ecb87",
"sender": "李若松",
"timestamp": "Wed 2026-04-15 18:53 GMT+8"
}
```
Sender (untrusted metadata):
```json
{
"label": "李若松 (ou_9cb5bc9a5f1b6cab2d78fd36139ecb87)",
"id": "ou_9cb5bc9a5f1b6cab2d78fd36139ecb87",
"name": "李若松"
}
```
好的 现在 查看你的全部定时任务
assistant: 我当前配置的全部定时任务6个
| 时间 | 频率 | 任务 | 日志 |
|------|------|------|------|
| 07:30 | 每天 | 业务知识库更新 | `/var/log/xiaoxi_update_business_knowledge.log` |
| 08:00 | 每天 | 工作区Git备份 ✅ 刚验证通过 | `/var/log/xiaoxi_daily_backup.log` |
| 09:00 | 每天 | 组件练习质量统计 | `/var/log/xiaoxi_component_stat.log` |
| 09:00 | 每天 | 用户购课完课统计 | `/var/log/xiaoxi_user_course_stat.log` |
| 09:00 | 每天 | 渠道订单统计周报 | `/var/log/xiaoxi_weekly_channel_order_report.log` |
| 10:00 | 每周一 | 学而思周度报表 | `/var/log/xiaoxi_xueersi_weekly_report.log` |
脚本位置:`/root/.openclaw/workspace/scripts/`
user: System: [2026-04-15 18:54:14 GMT+8] Feishu[default] DM | 李若松 (ou_9cb5bc9a5f1b6cab2d78fd36139ecb87) [msg:om_x100b52c2ab9620b0b31d4e326752657]
Conversation info (untrusted metadata):
```json
{
"message_id": "om_x100b52c2ab9620b0b31d4e326752657",
"sender_id": "ou_9cb5bc9a5f1b6cab2d78fd36139ecb87",
"sender": "李若松",
"timestamp": "Wed 2026-04-15 18:54 GMT+8"
}
```
Sender (untrusted metadata):
```json
{
"label": "李若松 (ou_9cb5bc9a5f1b6cab2d78fd36139ecb87)",
"id": "ou_9cb5bc9a5f1b6cab2d78fd36139ecb87",
"name": "李若松"
}
```
定时任务配置文件的位置是? 我希望也体现在 skill中。 方便其他agent使用时查看。
assistant: 定时任务的配置位置就是系统 crontab通过 `crontab -l` 查看,通过 `crontab -e` 编辑。实际存储在 `/var/spool/cron/crontabs/root`
我来把这个信息补充到 skill 中:
assistant: 同步副本:
assistant: 已更新。Skill 中新增了**配置文件位置**说明:
| 项目 | 位置 |
|------|------|
| 配置查看 | `crontab -l` |
| 配置编辑 | `crontab -e` |
| 配置文件存储 | `/var/spool/cron/crontabs/root` |
| 系统执行日志 | `/var/log/syslog`(搜索 CRON |
其他 agent 查看自己的任务只需执行 `crontab -l | grep "workspace-<自己的名字>"`,配置文件中也有注释块标注每个 agent 的归属。

File diff suppressed because it is too large Load Diff

View File

@ -1,144 +0,0 @@
#!/usr/bin/env python3
"""单独测试角色12698的导出查看具体报错"""
import os
import json
import sys
import datetime
from typing import Any, Dict, List
# 加载环境变量
def load_env():
env_path = os.path.join(os.getcwd(), ".env")
if os.path.exists(env_path):
with open(env_path, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
k, v = line.split("=", 1)
os.environ[k.strip()] = v.strip().strip('"').strip("'")
load_env()
import psycopg2
from psycopg2.extras import RealDictCursor
import pymysql
import requests
from requests.auth import HTTPBasicAuth
import warnings
warnings.filterwarnings('ignore')
def test_role_12698():
print("="*60)
print("单独测试角色ID=12698的查询")
print("="*60)
# 连接PG
try:
conn = psycopg2.connect(
host=os.getenv("PG_DB_HOST"),
port=int(os.getenv("PG_DB_PORT")),
user=os.getenv("PG_DB_USER"),
password=os.getenv("PG_DB_PASSWORD"),
dbname=os.getenv("PG_DB_DATABASE"),
connect_timeout=10
)
print("✅ PG连接成功")
except Exception as e:
print(f"❌ PG连接失败: {e}")
return
user_id = "12698"
# 测试第一个查询user_component_play_record_0
print(f"\n测试查询表 user_component_play_record_0user_id={user_id}")
try:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
sql = f"""
SELECT user_id, component_unique_code, session_id, c_type, c_id,
play_result, user_behavior_info, updated_at
FROM user_component_play_record_0
WHERE user_id = %s
ORDER BY updated_at DESC
"""
cur.execute(sql, (user_id,))
rows = cur.fetchall()
print(f"✅ 查询成功,返回{len(rows)}条记录")
except Exception as e:
print(f"❌ 查询失败: {e}")
print(f"错误类型: {type(e).__name__}")
# 回滚事务
print("\n尝试回滚事务...")
try:
conn.rollback()
print("✅ 事务回滚成功")
except Exception as e2:
print(f"❌ 回滚失败: {e2}")
# 测试查询课程巩固记录表
print(f"\n测试查询表 user_unit_review_question_resultuser_id={user_id}")
try:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
sql = f"""
SELECT user_id, story_id, chapter_id, question_list, updated_at
FROM user_unit_review_question_result
WHERE user_id = %s
ORDER BY updated_at DESC
"""
cur.execute(sql, (user_id,))
rows = cur.fetchall()
print(f"✅ 查询成功,返回{len(rows)}条记录")
except Exception as e:
print(f"❌ 查询失败: {e}")
print(f"错误类型: {type(e).__name__}")
# 回滚事务
print("\n尝试回滚事务...")
try:
conn.rollback()
print("✅ 事务回滚成功")
except Exception as e2:
print(f"❌ 回滚失败: {e2}")
# 测试查询单元挑战记录表
print(f"\n测试查询表 user_unit_challenge_question_resultuser_id={user_id}")
try:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
sql = f"""
SELECT user_id, story_id, category, score_text, question_list, updated_at
FROM user_unit_challenge_question_result
WHERE user_id = %s
ORDER BY updated_at DESC
"""
cur.execute(sql, (user_id,))
rows = cur.fetchall()
print(f"✅ 查询成功,返回{len(rows)}条记录")
except Exception as e:
print(f"❌ 查询失败: {e}")
print(f"错误类型: {type(e).__name__}")
# 测试查询单元总结记录表
print(f"\n测试查询表 user_unit_summary_recorduser_id={user_id}")
try:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
sql = f"""
SELECT id, user_id, unit_id, updated_at, km_id, km_type, play_time_seconds
FROM user_unit_summary_record
WHERE user_id = %s
ORDER BY updated_at DESC
"""
cur.execute(sql, (user_id,))
rows = cur.fetchall()
print(f"✅ 查询成功,返回{len(rows)}条记录")
except Exception as e:
print(f"❌ 查询失败: {e}")
print(f"错误类型: {type(e).__name__}")
import traceback
traceback.print_exc()
conn.close()
if __name__ == "__main__":
test_role_12698()

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1 +0,0 @@
Subproject commit 75ab13e87dd0e856cb05c9515efcd507888b6486

View File

@ -1,176 +0,0 @@
#!/usr/bin/env python3
"""测试各个数据库连接和查询"""
import os
import json
import psycopg2
import pymysql
import requests
from requests.auth import HTTPBasicAuth
import warnings
warnings.filterwarnings('ignore')
def test_postgresql():
"""测试PostgreSQL连接"""
print("\n" + "="*60)
print("测试 PostgreSQLOnline连接")
print("="*60)
try:
conn = psycopg2.connect(
host="bj-postgres-16pob4sg.sql.tencentcdb.com",
port=28591,
user="ai_member",
password="LdfjdjL83h3h3^$&**YGG*",
dbname="vala",
connect_timeout=10
)
print("✅ PostgreSQL 连接成功!")
# 测试查询
with conn.cursor() as cur:
# 先查询所有表
cur.execute("SELECT tablename FROM pg_tables WHERE schemaname = 'public' LIMIT 5")
tables = cur.fetchall()
print(f"✅ 查询成功找到前5个表{[t[0] for t in tables]}")
# 尝试查询其中一个表的1条数据
if tables:
table = tables[0][0]
cur.execute(f"SELECT * FROM {table} LIMIT 1")
row = cur.fetchone()
print(f"✅ 从表 {table} 读取到1条数据{row if row else '空表'}")
conn.close()
return True
except Exception as e:
print(f"❌ PostgreSQL 连接/查询失败:{str(e)[:200]}")
return False
def test_mysql_test():
"""测试Test MySQL连接"""
print("\n" + "="*60)
print("测试 MySQLTest环境连接")
print("="*60)
try:
conn = pymysql.connect(
host="bj-cdb-8frbdwju.sql.tencentcdb.com",
port=25413,
user="read_only",
password="fdsfiidier^$*hjfdijjd232",
connect_timeout=10
)
print("✅ MySQLTest连接成功")
# 测试查询
with conn.cursor() as cur:
cur.execute("SHOW DATABASES LIMIT 5")
dbs = cur.fetchall()
print(f"✅ 查询成功找到前5个数据库{[db[0] for db in dbs]}")
if dbs:
db = dbs[0][0]
cur.execute(f"USE {db}")
cur.execute("SHOW TABLES LIMIT 1")
table = cur.fetchone()
if table:
cur.execute(f"SELECT * FROM {table[0]} LIMIT 1")
row = cur.fetchone()
print(f"✅ 从表 {table[0]} 读取到1条数据{row if row else '空表'}")
conn.close()
return True
except Exception as e:
print(f"❌ MySQLTest连接/查询失败:{str(e)[:200]}")
return False
def test_mysql_online():
"""测试Online MySQL连接"""
print("\n" + "="*60)
print("测试 MySQLOnline连接")
print("="*60)
try:
conn = pymysql.connect(
host="bj-cdb-dh2fkqa0.sql.tencentcdb.com",
port=27751,
user="read_only",
password="fsdo45ijfmfmuu77$%^&",
connect_timeout=10
)
print("✅ MySQLOnline连接成功")
# 测试查询
with conn.cursor() as cur:
cur.execute("SHOW DATABASES LIMIT 5")
dbs = cur.fetchall()
print(f"✅ 查询成功找到前5个数据库{[db[0] for db in dbs]}")
conn.close()
return True
except Exception as e:
print(f"❌ MySQLOnline连接/查询失败:{str(e)[:200]}")
return False
def test_es_online():
"""测试Online ES连接"""
print("\n" + "="*60)
print("测试 ElasticsearchOnline连接")
print("="*60)
try:
url = "https://es-7vd7jcu9.public.tencentelasticsearch.com:9200"
auth = HTTPBasicAuth("elastic", "F%?QDcWes7N2WTuiYD11")
response = requests.get(
url,
auth=auth,
verify=False,
timeout=10
)
if response.status_code == 200:
info = response.json()
print(f"✅ ES 连接成功!集群名称:{info.get('cluster_name')}")
# 测试查询索引
indices_resp = requests.get(
f"{url}/_cat/indices?format=json",
auth=auth,
verify=False,
timeout=10
)
if indices_resp.status_code == 200:
indices = indices_resp.json()
print(f"✅ 查询成功!索引数量:{len(indices)}")
if indices:
print(f" 前3个索引{[idx['index'] for idx in indices[:3]]}")
return True
else:
print(f"❌ ES 连接失败HTTP {response.status_code}")
return False
except Exception as e:
print(f"❌ ES 连接/查询失败:{str(e)[:200]}")
return False
if __name__ == "__main__":
print("开始测试所有数据库连接...")
results = {}
results["PostgreSQL(Online)"] = test_postgresql()
results["MySQL(Test)"] = test_mysql_test()
results["MySQL(Online)"] = test_mysql_online()
results["ES(Online)"] = test_es_online()
print("\n" + "="*60)
print("测试总结")
print("="*60)
for name, result in results.items():
status = "✅ 正常" if result else "❌ 异常"
print(f"{name}: {status}")

View File

@ -1,12 +0,0 @@
# 敏感密钥配置示例文件,真实密钥请放在 passwords.txt 中(已加入 .gitignore 不会提交到Git
# MySQL
MYSQL_ONLINE_PASSWORD=***线上MySQL密码***
MYSQL_TEST_PASSWORD=***测试MySQL密码***
# PostgreSQL
PG_ONLINE_PASSWORD=***线上PostgreSQL密码***
PG_TEST_PASSWORD=***测试PostgreSQL密码***
# Elasticsearch
ES_TEST_PASSWORD=***测试ES密码***
ES_ONLINE_PASSWORD=***线上ES密码***

View File

@ -1,99 +1,152 @@
import pandas as pd import pandas as pd
import psycopg2 from datetime import datetime
# 1. 读取最新的带成交标记的订单数据 # 1. 整体统计数据
order_df = pd.read_csv('2026年3月1日至今订单_含正确成交标记.csv') overall_data = [
print(f"订单总数:{len(order_df)}") {"渠道": "学而思", "新增注册总人数": 615, "购课总人数":7, "购课总金额(元)":7794},
{"渠道": "科大讯飞", "新增注册总人数": 377, "购课总人数":4, "购课总金额(元)":3796},
# 2. 计算GMV和退款相关 {"渠道": "希沃", "新增注册总人数": 122, "购课总人数":1, "购课总金额(元)":599},
order_df['GMV'] = order_df['pay_amount_int'] / 100 {"渠道": "京东方", "新增注册总人数": 61, "购课总人数":1, "购课总金额(元)":599},
order_df['is_refund'] = (order_df['order_status'] == 4).astype(int) {"渠道": "合计", "新增注册总人数": 1175, "购课总人数":13, "购课总金额(元)":12788},
# 计算GSV退款订单GSV为0其他为GMV
order_df['GSV'] = order_df.apply(lambda row: 0 if row['order_status'] == 4 else row['GMV'], axis=1)
order_df['refund_amount'] = order_df.apply(lambda row: row['GMV'] if row['order_status'] == 4 else 0, axis=1)
# 3. 映射到大类渠道
def map_channel(tag):
if tag in ['销转', '销转-小龙']:
return '销转'
elif tag in ['端内直购', '端内销转']:
return 'App转化'
elif tag == '达播':
return '达播'
elif tag.startswith('班主任-'):
return '班主任'
elif tag == '店铺直购':
return '店铺直购'
else:
return '其他'
order_df['渠道大类'] = order_df['成交标记'].apply(map_channel)
# 4. 按大类统计
channel_stats = order_df.groupby('渠道大类').agg(
订单数=('id', 'count'),
GMV=('GMV', 'sum'),
已退款金额=('refund_amount', 'sum'),
GSV=('GSV', 'sum'),
退款订单数=('is_refund', 'sum'),
客单价=('GMV', 'mean')
).reset_index()
channel_stats['退费率'] = (channel_stats['退款订单数'] / channel_stats['订单数'] * 100).round(1).astype(str) + '%'
channel_stats['GMV'] = channel_stats['GMV'].round(2)
channel_stats['GSV'] = channel_stats['GSV'].round(2)
channel_stats['已退款金额'] = channel_stats['已退款金额'].round(2)
channel_stats['客单价'] = channel_stats['客单价'].round(2)
# 5. 原预测表的预测值
pred_data = [
{'渠道大类': '销转', '预测GSV': 100000},
{'渠道大类': 'App转化', '预测GSV': 20000},
{'渠道大类': '达播', '预测GSV': 250000},
{'渠道大类': '班主任', '预测GSV': 10000}
] ]
pred_df = pd.DataFrame(pred_data) df_overall = pd.DataFrame(overall_data)
# 6. 合并实际和预测数据 # 2. 每日购课明细数据
report_df = pd.merge(pred_df, channel_stats, on='渠道大类', how='left') purchase_data = [
# 加上店铺直购的统计 {"日期": "2026-03-02", "渠道": "学而思", "购课人数":1, "购课金额(元)":599, "订单号": "zfb202603022031481772454708683943"},
shop_stats = channel_stats[channel_stats['渠道大类'] == '店铺直购'] {"日期": "2026-03-07", "渠道": "学而思", "购课人数":1, "购课金额(元)":599, "订单号": "wx202603071022051772850125753228"},
report_df = pd.concat([report_df, shop_stats], ignore_index=True) {"日期": "2026-03-07", "渠道": "科大讯飞", "购课人数":1, "购课金额(元)":599, "订单号": "wx202603072123501772889830225976"},
# 加上总计 {"日期": "2026-03-10", "渠道": "学而思", "购课人数":1, "购课金额(元)":1999, "订单号": "wx202603101820431773138043948181"},
total = pd.DataFrame({ {"日期": "2026-03-15", "渠道": "科大讯飞", "购课人数":2, "购课金额(元)":2598, "订单号": "wx202603150854031773536043478685、wx20260315122747177354886748896"},
'渠道大类': ['总计'], {"日期": "2026-03-18", "渠道": "学而思", "购课人数":2, "购课金额(元)":2598, "订单号": "wx202603182055481773838548372991、zfb202603182118201773839900411837"},
'预测GSV': [pred_df['预测GSV'].sum()], {"日期": "2026-03-23", "渠道": "科大讯飞", "购课人数":1, "购课金额(元)":599, "订单号": "wx202603232015081774268108032833"},
'订单数': [channel_stats['订单数'].sum()], {"日期": "2026-03-24", "渠道": "京东方", "购课人数":1, "购课金额(元)":599, "订单号": "zfb202603242026431774355203538499"},
'GMV': [channel_stats['GMV'].sum()], {"日期": "2026-03-27", "渠道": "学而思", "购课人数":1, "购课金额(元)":1999, "订单号": "wx202603271258341774587514141956"},
'已退款金额': [channel_stats['已退款金额'].sum()], {"日期": "2026-03-28", "渠道": "希沃", "购课人数":1, "购课金额(元)":599, "订单号": "wx20260328145038177468063894734"},
'GSV': [channel_stats['GSV'].sum()], ]
'退款订单数': [channel_stats['退款订单数'].sum()], df_purchase = pd.DataFrame(purchase_data)
'客单价': [channel_stats['GMV'].sum()/channel_stats['订单数'].sum()],
'退费率': [str((channel_stats['退款订单数'].sum()/channel_stats['订单数'].sum()*100).round(1)) + '%']
})
report_df = pd.concat([report_df, total], ignore_index=True)
report_df['完成率'] = report_df.apply(lambda row: str(round(row['GSV']/row['预测GSV']*100, 1)) + '%' if pd.notna(row['预测GSV']) else '-', axis=1)
# 7. 保存报表 # 3. 每日新增注册数据
output_file = '2026年3月收入预测报表_最新版.xlsx' register_data = [
with pd.ExcelWriter(output_file) as writer: {"日期": "2026-03-01", "渠道": "京东方", "新增注册人数": 1},
report_df.to_excel(writer, sheet_name='整体统计', index=False) {"日期": "2026-03-01", "渠道": "学而思", "新增注册人数": 48},
# 达播分达人明细 {"日期": "2026-03-01", "渠道": "希沃", "新增注册人数": 2},
dabo_df = order_df[order_df['渠道大类'] == '达播'].groupby('key_from').agg( {"日期": "2026-03-02", "渠道": "京东方", "新增注册人数": 3},
订单数=('id', 'count'), {"日期": "2026-03-02", "渠道": "学而思", "新增注册人数": 38},
GMV=('GMV', 'sum'), {"日期": "2026-03-02", "渠道": "希沃", "新增注册人数": 1},
GSV=('GSV', 'sum'), {"日期": "2026-03-03", "渠道": "学而思", "新增注册人数": 24},
退费率=('is_refund', lambda x: str((x.sum()/x.count()*100).round(1)) + '%') {"日期": "2026-03-03", "渠道": "希沃", "新增注册人数": 4},
).reset_index() {"日期": "2026-03-04", "渠道": "京东方", "新增注册人数": 4},
dabo_df.to_excel(writer, sheet_name='达播达人明细', index=False) {"日期": "2026-03-04", "渠道": "学而思", "新增注册人数": 20},
# 成交标记明细 {"日期": "2026-03-04", "渠道": "希沃", "新增注册人数": 10},
tag_df = order_df.groupby('成交标记').agg( {"日期": "2026-03-04", "渠道": "科大讯飞", "新增注册人数": 3},
订单数=('id', 'count'), {"日期": "2026-03-05", "渠道": "京东方", "新增注册人数": 7},
GMV=('GMV', 'sum'), {"日期": "2026-03-05", "渠道": "学而思", "新增注册人数": 37},
GSV=('GSV', 'sum'), {"日期": "2026-03-05", "渠道": "希沃", "新增注册人数": 15},
退费率=('is_refund', lambda x: str((x.sum()/x.count()*100).round(1)) + '%') {"日期": "2026-03-05", "渠道": "科大讯飞", "新增注册人数": 17},
).reset_index() {"日期": "2026-03-06", "渠道": "京东方", "新增注册人数": 6},
tag_df.to_excel(writer, sheet_name='成交标记明细', index=False) {"日期": "2026-03-06", "渠道": "学而思", "新增注册人数": 26},
{"日期": "2026-03-06", "渠道": "希沃", "新增注册人数": 9},
{"日期": "2026-03-06", "渠道": "科大讯飞", "新增注册人数": 12},
{"日期": "2026-03-07", "渠道": "京东方", "新增注册人数": 5},
{"日期": "2026-03-07", "渠道": "学而思", "新增注册人数": 35},
{"日期": "2026-03-07", "渠道": "希沃", "新增注册人数": 5},
{"日期": "2026-03-07", "渠道": "科大讯飞", "新增注册人数": 34},
{"日期": "2026-03-08", "渠道": "京东方", "新增注册人数": 3},
{"日期": "2026-03-08", "渠道": "学而思", "新增注册人数": 33},
{"日期": "2026-03-08", "渠道": "希沃", "新增注册人数": 12},
{"日期": "2026-03-08", "渠道": "科大讯飞", "新增注册人数": 34},
{"日期": "2026-03-09", "渠道": "京东方", "新增注册人数": 2},
{"日期": "2026-03-09", "渠道": "学而思", "新增注册人数": 27},
{"日期": "2026-03-09", "渠道": "希沃", "新增注册人数": 5},
{"日期": "2026-03-09", "渠道": "科大讯飞", "新增注册人数": 15},
{"日期": "2026-03-10", "渠道": "学而思", "新增注册人数": 15},
{"日期": "2026-03-10", "渠道": "希沃", "新增注册人数": 3},
{"日期": "2026-03-10", "渠道": "科大讯飞", "新增注册人数": 9},
{"日期": "2026-03-11", "渠道": "京东方", "新增注册人数": 2},
{"日期": "2026-03-11", "渠道": "学而思", "新增注册人数": 25},
{"日期": "2026-03-11", "渠道": "希沃", "新增注册人数": 2},
{"日期": "2026-03-11", "渠道": "科大讯飞", "新增注册人数": 13},
{"日期": "2026-03-12", "渠道": "京东方", "新增注册人数": 5},
{"日期": "2026-03-12", "渠道": "学而思", "新增注册人数": 24},
{"日期": "2026-03-12", "渠道": "希沃", "新增注册人数": 5},
{"日期": "2026-03-12", "渠道": "科大讯飞", "新增注册人数": 15},
{"日期": "2026-03-13", "渠道": "京东方", "新增注册人数": 2},
{"日期": "2026-03-13", "渠道": "学而思", "新增注册人数": 31},
{"日期": "2026-03-13", "渠道": "希沃", "新增注册人数": 7},
{"日期": "2026-03-13", "渠道": "科大讯飞", "新增注册人数": 8},
{"日期": "2026-03-14", "渠道": "学而思", "新增注册人数": 30},
{"日期": "2026-03-14", "渠道": "希沃", "新增注册人数": 3},
{"日期": "2026-03-14", "渠道": "科大讯飞", "新增注册人数": 22},
{"日期": "2026-03-15", "渠道": "京东方", "新增注册人数": 1},
{"日期": "2026-03-15", "渠道": "学而思", "新增注册人数": 22},
{"日期": "2026-03-15", "渠道": "希沃", "新增注册人数": 3},
{"日期": "2026-03-15", "渠道": "科大讯飞", "新增注册人数": 22},
{"日期": "2026-03-16", "渠道": "京东方", "新增注册人数": 2},
{"日期": "2026-03-16", "渠道": "学而思", "新增注册人数": 6},
{"日期": "2026-03-16", "渠道": "希沃", "新增注册人数": 2},
{"日期": "2026-03-16", "渠道": "科大讯飞", "新增注册人数": 10},
{"日期": "2026-03-17", "渠道": "京东方", "新增注册人数": 3},
{"日期": "2026-03-17", "渠道": "学而思", "新增注册人数": 12},
{"日期": "2026-03-17", "渠道": "希沃", "新增注册人数": 3},
{"日期": "2026-03-17", "渠道": "科大讯飞", "新增注册人数": 6},
{"日期": "2026-03-18", "渠道": "京东方", "新增注册人数": 1},
{"日期": "2026-03-18", "渠道": "学而思", "新增注册人数": 9},
{"日期": "2026-03-18", "渠道": "科大讯飞", "新增注册人数": 11},
{"日期": "2026-03-19", "渠道": "京东方", "新增注册人数": 1},
{"日期": "2026-03-19", "渠道": "学而思", "新增注册人数": 6},
{"日期": "2026-03-19", "渠道": "希沃", "新增注册人数": 2},
{"日期": "2026-03-19", "渠道": "科大讯飞", "新增注册人数": 9},
{"日期": "2026-03-20", "渠道": "京东方", "新增注册人数": 1},
{"日期": "2026-03-20", "渠道": "学而思", "新增注册人数": 13},
{"日期": "2026-03-20", "渠道": "希沃", "新增注册人数": 2},
{"日期": "2026-03-20", "渠道": "科大讯飞", "新增注册人数": 12},
{"日期": "2026-03-21", "渠道": "京东方", "新增注册人数": 1},
{"日期": "2026-03-21", "渠道": "学而思", "新增注册人数": 27},
{"日期": "2026-03-21", "渠道": "科大讯飞", "新增注册人数": 26},
{"日期": "2026-03-22", "渠道": "学而思", "新增注册人数": 12},
{"日期": "2026-03-22", "渠道": "希沃", "新增注册人数": 4},
{"日期": "2026-03-22", "渠道": "科大讯飞", "新增注册人数": 22},
{"日期": "2026-03-23", "渠道": "京东方", "新增注册人数": 1},
{"日期": "2026-03-23", "渠道": "学而思", "新增注册人数": 9},
{"日期": "2026-03-23", "渠道": "希沃", "新增注册人数": 2},
{"日期": "2026-03-23", "渠道": "科大讯飞", "新增注册人数": 5},
{"日期": "2026-03-24", "渠道": "学而思", "新增注册人数": 4},
{"日期": "2026-03-24", "渠道": "希沃", "新增注册人数": 2},
{"日期": "2026-03-24", "渠道": "科大讯飞", "新增注册人数": 8},
{"日期": "2026-03-25", "渠道": "京东方", "新增注册人数": 1},
{"日期": "2026-03-25", "渠道": "学而思", "新增注册人数": 12},
{"日期": "2026-03-25", "渠道": "希沃", "新增注册人数": 5},
{"日期": "2026-03-25", "渠道": "科大讯飞", "新增注册人数": 13},
{"日期": "2026-03-26", "渠道": "京东方", "新增注册人数": 1},
{"日期": "2026-03-26", "渠道": "学而思", "新增注册人数": 8},
{"日期": "2026-03-26", "渠道": "希沃", "新增注册人数": 2},
{"日期": "2026-03-26", "渠道": "科大讯飞", "新增注册人数": 8},
{"日期": "2026-03-27", "渠道": "学而思", "新增注册人数": 9},
{"日期": "2026-03-27", "渠道": "希沃", "新增注册人数": 6},
{"日期": "2026-03-27", "渠道": "科大讯飞", "新增注册人数": 6},
{"日期": "2026-03-28", "渠道": "京东方", "新增注册人数": 2},
{"日期": "2026-03-28", "渠道": "学而思", "新增注册人数": 20},
{"日期": "2026-03-28", "渠道": "希沃", "新增注册人数": 4},
{"日期": "2026-03-28", "渠道": "科大讯飞", "新增注册人数": 12},
{"日期": "2026-03-29", "渠道": "京东方", "新增注册人数": 2},
{"日期": "2026-03-29", "渠道": "学而思", "新增注册人数": 16},
{"日期": "2026-03-29", "渠道": "科大讯飞", "新增注册人数": 9},
{"日期": "2026-03-30", "渠道": "京东方", "新增注册人数": 2},
{"日期": "2026-03-30", "渠道": "学而思", "新增注册人数": 7},
{"日期": "2026-03-30", "渠道": "希沃", "新增注册人数": 2},
{"日期": "2026-03-30", "渠道": "科大讯飞", "新增注册人数": 6},
{"日期": "2026-03-31", "渠道": "京东方", "新增注册人数": 3},
{"日期": "2026-03-31", "渠道": "学而思", "新增注册人数": 10},
{"日期": "2026-03-31", "渠道": "科大讯飞", "新增注册人数": 10},
]
df_register = pd.DataFrame(register_data)
print(f"\n最新3月收入预测报表已生成{output_file}") # 生成Excel文件
print("\n整体统计结果:") output_path = "/root/.openclaw/workspace/2026年3月硬件渠道数据汇总.xlsx"
print(report_df[['渠道大类', '预测GSV', 'GSV', '完成率', '订单数', 'GMV', '退费率']]) with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
df_overall.to_excel(writer, sheet_name='整体统计', index=False)
df_purchase.to_excel(writer, sheet_name='每日购课明细', index=False)
df_register.to_excel(writer, sheet_name='每日新增注册明细', index=False)
print(f"文件已生成:{output_path}")

View File

@ -1,85 +1,85 @@
--- ---
name: refund-user-learning-analysis name: refund-user-learning-analysis
description: [TODO: Complete and informative explanation of what the skill does and when to use it. Include WHEN to use this skill - specific scenarios, file types, or tasks that trigger it.] description: |
退费用户U0学习数据分析工具。统计指定时间段内购课并退费的用户在U0阶段的学习表现
包括课程巩固(Review)正确率与用时、单元强化(Summary)参与与完成、单元挑战(Challenge)各维度成绩。
支持自动剔除脏数据、生成Excel多Sheet报表。
**触发场景**:
(1) 统计退费用户的学习数据/学习情况
(2) 分析退费用户在U0阶段的巩固/强化/挑战表现
(3) 退费用户有多少完成了U0课程
(4) 退费用户的学习完成率、正确率统计
(5) 用户提到"退费用户"+"学习数据/巩固/强化/挑战"的组合
--- ---
# Refund User Learning Analysis # 退费用户U0学习数据分析
## Overview ## 分析流程
[TODO: 1-2 sentences explaining what this skill enables] ### Step 1: 确认参数
## Structuring This Skill 向用户确认:
- **时间范围**: 订单付款的起止日期 (默认当月)
- **是否剔除仍有有效订单的用户**: 默认剔除
- **巩固用时异常阈值**: 默认 60 分钟,超过视为脏数据
[TODO: Choose the structure that best fits this skill's purpose. Common patterns: ### Step 2: 执行数据查询
**1. Workflow-Based** (best for sequential processes) 运行查询脚本,传入参数:
- Works well when there are clear step-by-step procedures
- Example: DOCX skill with "Workflow Decision Tree" -> "Reading" -> "Creating" -> "Editing"
- Structure: ## Overview -> ## Workflow Decision Tree -> ## Step 1 -> ## Step 2...
**2. Task-Based** (best for tool collections) ```bash
- Works well when the skill offers different operations/capabilities python3 scripts/query_refund_learning.py \
- Example: PDF skill with "Quick Start" -> "Merge PDFs" -> "Split PDFs" -> "Extract Text" --start 2026-04-01 --end 2026-05-01 \
- Structure: ## Overview -> ## Quick Start -> ## Task Category 1 -> ## Task Category 2... --output /tmp/refund_learning_report.json \
--pure true --outlier 60
```
**3. Reference/Guidelines** (best for standards or specifications) 脚本自动完成:
- Works well for brand guidelines, coding standards, or requirements 1. 筛选时间段内购课且退费(order_status=4 + refund status=3)的用户
- Example: Brand styling with "Brand Guidelines" -> "Colors" -> "Typography" -> "Features" 2. 可选剔除仍持有有效订单(order_status=3)的用户
- Structure: ## Overview -> ## Guidelines -> ## Specifications -> ## Usage... 3. 关联角色表找到user_id查8张分表判断U0五节课完成情况
4. 统计巩固(Review)用时和正确率(从question_list JSON解析isRight)
5. 统计强化(Summary)进入和各知识模块完成情况
6. 统计挑战(Challenge)四维度参与和Perfect/Good/Oops分布
7. 自动识别并剔除巩固用时异常数据
**4. Capabilities-Based** (best for integrated systems) ### Step 3: 生成 Excel 报表
- Works well when the skill provides multiple interrelated features
- Example: Product Management with "Core Capabilities" -> numbered capability list
- Structure: ## Overview -> ## Core Capabilities -> ### 1. Feature -> ### 2. Feature...
Patterns can be mixed and matched as needed. Most skills combine patterns (e.g., start with task-based, add workflow for complex operations). ```bash
python3 scripts/generate_excel.py \
--input /tmp/refund_learning_report.json \
--output /tmp/退费用户U0学习数据统计.xlsx
```
Delete this entire "Structuring This Skill" section when done - it's just guidance.] 生成5个Sheet: 总览、课程巩固、单元强化、单元挑战、剔除的异常数据。
## [TODO: Replace with the first main section based on chosen structure] ### Step 4: 发送文件
[TODO: Add content here. See examples in existing skills: 使用 `feishu-send-file` skill 将 Excel 文件发送给用户。
- Code samples for technical skills
- Decision trees for complex workflows
- Concrete examples with realistic user requests
- References to scripts/templates/references as needed]
## Resources (optional) ## 数据口径
Create only the resource directories this skill actually needs. Delete this section if no resources are required. - **退费用户**: `bi_vala_order.order_status = 4``bi_refund_order.status = 3`,通过 `out_trade_no` 关联
- **纯退费用户**: 上述用户中无任何 `order_status = 3` 的有效订单
- **完成U0**: 用户至少完成 L1-U0 或 L2-U0 的全部5节课 (`play_status = 1`)
- **巩固正确率**: `question_list` JSON 中 `isRight=true` 的数量 / 总题数 × 100
- **强化完成**: 做完该单元所有知识模块 (L1=3个, L2=4个)
- **挑战成绩**: 首次各维度的 `score_text` (Perfect/Good/Oops)
- **测试账号**: 通过 `bi_vala_app_account.status = 1` 过滤
### scripts/ ## 扩展到其他单元
Executable code (Python/Bash/etc.) that can be run directly to perform specific operations.
**Examples from other skills:** 修改脚本中的 chapter_id 和 story_id 映射即可统计其他单元。
- PDF skill: `fill_fillable_fields.py`, `extract_form_field_info.py` - utilities for PDF manipulation 映射关系详见 `references/data-model.md`
- DOCX skill: `document.py`, `utilities.py` - Python modules for document processing
**Appropriate for:** Python scripts, shell scripts, or any executable code that performs automation, data processing, or specific operations. 查询 `bi_level_unit_lesson` 表获取任意单元的 chapter_id:
```sql
SELECT * FROM bi_level_unit_lesson WHERE course_unit = 'U01' ORDER BY course_level;
```
**Note:** Scripts may be executed without loading into context, but can still be read by Codex for patching or environment adjustments. 查询 story_id:
```sql
### references/ SELECT DISTINCT story_id, level FROM bi_user_unit_review_question_result
Documentation and reference material intended to be loaded into context to inform Codex's process and thinking. WHERE chapter_id IN (<target_chapter_ids>) LIMIT 5;
```
**Examples from other skills:**
- Product management: `communication.md`, `context_building.md` - detailed workflow guides
- BigQuery: API reference documentation and query examples
- Finance: Schema documentation, company policies
**Appropriate for:** In-depth documentation, API references, database schemas, comprehensive guides, or any detailed information that Codex should reference while working.
### assets/
Files not intended to be loaded into context, but rather used within the output Codex produces.
**Examples from other skills:**
- Brand styling: PowerPoint template files (.pptx), logo files
- Frontend builder: HTML/React boilerplate project directories
- Typography: Font files (.ttf, .woff2)
**Appropriate for:** Templates, boilerplate code, document templates, images, icons, fonts, or any files meant to be copied or used in the final output.
---
**Not every skill requires all three types of resources.**

View File

@ -0,0 +1,86 @@
# 退费用户学习数据 - 数据模型参考
## 数据库连接
| 库名 | 用途 | 主机 | 端口 | 用户 | 密码来源 |
|------|------|------|------|------|----------|
| vala_bi (PG) | BI统计表、同步表 | bj-postgres-16pob4sg.sql.tencentcdb.com | 28591 | ai_member | secrets.env → PG_ONLINE_PASSWORD |
| vala (PG) | 用户学习记录源表 | 同上 | 同上 | 同上 | 同上 |
## 核心表
### 订单相关 (vala_bi)
| 表名 | 说明 |
|------|------|
| `bi_vala_order` | 订单表。`order_status=3` 已支付, `order_status=4` 已退款, `pay_success_date` 付款时间 |
| `bi_refund_order` | 退费表。通过 `out_trade_no` 与订单关联, `status=3` 退费成功 |
| `bi_vala_app_account` | 账号表。`status=1` 正常, `status=2` 测试账号 |
| `bi_vala_app_character` | 角色表。`account_id` 关联账号, 一个账号可有多个角色 |
### 课时完成 (vala_bi)
| 表名 | 说明 |
|------|------|
| `bi_user_chapter_play_record_{0-7}` | 课时游玩记录分表(按user_id%8)。`play_status=1` 完成, `chapter_id` 课时ID |
| `bi_level_unit_lesson` | 课程结构映射表。`id`=chapter_id, 含 course_level/season/unit/lesson |
### 巩固 (vala_bi)
| 表名 | 说明 |
|------|------|
| `bi_user_unit_review_question_result` | 课程巩固记录。`chapter_id` 课时, `play_time` 用时(ms), `question_list` JSON含isRight |
### 单元强化 (vala_bi)
| 表名 | 说明 |
|------|------|
| `bi_user_unit_summary_km_result` | 强化练习记录。`story_id` 为 GameInfo.ID, `km_type` 知识模块类型(vocab/pron/sentence/grammar) |
### 单元挑战 (vala_bi)
| 表名 | 说明 |
|------|------|
| `bi_user_unit_challenge_question_result` | 挑战记录。`story_id` GameInfo.ID, `category` 维度(listening/speaking/reading/writing), `score_text` 评分(Perfect/Good/Oops) |
### 完成记录 (vala库)
| 表名 | 说明 |
|------|------|
| `user_learn_record_report_summary_{3-7}` | 学习完成汇总分表。`learn_card_type=1,record_type=3` 强化完成, `learn_card_type=1,record_type=4` 挑战完成 |
## U0 关键ID映射
### Chapter ID (课时)
| 等级 | 课时 | chapter_id |
|------|------|-----------|
| L1-U0 | L01~L05 | 343, 344, 345, 346, 348 |
| L2-U0 | L01~L05 | 55, 56, 57, 58, 59 |
### Story ID (单元)
| 等级 | story_id |
|------|----------|
| L1-U0 | 65 |
| L2-U0 | 8 |
### 知识模块
| 等级 | km_type 列表 | 总数 |
|------|-------------|------|
| L1-U0 | vocab, pron, sentence | 3 |
| L2-U0 | vocab, pron, sentence, grammar | 4 |
### 挑战维度
| 等级 | category 列表 |
|------|--------------|
| L1-U0 | listening, speaking |
| L2-U0 | listening, speaking, reading, writing |
## 课程结构映射公式
- `UnitIndex = (SeasonOfQuarter - 1) * 12 + GameInfo.Index`
- `ChapterIndex = UnitIndex * 5 + Chapter.Index`
- U0 对应 `season_package_index = 0, unit_index = 0`

View File

@ -0,0 +1,92 @@
#!/usr/bin/env python3
"""
JSON 结果生成 Excel 报表
用法: python3 generate_excel.py --input /tmp/report.json --output /tmp/report.xlsx
"""
import argparse, json
import openpyxl
from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
from openpyxl.utils import get_column_letter
def style_sheet(ws):
hfont = Font(bold=True, size=11)
hfill = PatternFill(start_color="D9E1F2", end_color="D9E1F2", fill_type="solid")
halign = Alignment(horizontal="center", vertical="center", wrap_text=True)
calign = Alignment(horizontal="center", vertical="center")
border = Border(left=Side(style='thin'), right=Side(style='thin'),
top=Side(style='thin'), bottom=Side(style='thin'))
for col in range(1, ws.max_column + 1):
cell = ws.cell(row=1, column=col)
cell.font, cell.fill, cell.alignment, cell.border = hfont, hfill, halign, border
for row in range(2, ws.max_row + 1):
for col in range(1, ws.max_column + 1):
cell = ws.cell(row=row, column=col)
cell.alignment, cell.border = calign, border
for col in range(1, ws.max_column + 1):
mx = max((len(str(ws.cell(r, col).value or "")) for r in range(1, ws.max_row + 1)), default=5)
ws.column_dimensions[get_column_letter(col)].width = max(mx + 4, 10)
def main():
p = argparse.ArgumentParser()
p.add_argument("--input", required=True)
p.add_argument("--output", required=True)
args = p.parse_args()
with open(args.input) as f:
data = json.load(f)
wb = openpyxl.Workbook()
# Sheet 1: Overview
ws = wb.active
ws.title = "总览"
ws.append(["指标", "数值"])
fun = data["funnel"]
ws.append(["购课退费用户总数", fun["total_refund"]])
ws.append(["剔除仍有有效订单后", fun["pure_refund"]])
ws.append(["其中完成U0全部5节课", fun["completed_u0"]])
ws.append([" - 仅完成L1-U0", fun["l1_only"]])
ws.append([" - 仅完成L2-U0", fun["l2_only"]])
ws.append([" - L1+L2都完成", fun["both"]])
ws.append(["完成U0占比", f"{round(fun['completed_u0']/fun['pure_refund']*100, 1)}%"])
style_sheet(ws)
# Sheet 2: Review
ws2 = wb.create_sheet("课程巩固(Review)")
ws2.append(["等级", "课时", "做了巩固的人数", "平均用时(分钟)", "平均正确率"])
for r in data["review"]:
ws2.append([r["course"], r["lesson"], r["review_count"],
r["avg_duration_min"], f"{r['avg_right_rate_pct']}%"])
style_sheet(ws2)
# Sheet 3: Summary
ws3 = wb.create_sheet("单元强化(Summary)")
ws3.append(["等级", "知识模块总数", "进入人数", "全部完成", "做1个", "做2个", "做3个", "做4个"])
for r in data["summary"]:
ws3.append([r["course"], r["total_km"], r["enter_count"], r["all_done"],
r["done_1"], r["done_2"], r["done_3"], r["done_4"]])
style_sheet(ws3)
# Sheet 4: Challenge
ws4 = wb.create_sheet("单元挑战(Challenge)")
ws4.append(["等级", "维度", "参与人数", "Perfect", "Perfect%", "Good", "Good%", "Oops", "Oops%"])
for r in data["challenge"]:
ws4.append([r["course"], r["category"], r["enter_count"],
r["perfect"], f"{r['perfect_pct']}%", r["good"], f"{r['good_pct']}%",
r["oops"], f"{r['oops_pct']}%"])
style_sheet(ws4)
# Sheet 5: Outliers
if data.get("outliers"):
ws5 = wb.create_sheet("剔除的异常数据")
ws5.append(["等级", "课时", "user_id", "巩固用时(分钟)", "play_time(ms)", "记录时间"])
for r in data["outliers"]:
ws5.append([r["course"], r["lesson"], r["user_id"],
r["duration_min"], r["play_time_ms"], r["created_at"]])
style_sheet(ws5)
wb.save(args.output)
print(f"Excel saved: {args.output}")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,236 @@
#!/usr/bin/env python3
"""
退费用户学习数据查询脚本
用法: python3 query_refund_learning.py --start 2026-04-01 --end 2026-05-01 --output /tmp/report.json
参数:
--start 订单付款起始日期 (YYYY-MM-DD)
--end 订单付款截止日期 (YYYY-MM-DD)
--output JSON 结果输出路径
--pure 是否剔除仍有有效订单的用户 (默认 true)
--outlier 巩固用时异常阈值(分钟), 超过此值视为脏数据 (默认 60)
"""
import argparse, json, os, subprocess, sys
def get_pg_password():
secrets_path = os.path.expanduser("~/.openclaw/workspace/secrets.env")
with open(secrets_path) as f:
for line in f:
if line.startswith("PG_ONLINE_PASSWORD="):
return line.split("'")[1]
raise RuntimeError("PG_ONLINE_PASSWORD not found in secrets.env")
def run_pg(db, sql, password):
env = os.environ.copy()
env["PGPASSWORD"] = password
r = subprocess.run(
["psql", "-h", "bj-postgres-16pob4sg.sql.tencentcdb.com", "-p", "28591",
"-U", "ai_member", "-d", db, "-t", "-A", "-F", "\t", "-c", sql],
capture_output=True, text=True, env=env, timeout=120
)
if r.returncode != 0:
print(f"SQL ERROR: {r.stderr}", file=sys.stderr)
sys.exit(1)
rows = [line.split("\t") for line in r.stdout.strip().split("\n") if line.strip()]
return rows
def main():
p = argparse.ArgumentParser()
p.add_argument("--start", required=True)
p.add_argument("--end", required=True)
p.add_argument("--output", default="/tmp/refund_learning_report.json")
p.add_argument("--pure", default="true")
p.add_argument("--outlier", type=float, default=60.0)
args = p.parse_args()
pw = get_pg_password()
pure_clause = ""
if args.pure == "true":
pure_clause = "WHERE NOT EXISTS (SELECT 1 FROM bi_vala_order o2 WHERE o2.account_id = ra.account_id AND o2.order_status = 3)"
# --- Chapter ID mappings ---
# L1-U0: 343,344,345,346,348 | L2-U0: 55,56,57,58,59
l1_ids = "343,344,345,346,348"
l2_ids = "55,56,57,58,59"
all_ids = f"{l1_ids},{l2_ids}"
chapter_play_union = " UNION ALL ".join([
f"SELECT r.user_id, r.chapter_id FROM bi_user_chapter_play_record_{i} r JOIN refund_users ru ON r.user_id = ru.user_id WHERE r.play_status = 1 AND r.chapter_id IN ({all_ids})"
for i in range(8)
])
base_cte = f"""
WITH refund_accounts AS (
SELECT DISTINCT o.account_id FROM bi_vala_order o
JOIN bi_vala_app_account a ON a.id = o.account_id AND a.status = 1
JOIN bi_refund_order r ON r.out_trade_no = o.out_trade_no AND r.status = 3
WHERE o.order_status = 4 AND o.pay_success_date >= '{args.start}' AND o.pay_success_date < '{args.end}'
),
pure_refund_accounts AS (
SELECT ra.account_id FROM refund_accounts ra {pure_clause}
),
refund_users AS (
SELECT c.id AS user_id, c.account_id FROM bi_vala_app_character c
JOIN pure_refund_accounts pra ON c.account_id = pra.account_id WHERE c.deleted_at IS NULL
),
all_done AS ({chapter_play_union}),
user_done_count AS (
SELECT user_id,
COUNT(DISTINCT CASE WHEN chapter_id IN ({l1_ids}) THEN chapter_id END) AS l1_done,
COUNT(DISTINCT CASE WHEN chapter_id IN ({l2_ids}) THEN chapter_id END) AS l2_done
FROM (SELECT DISTINCT user_id, chapter_id FROM all_done) t GROUP BY user_id
),
qualified_users AS (
SELECT ru.user_id, ru.account_id FROM user_done_count udc
JOIN refund_users ru ON udc.user_id = ru.user_id WHERE udc.l1_done = 5 OR udc.l2_done = 5
)"""
result = {}
# 1. Funnel counts
print("Querying funnel counts...")
rows = run_pg("vala_bi", f"""
{base_cte}
SELECT
(SELECT COUNT(*) FROM refund_accounts),
(SELECT COUNT(*) FROM pure_refund_accounts),
(SELECT COUNT(DISTINCT account_id) FROM qualified_users),
(SELECT COUNT(DISTINCT account_id) FROM qualified_users qu
JOIN user_done_count udc ON qu.user_id = udc.user_id AND udc.l1_done = 5 AND udc.l2_done < 5),
(SELECT COUNT(DISTINCT account_id) FROM qualified_users qu
JOIN user_done_count udc ON qu.user_id = udc.user_id AND udc.l2_done = 5 AND udc.l1_done < 5),
(SELECT COUNT(DISTINCT account_id) FROM qualified_users qu
JOIN user_done_count udc ON qu.user_id = udc.user_id AND udc.l1_done = 5 AND udc.l2_done = 5)
""", pw)
r = rows[0]
result["funnel"] = {
"total_refund": int(r[0]), "pure_refund": int(r[1]),
"completed_u0": int(r[2]), "l1_only": int(r[3]),
"l2_only": int(r[4]), "both": int(r[5])
}
# 2. Review data (with outlier filtering)
print("Querying review data...")
outlier_ms = int(args.outlier * 60 * 1000)
rows = run_pg("vala_bi", f"""
{base_cte},
review_with_rate AS (
SELECT rv.level, rv.chapter_id, rv.user_id, rv.play_time,
(SELECT COUNT(*) FROM jsonb_array_elements(rv.question_list::jsonb) q WHERE (q->>'isRight')::boolean = true)::numeric
/ NULLIF((SELECT COUNT(*) FROM jsonb_array_elements(rv.question_list::jsonb))::numeric, 0) * 100 AS right_rate,
ROW_NUMBER() OVER (PARTITION BY rv.user_id, rv.chapter_id ORDER BY rv.id) AS rn
FROM bi_user_unit_review_question_result rv
JOIN qualified_users qu ON rv.user_id = qu.user_id
WHERE rv.chapter_id IN ({all_ids}) AND rv.deleted_at IS NULL AND rv.play_time <= {outlier_ms}
)
SELECT level, chapter_id,
COUNT(DISTINCT user_id),
ROUND(AVG(play_time / 1000.0 / 60)::numeric, 1),
ROUND(AVG(right_rate)::numeric, 1)
FROM review_with_rate WHERE rn = 1
GROUP BY level, chapter_id ORDER BY level, chapter_id
""", pw)
chapter_map = {
"343": "U0-L01", "344": "U0-L02", "345": "U0-L03", "346": "U0-L04", "348": "U0-L05",
"55": "U0-L01", "56": "U0-L02", "57": "U0-L03", "58": "U0-L04", "59": "U0-L05"
}
result["review"] = []
for r in rows:
result["review"].append({
"course": "L1" if r[0] == "A1" else "L2",
"lesson": chapter_map.get(r[1], r[1]),
"review_count": int(r[2]),
"avg_duration_min": float(r[3]),
"avg_right_rate_pct": float(r[4])
})
# 3. Summary (enhancement) data
print("Querying summary data...")
rows = run_pg("vala_bi", f"""
{base_cte},
summary_data AS (
SELECT s.level, s.user_id, COUNT(DISTINCT s.km_type) AS km_types_done
FROM bi_user_unit_summary_km_result s
JOIN qualified_users qu ON s.user_id = qu.user_id
WHERE s.story_id IN (65, 8) AND s.deleted_at IS NULL
GROUP BY s.level, s.user_id
)
SELECT
level,
COUNT(DISTINCT user_id),
COUNT(DISTINCT CASE WHEN (level = 'A1' AND km_types_done >= 3) OR (level = 'A2' AND km_types_done >= 4) THEN user_id END),
COUNT(DISTINCT CASE WHEN km_types_done = 1 THEN user_id END),
COUNT(DISTINCT CASE WHEN km_types_done = 2 THEN user_id END),
COUNT(DISTINCT CASE WHEN km_types_done = 3 THEN user_id END),
COUNT(DISTINCT CASE WHEN km_types_done = 4 THEN user_id END)
FROM summary_data GROUP BY level ORDER BY level
""", pw)
result["summary"] = []
for r in rows:
result["summary"].append({
"course": "L1" if r[0] == "A1" else "L2",
"total_km": 3 if r[0] == "A1" else 4,
"enter_count": int(r[1]), "all_done": int(r[2]),
"done_1": int(r[3]), "done_2": int(r[4]),
"done_3": int(r[5]), "done_4": int(r[6])
})
# 4. Challenge data
print("Querying challenge data...")
rows = run_pg("vala_bi", f"""
{base_cte},
challenge_first AS (
SELECT ch.level, ch.category, ch.score_text, ch.user_id,
ROW_NUMBER() OVER (PARTITION BY ch.user_id, ch.level, ch.category ORDER BY ch.id) AS rn
FROM bi_user_unit_challenge_question_result ch
JOIN qualified_users qu ON ch.user_id = qu.user_id
WHERE ch.story_id IN (65, 8) AND ch.deleted_at IS NULL
)
SELECT level, category,
COUNT(DISTINCT user_id),
COUNT(DISTINCT CASE WHEN score_text = 'Perfect' THEN user_id END),
COUNT(DISTINCT CASE WHEN score_text = 'Good' THEN user_id END),
COUNT(DISTINCT CASE WHEN score_text = 'Oops' THEN user_id END)
FROM challenge_first WHERE rn = 1
GROUP BY level, category ORDER BY level, category
""", pw)
result["challenge"] = []
for r in rows:
total = int(r[3]) + int(r[4]) + int(r[5])
result["challenge"].append({
"course": "L1" if r[0] == "A1" else "L2",
"category": r[1],
"enter_count": int(r[2]),
"perfect": int(r[3]), "good": int(r[4]), "oops": int(r[5]),
"perfect_pct": round(int(r[3]) / total * 100) if total else 0,
"good_pct": round(int(r[4]) / total * 100) if total else 0,
"oops_pct": round(int(r[5]) / total * 100) if total else 0,
})
# 5. Outlier records
print("Querying outliers...")
rows = run_pg("vala_bi", f"""
{base_cte}
SELECT rv.level, rv.chapter_id, rv.user_id,
ROUND((rv.play_time / 1000.0 / 60)::numeric, 1), rv.play_time, rv.created_at
FROM bi_user_unit_review_question_result rv
JOIN qualified_users qu ON rv.user_id = qu.user_id
WHERE rv.chapter_id IN ({all_ids}) AND rv.deleted_at IS NULL AND rv.play_time > {outlier_ms}
ORDER BY rv.play_time DESC
""", pw)
result["outliers"] = []
for r in rows:
result["outliers"].append({
"course": "L1" if r[0] == "A1" else "L2",
"lesson": chapter_map.get(r[1], r[1]),
"user_id": int(r[2]),
"duration_min": float(r[3]),
"play_time_ms": int(r[4]),
"created_at": r[5]
})
with open(args.output, "w") as f:
json.dump(result, f, ensure_ascii=False, indent=2)
print(f"Done. Output: {args.output}")
if __name__ == "__main__":
main()

View File

@ -1 +0,0 @@
python3

View File

@ -1 +0,0 @@
/usr/bin/python3

View File

@ -1 +0,0 @@
python3

View File

@ -1 +0,0 @@
lib

View File

@ -1,5 +0,0 @@
home = /usr/bin
include-system-site-packages = false
version = 3.12.3
executable = /usr/bin/python3.12
command = /usr/bin/python3 -m venv /root/.openclaw/workspace/venv