- 新增 TOOLS.md 包含所有数据库连接信息 - 新增 business_knowledge/ 目录,包含: * 13个 SQL 查询文档 * 业务术语表 * 数据表说明 * 16个数据抽取脚本 * 知识总结文档
14 KiB
14 KiB
全字段大表
获取时间: 2026-03-02 飞书文档 Token: VVyWd5491o6tuqxceCVci6dVnFd
业务说明
这个查询将用户、购课、角色、课程完课等多个维度的数据整合在一起,形成一个宽表,适合进行综合分析。
涉及的数据表
- bi_vala_app_account - 用户账号表
- account_detail_info - 账号详情表
- bi_vala_order - 订单表
- bi_vala_app_character - 角色表
- bi_user_chapter_play_record_0~7 - 用户章节播放记录表(分表)
- bi_level_unit_lesson - 课程单元表
- bi_user_component_play_record_0~7 - 用户组件播放记录表(分表)
SQL 查询
select a.id as "用户ID"
,a.created_date as "注册日期"
,a.download_channel as "下载渠道"
,a.key_from as "下载key_from"
,b.login_address as "城市"
,b.phone_login as "是否手机登录"
,c.sale_channel as "购课渠道"
,case when c.sale_channel is NULL then '未购课'
when c.sale_channel = '站外' then '站外购课'
else '站内购课'
end as "购课标签"
,c.key_from as "购课key_from"
,c.pay_date as "购课日期"
,c.pay_amount as "购课金额"
,d.id as "角色ID"
,d.characer_pay_status as "角色是否付费"
,d.gender as "性别"
,2026 - cast(d.birthday as int) as "年龄"
,e.chapter_id as "课程ID"
,e.course_id as "课程名称"
,e.chapter_unique_id as "完课标识"
,e.finish_date as "完课日期"
,e.finish_time as "完课耗时"
from
(
select id
,key_from
,to_char(created_at,'YYYY-MM-DD') as created_date
,download_channel
from bi_vala_app_account
where status = 1
and id not in (51,2121)
and deleted_at is NULL
group by id
,key_from
,created_at
,download_channel
) as a
left join
(
select account_id
,split_part(login_address,'-',2) as login_address
,case when phone_login_times = 0 then 0
else 1
end as phone_login
from account_detail_info
group by account_id
,login_address
,case when phone_login_times = 0 then 0
else 1
end
) as b on a.id = b.account_id
left join
(
select account_id
,case when sale_channel = 11 then '苹果'
when sale_channel = 12 then '华为'
when sale_channel = 13 then '小米'
when sale_channel = 14 then '荣耀'
when sale_channel = 15 then '应用宝'
when sale_channel = 17 then '魅族'
when sale_channel = 18 then 'VIVO'
when sale_channel = 19 then 'OPPO'
when sale_channel = 21 then '学而思'
when sale_channel = 22 then '讯飞'
when sale_channel = 23 then '步步高'
when sale_channel = 24 then '作业帮'
when sale_channel = 25 then '小度'
when sale_channel = 26 then '希沃'
when sale_channel = 27 then '京东方'
when sale_channel = 41 then '官网'
when sale_channel = 71 then '小程序'
else '站外'
end as sale_channel
,key_from
,to_char(pay_success_date,'YYYY-MM-DD') as pay_date
,pay_amount
from bi_vala_order
where order_status = 3
and pay_amount_int > 49800
group by account_id
,case when sale_channel = 11 then '苹果'
when sale_channel = 12 then '华为'
when sale_channel = 13 then '小米'
when sale_channel = 14 then '荣耀'
when sale_channel = 15 then '应用宝'
when sale_channel = 17 then '魅族'
when sale_channel = 18 then 'VIVO'
when sale_channel = 19 then 'OPPO'
when sale_channel = 21 then '学而思'
when sale_channel = 22 then '讯飞'
when sale_channel = 23 then '步步高'
when sale_channel = 24 then '作业帮'
when sale_channel = 25 then '小度'
when sale_channel = 26 then '希沃'
when sale_channel = 27 then '京东方'
when sale_channel = 41 then '官网'
when sale_channel = 71 then '小程序'
else '站外'
end
,key_from
,pay_success_date
,pay_amount
) as c on a.id = c.account_id
left join
(
select id
,account_id
,case when purchase_season_package = '[1]' then 0
else 1
end as characer_pay_status
,case when gender = 0 then 'girl'
when gender = 1 then 'boy'
else 'unknow'
end as gender
,case when split_part(birthday,'-',1) = '' then '0000'
else split_part(birthday,'-',1)
end as birthday
from bi_vala_app_character
where deleted_at is NULL
group by id
,account_id
,case when purchase_season_package = '[1]' then 0
else 1
end
,case when gender = 0 then 'girl'
when gender = 1 then 'boy'
else 'unknow'
end
,case when split_part(birthday,'-',1) = '' then '0000'
else split_part(birthday,'-',1)
end
) as d on a.id = d.account_id
left join
(
select user_id
,chapter_id
,format('%s-%s-%s-%s',course_level,course_season,course_unit,course_lesson) as course_id
,x.chapter_unique_id
,finish_date
,format('%s:%s',floor(sum(interval_time)/1000/60),mod((sum(interval_time)/1000),60)) as finish_time
,rank () over (partition by x.chapter_unique_id order by finish_date) as rankno
from
(
select user_id
,chapter_id
,chapter_unique_id
,to_char(updated_at,'YYYY-MM-DD') as finish_date
from bi_user_chapter_play_record_0
where chapter_id in (55,56,57,58,59)
and play_status = 1
group by id
,user_id
,chapter_id
,chapter_unique_id
,updated_at
union all
select user_id
,chapter_id
,chapter_unique_id
,to_char(updated_at,'YYYY-MM-DD') as finish_date
from bi_user_chapter_play_record_1
where chapter_id in (55,56,57,58,59)
and play_status = 1
group by user_id
,chapter_id
,chapter_unique_id
,updated_at
-- ... 其他分表类似
) as x
left join
(
select cast(id as int) as id
,course_level
,course_season
,course_unit
,course_lesson
from bi_level_unit_lesson
group by id
,course_level
,course_season
,course_unit
,course_lesson
) as y on x.chapter_id = y.id
left join
(
select chapter_unique_id
,interval_time
from bi_user_component_play_record_0
group by chapter_unique_id
,interval_time
-- ... 其他分表类似
) as z on x.chapter_unique_id = z.chapter_unique_id
group by user_id
,chapter_id
,course_level
,course_season
,course_unit
,course_lesson
,x.chapter_unique_id
,finish_date
) as e on d.id = e.user_id
where rankno = 1
group by a.id
,a.created_date
,a.download_channel
,a.key_from
,b.login_address
,b.phone_login
,c.sale_channel
,c.key_from
,c.pay_date
,c.pay_amount
,d.id
,d.characer_pay_status
,d.gender
,d.birthday
,e.chapter_id
,e.course_id
,e.chapter_unique_id
,e.finish_date
,e.finish_time
重要业务逻辑
1. 购课渠道映射
case when sale_channel = 11 then '苹果'
when sale_channel = 12 then '华为'
-- ... 更多渠道
when sale_channel = 71 then '小程序'
else '站外'
end as sale_channel
2. 购课标签
case when c.sale_channel is NULL then '未购课'
when c.sale_channel = '站外' then '站外购课'
else '站内购课'
end as "购课标签"
3. 角色付费状态
case when purchase_season_package = '[1]' then 0
else 1
end as characer_pay_status
4. 性别映射
case when gender = 0 then 'girl'
when gender = 1 then 'boy'
else 'unknow'
end as gender
5. 完课时间计算
format('%s:%s',floor(sum(interval_time)/1000/60),mod((sum(interval_time)/1000),60)) as finish_time
注意事项
- 订单筛选条件:
order_status = 3andpay_amount_int > 49800(筛选有效订单且金额大于498元) - 分表处理: 用户播放记录表按分表存储(0-7),需要使用 UNION ALL 合并
- 去重逻辑: 使用
rank() over (partition by ... order by ...)取第一次完课记录 - 测试用户排除:
id not in (51,2121)