每日总结更新 20260601
This commit is contained in:
parent
88b4518779
commit
7ea125c51e
19
.env
Normal file
19
.env
Normal file
@ -0,0 +1,19 @@
|
||||
# MySQL测试环境
|
||||
MYSQL_HOST=bj-cdb-8frbdwju.sql.tencentcdb.com
|
||||
MYSQL_PORT=25413
|
||||
MYSQL_USERNAME=read_only
|
||||
MYSQL_PASSWORD=fdsfiidier^$*hjfdijjd232
|
||||
|
||||
# PostgreSQL线上环境
|
||||
PG_DB_HOST=bj-postgres-16pob4sg.sql.tencentcdb.com
|
||||
PG_DB_PORT=28591
|
||||
PG_DB_USER=ai_member
|
||||
PG_DB_PASSWORD=LdfjdjL83h3h3^$&**YGG*
|
||||
PG_DB_DATABASE=vala
|
||||
|
||||
# Elasticsearch线上环境
|
||||
ES_HOST=es-7vd7jcu9.public.tencentelasticsearch.com
|
||||
ES_PORT=9200
|
||||
ES_SCHEME=https
|
||||
ES_USER=elastic
|
||||
ES_PASSWORD=F%?QDcWes7N2WTuiYD11
|
||||
@ -468,3 +468,11 @@ To https://git.valavala.com/ai_member_only/ai_member_xiaoban
|
||||
338e2f6..d523814 master -> master
|
||||
[2026-05-30 08:10:01] 工作区备份成功:自动备份 2026-05-30 08:10:01
|
||||
[2026-05-31 08:10:01] 开始备份工作区...
|
||||
[master 88b4518] 自动备份 2026-05-31 08:10:01
|
||||
2 files changed, 1 insertion(+), 3 deletions(-)
|
||||
delete mode 100644 tmp_daily_summary.md
|
||||
remote: . Processing 1 references
|
||||
remote: Processed 1 references in total
|
||||
To https://git.valavala.com/ai_member_only/ai_member_xiaoban
|
||||
d523814..88b4518 master -> master
|
||||
[2026-05-31 08:10:02] 工作区备份成功:自动备份 2026-05-31 08:10:01
|
||||
|
||||
@ -29,3 +29,6 @@
|
||||
{"type":"memory.recall.recorded","timestamp":"2026-05-30T12:39:47.963Z","query":"小红书数据需求群 chat_id oc_ 群ID","resultCount":6,"results":[{"path":"memory/2026-05-28.md","startLine":74,"endLine":100,"score":1},{"path":"memory/2026-05-28.md","startLine":240,"endLine":266,"score":1},{"path":"memory/2026-05-28.md","startLine":432,"endLine":458,"score":1},{"path":"memory/2026-05-28.md","startLine":94,"endLine":127,"score":1},{"path":"memory/2026-05-28.md","startLine":260,"endLine":293,"score":1},{"path":"memory/2026-05-28.md","startLine":452,"endLine":485,"score":1}]}
|
||||
{"type":"memory.recall.recorded","timestamp":"2026-05-30T14:07:53.580Z","query":"竞品 竞争对手 产品对比 市场分析","resultCount":2,"results":[{"path":"memory/2026-05-13.md","startLine":1,"endLine":16,"score":1},{"path":"memory/2026-05-24.md","startLine":66,"endLine":92,"score":1}]}
|
||||
{"type":"memory.recall.recorded","timestamp":"2026-05-30T14:07:53.580Z","query":"competitor rival English learning kids education product","resultCount":1,"results":[{"path":"memory/2026-03-01.md","startLine":1,"endLine":11,"score":1}]}
|
||||
{"type":"memory.recall.recorded","timestamp":"2026-05-31T23:25:36.480Z","query":"陈逸鸫 任务 进度 2026-05","resultCount":6,"results":[{"path":"memory/2026-05-28.md","startLine":504,"endLine":536,"score":1},{"path":"memory/2026-05-28.md","startLine":337,"endLine":366,"score":1},{"path":"memory/2026-05-28.md","startLine":74,"endLine":100,"score":1},{"path":"memory/2026-05-28.md","startLine":240,"endLine":266,"score":1},{"path":"memory/2026-05-28.md","startLine":432,"endLine":458,"score":1},{"path":"memory/2026-05-28.md","startLine":530,"endLine":557,"score":1}]}
|
||||
{"type":"memory.recall.recorded","timestamp":"2026-05-31T23:25:36.480Z","query":"pipeline 数据 同步 cron 定时","resultCount":5,"results":[{"path":"memory/2026-05-28.md","startLine":704,"endLine":717,"score":1},{"path":"memory/2026-05-28.md","startLine":596,"endLine":624,"score":1},{"path":"memory/2026-05-28.md","startLine":1,"endLine":36,"score":1},{"path":"memory/2026-05-28.md","startLine":359,"endLine":394,"score":1},{"path":"memory/2026-05-28.md","startLine":122,"endLine":152,"score":1}]}
|
||||
{"type":"memory.recall.recorded","timestamp":"2026-05-31T23:27:40.582Z","query":"视频 转译 翻译 评论 transcript 进度","resultCount":2,"results":[{"path":"memory/2026-05-28.md","startLine":641,"endLine":671,"score":1},{"path":"memory/2026-05-28.md","startLine":618,"endLine":649,"score":1}]}
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
{
|
||||
"version": 1,
|
||||
"updatedAt": "2026-05-30T14:07:53.580Z",
|
||||
"updatedAt": "2026-05-31T23:27:40.582Z",
|
||||
"entries": {
|
||||
"memory:memory/2026-05-24.md:1:30": {
|
||||
"key": "memory:memory/2026-05-24.md:1:30",
|
||||
@ -476,20 +476,22 @@
|
||||
"endLine": 152,
|
||||
"source": "memory",
|
||||
"snippet": "### 17:56 大麦查询输出表就绪 ✅ [陈逸鸫] 输出表 Token: `GCqNsqgzKhfQ5atFLnQcmLcGn4d`,Sheet: 查询快照 (`fd42b8`) - 实习虾 `cli_aa898f32d4799bea` 已分享编辑权限 - 写入验证通过 (revision 4) - env var: `XIAOBAN_QUERY_OUTPUT_TOKEN=GCqNsqgzKhfQ5atFLnQcmLcGn4d` - 同事查数结果 append `fd42b8!A:G`(请求时间/请求人/查询类型/cutoff/查询参数/结果摘要/备注) ### 17:48 5/27 日报推群成功 ✅ wrapper v0.4 实现 `im +messages-send`,推送 `message_id=om_x100b6eb54f9da0f4b115725198feede` ### 待办汇总 1. 📋 陈逸鸫发 `docs/xiaoban-data-boundary.md` §7 Agent 系统提示词 2. 📋 陈逸鸫发 `docs/xiaoban-runbook.md` §2 部署 checklist 3. 📋 建「大麦查询输出」飞书表 → 陈逸鸫提供 token 4. 📋 `~/xhs-tech-dashboard` 仓库 clone → 陈逸鸫提供访问方式 5. ⏳ 全量 pipeline 聚光步骤验证(子进程中) ### 18:30 Pipeline 全链路 dry-run 验证全部通过 [陈逸鸫] 收到微伴 xlsx,触发全链路 dry-r",
|
||||
"recallCount": 2,
|
||||
"recallCount": 3,
|
||||
"dailyCount": 0,
|
||||
"groundedCount": 0,
|
||||
"totalScore": 2,
|
||||
"totalScore": 3,
|
||||
"maxScore": 1,
|
||||
"firstRecalledAt": "2026-05-28T12:30:30.129Z",
|
||||
"lastRecalledAt": "2026-05-28T20:57:42.055Z",
|
||||
"lastRecalledAt": "2026-05-31T23:25:36.480Z",
|
||||
"queryHashes": [
|
||||
"4596e377d39b",
|
||||
"f139ebdae100"
|
||||
"f139ebdae100",
|
||||
"679cdd7bd3a8"
|
||||
],
|
||||
"recallDays": [
|
||||
"2026-05-28",
|
||||
"2026-05-29"
|
||||
"2026-05-29",
|
||||
"2026-06-01"
|
||||
],
|
||||
"conceptTags": [
|
||||
"cli-aa898f32d4799bea",
|
||||
@ -540,21 +542,23 @@
|
||||
"endLine": 536,
|
||||
"source": "memory",
|
||||
"snippet": "| 2b | 客户主表 → 订单 | ✅ | 公式就绪 | | 3 | 日报 C1HVN2 | ✅ | 四段刷新 | | 3a/3c | 口径审计 | ✅ | 推群全过 | | 4a | 结算月汇总 | ✅ | 公式就绪 | **结论:当前表内所有数据与重拉完全一致,零差异,无需刷写。** ### 18:36 小溪行课触发实跑 - 已推 1920 条到小溪查询表(新 Sheet) - 已 @小溪 到小红书数据需求群,请处理 ~5 条待查询记录 - 表格:`RFIJsXT8FhGHhctY4RwczcOfnac` - 等待小溪回填后跑 `pull_xiaoxi_results.py` 回收结果 ### 18:50 PG 实时行课数据全量覆盖 [陈逸鸫确认] 不用等小溪回填那 5 条,直接用 PG `user_course_detail` 表实时数据覆盖: - 448 人有新进展(相比小溪历史快照) - pull → 3PRySY 口径对齐 12/12 ✅ - lesson_cache → C1HVN2 16 格 ✅ - 变化有限(PG=正式课,小溪=体验课,口径不同) - sync_base (4b) → 多维表格 8 张 ✅ 21.2s ### 18:54 漏斗看板发布待解决 - funnel HTML 已构建(scripts/build_funnel_dashboard.py ✅) - 妙搭发布 `apps +html-publish` 需要 `--as user`,bot 模式不支持 - 系统 lark-cli `/usr/local/lib/node_mod",
|
||||
"recallCount": 4,
|
||||
"recallCount": 5,
|
||||
"dailyCount": 0,
|
||||
"groundedCount": 0,
|
||||
"totalScore": 4,
|
||||
"totalScore": 5,
|
||||
"maxScore": 1,
|
||||
"firstRecalledAt": "2026-05-28T20:51:03.908Z",
|
||||
"lastRecalledAt": "2026-05-29T06:11:40.432Z",
|
||||
"lastRecalledAt": "2026-05-31T23:25:36.480Z",
|
||||
"queryHashes": [
|
||||
"f22544a8757c",
|
||||
"2af907cea93d",
|
||||
"d9f1601110da",
|
||||
"82be33d1f911"
|
||||
"82be33d1f911",
|
||||
"cf12fd62a5e5"
|
||||
],
|
||||
"recallDays": [
|
||||
"2026-05-29"
|
||||
"2026-05-29",
|
||||
"2026-06-01"
|
||||
],
|
||||
"conceptTags": [
|
||||
"3a/3c",
|
||||
@ -574,20 +578,22 @@
|
||||
"endLine": 366,
|
||||
"source": "memory",
|
||||
"snippet": "- 448 人有新进展(相比小溪历史快照) - pull → 3PRySY 口径对齐 12/12 ✅ - lesson_cache → C1HVN2 16 格 ✅ - 变化有限(PG=正式课,小溪=体验课,口径不同) - sync_base (4b) → 多维表格 8 张 ✅ 21.2s ### 18:54 漏斗看板发布待解决 - funnel HTML 已构建(scripts/build_funnel_dashboard.py ✅) - 妙搭发布 `apps +html-publish` 需要 `--as user`,bot 模式不支持 - 系统 lark-cli `/usr/local/lib/node_modules/@anthropic/lark-cli` 可能支持 - 待确认:服务器是否已 `lark-cli auth login --as user` ### 19:00 陈逸鸫派 Image2 生图任务 **任务:** L1-S1-U1《秘密基地》5 课投放用小地图底图 - 模型:gpt-image-2 · 3:4 · 2K · 不要文字 - 风格:太阳朋克 + L1 场景 - 5 张 PNG:U1-L1~U1-L5 - FUNCLOUD_API_KEY 在小研 workspace `.env` 中可用(`fc_eea138933b02b4797ce0779ffb637d8b8a6368db7b435dfdab7b4be1cd254d98`) - Brief 文档 `/docx/KsVadUTmooO7yYxHaGmc1R0Bn5b` + 投放手册 `/do",
|
||||
"recallCount": 3,
|
||||
"recallCount": 4,
|
||||
"dailyCount": 0,
|
||||
"groundedCount": 0,
|
||||
"totalScore": 3,
|
||||
"totalScore": 4,
|
||||
"maxScore": 1,
|
||||
"firstRecalledAt": "2026-05-28T20:51:03.908Z",
|
||||
"lastRecalledAt": "2026-05-29T13:16:08.062Z",
|
||||
"lastRecalledAt": "2026-05-31T23:25:36.480Z",
|
||||
"queryHashes": [
|
||||
"f22544a8757c",
|
||||
"2af907cea93d",
|
||||
"3737f6af1445"
|
||||
"3737f6af1445",
|
||||
"cf12fd62a5e5"
|
||||
],
|
||||
"recallDays": [
|
||||
"2026-05-29"
|
||||
"2026-05-29",
|
||||
"2026-06-01"
|
||||
],
|
||||
"conceptTags": [
|
||||
"gpt",
|
||||
@ -607,22 +613,24 @@
|
||||
"endLine": 100,
|
||||
"source": "memory",
|
||||
"snippet": "3. **check_call→run**(`sync_juguang_notes.py` `sheet_write_range`):`subprocess.check_call` 不支持 `input=`, 改用 `subprocess.run(..., check=True, input=...)` 1b 单天验证通过:`sync_juguang_agents.py --start 2026-05-27 --end 2026-05-27` 写入 12 个单元格成功。全量 27 天在子进程跑,结果待出。 ### 17:12 数据服务边界规则部署 [陈逸鸫] **文档位置:** `docs/xiaoban-data-boundary.md`(git@github.com:chenyd11/feishu-database.git — 服务器无 SSH key 无法 pull) **三条核心规则(已写入 MEMORY.md):** 1. cron pipeline 和帮同事查数分轨,不能混用同一流程 2. 同事请求默认只读;写生产表 CYFTsu 必须 @陈逸鸫 确认 3. 查数结果写「输出区」副本,不改主表 **操作黑名单:** `pipeline.py` / `sync_*` / `run_juguang_*` / `sheets +write`(生产表) / `--promote` **待办:** 建「大麦查询输出」专用表(待陈逸鸫提供 token) **待办:** 获取 docs/xiaoban-data-boundary.md §7 Agent 系统提示词(完",
|
||||
"recallCount": 4,
|
||||
"recallCount": 5,
|
||||
"dailyCount": 0,
|
||||
"groundedCount": 0,
|
||||
"totalScore": 4,
|
||||
"totalScore": 5,
|
||||
"maxScore": 1,
|
||||
"firstRecalledAt": "2026-05-28T20:51:03.908Z",
|
||||
"lastRecalledAt": "2026-05-30T12:39:47.963Z",
|
||||
"lastRecalledAt": "2026-05-31T23:25:36.480Z",
|
||||
"queryHashes": [
|
||||
"f22544a8757c",
|
||||
"c3cb24be8923",
|
||||
"e3108bd5b94c",
|
||||
"340c1d46da26"
|
||||
"340c1d46da26",
|
||||
"cf12fd62a5e5"
|
||||
],
|
||||
"recallDays": [
|
||||
"2026-05-29",
|
||||
"2026-05-30"
|
||||
"2026-05-30",
|
||||
"2026-06-01"
|
||||
],
|
||||
"conceptTags": [
|
||||
"check-call",
|
||||
@ -642,22 +650,24 @@
|
||||
"endLine": 266,
|
||||
"source": "memory",
|
||||
"snippet": "3. **check_call→run**(`sync_juguang_notes.py` `sheet_write_range`):`subprocess.check_call` 不支持 `input=`, 改用 `subprocess.run(..., check=True, input=...)` 1b 单天验证通过:`sync_juguang_agents.py --start 2026-05-27 --end 2026-05-27` 写入 12 个单元格成功。全量 27 天在子进程跑,结果待出。 ### 17:12 数据服务边界规则部署 [陈逸鸫] **文档位置:** `docs/xiaoban-data-boundary.md`(git@github.com:chenyd11/feishu-database.git — 服务器无 SSH key 无法 pull) **三条核心规则(已写入 MEMORY.md):** 1. cron pipeline 和帮同事查数分轨,不能混用同一流程 2. 同事请求默认只读;写生产表 CYFTsu 必须 @陈逸鸫 确认 3. 查数结果写「输出区」副本,不改主表 **操作黑名单:** `pipeline.py` / `sync_*` / `run_juguang_*` / `sheets +write`(生产表) / `--promote` **待办:** 建「大麦查询输出」专用表(待陈逸鸫提供 token) **待办:** 获取 docs/xiaoban-data-boundary.md §7 Agent 系统提示词(完",
|
||||
"recallCount": 4,
|
||||
"recallCount": 5,
|
||||
"dailyCount": 0,
|
||||
"groundedCount": 0,
|
||||
"totalScore": 4,
|
||||
"totalScore": 5,
|
||||
"maxScore": 1,
|
||||
"firstRecalledAt": "2026-05-28T20:51:03.908Z",
|
||||
"lastRecalledAt": "2026-05-30T12:39:47.963Z",
|
||||
"lastRecalledAt": "2026-05-31T23:25:36.480Z",
|
||||
"queryHashes": [
|
||||
"f22544a8757c",
|
||||
"c3cb24be8923",
|
||||
"e3108bd5b94c",
|
||||
"340c1d46da26"
|
||||
"340c1d46da26",
|
||||
"cf12fd62a5e5"
|
||||
],
|
||||
"recallDays": [
|
||||
"2026-05-29",
|
||||
"2026-05-30"
|
||||
"2026-05-30",
|
||||
"2026-06-01"
|
||||
],
|
||||
"conceptTags": [
|
||||
"check-call",
|
||||
@ -677,22 +687,24 @@
|
||||
"endLine": 458,
|
||||
"source": "memory",
|
||||
"snippet": "3. **check_call→run**(`sync_juguang_notes.py` `sheet_write_range`):`subprocess.check_call` 不支持 `input=`, 改用 `subprocess.run(..., check=True, input=...)` 1b 单天验证通过:`sync_juguang_agents.py --start 2026-05-27 --end 2026-05-27` 写入 12 个单元格成功。全量 27 天在子进程跑,结果待出。 ### 17:12 数据服务边界规则部署 [陈逸鸫] **文档位置:** `docs/xiaoban-data-boundary.md`(git@github.com:chenyd11/feishu-database.git — 服务器无 SSH key 无法 pull) **三条核心规则(已写入 MEMORY.md):** 1. cron pipeline 和帮同事查数分轨,不能混用同一流程 2. 同事请求默认只读;写生产表 CYFTsu 必须 @陈逸鸫 确认 3. 查数结果写「输出区」副本,不改主表 **操作黑名单:** `pipeline.py` / `sync_*` / `run_juguang_*` / `sheets +write`(生产表) / `--promote` **待办:** 建「大麦查询输出」专用表(待陈逸鸫提供 token) **待办:** 获取 docs/xiaoban-data-boundary.md §7 Agent 系统提示词(完",
|
||||
"recallCount": 4,
|
||||
"recallCount": 5,
|
||||
"dailyCount": 0,
|
||||
"groundedCount": 0,
|
||||
"totalScore": 4,
|
||||
"totalScore": 5,
|
||||
"maxScore": 1,
|
||||
"firstRecalledAt": "2026-05-28T20:51:03.908Z",
|
||||
"lastRecalledAt": "2026-05-30T12:39:47.963Z",
|
||||
"lastRecalledAt": "2026-05-31T23:25:36.480Z",
|
||||
"queryHashes": [
|
||||
"f22544a8757c",
|
||||
"c3cb24be8923",
|
||||
"e3108bd5b94c",
|
||||
"340c1d46da26"
|
||||
"340c1d46da26",
|
||||
"cf12fd62a5e5"
|
||||
],
|
||||
"recallDays": [
|
||||
"2026-05-29",
|
||||
"2026-05-30"
|
||||
"2026-05-30",
|
||||
"2026-06-01"
|
||||
],
|
||||
"conceptTags": [
|
||||
"check-call",
|
||||
@ -712,22 +724,24 @@
|
||||
"endLine": 557,
|
||||
"source": "memory",
|
||||
"snippet": "- 妙搭发布 `apps +html-publish` 需要 `--as user`,bot 模式不支持 - 系统 lark-cli `/usr/local/lib/node_modules/@anthropic/lark-cli` 可能支持 - 待确认:服务器是否已 `lark-cli auth login --as user` ### 19:00 陈逸鸫派 Image2 生图任务 **任务:** L1-S1-U1《秘密基地》5 课投放用小地图底图 - 模型:gpt-image-2 · 3:4 · 2K · 不要文字 - 风格:太阳朋克 + L1 场景 - 5 张 PNG:U1-L1~U1-L5 - FUNCLOUD_API_KEY 在小研 workspace `.env` 中可用(`fc_eea138933b02b4797ce0779ffb637d8b8a6368db7b435dfdab7b4be1cd254d98`) - Brief 文档 `/docx/KsVadUTmooO7yYxHaGmc1R0Bn5b` + 投放手册 `/docx/QhYQdz7PvoN7Eaxmhu0c0Q5UnHe` — 均为个人文档,AGENTS.md 规则禁止读取 - 素材库入口:https://llm-dev.valavala.com/web_tools/material_prod --- ### 19:30 同事数据查询流程演练 [陈逸鸫测试] **场景:模拟王虹茗请求小龙 4/21-5/20 订单详情,验证三级查询流程** **小龙订单查询结果(数据源:3wcle8 销售订",
|
||||
"recallCount": 5,
|
||||
"recallCount": 6,
|
||||
"dailyCount": 0,
|
||||
"groundedCount": 0,
|
||||
"totalScore": 5,
|
||||
"totalScore": 6,
|
||||
"maxScore": 1,
|
||||
"firstRecalledAt": "2026-05-28T20:51:03.908Z",
|
||||
"lastRecalledAt": "2026-05-29T13:16:08.062Z",
|
||||
"lastRecalledAt": "2026-05-31T23:25:36.480Z",
|
||||
"queryHashes": [
|
||||
"f22544a8757c",
|
||||
"82be33d1f911",
|
||||
"2aa08c6652fb",
|
||||
"f7ae50ae228d",
|
||||
"3737f6af1445"
|
||||
"3737f6af1445",
|
||||
"cf12fd62a5e5"
|
||||
],
|
||||
"recallDays": [
|
||||
"2026-05-29"
|
||||
"2026-05-29",
|
||||
"2026-06-01"
|
||||
],
|
||||
"conceptTags": [
|
||||
"gpt",
|
||||
@ -1268,6 +1282,192 @@
|
||||
"5/5",
|
||||
"5/20"
|
||||
]
|
||||
},
|
||||
"memory:memory/2026-05-28.md:704:717": {
|
||||
"key": "memory:memory/2026-05-28.md:704:717",
|
||||
"path": "memory/2026-05-28.md",
|
||||
"startLine": 704,
|
||||
"endLine": 717,
|
||||
"source": "memory",
|
||||
"snippet": "**陈逸鸫提到 Cursor 可以写公式** — 待确认他用的是什么工具/API。可能路径:Cursor 用了不同端点(Sheets v3?)、不同 auth 方式、或者直接用 Feishu MCP。 ### ~22:05 日报「公式映射」现状 **事实:** C1HVN2 Section 三当前数字是 `compute_lesson_activation.py` / `pipeline.py step 3` 写入的静态值,不是真正的 Feishu 公式。API 层无法写入公式。 **正确的「公式映射」方案:** 不强求单元格公式,而是 pipeline 脚本作为\"公式\"—从源表取数→计算→写入。Section 一/二的现有自动化就是这样运行的。需验证 pipeline step 3 是否已覆盖新增的当日行课/7天首课行,如果没覆盖就加进去。 ### 当前 C1HVN2 待修复项 1. Row 18(企微新增)TEXT 格式 → 需陈逸鸫手动「清除格式」 2. Section 四(销转情况)Row 26-29 数字也是 text 格式 → 同样需手动清除 3. 当日行课/7天首课数据 → pipeline 脚本需覆盖 Section 三写入",
|
||||
"recallCount": 1,
|
||||
"dailyCount": 0,
|
||||
"groundedCount": 0,
|
||||
"totalScore": 1,
|
||||
"maxScore": 1,
|
||||
"firstRecalledAt": "2026-05-31T23:25:36.480Z",
|
||||
"lastRecalledAt": "2026-05-31T23:25:36.480Z",
|
||||
"queryHashes": [
|
||||
"679cdd7bd3a8"
|
||||
],
|
||||
"recallDays": [
|
||||
"2026-06-01"
|
||||
],
|
||||
"conceptTags": [
|
||||
"待确认他用的是什么工具/api",
|
||||
"compute-lesson-activation.py",
|
||||
"pipeline.py",
|
||||
"一/二的现有自动化就是这样运行的",
|
||||
"是否已覆盖新增的当日行课/7天首课行",
|
||||
"26-29",
|
||||
"当日行课/7天首课数据",
|
||||
"提到"
|
||||
]
|
||||
},
|
||||
"memory:memory/2026-05-28.md:596:624": {
|
||||
"key": "memory:memory/2026-05-28.md:596:624",
|
||||
"path": "memory/2026-05-28.md",
|
||||
"startLine": 596,
|
||||
"endLine": 624,
|
||||
"source": "memory",
|
||||
"snippet": "6. **build_pipeline 脚本** — 新增 `scripts/compute_lesson_activation.py`(PG→2aNzzy V/W) **关键数据源映射:** 进线=2aNzzy C列日期 → 用户ID=2aNzzy → PG user_course_detail 首课日期 → 比对同天 → 写回 2aNzzy V/W ### 待办汇总 1. 📋 陈逸鸫确认行课转化改动方案 → 一口气改 6 处 2. 📋 王虹茗 user_id 获取(需她发消息或陈逸鸫截图) 3. 📋 数据转发王虹茗 + 写入大麦查询输出表 fd42b8 4. 📋 销售看板 build 挂住问题排查 5. 📋 Image2 生图任务执行 6. ⏳ 全量 pipeline 聚光验证(子进程) ### 20:52 行课转化全量改动完成 [陈逸鸫确认] 行课记录新增指标:当日进线→当天行课 + 7天首课,6处改动已完成4处: | # | 位置 | 改动 | 状态 | |---|------|------|------| | 1 | 2aNzzy | V/W/X 三列(首课日期/当日行课/7日内行课) | ✅ | | 2 | 3PRySY | AE-AH 四列(当日行课/当日行课率/7日内首课/7日内首课率)| ✅ | | 3 | C1HVN2 | 合并 三+五 →「线索→行课转化」| ✅ | | 4 | Base | 行课销售月(4)/5月漏斗(3)/销转销售月(2) 加字段 | ⚠️ bot权限不足,需手动 | | 5 | funnel-daily 看板 |",
|
||||
"recallCount": 1,
|
||||
"dailyCount": 0,
|
||||
"groundedCount": 0,
|
||||
"totalScore": 1,
|
||||
"maxScore": 1,
|
||||
"firstRecalledAt": "2026-05-31T23:25:36.480Z",
|
||||
"lastRecalledAt": "2026-05-31T23:25:36.480Z",
|
||||
"queryHashes": [
|
||||
"679cdd7bd3a8"
|
||||
],
|
||||
"recallDays": [
|
||||
"2026-06-01"
|
||||
],
|
||||
"conceptTags": [
|
||||
"build-pipeline",
|
||||
"v/w",
|
||||
"user-course-detail",
|
||||
"user-id",
|
||||
"v/w/x",
|
||||
"首课日期/当日行课/7日内行课",
|
||||
"ae-ah",
|
||||
"当日行课/当日行课率/7日内首课/7日内首课率"
|
||||
]
|
||||
},
|
||||
"memory:memory/2026-05-28.md:1:36": {
|
||||
"key": "memory:memory/2026-05-28.md:1:36",
|
||||
"path": "memory/2026-05-28.md",
|
||||
"startLine": 1,
|
||||
"endLine": 36,
|
||||
"source": "memory",
|
||||
"snippet": "# 2026-05-28 工作日志 ## 品牌更名与定位升级 [Cris确认] - 姓名:小斑 → **大麦** - 定位:AI班主任 → **增长营销分析师** - 核心职能:增长数据分析、营销效果评估、转化漏斗分析、商业化洞察 - Emoji:📚 → 🌾 - 已更新文件:IDENTITY.md、AGENTS.md(@规则同步变更)、MEMORY.md - SOUL.md 无需改动(行为方法论为通用框架) ### 16:20 pipeline 非聚光部分验证完成 **lark-cli wrapper v0.3 支持的 action:** | 类别 | action | 状态 | |------|--------|------| | sheets | +read, +write, +append, +info, +meta | ✅ | | sheets | +create-sheet, +delete-sheet | ✅ | | sheets | +batch-set-style (stub) | ✅ | | sheets | +merge-cells, +unmerge-cells | ✅ | | sheets | +update-dimension | ✅ | | bitable | +app, +tables, +records, +create, +update | ✅ | | auth | status | ✅ | | im | +messages-send (stub) | ✅ | **pipeline 试跑结果(--dry-run):** | 步骤",
|
||||
"recallCount": 1,
|
||||
"dailyCount": 0,
|
||||
"groundedCount": 0,
|
||||
"totalScore": 1,
|
||||
"maxScore": 1,
|
||||
"firstRecalledAt": "2026-05-31T23:25:36.480Z",
|
||||
"lastRecalledAt": "2026-05-31T23:25:36.480Z",
|
||||
"queryHashes": [
|
||||
"679cdd7bd3a8"
|
||||
],
|
||||
"recallDays": [
|
||||
"2026-06-01"
|
||||
],
|
||||
"conceptTags": [
|
||||
"identity.md",
|
||||
"agents.md",
|
||||
"memory.md",
|
||||
"soul.md",
|
||||
"lark-cli",
|
||||
"v0.3",
|
||||
"create-sheet",
|
||||
"delete-sheet"
|
||||
]
|
||||
},
|
||||
"memory:memory/2026-05-28.md:359:394": {
|
||||
"key": "memory:memory/2026-05-28.md:359:394",
|
||||
"path": "memory/2026-05-28.md",
|
||||
"startLine": 359,
|
||||
"endLine": 394,
|
||||
"source": "memory",
|
||||
"snippet": "# 2026-05-28 工作日志 ## 品牌更名与定位升级 [Cris确认] - 姓名:小斑 → **大麦** - 定位:AI班主任 → **增长营销分析师** - 核心职能:增长数据分析、营销效果评估、转化漏斗分析、商业化洞察 - Emoji:📚 → 🌾 - 已更新文件:IDENTITY.md、AGENTS.md(@规则同步变更)、MEMORY.md - SOUL.md 无需改动(行为方法论为通用框架) ### 16:20 pipeline 非聚光部分验证完成 **lark-cli wrapper v0.3 支持的 action:** | 类别 | action | 状态 | |------|--------|------| | sheets | +read, +write, +append, +info, +meta | ✅ | | sheets | +create-sheet, +delete-sheet | ✅ | | sheets | +batch-set-style (stub) | ✅ | | sheets | +merge-cells, +unmerge-cells | ✅ | | sheets | +update-dimension | ✅ | | bitable | +app, +tables, +records, +create, +update | ✅ | | auth | status | ✅ | | im | +messages-send (stub) | ✅ | **pipeline 试跑结果(--dry-run):** | 步骤",
|
||||
"recallCount": 1,
|
||||
"dailyCount": 0,
|
||||
"groundedCount": 0,
|
||||
"totalScore": 1,
|
||||
"maxScore": 1,
|
||||
"firstRecalledAt": "2026-05-31T23:25:36.480Z",
|
||||
"lastRecalledAt": "2026-05-31T23:25:36.480Z",
|
||||
"queryHashes": [
|
||||
"679cdd7bd3a8"
|
||||
],
|
||||
"recallDays": [
|
||||
"2026-06-01"
|
||||
],
|
||||
"conceptTags": [
|
||||
"identity.md",
|
||||
"agents.md",
|
||||
"memory.md",
|
||||
"soul.md",
|
||||
"lark-cli",
|
||||
"v0.3",
|
||||
"create-sheet",
|
||||
"delete-sheet"
|
||||
]
|
||||
},
|
||||
"memory:memory/2026-05-28.md:641:671": {
|
||||
"key": "memory:memory/2026-05-28.md:641:671",
|
||||
"path": "memory/2026-05-28.md",
|
||||
"startLine": 641,
|
||||
"endLine": 671,
|
||||
"source": "memory",
|
||||
"snippet": "- compute_lesson_activation.py 进 cron 后每日自动刷新 **待办:** Base 字段需陈逸鸫在 UI 手动添加(行课销售月/5月漏斗/销转销售月各加 2-4 个数字字段) ## funnel-daily 看板行课激活指标 — 待修复 (21:49) ### Bug 1: `_read_activations` 未过滤汇总行 `snapshot_funnel_daily.py` 读 2aNzzy O2:X4000 时,只检查 O/P 是否有值,没检查 Q (用户ID),导致汇总行(无 user_id)也被计入。 - **修复**: 加 `if not uid: continue`,只统计有 user_id 的个体小溪记录 ### Bug 2: key 不匹配 `collect_snapshot` 合计用 `\"week_lesson\"`,但 `_read_activations` 存的是 `\"week\"`,导致 合计 wk=0。 - **修复**: 统一用 `\"week\"` key ### Bug 3: X列数据 O列分组失真 X列 (7日内行课) 是基于 B列(进线日期) 计算的,但按 O列(行课月) 聚合会导致跨月用户重复计入。5月 O列=505行但个体记录只有约108条/月。 - **初步方案**: 先按 O列+Q列过滤(过滤汇总行),暂时接受 O列分组。后续可考虑 B列分组。 ### 待执行 (下次回话) ```bash # 1. 修复 snapshot_funnel_daily.py 两处 bug # 2. 重建 sn",
|
||||
"recallCount": 1,
|
||||
"dailyCount": 0,
|
||||
"groundedCount": 0,
|
||||
"totalScore": 1,
|
||||
"maxScore": 1,
|
||||
"firstRecalledAt": "2026-05-31T23:27:40.582Z",
|
||||
"lastRecalledAt": "2026-05-31T23:27:40.582Z",
|
||||
"queryHashes": [
|
||||
"0be022b45645"
|
||||
],
|
||||
"recallDays": [
|
||||
"2026-06-01"
|
||||
],
|
||||
"conceptTags": [
|
||||
"compute-lesson-activation.py",
|
||||
"行课销售月/5月漏斗/销转销售月各加",
|
||||
"2-4",
|
||||
"funnel-daily",
|
||||
"read-activations",
|
||||
"snapshot-funnel-daily.py",
|
||||
"o/p",
|
||||
"user-id"
|
||||
]
|
||||
},
|
||||
"memory:memory/2026-05-28.md:618:649": {
|
||||
"key": "memory:memory/2026-05-28.md:618:649",
|
||||
"path": "memory/2026-05-28.md",
|
||||
"startLine": 618,
|
||||
"endLine": 649,
|
||||
"source": "memory",
|
||||
"snippet": "| 4 | Base | 行课销售月(4)/5月漏斗(3)/销转销售月(2) 加字段 | ⚠️ bot权限不足,需手动 | | 5 | funnel-daily 看板 | 加行课转化指标 | 📋 待 build 脚本改 | | 6 | build 脚本 | compute_lesson_activation.py | ✅ | **合并后的 C1HVN2 Section 三:** ``` 三、线索→行课转化(5月27日) 指标 小龙 吴迪 Bob Tom 日汇总 企微新增 13 6 14 15 48 当日行课 0 0 0 0 0 当日行课率 0% 0% 0% 0% 0% 7天线索→首课 1 2 2 2 10 7天首课率 7.7% 33.3% 14.3% 13.3% 20.8% ``` **5月全月数据(3PRySY 公式自动计算):** Bob 当日行课率 7.4% > Tom 3.7% > 其余 0%;7天首课率 吴迪 28.6% > 小龙 14.3% **关键实现细节:** - 2aNzzy V: PG chapter_settlement_data 最早日期 - 2aNzzy W: 进线当天有 chapter 活动=1 - 2aNzzy X: 进线 7 天内有 chapter 活动=1 - 3PRySY",
|
||||
"recallCount": 1,
|
||||
"dailyCount": 0,
|
||||
"groundedCount": 0,
|
||||
"totalScore": 1,
|
||||
"maxScore": 1,
|
||||
"firstRecalledAt": "2026-05-31T23:27:40.582Z",
|
||||
"lastRecalledAt": "2026-05-31T23:27:40.582Z",
|
||||
"queryHashes": [
|
||||
"0be022b45645"
|
||||
],
|
||||
"recallDays": [
|
||||
"2026-06-01"
|
||||
],
|
||||
"conceptTags": [
|
||||
"funnel-daily",
|
||||
"compute-lesson-activation.py",
|
||||
"7.7",
|
||||
"33.3",
|
||||
"14.3",
|
||||
"13.3",
|
||||
"20.8",
|
||||
"7.4"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BIN
output/角色id_33943_导出时间_20260531.xlsx
Normal file
BIN
output/角色id_33943_导出时间_20260531.xlsx
Normal file
Binary file not shown.
BIN
output/角色id_35000_导出时间_20260531.xlsx
Normal file
BIN
output/角色id_35000_导出时间_20260531.xlsx
Normal file
Binary file not shown.
BIN
output/角色id_35946_导出时间_20260531.xlsx
Normal file
BIN
output/角色id_35946_导出时间_20260531.xlsx
Normal file
Binary file not shown.
200
scripts/upload_server.py
Normal file
200
scripts/upload_server.py
Normal file
@ -0,0 +1,200 @@
|
||||
#!/usr/bin/env python3
|
||||
"""简易文件上传服务 - 浏览器打开即可拖拽上传"""
|
||||
import os
|
||||
import cgi
|
||||
import html
|
||||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||
|
||||
UPLOAD_DIR = os.path.expanduser("~/.openclaw/workspace-xiaoban/tmp/uploads")
|
||||
PORT = 18888
|
||||
|
||||
HTML = r"""<!DOCTYPE html>
|
||||
<html lang="zh">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>上传文件到大麦</title>
|
||||
<style>
|
||||
*{margin:0;padding:0;box-sizing:border-box}
|
||||
body{font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,sans-serif;background:#0f172a;color:#e2e8f0;min-height:100vh;display:flex;align-items:center;justify-content:center}
|
||||
.container{text-align:center;max-width:600px;padding:40px}
|
||||
h1{font-size:28px;margin-bottom:8px}
|
||||
.sub{color:#94a3b8;margin-bottom:32px;font-size:14px}
|
||||
.dropzone{background:#1e293b;border:2px dashed #475569;border-radius:16px;padding:60px 20px;cursor:pointer;transition:all .2s}
|
||||
.dropzone:hover,.dropzone.dragover{border-color:#6366f1;background:#1e1b4b}
|
||||
.dropzone p{font-size:16px;color:#94a3b8}
|
||||
.dropzone .icon{font-size:48px;margin-bottom:16px}
|
||||
input[type=file]{display:none}
|
||||
#progress{display:none;margin-top:16px}
|
||||
.bar-bg{background:#334155;border-radius:8px;height:8px;overflow:hidden}
|
||||
.bar{background:linear-gradient(90deg,#6366f1,#a855f7);height:100%;width:0;transition:width .3s}
|
||||
#result{margin-top:20px;font-size:14px}
|
||||
.success{color:#4ade80}
|
||||
.error{color:#f87171}
|
||||
.recent{margin-top:32px;text-align:left;max-height:200px;overflow-y:auto}
|
||||
.recent h3{font-size:14px;color:#64748b;margin-bottom:8px}
|
||||
.recent li{font-size:12px;color:#94a3b8;padding:4px 0;border-bottom:1px solid #1e293b}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<h1>📤 上传文件到大麦</h1>
|
||||
<p class="sub">拖拽文件到下方区域 或 点击选择文件</p>
|
||||
<div class="dropzone" id="dropzone" onclick="document.getElementById('file').click()">
|
||||
<div class="icon">📁</div>
|
||||
<p>点击或拖拽文件到此处上传</p>
|
||||
</div>
|
||||
<input type="file" id="file" multiple onchange="upload(this.files)">
|
||||
<div id="progress">
|
||||
<p id="status">上传中...</p>
|
||||
<div class="bar-bg"><div class="bar" id="bar"></div></div>
|
||||
</div>
|
||||
<div id="result"></div>
|
||||
<div class="recent" id="recent"><h3>📋 最近上传</h3><ul id="filelist"></ul></div>
|
||||
</div>
|
||||
<script>
|
||||
const dropzone = document.getElementById('dropzone');
|
||||
['dragenter','dragover','dragleave','drop'].forEach(e => {
|
||||
dropzone.addEventListener(e, ev => { ev.preventDefault(); ev.stopPropagation(); });
|
||||
});
|
||||
['dragenter','dragover'].forEach(e => dropzone.addEventListener(e, () => dropzone.classList.add('dragover')));
|
||||
['dragleave','drop'].forEach(e => dropzone.addEventListener(e, () => dropzone.classList.remove('dragover')));
|
||||
dropzone.addEventListener('drop', ev => upload(ev.dataTransfer.files));
|
||||
|
||||
async function upload(files){
|
||||
if(!files.length) return;
|
||||
const progress = document.getElementById('progress');
|
||||
const bar = document.getElementById('bar');
|
||||
const status = document.getElementById('status');
|
||||
const result = document.getElementById('result');
|
||||
progress.style.display='block'; bar.style.width='0'; result.innerHTML='';
|
||||
|
||||
let ok=0, fail=0;
|
||||
for(let i=0;i<files.length;i++){
|
||||
const f = files[i];
|
||||
status.textContent = `正在上传: ${f.name} (${(f.size/1024/1024).toFixed(1)}MB)...`;
|
||||
bar.style.width = ((i/files.length)*100)+'%';
|
||||
const fd = new FormData(); fd.append('file', f);
|
||||
try{
|
||||
const r = await fetch('/upload', {method:'POST', body:fd});
|
||||
const t = await r.text();
|
||||
if(r.ok){ ok++; } else { fail++; result.innerHTML+=`<div class="error">❌ ${f.name}: ${t}</div>`; }
|
||||
}catch(e){ fail++; result.innerHTML+=`<div class="error">❌ ${f.name}: ${e}</div>`; }
|
||||
}
|
||||
bar.style.width='100%';
|
||||
if(fail===0) result.innerHTML+=`<div class="success">✅ 全部 ${ok} 个文件上传成功!</div>`;
|
||||
else result.innerHTML+=`<div class="success">✅ ${ok} 个成功</div><div class="error">❌ ${fail} 个失败</div>`;
|
||||
status.textContent='上传完成!';
|
||||
loadFiles();
|
||||
setTimeout(()=>{ progress.style.display='none'; result.innerHTML=''; }, 5000);
|
||||
}
|
||||
|
||||
async function loadFiles(){
|
||||
try{
|
||||
const r = await fetch('/files');
|
||||
const files = await r.json();
|
||||
document.getElementById('filelist').innerHTML = files.map(f =>
|
||||
`<li>📄 ${f.name} <span style="color:#64748b">${(f.size/1024/1024).toFixed(1)}MB · ${f.time}</span></li>`
|
||||
).join('');
|
||||
}catch(e){}
|
||||
}
|
||||
loadFiles();
|
||||
</script>
|
||||
</body>
|
||||
</html>"""
|
||||
|
||||
|
||||
class UploadHandler(BaseHTTPRequestHandler):
|
||||
def do_GET(self):
|
||||
if self.path == '/':
|
||||
self._send_html(HTML)
|
||||
elif self.path == '/files':
|
||||
self._list_files()
|
||||
else:
|
||||
self.send_error(404)
|
||||
|
||||
def do_POST(self):
|
||||
if self.path == '/upload':
|
||||
self._handle_upload()
|
||||
else:
|
||||
self.send_error(404)
|
||||
|
||||
def _send_html(self, content, code=200):
|
||||
body = content.encode('utf-8')
|
||||
self.send_response(code)
|
||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||
self.send_header('Content-Length', len(body))
|
||||
self.end_headers()
|
||||
self.wfile.write(body)
|
||||
|
||||
def _list_files(self):
|
||||
import json
|
||||
import time
|
||||
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
||||
files = []
|
||||
for name in sorted(os.listdir(UPLOAD_DIR), reverse=True):
|
||||
path = os.path.join(UPLOAD_DIR, name)
|
||||
if os.path.isfile(path):
|
||||
st = os.stat(path)
|
||||
files.append({
|
||||
'name': name,
|
||||
'size': st.st_size,
|
||||
'time': time.strftime('%m-%d %H:%M', time.localtime(st.st_mtime))
|
||||
})
|
||||
body = json.dumps(files[:20]).encode('utf-8')
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'application/json')
|
||||
self.send_header('Content-Length', len(body))
|
||||
self.end_headers()
|
||||
self.wfile.write(body)
|
||||
|
||||
def _handle_upload(self):
|
||||
content_type = self.headers.get('Content-Type', '')
|
||||
if 'multipart/form-data' not in content_type:
|
||||
self.send_error(400, '需要 multipart/form-data')
|
||||
return
|
||||
|
||||
form = cgi.FieldStorage(
|
||||
fp=self.rfile,
|
||||
headers=self.headers,
|
||||
environ={'REQUEST_METHOD': 'POST', 'CONTENT_TYPE': content_type}
|
||||
)
|
||||
|
||||
file_item = form['file']
|
||||
if not file_item.filename:
|
||||
self.send_error(400, '未选择文件')
|
||||
return
|
||||
|
||||
safe_name = os.path.basename(file_item.filename)
|
||||
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
||||
path = os.path.join(UPLOAD_DIR, safe_name)
|
||||
|
||||
# 避免覆盖,自动加序号
|
||||
base, ext = os.path.splitext(safe_name)
|
||||
counter = 1
|
||||
while os.path.exists(path):
|
||||
path = os.path.join(UPLOAD_DIR, f"{base}_{counter}{ext}")
|
||||
counter += 1
|
||||
|
||||
with open(path, 'wb') as f:
|
||||
f.write(file_item.file.read())
|
||||
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'text/plain; charset=utf-8')
|
||||
self.end_headers()
|
||||
self.wfile.write(f'OK: {os.path.basename(path)}'.encode('utf-8'))
|
||||
|
||||
def log_message(self, format, *args):
|
||||
pass # 静默日志
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
||||
server = HTTPServer(('0.0.0.0', PORT), UploadHandler)
|
||||
print(f'上传服务已启动: http://115.190.225.235:{PORT}')
|
||||
print(f'上传目录: {UPLOAD_DIR}')
|
||||
try:
|
||||
server.serve_forever()
|
||||
except KeyboardInterrupt:
|
||||
print('\n已停止')
|
||||
server.server_close()
|
||||
142
scripts/whisper_batch.py
Normal file
142
scripts/whisper_batch.py
Normal file
@ -0,0 +1,142 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
批量 Whisper 转录脚本
|
||||
- 并行度: 4 workers (medium模型, 31GB RAM安全)
|
||||
- 输出: 每个视频的 .txt 逐字稿 + batch_summary.json
|
||||
"""
|
||||
import subprocess
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||
from pathlib import Path
|
||||
|
||||
VIDEO_DIR = "/root/.openclaw/workspace-xiaoban/tmp/Lina先生_视频/Lina先生_作品下载"
|
||||
OUTPUT_DIR = "/root/.openclaw/workspace-xiaoban/tmp/whisper_output"
|
||||
MODEL = "medium"
|
||||
LANGUAGE = "Chinese"
|
||||
WORKERS = 4 # 保守并行度
|
||||
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
|
||||
def get_videos():
|
||||
"""获取所有待处理视频列表"""
|
||||
videos = []
|
||||
for f in sorted(Path(VIDEO_DIR).glob("*.mp4")):
|
||||
# 跳过已处理的
|
||||
out_txt = Path(OUTPUT_DIR) / (f.stem + ".txt")
|
||||
if out_txt.exists() and out_txt.stat().st_size > 0:
|
||||
continue
|
||||
# 获取时长
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["ffprobe", "-v", "error", "-show_entries", "format=duration",
|
||||
"-of", "csv=p=0", str(f)],
|
||||
capture_output=True, text=True, timeout=10
|
||||
)
|
||||
duration = float(result.stdout.strip()) if result.stdout.strip() else 0
|
||||
except:
|
||||
duration = 0
|
||||
videos.append({
|
||||
"path": str(f),
|
||||
"name": f.name,
|
||||
"duration": duration
|
||||
})
|
||||
return videos
|
||||
|
||||
def transcribe(video_info):
|
||||
"""单个视频转录"""
|
||||
f = video_info["path"]
|
||||
name = video_info["name"]
|
||||
duration = video_info["duration"]
|
||||
out_txt = os.path.join(OUTPUT_DIR, Path(f).stem + ".txt")
|
||||
|
||||
# 如果已存在有效输出,跳过
|
||||
if os.path.exists(out_txt) and os.path.getsize(out_txt) > 10:
|
||||
with open(out_txt, 'r') as fh:
|
||||
content = fh.read()
|
||||
return {"name": name, "duration": duration, "text": content, "elapsed": 0, "cached": True}
|
||||
|
||||
start = time.time()
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["/usr/local/bin/whisper", f,
|
||||
"--model", MODEL,
|
||||
"--language", LANGUAGE,
|
||||
"--output_dir", OUTPUT_DIR,
|
||||
"--output_format", "txt"],
|
||||
capture_output=True, text=True, timeout=1800 # 30min max per video
|
||||
)
|
||||
elapsed = time.time() - start
|
||||
|
||||
if os.path.exists(out_txt):
|
||||
with open(out_txt, 'r') as fh:
|
||||
content = fh.read()
|
||||
else:
|
||||
content = "[转录失败]"
|
||||
|
||||
return {"name": name, "duration": duration, "text": content, "elapsed": elapsed, "cached": False}
|
||||
except subprocess.TimeoutExpired:
|
||||
elapsed = time.time() - start
|
||||
return {"name": name, "duration": duration, "text": "[超时]", "elapsed": elapsed, "cached": False}
|
||||
except Exception as e:
|
||||
elapsed = time.time() - start
|
||||
return {"name": name, "duration": duration, "text": f"[错误: {e}]", "elapsed": elapsed, "cached": False}
|
||||
|
||||
def main():
|
||||
videos = get_videos()
|
||||
total = len(videos)
|
||||
total_duration = sum(v["duration"] for v in videos)
|
||||
|
||||
print(f"待处理: {total} 个视频, 总时长: {total_duration/3600:.1f} 小时")
|
||||
print(f"模型: {MODEL}, 并行: {WORKERS} workers")
|
||||
print(f"预计耗时: {total_duration * 2.6 / WORKERS / 3600:.1f} 小时")
|
||||
print("=" * 60)
|
||||
|
||||
done = 0
|
||||
total_elapsed = 0
|
||||
start_time = time.time()
|
||||
|
||||
with ProcessPoolExecutor(max_workers=WORKERS) as executor:
|
||||
futures = {executor.submit(transcribe, v): v for v in videos}
|
||||
|
||||
for future in as_completed(futures):
|
||||
v = futures[future]
|
||||
done += 1
|
||||
try:
|
||||
result = future.result()
|
||||
cached = result.get("cached", False)
|
||||
elapsed = result.get("elapsed", 0)
|
||||
total_elapsed += elapsed
|
||||
|
||||
pct = done / total * 100
|
||||
elapsed_all = time.time() - start_time
|
||||
eta = elapsed_all / done * (total - done) if done > 0 else 0
|
||||
|
||||
tag = "♻️缓存" if cached else f"⏱{elapsed:.0f}s"
|
||||
print(f"[{done}/{total} {pct:.1f}%] {tag} | ETA: {eta/60:.0f}min | {result['name'][:50]}...")
|
||||
|
||||
# 每20个输出一次进度文件
|
||||
if done % 20 == 0:
|
||||
progress = {
|
||||
"done": done, "total": total,
|
||||
"elapsed_total": elapsed_all,
|
||||
"eta_seconds": eta,
|
||||
"pct": pct
|
||||
}
|
||||
with open(os.path.join(OUTPUT_DIR, "_progress.json"), 'w') as fp:
|
||||
json.dump(progress, fp, indent=2)
|
||||
|
||||
except Exception as e:
|
||||
print(f"[{done}/{total}] ❌ 错误: {v['name'][:50]}... => {e}")
|
||||
|
||||
total_time = time.time() - start_time
|
||||
print(f"\n✅ 全部完成! 总耗时: {total_time/3600:.1f} 小时")
|
||||
|
||||
# 写入最终进度
|
||||
with open(os.path.join(OUTPUT_DIR, "_progress.json"), 'w') as fp:
|
||||
json.dump({"done": total, "total": total, "status": "complete"}, fp, indent=2)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
3
tmp_daily_summary.md
Normal file
3
tmp_daily_summary.md
Normal file
@ -0,0 +1,3 @@
|
||||
=== 每日总结 20260601 ===
|
||||
## 昨日关键进展
|
||||
无昨日记忆记录
|
||||
Loading…
Reference in New Issue
Block a user