398 lines
14 KiB
Python
398 lines
14 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
刷新 5 月行课记录:查询学情数据并回填 Sheet1 D 列
|
||
"""
|
||
import json
|
||
import subprocess
|
||
import sys
|
||
import os
|
||
from datetime import datetime
|
||
|
||
# ========== 1. 读取目标行 ==========
|
||
target_rows = json.load(open('/tmp/target_rows.json'))
|
||
print(f"[1/4] 读取目标行: {len(target_rows)} 条")
|
||
|
||
# 提取 account_id 列表
|
||
uid_set = sorted(set(str(r[1]) for r in target_rows if r[1]), key=int)
|
||
print(f" 去重用户数: {len(uid_set)}")
|
||
uid_csv = ','.join(uid_set)
|
||
|
||
# ========== 2. 数据库查询 ==========
|
||
print(f"[2/4] 查询学情数据...")
|
||
|
||
# 从 secrets.env 获取密码
|
||
secrets = {}
|
||
with open('/root/.openclaw/workspace/secrets.env') as f:
|
||
for line in f:
|
||
line = line.strip()
|
||
if line and not line.startswith('#') and '=' in line:
|
||
k, v = line.split('=', 1)
|
||
secrets[k] = v.strip('"').strip("'")
|
||
|
||
pg_pass = secrets.get('PG_ONLINE_PASSWORD', '')
|
||
pg_host = 'bj-postgres-16pob4sg.sql.tencentcdb.com'
|
||
pg_port = '28591'
|
||
|
||
# 2a. 课程信息 + 角色ID
|
||
print(" 查询课程信息...")
|
||
course_sql = f"""
|
||
SELECT
|
||
a.id AS account_id,
|
||
c.id AS user_id,
|
||
d.course_level,
|
||
CASE WHEN d.expire_time IS NOT NULL THEN '正式课' ELSE '体验课' END AS course_type
|
||
FROM bi_vala_app_account a
|
||
JOIN bi_vala_app_character c ON c.account_id = a.id AND c.deleted_at IS NULL
|
||
LEFT JOIN bi_user_course_detail d ON d.user_id = c.id AND d.deleted_at IS NULL
|
||
WHERE a.id IN ({uid_csv})
|
||
AND a.status = 1
|
||
AND a.deleted_at IS NULL
|
||
ORDER BY a.id, c.id, d.course_level
|
||
"""
|
||
|
||
result = subprocess.run(
|
||
['psql', '-h', pg_host, '-p', pg_port, '-U', 'ai_member', '-d', 'vala_bi',
|
||
'-t', '-A', '-F', '\t'],
|
||
input=course_sql,
|
||
capture_output=True, text=True,
|
||
env={**os.environ, 'PGPASSWORD': pg_pass}
|
||
)
|
||
|
||
# 构建 account_id -> {user_id列表, 课程信息} 映射
|
||
user_courses = {} # account_id -> [{'user_id': ..., 'level': ..., 'type': ...}, ...]
|
||
for line in result.stdout.strip().split('\n'):
|
||
if not line:
|
||
continue
|
||
parts = line.split('\t')
|
||
if len(parts) >= 4:
|
||
acc_id, user_id, level, ctype = parts[0], parts[1], parts[2], parts[3]
|
||
if acc_id not in user_courses:
|
||
user_courses[acc_id] = []
|
||
user_courses[acc_id].append({
|
||
'user_id': user_id,
|
||
'level': level,
|
||
'type': ctype
|
||
})
|
||
|
||
print(f" 课程信息: {len(user_courses)} 个账号有角色")
|
||
|
||
# 2b. 最近行课记录 (分表查询)
|
||
print(" 查询最近行课记录...")
|
||
# 收集所有 user_id
|
||
all_user_ids = set()
|
||
for acc_id, chars in user_courses.items():
|
||
for c in chars:
|
||
all_user_ids.add(c['user_id'])
|
||
|
||
# 分表查询
|
||
play_records = {} # user_id -> {'recent_date': ..., 'chapter_id': ..., 'chapter_unique_id': ...}
|
||
tables = [f'bi_user_chapter_play_record_{i}' for i in range(8)]
|
||
for table in tables:
|
||
uid_subset = [u for u in all_user_ids if int(u) % 8 == int(table[-1])]
|
||
if not uid_subset:
|
||
continue
|
||
uid_csv_sub = ','.join(uid_subset)
|
||
sql = f"""
|
||
SELECT DISTINCT ON (user_id)
|
||
user_id,
|
||
created_at::date AS recent_date,
|
||
chapter_id,
|
||
chapter_unique_id
|
||
FROM {table}
|
||
WHERE user_id IN ({uid_csv_sub})
|
||
AND play_status = 1
|
||
ORDER BY user_id, created_at DESC
|
||
"""
|
||
result = subprocess.run(
|
||
['psql', '-h', pg_host, '-p', pg_port, '-U', 'ai_member', '-d', 'vala_bi',
|
||
'-t', '-A', '-F', '\t'],
|
||
input=sql,
|
||
capture_output=True, text=True,
|
||
env={**os.environ, 'PGPASSWORD': pg_pass}
|
||
)
|
||
for line in result.stdout.strip().split('\n'):
|
||
if not line:
|
||
continue
|
||
parts = line.split('\t')
|
||
if len(parts) >= 4:
|
||
play_records[parts[0]] = {
|
||
'recent_date': parts[1],
|
||
'chapter_id': parts[2],
|
||
'chapter_unique_id': parts[3]
|
||
}
|
||
|
||
print(f" 行课记录: {len(play_records)} 个角色有行课")
|
||
|
||
# 2c. 课程结构映射 (chapter_id -> 课程名称)
|
||
print(" 查询课程结构...")
|
||
chapter_ids = set(r['chapter_id'] for r in play_records.values())
|
||
chapter_map = {}
|
||
if chapter_ids:
|
||
ch_csv = ','.join(chapter_ids)
|
||
ch_sql = f"""
|
||
SELECT id, course_level, course_season, course_unit, course_lesson
|
||
FROM bi_level_unit_lesson
|
||
WHERE id IN ({ch_csv})
|
||
"""
|
||
result = subprocess.run(
|
||
['psql', '-h', pg_host, '-p', pg_port, '-U', 'ai_member', '-d', 'vala_bi',
|
||
'-t', '-A', '-F', '\t'],
|
||
input=ch_sql,
|
||
capture_output=True, text=True,
|
||
env={**os.environ, 'PGPASSWORD': pg_pass}
|
||
)
|
||
for line in result.stdout.strip().split('\n'):
|
||
if not line:
|
||
continue
|
||
parts = line.split('\t')
|
||
if len(parts) >= 5:
|
||
ch_id = parts[0]
|
||
chapter_map[ch_id] = f"{parts[1]}-{parts[2]}-{parts[3]}-{parts[4]}"
|
||
|
||
# 2d. 学习时长
|
||
print(" 查询学习时长...")
|
||
study_times = {} # user_id -> total_minutes
|
||
comp_tables = [f'bi_user_component_play_record_{i}' for i in range(8)]
|
||
for table in comp_tables:
|
||
uid_subset = [u for u in all_user_ids if int(u) % 8 == int(table[-1])]
|
||
if not uid_subset:
|
||
continue
|
||
uid_csv_sub = ','.join(uid_subset)
|
||
sql = f"""
|
||
SELECT user_id, COALESCE(SUM(interval_time), 0) / 60000.0 AS total_min
|
||
FROM {table}
|
||
WHERE user_id IN ({uid_csv_sub})
|
||
GROUP BY user_id
|
||
"""
|
||
result = subprocess.run(
|
||
['psql', '-h', pg_host, '-p', pg_port, '-U', 'ai_member', '-d', 'vala_bi',
|
||
'-t', '-A', '-F', '\t'],
|
||
input=sql,
|
||
capture_output=True, text=True,
|
||
env={**os.environ, 'PGPASSWORD': pg_pass}
|
||
)
|
||
for line in result.stdout.strip().split('\n'):
|
||
if not line:
|
||
continue
|
||
parts = line.split('\t')
|
||
if len(parts) >= 2:
|
||
study_times[parts[0]] = float(parts[1])
|
||
|
||
# 2e. 付费状态
|
||
print(" 查询付费状态...")
|
||
pay_sql = f"""
|
||
SELECT account_id,
|
||
CASE WHEN COUNT(*) > 0 THEN '已付费' ELSE '未付费' END AS pay_status
|
||
FROM bi_vala_order
|
||
WHERE account_id IN ({uid_csv})
|
||
AND pay_success_date IS NOT NULL
|
||
AND order_status = 3
|
||
GROUP BY account_id
|
||
"""
|
||
result = subprocess.run(
|
||
['psql', '-h', pg_host, '-p', pg_port, '-U', 'ai_member', '-d', 'vala_bi',
|
||
'-t', '-A', '-F', '\t'],
|
||
input=pay_sql,
|
||
capture_output=True, text=True,
|
||
env={**os.environ, 'PGPASSWORD': pg_pass}
|
||
)
|
||
paid_users = set()
|
||
for line in result.stdout.strip().split('\n'):
|
||
if not line:
|
||
continue
|
||
parts = line.split('\t')
|
||
if len(parts) >= 2 and parts[1] == '已付费':
|
||
paid_users.add(parts[0])
|
||
|
||
# 查询所有用户(包括未付费的)
|
||
all_paid = set()
|
||
for acc_id in uid_set:
|
||
if acc_id in paid_users:
|
||
all_paid.add(acc_id)
|
||
print(f" 付费用户: {len(paid_users)}")
|
||
|
||
# ========== 3. 组装学情数据 ==========
|
||
print(f"[3/4] 组装学情数据...")
|
||
|
||
def get_course_info(acc_id):
|
||
"""获取用户最优先的课程信息"""
|
||
chars = user_courses.get(acc_id, [])
|
||
if not chars:
|
||
return None, None, None
|
||
|
||
# 选最近有过行课记录的角色
|
||
best_char = None
|
||
best_date = None
|
||
for c in chars:
|
||
uid = c['user_id']
|
||
if uid in play_records:
|
||
d = play_records[uid]['recent_date']
|
||
if best_date is None or d > best_date:
|
||
best_date = d
|
||
best_char = c
|
||
|
||
if best_char is None and chars:
|
||
best_char = chars[0] # 回退到第一个角色
|
||
|
||
if best_char is None:
|
||
return None, None, None
|
||
|
||
uid = best_char['user_id']
|
||
level = best_char['level'] or '?'
|
||
ctype = best_char['type'] or '体验课'
|
||
|
||
# 获取当前进度
|
||
pr = play_records.get(uid)
|
||
if pr and pr['chapter_id'] in chapter_map:
|
||
chapter_name = chapter_map[pr['chapter_id']]
|
||
current = f"{level}{ctype}-{chapter_name}"
|
||
else:
|
||
current = f"{level}{ctype}-?"
|
||
|
||
recent_date = pr['recent_date'] if pr else '无记录'
|
||
study_min = study_times.get(uid, 0)
|
||
|
||
return current, recent_date, int(study_min)
|
||
|
||
# 为每个目标行生成 D 列文本
|
||
now_str = datetime.now().strftime('%Y-%m-%d %H:%M')
|
||
updates = [] # [(row_num, d_text)]
|
||
|
||
for row_num, acc_id, phone in target_rows:
|
||
acc_id = str(acc_id)
|
||
current, recent_date, study_min = get_course_info(acc_id)
|
||
pay_status = '已付费' if acc_id in paid_users else '未付费'
|
||
|
||
# 从原始表取销售名(通过B列用户ID)
|
||
# 销售名需要从原始表A列获取,但这里我们直接用target_rows中没有销售名
|
||
# 实际上我们需要重新读取A列。为了效率,暂时用"用户"占位
|
||
# 让我从之前的扫描中保留销售名...
|
||
|
||
# 实际上 target_rows 目前只有 (row_num, acc_id, phone),没有 sales_name
|
||
# 需要补读销售名列
|
||
# 先临时处理,后续补全
|
||
|
||
if current is None:
|
||
d_text = f"用户:{acc_id} | 未匹配到课程信息 | 最近行课:无记录 | 学习0min | {pay_status}"
|
||
else:
|
||
d_text = f"用户:{acc_id} | 当前:{current} | 最近行课:{recent_date} | 学习{study_min}min | {pay_status}"
|
||
|
||
updates.append((row_num, d_text, pay_status, recent_date, current))
|
||
|
||
print(f" 生成 {len(updates)} 条更新")
|
||
|
||
# ========== 需要补读A列(销售名)==========
|
||
# 从 target_rows 中批量读取A列
|
||
print(" 补读销售名列...")
|
||
import urllib.request
|
||
|
||
# 获取 TAT
|
||
config = json.load(open('/root/.openclaw/credentials/xiaoxi/config.json'))
|
||
app = config['apps'][0]
|
||
tat_data = json.dumps({"app_id": app['appId'], "app_secret": app['appSecret']}).encode()
|
||
tat_req = urllib.request.Request(
|
||
'https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal',
|
||
data=tat_data, headers={'Content-Type': 'application/json; charset=utf-8'})
|
||
tat = json.loads(urllib.request.urlopen(tat_req).read())['tenant_access_token']
|
||
|
||
# 读取所有目标行的 A 列
|
||
TOKEN = 'RFIJsXT8FhGHhctY4RwczcOfnac'
|
||
SHEET = '55b0eb'
|
||
|
||
# 按行号排序
|
||
target_rows_sorted = sorted(updates, key=lambda x: x[0])
|
||
sales_map = {} # row_num -> sales_name
|
||
|
||
# 分批读取A列
|
||
batch_size = 200
|
||
for i in range(0, len(target_rows_sorted), batch_size):
|
||
batch = target_rows_sorted[i:i+batch_size]
|
||
row_nums = [str(r[0]) for r in batch]
|
||
range_str = f"{SHEET}!A{min(row_nums, key=int)}:A{max(row_nums, key=int)}"
|
||
url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{TOKEN}/values/{range_str}"
|
||
req = urllib.request.Request(url)
|
||
req.add_header('Authorization', f'Bearer {tat}')
|
||
|
||
try:
|
||
resp = json.loads(urllib.request.urlopen(req).read())
|
||
values = resp.get('data', {}).get('valueRange', {}).get('values', [])
|
||
start_row = int(min(row_nums, key=int))
|
||
for j, v in enumerate(values):
|
||
row_num = start_row + j
|
||
if v:
|
||
sales_map[row_num] = v[0]
|
||
except Exception as e:
|
||
print(f" Error reading A col: {e}")
|
||
|
||
print(f" 销售名: {len(sales_map)} 条")
|
||
|
||
# 重新组装 D 列文本(加入销售名)
|
||
final_updates = []
|
||
for row_num, d_text, pay_status, recent_date, current in target_rows_sorted:
|
||
sales = sales_map.get(row_num, '?')
|
||
|
||
# 从原始 target_rows 中找到这个 row_num 对应的 acc_id
|
||
orig = next((r for r in target_rows if r[0] == row_num), None)
|
||
acc_id = str(orig[1]) if orig else '?'
|
||
|
||
if current and current != 'None':
|
||
d_text = f"销售:{sales} | 用户:{acc_id} | 当前:{current} | 最近行课:{recent_date} | 学习{study_min}min | {pay_status}"
|
||
else:
|
||
# 需要重新计算
|
||
current2, recent_date2, study_min2 = get_course_info(acc_id)
|
||
pay_status2 = '已付费' if acc_id in paid_users else '未付费'
|
||
if current2:
|
||
d_text = f"销售:{sales} | 用户:{acc_id} | 当前:{current2} | 最近行课:{recent_date2} | 学习{study_min2}min | {pay_status2}"
|
||
else:
|
||
d_text = f"销售:{sales} | 用户:{acc_id} | 未匹配到课程信息 | 最近行课:无记录 | 学习0min | {pay_status2}"
|
||
|
||
final_updates.append((row_num, d_text))
|
||
|
||
updates = final_updates
|
||
|
||
# ========== 4. 批量回填 ==========
|
||
print(f"[4/4] 批量回填 {len(updates)} 条...")
|
||
|
||
# 构建批量更新请求
|
||
# 飞书 API 支持范围更新,但 D 列不连续(有大量跳行)
|
||
# 策略:逐行更新,每50行一批
|
||
|
||
def update_cell(tat, token, sheet, row, col, value):
|
||
"""更新单个单元格"""
|
||
range_str = f"{sheet}!{col}{row}:{col}{row}"
|
||
url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{token}/values"
|
||
body = json.dumps({
|
||
"valueRange": {
|
||
"range": range_str,
|
||
"values": [[value]]
|
||
}
|
||
}).encode()
|
||
req = urllib.request.Request(url, data=body, method='PUT')
|
||
req.add_header('Authorization', f'Bearer {tat}')
|
||
req.add_header('Content-Type', 'application/json; charset=utf-8')
|
||
resp = json.loads(urllib.request.urlopen(req).read())
|
||
return resp.get('code') == 0
|
||
|
||
# 分批处理:每50个一批,D列和C列一起更新
|
||
batch_size = 50
|
||
success_d = 0
|
||
success_c = 0
|
||
|
||
for i in range(0, len(updates), batch_size):
|
||
batch = updates[i:i+batch_size]
|
||
|
||
# 收集 D 列和 C 列的更新
|
||
# 由于行不连续,需要逐行更新
|
||
for row_num, d_text in batch:
|
||
# 更新 D 列
|
||
if update_cell(tat, TOKEN, SHEET, row_num, 'D', d_text):
|
||
success_d += 1
|
||
|
||
# 更新 C 列为"已返回"
|
||
if update_cell(tat, TOKEN, SHEET, row_num, 'C', '已返回'):
|
||
success_c += 1
|
||
|
||
print(f" 进度: {min(i+batch_size, len(updates))}/{len(updates)} (D:{success_d}, C:{success_c})")
|
||
|
||
print(f"\n✅ 完成!D列更新: {success_d}/{len(updates)},C列更新: {success_c}/{len(updates)}")
|