ai_member_xiaoxi/scripts/may230_refresh.py
2026-06-02 08:00:01 +08:00

281 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""处理Sheet1 5月230条待查询查学情+回填D/C列"""
import json, subprocess, os, urllib.request, re, sys
from datetime import datetime
SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, SCRIPTS_DIR)
from phone_encrypt import encrypt_phone
# ===== 0. 准备 =====
secrets = {}
with open('/root/.openclaw/workspace/secrets.env') as f:
for line in f:
line = line.strip()
if line and not line.startswith('#') and '=' in line:
k, v = line.split('=', 1)
secrets[k] = v.strip('"').strip("'")
PG_PASS = secrets['PG_ONLINE_PASSWORD']
def pg_query(sql):
r = subprocess.run(['psql', '-h', 'bj-postgres-16pob4sg.sql.tencentcdb.com', '-p', '28591',
'-U', 'ai_member', '-d', 'vala_bi', '-t', '-A', '-F', '\t'],
input=sql, capture_output=True, text=True, env={**os.environ, 'PGPASSWORD': PG_PASS})
return r.stdout.strip()
config = json.load(open('/root/.openclaw/credentials/xiaoxi/config.json'))
app = config['apps'][0]
data = json.dumps({"app_id": app['appId'], "app_secret": app['appSecret']}).encode()
req = urllib.request.Request('https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal',
data=data, headers={'Content-Type': 'application/json; charset=utf-8'})
TAT = json.loads(urllib.request.urlopen(req).read())['tenant_access_token']
TOKEN = 'RFIJsXT8FhGHhctY4RwczcOfnac'
SHEET = '55b0eb'
# ===== 1. 读取目标数据 =====
targets = json.load(open('/tmp/may230_targets.json'))
with_uid = targets['with_uid'] # [{row, uid, sales, phone}]
without_uid = targets['without_uid'] # [{row, uid:'', sales, phone}]
all_rows = with_uid + without_uid
print(f"[1] 目标: {len(all_rows)} 条 (有uid={len(with_uid)}, 无uid={len(without_uid)})")
# ===== 2. 手机号加密匹配 account_id无uid的行=====
print("[2] 手机号加密匹配 account_id...")
phone_to_acc = {} # phone -> account_id
if without_uid:
phones = sorted(set(r['phone'] for r in without_uid if r['phone']))
# 加密手机号
phone_enc_map = {}
for p in phones:
if len(p) == 11 and p.isdigit():
phone_enc_map[encrypt_phone(p)] = p
enc_list = list(phone_enc_map.keys())
phone_to_acc_raw = {} # tel_encrypt -> acc_id
batch_size = 200
for i in range(0, len(enc_list), batch_size):
batch = enc_list[i:i+batch_size]
placeholders = ','.join(f"'{e}'" for e in batch)
sql = f"""
SELECT id, tel_encrypt FROM (
SELECT id, tel_encrypt, ROW_NUMBER() OVER (PARTITION BY tel_encrypt ORDER BY id DESC) AS rn
FROM bi_vala_app_account
WHERE status = 1 AND deleted_at IS NULL
AND tel_encrypt IN ({placeholders})
) t WHERE rn = 1
"""
for line in pg_query(sql).split('\n'):
if not line: continue
parts = line.split('\t')
if len(parts) >= 2:
phone_to_acc_raw[parts[1]] = parts[0]
# 用明文手机号做key映射
for enc, p in phone_enc_map.items():
acc = phone_to_acc_raw.get(enc)
if acc:
phone_to_acc[p] = acc
print(f" 匹配: {len(phone_to_acc)}/{len(phones)}")
# 构建 row_num -> {acc_id, sales} 映射
row_info = {}
for r in with_uid:
row_info[r['row']] = {'acc_id': r['uid'], 'sales': r['sales']}
for r in without_uid:
acc = phone_to_acc.get(r['phone'], '')
row_info[r['row']] = {'acc_id': acc, 'sales': r['sales']}
matched = sum(1 for v in row_info.values() if v['acc_id'])
print(f" 最终有account_id: {matched}/{len(row_info)}")
# ===== 3. 批量数据库查询学情 =====
print("[3] 批量查询学情...")
acc_ids = sorted(set(v['acc_id'] for v in row_info.values() if v['acc_id']), key=int)
uid_csv = ','.join(acc_ids)
print(f" 去重用户: {len(acc_ids)}")
# 3a. 课程+角色
print(" 课程信息...")
course_rows = pg_query(f"""
SELECT a.id, c.id, COALESCE(d.course_level,'?'),
CASE WHEN d.expire_time IS NOT NULL THEN '正式课' ELSE '体验课' END
FROM bi_vala_app_account a
JOIN bi_vala_app_character c ON c.account_id = a.id AND c.deleted_at IS NULL
LEFT JOIN bi_user_course_detail d ON d.user_id = c.id AND d.deleted_at IS NULL
WHERE a.id IN ({uid_csv}) AND a.status = 1 AND a.deleted_at IS NULL
ORDER BY a.id, c.id, d.course_level
""")
user_courses = {}
all_uids = set()
for line in course_rows.split('\n'):
if not line: continue
parts = line.split('\t')
if len(parts) >= 4:
acc_id, uid, level, ctype = parts[0], parts[1], parts[2], parts[3]
all_uids.add(uid)
user_courses.setdefault(acc_id, []).append({'user_id': uid, 'level': level, 'type': ctype})
# 3b. 最近行课
print(" 最近行课...")
play_records = {}
for i in range(8):
subset = [u for u in all_uids if int(u) % 8 == i]
if not subset: continue
for line in pg_query(f"""
SELECT DISTINCT ON (user_id) user_id, created_at::date, chapter_id, chapter_unique_id
FROM bi_user_chapter_play_record_{i}
WHERE user_id IN ({','.join(subset)}) AND play_status = 1
ORDER BY user_id, created_at DESC
""").split('\n'):
if not line: continue
parts = line.split('\t')
if len(parts) >= 4:
play_records[parts[0]] = {'date': parts[1], 'ch_id': parts[2], 'ch_uid': parts[3]}
# 3c. 课程结构
print(" 课程结构...")
ch_ids = set(r['ch_id'] for r in play_records.values())
ch_map = {}
if ch_ids:
for line in pg_query(f"SELECT id, course_level, course_season, course_unit, course_lesson FROM bi_level_unit_lesson WHERE id IN ({','.join(ch_ids)})").split('\n'):
if not line: continue
parts = line.split('\t')
if len(parts) >= 5:
ch_map[parts[0]] = f"{parts[1]}-{parts[2]}-{parts[3]}-{parts[4]}"
# 3d. 学习时长
print(" 学习时长...")
study_map = {}
for i in range(8):
subset = [u for u in all_uids if int(u) % 8 == i]
if not subset: continue
for line in pg_query(f"SELECT user_id, COALESCE(SUM(interval_time),0)/60000.0 FROM bi_user_component_play_record_{i} WHERE user_id IN ({','.join(subset)}) GROUP BY user_id").split('\n'):
if not line: continue
parts = line.split('\t')
if len(parts) >= 2:
study_map[parts[0]] = float(parts[1])
# 3e. 付费状态
print(" 付费状态...")
paid_set = set()
for line in pg_query(f"SELECT DISTINCT account_id FROM bi_vala_order WHERE account_id IN ({uid_csv}) AND pay_success_date IS NOT NULL AND order_status = 3").split('\n'):
if line.strip():
paid_set.add(line.strip())
print(f" 课程:{len(user_courses)} 行课:{len(play_records)} 付费:{len(paid_set)}")
# ===== 4. 组装 D 列文本 =====
print("[4] 组装学情文本...")
def get_best_char(acc_id):
chars = user_courses.get(acc_id, [])
if not chars:
return None, None, None
best, best_date = None, None
for c in chars:
pr = play_records.get(c['user_id'])
if pr and (best_date is None or pr['date'] > best_date):
best_date = pr['date']
best = c
if best is None:
best = chars[0]
uid = best['user_id']
pr = play_records.get(uid)
level = best['level'].replace('A1','L1').replace('A2','L2') if best['level'] != '?' else '?'
ctype = best['type']
if pr and pr['ch_id'] in ch_map:
ch_name = ch_map[pr['ch_id']]
# format: "L1体验课-U00-L01"
parts = ch_name.split('-')
if len(parts) >= 5:
current = f"{level}{ctype}-{parts[3]}-{parts[4]}"
else:
current = f"{level}{ctype}-{ch_name}"
elif pr:
current = f"{level}{ctype}-?"
else:
current = f"{level}{ctype}-无记录"
recent = pr['date'] if pr else '无记录'
study = int(study_map.get(uid, 0))
return current, recent, study
updates = {}
no_match = []
for row_num, info in row_info.items():
acc_id = info['acc_id']
sales = info['sales']
if not acc_id:
no_match.append(row_num)
d_text = f"销售:{sales} | 用户:未匹配 | 当前:无记录 | 最近行课:无记录 | 学习0min | 未付费"
else:
current, recent, study = get_best_char(acc_id)
pay = '已付费' if acc_id in paid_set else '未付费'
if current:
d_text = f"销售:{sales} | 用户:{acc_id} | 当前:{current} | 最近行课:{recent} | 学习{study}min | {pay}"
else:
d_text = f"销售:{sales} | 用户:{acc_id} | 无课程角色 | 最近行课:无记录 | 学习0min | {pay}"
updates[row_num] = d_text
print(f" 未匹配手机号: {len(no_match)}")
# ===== 5. 批量回填 =====
print(f"[5] 批量回填 {len(updates)} 条...")
def api_put(tat, token, sheet, range_str, values):
url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{token}/values"
body = {"valueRange": {"range": f"{sheet}!{range_str}", "values": values}}
data = json.dumps(body).encode()
req = urllib.request.Request(url, data=data, method='PUT')
req.add_header('Authorization', f'Bearer {tat}')
req.add_header('Content-Type', 'application/json; charset=utf-8')
try:
resp = json.loads(urllib.request.urlopen(req).read())
return resp.get('code') == 0
except Exception as e:
print(f" API error: {e}")
return False
sorted_items = sorted(updates.items(), key=lambda x: x[0])
# 找连续块
blocks = []
cur_block = [sorted_items[0]]
for i in range(1, len(sorted_items)):
if sorted_items[i][0] == sorted_items[i-1][0] + 1:
cur_block.append(sorted_items[i])
else:
blocks.append(cur_block)
cur_block = [sorted_items[i]]
if cur_block:
blocks.append(cur_block)
print(f" {len(blocks)} 个连续块")
success_d = 0
success_c = 0
for bi, block in enumerate(blocks):
rows = [r for r, _ in block]
d_vals = [[v] for _, v in block]
c_vals = [["已返回"] for _ in block]
start_row, end_row = rows[0], rows[-1]
if api_put(TAT, TOKEN, SHEET, f'D{start_row}:D{end_row}', d_vals):
success_d += len(block)
if api_put(TAT, TOKEN, SHEET, f'C{start_row}:C{end_row}', c_vals):
success_c += len(block)
if (bi+1) % 20 == 0:
print(f"{bi+1}/{len(blocks)}: D={success_d} C={success_c}")
print(f"\n✅ 完成: D列={success_d}/230, C列={success_c}/230")
if no_match:
print(f"⚠️ 未匹配手机号 {len(no_match)} 行: {no_match[:20]}")