ai_member_xiaoxi/scripts/may230_refresh.py

#!/usr/bin/env python3
"""处理Sheet1 5月230条待查询：查学情+回填D/C列"""
import json, subprocess, os, urllib.request, re, sys
from datetime import datetime

SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, SCRIPTS_DIR)
from phone_encrypt import encrypt_phone

# ===== 0. 准备 =====
secrets = {}
with open('/root/.openclaw/workspace/secrets.env') as f:
    for line in f:
        line = line.strip()
        if line and not line.startswith('#') and '=' in line:
            k, v = line.split('=', 1)
            secrets[k] = v.strip('"').strip("'")
PG_PASS = secrets['PG_ONLINE_PASSWORD']

def pg_query(sql):
    r = subprocess.run(['psql', '-h', 'bj-postgres-16pob4sg.sql.tencentcdb.com', '-p', '28591',
        '-U', 'ai_member', '-d', 'vala_bi', '-t', '-A', '-F', '\t'],
        input=sql, capture_output=True, text=True, env={**os.environ, 'PGPASSWORD': PG_PASS})
    return r.stdout.strip()

config = json.load(open('/root/.openclaw/credentials/xiaoxi/config.json'))
app = config['apps'][0]
data = json.dumps({"app_id": app['appId'], "app_secret": app['appSecret']}).encode()
req = urllib.request.Request('https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal',
    data=data, headers={'Content-Type': 'application/json; charset=utf-8'})
TAT = json.loads(urllib.request.urlopen(req).read())['tenant_access_token']

TOKEN = 'RFIJsXT8FhGHhctY4RwczcOfnac'
SHEET = '55b0eb'

# ===== 1. 读取目标数据 =====
targets = json.load(open('/tmp/may230_targets.json'))
with_uid = targets['with_uid']    # [{row, uid, sales, phone}]
without_uid = targets['without_uid']  # [{row, uid:'', sales, phone}]
all_rows = with_uid + without_uid
print(f"[1] 目标: {len(all_rows)} 条 (有uid={len(with_uid)}, 无uid={len(without_uid)})")

# ===== 2. 手机号加密匹配 account_id（无uid的行）=====
print("[2] 手机号加密匹配 account_id...")
phone_to_acc = {}  # phone -> account_id
if without_uid:
    phones = sorted(set(r['phone'] for r in without_uid if r['phone']))

    # 加密手机号
    phone_enc_map = {}
    for p in phones:
        if len(p) == 11 and p.isdigit():
            phone_enc_map[encrypt_phone(p)] = p

    enc_list = list(phone_enc_map.keys())
    phone_to_acc_raw = {}  # tel_encrypt -> acc_id
    batch_size = 200
    for i in range(0, len(enc_list), batch_size):
        batch = enc_list[i:i+batch_size]
        placeholders = ','.join(f"'{e}'" for e in batch)
        sql = f"""
        SELECT id, tel_encrypt FROM (
            SELECT id, tel_encrypt, ROW_NUMBER() OVER (PARTITION BY tel_encrypt ORDER BY id DESC) AS rn
            FROM bi_vala_app_account
            WHERE status = 1 AND deleted_at IS NULL
              AND tel_encrypt IN ({placeholders})
        ) t WHERE rn = 1
        """
        for line in pg_query(sql).split('\n'):
            if not line: continue
            parts = line.split('\t')
            if len(parts) >= 2:
                phone_to_acc_raw[parts[1]] = parts[0]

    # 用明文手机号做key映射
    for enc, p in phone_enc_map.items():
        acc = phone_to_acc_raw.get(enc)
        if acc:
            phone_to_acc[p] = acc

    print(f"  匹配: {len(phone_to_acc)}/{len(phones)}")

# 构建 row_num -> {acc_id, sales} 映射
row_info = {}
for r in with_uid:
    row_info[r['row']] = {'acc_id': r['uid'], 'sales': r['sales']}
for r in without_uid:
    acc = phone_to_acc.get(r['phone'], '')
    row_info[r['row']] = {'acc_id': acc, 'sales': r['sales']}

matched = sum(1 for v in row_info.values() if v['acc_id'])
print(f"  最终有account_id: {matched}/{len(row_info)}")

# ===== 3. 批量数据库查询学情 =====
print("[3] 批量查询学情...")

acc_ids = sorted(set(v['acc_id'] for v in row_info.values() if v['acc_id']), key=int)
uid_csv = ','.join(acc_ids)
print(f"  去重用户: {len(acc_ids)}")

# 3a. 课程+角色
print("    课程信息...")
course_rows = pg_query(f"""
SELECT a.id, c.id, COALESCE(d.course_level,'?'),
  CASE WHEN d.expire_time IS NOT NULL THEN '正式课' ELSE '体验课' END
FROM bi_vala_app_account a
JOIN bi_vala_app_character c ON c.account_id = a.id AND c.deleted_at IS NULL
LEFT JOIN bi_user_course_detail d ON d.user_id = c.id AND d.deleted_at IS NULL
WHERE a.id IN ({uid_csv}) AND a.status = 1 AND a.deleted_at IS NULL
ORDER BY a.id, c.id, d.course_level
""")

user_courses = {}
all_uids = set()
for line in course_rows.split('\n'):
    if not line: continue
    parts = line.split('\t')
    if len(parts) >= 4:
        acc_id, uid, level, ctype = parts[0], parts[1], parts[2], parts[3]
        all_uids.add(uid)
        user_courses.setdefault(acc_id, []).append({'user_id': uid, 'level': level, 'type': ctype})

# 3b. 最近行课
print("    最近行课...")
play_records = {}
for i in range(8):
    subset = [u for u in all_uids if int(u) % 8 == i]
    if not subset: continue
    for line in pg_query(f"""
    SELECT DISTINCT ON (user_id) user_id, created_at::date, chapter_id, chapter_unique_id
    FROM bi_user_chapter_play_record_{i}
    WHERE user_id IN ({','.join(subset)}) AND play_status = 1
    ORDER BY user_id, created_at DESC
    """).split('\n'):
        if not line: continue
        parts = line.split('\t')
        if len(parts) >= 4:
            play_records[parts[0]] = {'date': parts[1], 'ch_id': parts[2], 'ch_uid': parts[3]}

# 3c. 课程结构
print("    课程结构...")
ch_ids = set(r['ch_id'] for r in play_records.values())
ch_map = {}
if ch_ids:
    for line in pg_query(f"SELECT id, course_level, course_season, course_unit, course_lesson FROM bi_level_unit_lesson WHERE id IN ({','.join(ch_ids)})").split('\n'):
        if not line: continue
        parts = line.split('\t')
        if len(parts) >= 5:
            ch_map[parts[0]] = f"{parts[1]}-{parts[2]}-{parts[3]}-{parts[4]}"

# 3d. 学习时长
print("    学习时长...")
study_map = {}
for i in range(8):
    subset = [u for u in all_uids if int(u) % 8 == i]
    if not subset: continue
    for line in pg_query(f"SELECT user_id, COALESCE(SUM(interval_time),0)/60000.0 FROM bi_user_component_play_record_{i} WHERE user_id IN ({','.join(subset)}) GROUP BY user_id").split('\n'):
        if not line: continue
        parts = line.split('\t')
        if len(parts) >= 2:
            study_map[parts[0]] = float(parts[1])

# 3e. 付费状态
print("    付费状态...")
paid_set = set()
for line in pg_query(f"SELECT DISTINCT account_id FROM bi_vala_order WHERE account_id IN ({uid_csv}) AND pay_success_date IS NOT NULL AND order_status = 3").split('\n'):
    if line.strip():
        paid_set.add(line.strip())

print(f"    课程:{len(user_courses)} 行课:{len(play_records)} 付费:{len(paid_set)}")

# ===== 4. 组装 D 列文本 =====
print("[4] 组装学情文本...")

def get_best_char(acc_id):
    chars = user_courses.get(acc_id, [])
    if not chars:
        return None, None, None
    best, best_date = None, None
    for c in chars:
        pr = play_records.get(c['user_id'])
        if pr and (best_date is None or pr['date'] > best_date):
            best_date = pr['date']
            best = c
    if best is None:
        best = chars[0]
    uid = best['user_id']
    pr = play_records.get(uid)
    level = best['level'].replace('A1','L1').replace('A2','L2') if best['level'] != '?' else '?'
    ctype = best['type']
    if pr and pr['ch_id'] in ch_map:
        ch_name = ch_map[pr['ch_id']]
        # format: "L1体验课-U00-L01"
        parts = ch_name.split('-')
        if len(parts) >= 5:
            current = f"{level}{ctype}-{parts[3]}-{parts[4]}"
        else:
            current = f"{level}{ctype}-{ch_name}"
    elif pr:
        current = f"{level}{ctype}-?"
    else:
        current = f"{level}{ctype}-无记录"
    recent = pr['date'] if pr else '无记录'
    study = int(study_map.get(uid, 0))
    return current, recent, study

updates = {}
no_match = []

for row_num, info in row_info.items():
    acc_id = info['acc_id']
    sales = info['sales']

    if not acc_id:
        no_match.append(row_num)
        d_text = f"销售:{sales} | 用户:未匹配 | 当前:无记录 | 最近行课:无记录 | 学习0min | 未付费"
    else:
        current, recent, study = get_best_char(acc_id)
        pay = '已付费' if acc_id in paid_set else '未付费'
        if current:
            d_text = f"销售:{sales} | 用户:{acc_id} | 当前:{current} | 最近行课:{recent} | 学习{study}min | {pay}"
        else:
            d_text = f"销售:{sales} | 用户:{acc_id} | 无课程角色 | 最近行课:无记录 | 学习0min | {pay}"

    updates[row_num] = d_text

print(f"  未匹配手机号: {len(no_match)} 行")

# ===== 5. 批量回填 =====
print(f"[5] 批量回填 {len(updates)} 条...")

def api_put(tat, token, sheet, range_str, values):
    url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{token}/values"
    body = {"valueRange": {"range": f"{sheet}!{range_str}", "values": values}}
    data = json.dumps(body).encode()
    req = urllib.request.Request(url, data=data, method='PUT')
    req.add_header('Authorization', f'Bearer {tat}')
    req.add_header('Content-Type', 'application/json; charset=utf-8')
    try:
        resp = json.loads(urllib.request.urlopen(req).read())
        return resp.get('code') == 0
    except Exception as e:
        print(f"      API error: {e}")
        return False

sorted_items = sorted(updates.items(), key=lambda x: x[0])

# 找连续块
blocks = []
cur_block = [sorted_items[0]]
for i in range(1, len(sorted_items)):
    if sorted_items[i][0] == sorted_items[i-1][0] + 1:
        cur_block.append(sorted_items[i])
    else:
        blocks.append(cur_block)
        cur_block = [sorted_items[i]]
if cur_block:
    blocks.append(cur_block)

print(f"  {len(blocks)} 个连续块")

success_d = 0
success_c = 0
for bi, block in enumerate(blocks):
    rows = [r for r, _ in block]
    d_vals = [[v] for _, v in block]
    c_vals = [["已返回"] for _ in block]
    start_row, end_row = rows[0], rows[-1]

    if api_put(TAT, TOKEN, SHEET, f'D{start_row}:D{end_row}', d_vals):
        success_d += len(block)
    if api_put(TAT, TOKEN, SHEET, f'C{start_row}:C{end_row}', c_vals):
        success_c += len(block)

    if (bi+1) % 20 == 0:
        print(f"    块 {bi+1}/{len(blocks)}: D={success_d} C={success_c}")

print(f"\n✅ 完成: D列={success_d}/230, C列={success_c}/230")
if no_match:
    print(f"⚠️ 未匹配手机号 {len(no_match)} 行: {no_match[:20]}")