#!/usr/bin/env python3 """处理Sheet1 5月230条待查询:查学情+回填D/C列""" import json, subprocess, os, urllib.request, re from datetime import datetime # ===== 0. 准备 ===== secrets = {} with open('/root/.openclaw/workspace/secrets.env') as f: for line in f: line = line.strip() if line and not line.startswith('#') and '=' in line: k, v = line.split('=', 1) secrets[k] = v.strip('"').strip("'") PG_PASS = secrets['PG_ONLINE_PASSWORD'] def pg_query(sql): r = subprocess.run(['psql', '-h', 'bj-postgres-16pob4sg.sql.tencentcdb.com', '-p', '28591', '-U', 'ai_member', '-d', 'vala_bi', '-t', '-A', '-F', '\t'], input=sql, capture_output=True, text=True, env={**os.environ, 'PGPASSWORD': PG_PASS}) return r.stdout.strip() config = json.load(open('/root/.openclaw/credentials/xiaoxi/config.json')) app = config['apps'][0] data = json.dumps({"app_id": app['appId'], "app_secret": app['appSecret']}).encode() req = urllib.request.Request('https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal', data=data, headers={'Content-Type': 'application/json; charset=utf-8'}) TAT = json.loads(urllib.request.urlopen(req).read())['tenant_access_token'] TOKEN = 'RFIJsXT8FhGHhctY4RwczcOfnac' SHEET = '55b0eb' # ===== 1. 读取目标数据 ===== targets = json.load(open('/tmp/may230_targets.json')) with_uid = targets['with_uid'] # [{row, uid, sales, phone}] without_uid = targets['without_uid'] # [{row, uid:'', sales, phone}] all_rows = with_uid + without_uid print(f"[1] 目标: {len(all_rows)} 条 (有uid={len(with_uid)}, 无uid={len(without_uid)})") # ===== 2. 手机号匹配 account_id(无uid的行)===== print("[2] 手机号匹配 account_id...") phone_to_acc = {} # phone -> account_id if without_uid: phones = sorted(set(r['phone'] for r in without_uid if r['phone'])) # 构建脱敏条件 conds = [] for p in phones: if len(p) == 11 and p.isdigit(): conds.append(f"(LEFT(tel,3)='{p[:3]}' AND RIGHT(tel,4)='{p[-4:]}')") # DB中tel是脱敏格式(130****1168),需用明文手机号做key phone_to_acc_raw = {} # masked -> acc_id batch_size = 200 for i in range(0, len(conds), batch_size): batch_conds = conds[i:i+batch_size] sql = f""" SELECT id, tel FROM ( SELECT id, tel, ROW_NUMBER() OVER (PARTITION BY tel ORDER BY id DESC) AS rn FROM bi_vala_app_account WHERE status = 1 AND deleted_at IS NULL AND ({' OR '.join(batch_conds)}) ) t WHERE rn = 1 """ for line in pg_query(sql).split('\n'): if not line: continue parts = line.split('\t') if len(parts) >= 2: phone_to_acc_raw[parts[1]] = parts[0] # 用明文手机号做key映射 for p in phones: masked = f"{p[:3]}****{p[-4:]}" acc = phone_to_acc_raw.get(masked) if acc: phone_to_acc[p] = acc print(f" 匹配: {len(phone_to_acc)}/{len(phones)}") # 构建 row_num -> {acc_id, sales} 映射 row_info = {} for r in with_uid: row_info[r['row']] = {'acc_id': r['uid'], 'sales': r['sales']} for r in without_uid: acc = phone_to_acc.get(r['phone'], '') row_info[r['row']] = {'acc_id': acc, 'sales': r['sales']} matched = sum(1 for v in row_info.values() if v['acc_id']) print(f" 最终有account_id: {matched}/{len(row_info)}") # ===== 3. 批量数据库查询学情 ===== print("[3] 批量查询学情...") acc_ids = sorted(set(v['acc_id'] for v in row_info.values() if v['acc_id']), key=int) uid_csv = ','.join(acc_ids) print(f" 去重用户: {len(acc_ids)}") # 3a. 课程+角色 print(" 课程信息...") course_rows = pg_query(f""" SELECT a.id, c.id, COALESCE(d.course_level,'?'), CASE WHEN d.expire_time IS NOT NULL THEN '正式课' ELSE '体验课' END FROM bi_vala_app_account a JOIN bi_vala_app_character c ON c.account_id = a.id AND c.deleted_at IS NULL LEFT JOIN bi_user_course_detail d ON d.user_id = c.id AND d.deleted_at IS NULL WHERE a.id IN ({uid_csv}) AND a.status = 1 AND a.deleted_at IS NULL ORDER BY a.id, c.id, d.course_level """) user_courses = {} all_uids = set() for line in course_rows.split('\n'): if not line: continue parts = line.split('\t') if len(parts) >= 4: acc_id, uid, level, ctype = parts[0], parts[1], parts[2], parts[3] all_uids.add(uid) user_courses.setdefault(acc_id, []).append({'user_id': uid, 'level': level, 'type': ctype}) # 3b. 最近行课 print(" 最近行课...") play_records = {} for i in range(8): subset = [u for u in all_uids if int(u) % 8 == i] if not subset: continue for line in pg_query(f""" SELECT DISTINCT ON (user_id) user_id, created_at::date, chapter_id, chapter_unique_id FROM bi_user_chapter_play_record_{i} WHERE user_id IN ({','.join(subset)}) AND play_status = 1 ORDER BY user_id, created_at DESC """).split('\n'): if not line: continue parts = line.split('\t') if len(parts) >= 4: play_records[parts[0]] = {'date': parts[1], 'ch_id': parts[2], 'ch_uid': parts[3]} # 3c. 课程结构 print(" 课程结构...") ch_ids = set(r['ch_id'] for r in play_records.values()) ch_map = {} if ch_ids: for line in pg_query(f"SELECT id, course_level, course_season, course_unit, course_lesson FROM bi_level_unit_lesson WHERE id IN ({','.join(ch_ids)})").split('\n'): if not line: continue parts = line.split('\t') if len(parts) >= 5: ch_map[parts[0]] = f"{parts[1]}-{parts[2]}-{parts[3]}-{parts[4]}" # 3d. 学习时长 print(" 学习时长...") study_map = {} for i in range(8): subset = [u for u in all_uids if int(u) % 8 == i] if not subset: continue for line in pg_query(f"SELECT user_id, COALESCE(SUM(interval_time),0)/60000.0 FROM bi_user_component_play_record_{i} WHERE user_id IN ({','.join(subset)}) GROUP BY user_id").split('\n'): if not line: continue parts = line.split('\t') if len(parts) >= 2: study_map[parts[0]] = float(parts[1]) # 3e. 付费状态 print(" 付费状态...") paid_set = set() for line in pg_query(f"SELECT DISTINCT account_id FROM bi_vala_order WHERE account_id IN ({uid_csv}) AND pay_success_date IS NOT NULL AND order_status = 3").split('\n'): if line.strip(): paid_set.add(line.strip()) print(f" 课程:{len(user_courses)} 行课:{len(play_records)} 付费:{len(paid_set)}") # ===== 4. 组装 D 列文本 ===== print("[4] 组装学情文本...") def get_best_char(acc_id): chars = user_courses.get(acc_id, []) if not chars: return None, None, None best, best_date = None, None for c in chars: pr = play_records.get(c['user_id']) if pr and (best_date is None or pr['date'] > best_date): best_date = pr['date'] best = c if best is None: best = chars[0] uid = best['user_id'] pr = play_records.get(uid) level = best['level'].replace('A1','L1').replace('A2','L2') if best['level'] != '?' else '?' ctype = best['type'] if pr and pr['ch_id'] in ch_map: ch_name = ch_map[pr['ch_id']] # format: "L1体验课-U00-L01" parts = ch_name.split('-') if len(parts) >= 5: current = f"{level}{ctype}-{parts[3]}-{parts[4]}" else: current = f"{level}{ctype}-{ch_name}" elif pr: current = f"{level}{ctype}-?" else: current = f"{level}{ctype}-无记录" recent = pr['date'] if pr else '无记录' study = int(study_map.get(uid, 0)) return current, recent, study updates = {} no_match = [] for row_num, info in row_info.items(): acc_id = info['acc_id'] sales = info['sales'] if not acc_id: no_match.append(row_num) d_text = f"销售:{sales} | 用户:未匹配 | 当前:无记录 | 最近行课:无记录 | 学习0min | 未付费" else: current, recent, study = get_best_char(acc_id) pay = '已付费' if acc_id in paid_set else '未付费' if current: d_text = f"销售:{sales} | 用户:{acc_id} | 当前:{current} | 最近行课:{recent} | 学习{study}min | {pay}" else: d_text = f"销售:{sales} | 用户:{acc_id} | 无课程角色 | 最近行课:无记录 | 学习0min | {pay}" updates[row_num] = d_text print(f" 未匹配手机号: {len(no_match)} 行") # ===== 5. 批量回填 ===== print(f"[5] 批量回填 {len(updates)} 条...") def api_put(tat, token, sheet, range_str, values): url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{token}/values" body = {"valueRange": {"range": f"{sheet}!{range_str}", "values": values}} data = json.dumps(body).encode() req = urllib.request.Request(url, data=data, method='PUT') req.add_header('Authorization', f'Bearer {tat}') req.add_header('Content-Type', 'application/json; charset=utf-8') try: resp = json.loads(urllib.request.urlopen(req).read()) return resp.get('code') == 0 except Exception as e: print(f" API error: {e}") return False sorted_items = sorted(updates.items(), key=lambda x: x[0]) # 找连续块 blocks = [] cur_block = [sorted_items[0]] for i in range(1, len(sorted_items)): if sorted_items[i][0] == sorted_items[i-1][0] + 1: cur_block.append(sorted_items[i]) else: blocks.append(cur_block) cur_block = [sorted_items[i]] if cur_block: blocks.append(cur_block) print(f" {len(blocks)} 个连续块") success_d = 0 success_c = 0 for bi, block in enumerate(blocks): rows = [r for r, _ in block] d_vals = [[v] for _, v in block] c_vals = [["已返回"] for _ in block] start_row, end_row = rows[0], rows[-1] if api_put(TAT, TOKEN, SHEET, f'D{start_row}:D{end_row}', d_vals): success_d += len(block) if api_put(TAT, TOKEN, SHEET, f'C{start_row}:C{end_row}', c_vals): success_c += len(block) if (bi+1) % 20 == 0: print(f" 块 {bi+1}/{len(blocks)}: D={success_d} C={success_c}") print(f"\n✅ 完成: D列={success_d}/230, C列={success_c}/230") if no_match: print(f"⚠️ 未匹配手机号 {len(no_match)} 行: {no_match[:20]}")