325 lines
11 KiB
Python
325 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Process 吴迪 sheet: match phones via XXTEA encryption, fill H/D/I/J columns.
|
|
"""
|
|
import sys
|
|
import json
|
|
import time
|
|
import urllib.request
|
|
import urllib.error
|
|
|
|
# Import phone encryption
|
|
sys.path.insert(0, '/root/.openclaw/workspace/scripts')
|
|
from phone_encrypt import encrypt_phone
|
|
|
|
# --- Config ---
|
|
FEISHU_TOKEN = sys.argv[1] if len(sys.argv) > 1 else None
|
|
if not FEISHU_TOKEN:
|
|
print("Usage: python3 process_wudi_sheet.py <FEISHU_TOKEN>")
|
|
sys.exit(1)
|
|
|
|
SPREADSHEET_TOKEN = "NoZqsFi47hIOHEt9j8WcfRtbnug"
|
|
SHEET_ID = "f975f0"
|
|
|
|
# PostgreSQL config
|
|
import psycopg2
|
|
PG_CONFIG = {
|
|
"host": "bj-postgres-16pob4sg.sql.tencentcdb.com",
|
|
"port": 28591,
|
|
"user": "ai_member",
|
|
"password": "LdfjdjL83h3h3^$&**YGG*",
|
|
"database": "vala_bi",
|
|
}
|
|
|
|
# --- Step 1: Read all sheet data ---
|
|
def feishu_get(url):
|
|
req = urllib.request.Request(url)
|
|
req.add_header("Authorization", f"Bearer {FEISHU_TOKEN}")
|
|
req.add_header("Content-Type", "application/json")
|
|
with urllib.request.urlopen(req) as resp:
|
|
return json.loads(resp.read())
|
|
|
|
def feishu_put(url, body):
|
|
data = json.dumps(body).encode()
|
|
req = urllib.request.Request(url, data=data, method="PUT")
|
|
req.add_header("Authorization", f"Bearer {FEISHU_TOKEN}")
|
|
req.add_header("Content-Type", "application/json")
|
|
with urllib.request.urlopen(req) as resp:
|
|
return json.loads(resp.read())
|
|
|
|
print("Step 1: Reading sheet data...")
|
|
url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{SHEET_ID}!A3:J8016?valueRenderOption=ToString"
|
|
result = feishu_get(url)
|
|
rows = result.get("data", {}).get("valueRange", {}).get("values", [])
|
|
print(f"Read {len(rows)} rows")
|
|
|
|
# Parse rows
|
|
parsed = []
|
|
for i, row in enumerate(rows):
|
|
excel_row = i + 3
|
|
padded = row + [''] * (10 - len(row))
|
|
parsed.append({
|
|
"excel_row": excel_row,
|
|
"A": str(padded[0]).strip() if padded[0] else '',
|
|
"B": str(padded[1]).strip() if padded[1] else '',
|
|
"C": str(padded[2]).strip() if padded[2] else '',
|
|
"D": str(padded[3]).strip() if padded[3] else '',
|
|
"E": str(padded[4]).strip() if padded[4] else '',
|
|
"F": str(padded[5]).strip() if padded[5] else '',
|
|
"G": str(padded[6]).strip() if padded[6] else '',
|
|
"H": str(padded[7]).strip() if padded[7] else '',
|
|
"I": str(padded[8]).strip() if padded[8] else '',
|
|
"J": str(padded[9]).strip() if padded[9] else '',
|
|
})
|
|
|
|
# --- Step 2: Identify rows needing processing ---
|
|
# Case A: Has phone (E), H is empty -> need phone match
|
|
# Case B: Has valid H (numeric UID), D is empty -> need to fill D/I/J from DB
|
|
|
|
phones_to_match = [] # (excel_row, phone)
|
|
rows_need_dij = [] # (excel_row, uid)
|
|
|
|
for p in parsed:
|
|
phone = p["E"]
|
|
h_val = p["H"]
|
|
d_val = p["D"]
|
|
|
|
# Check if phone is 11-digit
|
|
if phone and len(phone) == 11 and phone.isdigit():
|
|
if not h_val or not h_val.isdigit():
|
|
phones_to_match.append((p["excel_row"], phone))
|
|
|
|
# Check if H has valid UID but D is empty
|
|
if h_val and h_val.isdigit():
|
|
if not d_val or d_val == '':
|
|
rows_need_dij.append((p["excel_row"], h_val))
|
|
|
|
print(f"\nStep 2: Analysis")
|
|
print(f" Phones to match (H empty): {len(phones_to_match)}")
|
|
print(f" Rows with UID but D empty: {len(rows_need_dij)}")
|
|
|
|
# --- Step 3: Encrypt phones and query PostgreSQL ---
|
|
print("\nStep 3: Encrypting phones and querying DB...")
|
|
|
|
# Encrypt all phones
|
|
phone_to_enc = {}
|
|
for excel_row, phone in phones_to_match:
|
|
phone_to_enc[phone] = encrypt_phone(phone)
|
|
|
|
# Build lookup: enc -> phone
|
|
enc_to_phone = {v: k for k, v in phone_to_enc.items()}
|
|
|
|
# Query PostgreSQL for phone matches
|
|
conn = psycopg2.connect(**PG_CONFIG)
|
|
cur = conn.cursor()
|
|
|
|
# Get account IDs for encrypted phones
|
|
enc_list = list(enc_to_phone.keys())
|
|
phone_matches = {} # phone -> account_id
|
|
if enc_list:
|
|
# Query in batches of 500
|
|
batch_size = 500
|
|
for batch_start in range(0, len(enc_list), batch_size):
|
|
batch = enc_list[batch_start:batch_start + batch_size]
|
|
placeholders = ','.join(['%s'] * len(batch))
|
|
cur.execute(
|
|
f"SELECT id, tel_encrypt FROM bi_vala_app_account WHERE tel_encrypt IN ({placeholders}) AND status=1 AND deleted_at IS NULL",
|
|
batch
|
|
)
|
|
for row in cur.fetchall():
|
|
account_id, tel_enc = row
|
|
phone = enc_to_phone.get(tel_enc)
|
|
if phone:
|
|
phone_matches[phone] = str(account_id)
|
|
print(f" Batch {batch_start//batch_size + 1}: matched {len(cur.fetchall())} (but we already consumed)")
|
|
|
|
# Re-query properly
|
|
cur.close()
|
|
cur = conn.cursor()
|
|
phone_matches = {}
|
|
if enc_list:
|
|
batch_size = 500
|
|
for batch_start in range(0, len(enc_list), batch_size):
|
|
batch = enc_list[batch_start:batch_start + batch_size]
|
|
placeholders = ','.join(['%s'] * len(batch))
|
|
cur.execute(
|
|
f"SELECT id, tel_encrypt FROM bi_vala_app_account WHERE tel_encrypt IN ({placeholders}) AND status=1 AND deleted_at IS NULL",
|
|
batch
|
|
)
|
|
results = cur.fetchall()
|
|
for row in results:
|
|
account_id, tel_enc = row
|
|
phone = enc_to_phone.get(tel_enc)
|
|
if phone:
|
|
phone_matches[phone] = str(account_id)
|
|
print(f" Batch {batch_start//batch_size + 1}: {len(results)} results from {len(batch)} phones")
|
|
|
|
print(f" Total phone matches: {len(phone_matches)}")
|
|
|
|
# Get D/I/J for all matched UIDs + existing UIDs
|
|
all_uids = set()
|
|
for phone, uid in phone_matches.items():
|
|
all_uids.add(int(uid))
|
|
for excel_row, uid in rows_need_dij:
|
|
all_uids.add(int(uid))
|
|
|
|
print(f"\n Fetching D/I/J for {len(all_uids)} unique UIDs...")
|
|
|
|
uid_info = {} # uid -> {d_count, created_at_date, download_channel}
|
|
|
|
if all_uids:
|
|
# Get trial lesson count (D column)
|
|
uid_list = list(all_uids)
|
|
for batch_start in range(0, len(uid_list), 500):
|
|
batch = uid_list[batch_start:batch_start + 500]
|
|
placeholders = ','.join(['%s'] * len(batch))
|
|
cur.execute(
|
|
f"SELECT account_id, COUNT(*) FROM bi_user_course_detail WHERE account_id IN ({placeholders}) AND expire_time IS NULL AND deleted_at IS NULL GROUP BY account_id",
|
|
batch
|
|
)
|
|
for row in cur.fetchall():
|
|
uid = row[0]
|
|
count = row[1]
|
|
if uid not in uid_info:
|
|
uid_info[uid] = {}
|
|
uid_info[uid]['d_count'] = count
|
|
|
|
# Get registration date and download channel
|
|
for batch_start in range(0, len(uid_list), 500):
|
|
batch = uid_list[batch_start:batch_start + 500]
|
|
placeholders = ','.join(['%s'] * len(batch))
|
|
cur.execute(
|
|
f"SELECT id, created_at, download_channel FROM bi_vala_app_account WHERE id IN ({placeholders}) AND status=1 AND deleted_at IS NULL",
|
|
batch
|
|
)
|
|
for row in cur.fetchall():
|
|
uid = row[0]
|
|
created_at = row[1]
|
|
download_channel = row[2] or ''
|
|
if uid not in uid_info:
|
|
uid_info[uid] = {}
|
|
uid_info[uid]['created_at'] = str(created_at.date()) if created_at else ''
|
|
uid_info[uid]['download_channel'] = download_channel
|
|
|
|
cur.close()
|
|
conn.close()
|
|
|
|
print(f" Fetched info for {len(uid_info)} UIDs")
|
|
|
|
# --- Step 4: Prepare writes ---
|
|
print("\nStep 4: Preparing writes...")
|
|
|
|
# Map excel_row -> {H, D, I, J}
|
|
writes = {} # excel_row -> {col: value}
|
|
|
|
# From phone matches: H column
|
|
for excel_row, phone in phones_to_match:
|
|
uid = phone_matches.get(phone)
|
|
if uid:
|
|
if excel_row not in writes:
|
|
writes[excel_row] = {}
|
|
writes[excel_row]['H'] = uid
|
|
|
|
# From UIDs (both matched and existing): D, I, J
|
|
# Collect all excel_rows that need D/I/J
|
|
rows_for_dij = {}
|
|
for excel_row, uid_str in rows_need_dij:
|
|
rows_for_dij[excel_row] = int(uid_str)
|
|
# Also add matched phones that now have UIDs
|
|
for excel_row, phone in phones_to_match:
|
|
uid = phone_matches.get(phone)
|
|
if uid:
|
|
rows_for_dij[excel_row] = int(uid)
|
|
|
|
for excel_row, uid in rows_for_dij.items():
|
|
info = uid_info.get(uid, {})
|
|
if excel_row not in writes:
|
|
writes[excel_row] = {}
|
|
|
|
d_count = info.get('d_count', 0)
|
|
writes[excel_row]['D'] = str(d_count) if d_count > 0 else '0'
|
|
|
|
created_at = info.get('created_at', '')
|
|
if created_at:
|
|
writes[excel_row]['I'] = created_at
|
|
|
|
download_channel = info.get('download_channel', '')
|
|
if download_channel:
|
|
writes[excel_row]['J'] = download_channel
|
|
|
|
print(f" Total rows to write: {len(writes)}")
|
|
|
|
# --- Step 5: Execute writes in batches ---
|
|
print("\nStep 5: Writing to sheet...")
|
|
|
|
# Group by column for batch writes
|
|
# H column writes
|
|
h_writes = [(r, writes[r]['H']) for r in sorted(writes.keys()) if 'H' in writes[r]]
|
|
d_writes = [(r, writes[r]['D']) for r in sorted(writes.keys()) if 'D' in writes[r]]
|
|
i_writes = [(r, writes[r]['I']) for r in sorted(writes.keys()) if 'I' in writes[r]]
|
|
j_writes = [(r, writes[r]['J']) for r in sorted(writes.keys()) if 'J' in writes[r]]
|
|
|
|
def write_batch(col_letter, row_values, col_name):
|
|
"""Write consecutive rows in batches"""
|
|
if not row_values:
|
|
print(f" {col_name} ({col_letter}): nothing to write")
|
|
return 0
|
|
|
|
written = 0
|
|
i = 0
|
|
while i < len(row_values):
|
|
# Find consecutive rows
|
|
j = i + 1
|
|
while j < len(row_values) and row_values[j][0] == row_values[j-1][0] + 1:
|
|
j += 1
|
|
|
|
batch = row_values[i:j]
|
|
start_row = batch[0][0]
|
|
end_row = batch[-1][0]
|
|
values = [[v[1]] for v in batch]
|
|
|
|
range_str = f"{SHEET_ID}!{col_letter}{start_row}:{col_letter}{end_row}"
|
|
body = {"valueRange": {"range": range_str, "values": values}}
|
|
|
|
try:
|
|
url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values"
|
|
result = feishu_put(url, body)
|
|
code = result.get("code", -1)
|
|
if code == 0:
|
|
written += len(batch)
|
|
print(f" {col_name} rows {start_row}-{end_row}: OK ({len(batch)} cells)")
|
|
else:
|
|
print(f" {col_name} rows {start_row}-{end_row}: ERROR code={code} msg={result.get('msg','')}")
|
|
except Exception as e:
|
|
print(f" {col_name} rows {start_row}-{end_row}: EXCEPTION {e}")
|
|
|
|
time.sleep(0.05)
|
|
i = j
|
|
|
|
return written
|
|
|
|
h_count = write_batch('H', h_writes, 'H(UID)')
|
|
d_count = write_batch('D', d_writes, 'D(Trial)')
|
|
i_count = write_batch('I', i_writes, 'I(RegDate)')
|
|
j_count = write_batch('J', j_writes, 'J(Channel)')
|
|
|
|
# --- Summary ---
|
|
print(f"\n{'='*60}")
|
|
print(f"SUMMARY")
|
|
print(f"{'='*60}")
|
|
print(f" Phones matched to UID: {len(phone_matches)}")
|
|
print(f" H (UID) written: {h_count}")
|
|
print(f" D (Trial count) written: {d_count}")
|
|
print(f" I (Reg date) written: {i_count}")
|
|
print(f" J (Channel) written: {j_count}")
|
|
print(f" Total rows updated: {len(writes)}")
|
|
|
|
# Show unmatched phones
|
|
unmatched = [(r, p) for r, p in phones_to_match if p not in phone_matches]
|
|
if unmatched:
|
|
print(f"\n Unmatched phones ({len(unmatched)}):")
|
|
for r, p in unmatched[:20]:
|
|
print(f" Row {r}: {p}")
|
|
if len(unmatched) > 20:
|
|
print(f" ... and {len(unmatched) - 20} more")
|