336 lines
12 KiB
Python
336 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Process 小龙 sheet: match phones to UIDs, fill trial counts, registration dates, download channels.
|
|
"""
|
|
import sys
|
|
sys.path.insert(0, '/root/.openclaw/workspace/scripts')
|
|
from phone_encrypt import encrypt_phone
|
|
import psycopg2
|
|
import json
|
|
import requests
|
|
import time
|
|
|
|
# === Config ===
|
|
PG_CONFIG = {
|
|
'host': 'bj-postgres-16pob4sg.sql.tencentcdb.com',
|
|
'port': 28591,
|
|
'user': 'ai_member',
|
|
'password': 'LdfjdjL83h3h3^$&**YGG*',
|
|
'database': 'vala_bi',
|
|
}
|
|
|
|
FEISHU_TOKEN = None
|
|
SPREADSHEET_TOKEN = 'DU4dsUOJThfbPStMcgBcsMH5nyb'
|
|
SHEET_ID = 'dff8c7'
|
|
|
|
def get_feishu_token():
|
|
global FEISHU_TOKEN
|
|
if FEISHU_TOKEN:
|
|
return FEISHU_TOKEN
|
|
resp = requests.post('https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal',
|
|
json={'app_id': 'cli_a929ae22e0b8dcc8', 'app_secret': 'OtFjMy7p3qE3VvLbMdcWidwgHOnGD4FJ'})
|
|
FEISHU_TOKEN = resp.json()['tenant_access_token']
|
|
return FEISHU_TOKEN
|
|
|
|
def read_sheet():
|
|
"""Read all data from the sheet."""
|
|
token = get_feishu_token()
|
|
url = f'https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{SHEET_ID}!A3:J1142?valueRenderOption=ToString'
|
|
resp = requests.get(url, headers={'Authorization': f'Bearer {token}'})
|
|
data = resp.json()
|
|
return data.get('data', {}).get('valueRange', {}).get('values', [])
|
|
|
|
def write_batch(range_str, values):
|
|
"""Write a batch of values to the sheet."""
|
|
token = get_feishu_token()
|
|
url = f'https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values'
|
|
body = {"valueRange": {"range": f"{SHEET_ID}!{range_str}", "values": values}}
|
|
resp = requests.put(url, headers={
|
|
'Authorization': f'Bearer {token}',
|
|
'Content-Type': 'application/json'
|
|
}, json=body)
|
|
result = resp.json()
|
|
if result.get('code') != 0:
|
|
print(f" ERROR writing {range_str}: {result}")
|
|
return False
|
|
return True
|
|
|
|
def main():
|
|
# 1. Read sheet data
|
|
print("Reading sheet data...")
|
|
rows = read_sheet()
|
|
print(f" Got {len(rows)} rows")
|
|
|
|
# 2. Extract phones and encrypt
|
|
print("\nExtracting and encrypting phones...")
|
|
phone_map = {} # encrypted -> (row_idx, phone)
|
|
row_phones = {} # row_idx -> phone
|
|
for i, row in enumerate(rows):
|
|
row_num = i + 3 # 1-indexed row number in sheet
|
|
if len(row) > 4 and row[4]:
|
|
phone = row[4].strip()
|
|
# Only process 11-digit phone numbers
|
|
if len(phone) == 11 and phone.isdigit():
|
|
enc = encrypt_phone(phone)
|
|
phone_map[enc] = (i, phone)
|
|
row_phones[i] = phone
|
|
|
|
print(f" Found {len(phone_map)} valid 11-digit phones")
|
|
|
|
# 3. Query PostgreSQL for phone matching
|
|
print("\nQuerying PostgreSQL for phone matching...")
|
|
conn = psycopg2.connect(**PG_CONFIG)
|
|
cur = conn.cursor()
|
|
|
|
enc_list = list(phone_map.keys())
|
|
# Batch query in chunks of 500
|
|
phone_to_uid = {} # encrypted -> account_id
|
|
phone_to_created = {} # encrypted -> created_at
|
|
phone_to_channel = {} # encrypted -> download_channel
|
|
|
|
for chunk_start in range(0, len(enc_list), 500):
|
|
chunk = enc_list[chunk_start:chunk_start+500]
|
|
placeholders = ','.join(['%s'] * len(chunk))
|
|
cur.execute(f"""
|
|
SELECT id, tel_encrypt, created_at, download_channel
|
|
FROM bi_vala_app_account
|
|
WHERE tel_encrypt IN ({placeholders})
|
|
AND status = 1
|
|
AND deleted_at IS NULL
|
|
""", chunk)
|
|
for row in cur.fetchall():
|
|
uid, enc, created, channel = row
|
|
phone_to_uid[enc] = str(uid)
|
|
phone_to_created[enc] = created
|
|
phone_to_channel[enc] = channel or ''
|
|
|
|
print(f" Matched {len(phone_to_uid)} phones to UIDs")
|
|
|
|
# 4. Query trial lesson counts for matched UIDs
|
|
print("\nQuerying trial lesson counts...")
|
|
all_uids = list(set(phone_to_uid.values()))
|
|
uid_to_trial_count = {}
|
|
|
|
for chunk_start in range(0, len(all_uids), 500):
|
|
chunk = all_uids[chunk_start:chunk_start+500]
|
|
placeholders = ','.join(['%s'] * len(chunk))
|
|
cur.execute(f"""
|
|
SELECT account_id, COUNT(*) as trial_count
|
|
FROM bi_user_course_detail
|
|
WHERE account_id IN ({placeholders})
|
|
AND expire_time IS NULL
|
|
AND deleted_at IS NULL
|
|
GROUP BY account_id
|
|
""", [int(x) for x in chunk])
|
|
for row in cur.fetchall():
|
|
uid_to_trial_count[str(row[0])] = row[1]
|
|
|
|
print(f" Found trial counts for {len(uid_to_trial_count)} users")
|
|
|
|
cur.close()
|
|
conn.close()
|
|
|
|
# 5. Prepare writes
|
|
print("\nPreparing writes...")
|
|
|
|
# Build row-level data
|
|
# row_idx -> {H: uid, D: trial_count, I: reg_date, J: channel}
|
|
row_data = {}
|
|
|
|
for enc, (row_idx, phone) in phone_map.items():
|
|
uid = phone_to_uid.get(enc)
|
|
if uid:
|
|
trial_count = uid_to_trial_count.get(uid, 0)
|
|
created = phone_to_created.get(enc)
|
|
channel = phone_to_channel.get(enc, '')
|
|
reg_date = created.strftime('%Y-%m-%d') if created else ''
|
|
|
|
row_data[row_idx] = {
|
|
'H': uid,
|
|
'D': str(trial_count) if trial_count > 0 else '',
|
|
'I': reg_date,
|
|
'J': channel,
|
|
}
|
|
|
|
# Also fill D/I/J for rows where H already has a valid UID but D is empty
|
|
print(" Checking rows with existing UIDs but empty D column...")
|
|
for i, row in enumerate(rows):
|
|
if i in row_data:
|
|
continue # Already processed above
|
|
if len(row) > 7 and row[7]:
|
|
# H column has a value - check if it's a numeric UID
|
|
h_val = row[7].strip()
|
|
if h_val.isdigit():
|
|
# Check if D is empty
|
|
d_empty = len(row) <= 3 or not row[3]
|
|
if d_empty:
|
|
trial_count = uid_to_trial_count.get(h_val, 0)
|
|
if trial_count > 0:
|
|
if i not in row_data:
|
|
row_data[i] = {}
|
|
row_data[i]['D'] = str(trial_count)
|
|
|
|
# Check if I is empty but we have data
|
|
i_empty = len(row) <= 8 or not row[8]
|
|
if i_empty:
|
|
# Need to query this UID's created_at
|
|
pass # Skip for now - we don't have this data from our query
|
|
|
|
# 6. Write H column (UIDs)
|
|
print("\nWriting H column (UIDs)...")
|
|
h_writes = []
|
|
for row_idx, data in row_data.items():
|
|
if 'H' in data:
|
|
h_writes.append((row_idx, data['H']))
|
|
|
|
h_writes.sort(key=lambda x: x[0])
|
|
|
|
# Batch consecutive rows
|
|
h_batches = []
|
|
if h_writes:
|
|
batch_start = h_writes[0][0]
|
|
batch_vals = [[h_writes[0][1]]]
|
|
for j in range(1, len(h_writes)):
|
|
if h_writes[j][0] == h_writes[j-1][0] + 1:
|
|
batch_vals.append([h_writes[j][1]])
|
|
else:
|
|
h_batches.append((batch_start, batch_vals))
|
|
batch_start = h_writes[j][0]
|
|
batch_vals = [[h_writes[j][1]]]
|
|
h_batches.append((batch_start, batch_vals))
|
|
|
|
h_written = 0
|
|
for start_idx, vals in h_batches:
|
|
start_row = start_idx + 3
|
|
end_row = start_row + len(vals) - 1
|
|
range_str = f'H{start_row}:H{end_row}'
|
|
if write_batch(range_str, vals):
|
|
h_written += len(vals)
|
|
print(f" Wrote H{start_row}:H{end_row} ({len(vals)} cells)")
|
|
time.sleep(0.05)
|
|
|
|
# 7. Write D column (trial counts)
|
|
print("\nWriting D column (trial counts)...")
|
|
d_writes = []
|
|
for row_idx, data in row_data.items():
|
|
if 'D' in data and data['D']:
|
|
d_writes.append((row_idx, data['D']))
|
|
|
|
d_writes.sort(key=lambda x: x[0])
|
|
|
|
d_batches = []
|
|
if d_writes:
|
|
batch_start = d_writes[0][0]
|
|
batch_vals = [[d_writes[0][1]]]
|
|
for j in range(1, len(d_writes)):
|
|
if d_writes[j][0] == d_writes[j-1][0] + 1:
|
|
batch_vals.append([d_writes[j][1]])
|
|
else:
|
|
d_batches.append((batch_start, batch_vals))
|
|
batch_start = d_writes[j][0]
|
|
batch_vals = [[d_writes[j][1]]]
|
|
d_batches.append((batch_start, batch_vals))
|
|
|
|
d_written = 0
|
|
for start_idx, vals in d_batches:
|
|
start_row = start_idx + 3
|
|
end_row = start_row + len(vals) - 1
|
|
range_str = f'D{start_row}:D{end_row}'
|
|
if write_batch(range_str, vals):
|
|
d_written += len(vals)
|
|
print(f" Wrote D{start_row}:D{end_row} ({len(vals)} cells)")
|
|
time.sleep(0.05)
|
|
|
|
# 8. Write I column (registration dates)
|
|
print("\nWriting I column (registration dates)...")
|
|
i_writes = []
|
|
for row_idx, data in row_data.items():
|
|
if 'I' in data and data['I']:
|
|
i_writes.append((row_idx, data['I']))
|
|
|
|
i_writes.sort(key=lambda x: x[0])
|
|
|
|
i_batches = []
|
|
if i_writes:
|
|
batch_start = i_writes[0][0]
|
|
batch_vals = [[i_writes[0][1]]]
|
|
for j in range(1, len(i_writes)):
|
|
if i_writes[j][0] == i_writes[j-1][0] + 1:
|
|
batch_vals.append([i_writes[j][1]])
|
|
else:
|
|
i_batches.append((batch_start, batch_vals))
|
|
batch_start = i_writes[j][0]
|
|
batch_vals = [[i_writes[j][1]]]
|
|
i_batches.append((batch_start, batch_vals))
|
|
|
|
i_written = 0
|
|
for start_idx, vals in i_batches:
|
|
start_row = start_idx + 3
|
|
end_row = start_row + len(vals) - 1
|
|
range_str = f'I{start_row}:I{end_row}'
|
|
if write_batch(range_str, vals):
|
|
i_written += len(vals)
|
|
print(f" Wrote I{start_row}:I{end_row} ({len(vals)} cells)")
|
|
time.sleep(0.05)
|
|
|
|
# 9. Write J column (download channels)
|
|
print("\nWriting J column (download channels)...")
|
|
j_writes = []
|
|
for row_idx, data in row_data.items():
|
|
if 'J' in data and data['J']:
|
|
j_writes.append((row_idx, data['J']))
|
|
|
|
j_writes.sort(key=lambda x: x[0])
|
|
|
|
j_batches = []
|
|
if j_writes:
|
|
batch_start = j_writes[0][0]
|
|
batch_vals = [[j_writes[0][1]]]
|
|
for j in range(1, len(j_writes)):
|
|
if j_writes[j][0] == j_writes[j-1][0] + 1:
|
|
batch_vals.append([j_writes[j][1]])
|
|
else:
|
|
j_batches.append((batch_start, batch_vals))
|
|
batch_start = j_writes[j][0]
|
|
batch_vals = [[j_writes[j][1]]]
|
|
j_batches.append((batch_start, batch_vals))
|
|
|
|
j_written = 0
|
|
for start_idx, vals in j_batches:
|
|
start_row = start_idx + 3
|
|
end_row = start_row + len(vals) - 1
|
|
range_str = f'J{start_row}:J{end_row}'
|
|
if write_batch(range_str, vals):
|
|
j_written += len(vals)
|
|
print(f" Wrote J{start_row}:J{end_row} ({len(vals)} cells)")
|
|
time.sleep(0.05)
|
|
|
|
# 10. Report
|
|
print("\n" + "="*60)
|
|
print("FINAL REPORT")
|
|
print("="*60)
|
|
print(f"Total rows processed: {len(rows)}")
|
|
print(f"Phones extracted: {len(phone_map)}")
|
|
print(f"Phones matched to UIDs: {len(phone_to_uid)}")
|
|
print(f"Phones NOT matched: {len(phone_map) - len(phone_to_uid)}")
|
|
print(f"H column (UIDs) written: {h_written}")
|
|
print(f"D column (trial counts) written: {d_written}")
|
|
print(f"I column (reg dates) written: {i_written}")
|
|
print(f"J column (channels) written: {j_written}")
|
|
|
|
# Show unmatched phones
|
|
unmatched = []
|
|
for enc, (row_idx, phone) in phone_map.items():
|
|
if enc not in phone_to_uid:
|
|
unmatched.append(f" Row {row_idx+3}: {phone}")
|
|
|
|
if unmatched:
|
|
print(f"\nUnmatched phones ({len(unmatched)}):")
|
|
for u in unmatched[:20]:
|
|
print(u)
|
|
if len(unmatched) > 20:
|
|
print(f" ... and {len(unmatched)-20} more")
|
|
|
|
if __name__ == '__main__':
|
|
main()
|