284 lines
9.9 KiB
Python
284 lines
9.9 KiB
Python
#!/usr/bin/env python3
|
|
"""Update the 小龙 sheet with UID, trial count, registration date, and download channel."""
|
|
|
|
import json
|
|
import re
|
|
import time
|
|
import sys
|
|
import mysql.connector
|
|
import psycopg2
|
|
import requests
|
|
|
|
FEISHU_TOKEN = "t-g10464c0UK5L67JVXSDDT3EWM4DPLSDY5C7R7NS6"
|
|
SPREADSHEET_TOKEN = "DU4dsUOJThfbPStMcgBcsMH5nyb"
|
|
SHEET_ID = "dff8c7"
|
|
|
|
# DB connections
|
|
MYSQL_CONFIG = {
|
|
"host": "bj-cdb-dh2fkqa0.sql.tencentcdb.com",
|
|
"port": 27751,
|
|
"user": "read_only",
|
|
"password": "fsdo45ijfmfmuu77$%^&",
|
|
"database": "vala_user",
|
|
}
|
|
|
|
PG_CONFIG = {
|
|
"host": "bj-postgres-16pob4sg.sql.tencentcdb.com",
|
|
"port": 28591,
|
|
"user": "ai_member",
|
|
"password": "LdfjdjL83h3h3^$&**YGG*",
|
|
"database": "vala_bi",
|
|
}
|
|
|
|
def read_sheet():
|
|
"""Read all data from the sheet."""
|
|
url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{SHEET_ID}!A3:J1142?valueRenderOption=ToString"
|
|
headers = {"Authorization": f"Bearer {FEISHU_TOKEN}"}
|
|
r = requests.get(url, headers=headers)
|
|
data = r.json()
|
|
if data.get("code") != 0:
|
|
print(f"Error reading sheet: {data}")
|
|
sys.exit(1)
|
|
return data["data"]["valueRange"]["values"]
|
|
|
|
def write_batch(rows_data):
|
|
"""Write a batch of rows to the sheet. rows_data is a list of (row_num, col, value) tuples."""
|
|
# Group by row for efficiency
|
|
# We'll write column by column for contiguous ranges
|
|
pass
|
|
|
|
def write_range(start_row, end_row, col_letter, values):
|
|
"""Write values to a column range."""
|
|
# start_row and end_row are 1-based sheet row numbers
|
|
range_str = f"{SHEET_ID}!{col_letter}{start_row}:{col_letter}{end_row}"
|
|
url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values"
|
|
headers = {
|
|
"Authorization": f"Bearer {FEISHU_TOKEN}",
|
|
"Content-Type": "application/json",
|
|
}
|
|
body = {
|
|
"valueRange": {
|
|
"range": range_str,
|
|
"values": values,
|
|
}
|
|
}
|
|
r = requests.put(url, headers=headers, json=body)
|
|
data = r.json()
|
|
if data.get("code") != 0:
|
|
print(f"Error writing {range_str}: {data}")
|
|
return False
|
|
print(f" Wrote {range_str}: {len(values)} cells")
|
|
return True
|
|
|
|
def main():
|
|
print("Reading sheet data...")
|
|
rows = read_sheet()
|
|
print(f"Got {len(rows)} rows")
|
|
|
|
# Step 1: Identify rows needing phone lookup
|
|
# Columns: A=0(sheet), B=1(name), C=2(date), D=3(trial_count), E=4(phone), F=5(grade), G=6(notes), H=7(UID), I=8(reg_date), J=9(channel)
|
|
phone_rows = [] # (row_index, phone, row_num)
|
|
existing_uids = set() # UIDs already in H column
|
|
|
|
for i, row in enumerate(rows):
|
|
row_num = i + 3 # sheet row number
|
|
|
|
# Pad row to 10 columns
|
|
while len(row) < 10:
|
|
row.append("")
|
|
|
|
phone = row[4].strip() if len(row) > 4 else ""
|
|
h_val = row[7].strip() if len(row) > 7 else ""
|
|
|
|
# Check H column for existing UID (pure digits)
|
|
if h_val and h_val.isdigit():
|
|
existing_uids.add(h_val)
|
|
|
|
# Check if phone is 11-digit and H is empty or "未注册"
|
|
if re.match(r'^\d{11}$', phone):
|
|
if not h_val or h_val == "未注册":
|
|
phone_rows.append((i, phone, row_num))
|
|
|
|
print(f"Rows with 11-digit phone and empty H: {len(phone_rows)}")
|
|
print(f"Existing UIDs in H column: {len(existing_uids)}")
|
|
|
|
# Step 2: Query MySQL for phone -> UID mapping
|
|
phone_to_uid = {}
|
|
if phone_rows:
|
|
mysql_conn = mysql.connector.connect(**MYSQL_CONFIG)
|
|
mysql_cur = mysql_conn.cursor()
|
|
|
|
# Batch query in chunks of 50
|
|
chunk_size = 50
|
|
for chunk_start in range(0, len(phone_rows), chunk_size):
|
|
chunk = phone_rows[chunk_start:chunk_start + chunk_size]
|
|
# Build LIKE conditions
|
|
conditions = []
|
|
for _, phone, _ in chunk:
|
|
first3 = phone[:3]
|
|
last4 = phone[-4:]
|
|
conditions.append(f"tel LIKE '{first3}%{last4}'")
|
|
|
|
query = f"SELECT id, tel FROM vala_app_account WHERE ({' OR '.join(conditions)}) AND deleted_at IS NULL"
|
|
mysql_cur.execute(query)
|
|
for uid, tel in mysql_cur.fetchall():
|
|
# tel is masked like "138****3774", match by first3+last4
|
|
for _, phone, _ in chunk:
|
|
if phone[:3] == tel[:3] and phone[-4:] == tel[-4:]:
|
|
phone_to_uid[phone] = str(uid)
|
|
break
|
|
|
|
time.sleep(0.05)
|
|
|
|
mysql_cur.close()
|
|
mysql_conn.close()
|
|
print(f"Phone -> UID matches found: {len(phone_to_uid)}")
|
|
|
|
# Step 3: Collect all UIDs to query PostgreSQL
|
|
all_uids = set(existing_uids)
|
|
for phone, uid in phone_to_uid.items():
|
|
all_uids.add(uid)
|
|
|
|
print(f"Total unique UIDs to query: {len(all_uids)}")
|
|
|
|
# Step 4: Query PostgreSQL for registration info and trial count
|
|
uid_reg_info = {} # uid -> {created_at, download_channel}
|
|
uid_trial_count = {} # uid -> trial_count
|
|
|
|
if all_uids:
|
|
pg_conn = psycopg2.connect(**PG_CONFIG)
|
|
pg_cur = pg_conn.cursor()
|
|
|
|
uid_list = list(all_uids)
|
|
|
|
# Query bi_vala_app_account for reg info
|
|
chunk_size = 100
|
|
for chunk_start in range(0, len(uid_list), chunk_size):
|
|
chunk = uid_list[chunk_start:chunk_start + chunk_size]
|
|
placeholders = ",".join(["%s"] * len(chunk))
|
|
query = f"SELECT id, created_at::date, download_channel FROM bi_vala_app_account WHERE id IN ({placeholders}) AND status=1 AND deleted_at IS NULL"
|
|
pg_cur.execute(query, chunk)
|
|
for uid, created_at, download_channel in pg_cur.fetchall():
|
|
uid_reg_info[str(uid)] = {
|
|
"created_at": str(created_at) if created_at else "",
|
|
"download_channel": download_channel or "",
|
|
}
|
|
time.sleep(0.05)
|
|
|
|
# Query bi_user_course_detail for trial count
|
|
for chunk_start in range(0, len(uid_list), chunk_size):
|
|
chunk = uid_list[chunk_start:chunk_start + chunk_size]
|
|
placeholders = ",".join(["%s"] * len(chunk))
|
|
query = f"SELECT account_id, COUNT(*) FROM bi_user_course_detail WHERE account_id IN ({placeholders}) AND expire_time IS NULL AND deleted_at IS NULL GROUP BY account_id"
|
|
pg_cur.execute(query, chunk)
|
|
for uid, cnt in pg_cur.fetchall():
|
|
uid_trial_count[str(uid)] = cnt
|
|
time.sleep(0.05)
|
|
|
|
pg_cur.close()
|
|
pg_conn.close()
|
|
|
|
print(f"UIDs with reg info: {len(uid_reg_info)}")
|
|
print(f"UIDs with trial count: {len(uid_trial_count)}")
|
|
|
|
# Step 5: Build the update data
|
|
# For each row, determine what to write
|
|
# H column: UID (only if found in MySQL)
|
|
# D column: trial count
|
|
# I column: registration date
|
|
# J column: download channel
|
|
|
|
# We'll batch writes by column for contiguous ranges
|
|
updates = [] # (row_num, col_index, value)
|
|
# col_index: 3=D, 7=H, 8=I, 9=J
|
|
|
|
for i, row in enumerate(rows):
|
|
row_num = i + 3
|
|
while len(row) < 10:
|
|
row.append("")
|
|
|
|
phone = row[4].strip() if len(row) > 4 else ""
|
|
h_val = row[7].strip() if len(row) > 7 else ""
|
|
|
|
# Determine UID for this row
|
|
uid = None
|
|
if re.match(r'^\d{11}$', phone) and phone in phone_to_uid:
|
|
uid = phone_to_uid[phone]
|
|
elif h_val and h_val.isdigit():
|
|
uid = h_val
|
|
|
|
if not uid:
|
|
continue
|
|
|
|
# H column: write UID if phone was matched and H was empty
|
|
if re.match(r'^\d{11}$', phone) and phone in phone_to_uid:
|
|
if not h_val or h_val == "未注册":
|
|
updates.append((row_num, 7, uid))
|
|
|
|
# D, I, J columns: write if we have data
|
|
if uid in uid_trial_count:
|
|
updates.append((row_num, 3, str(uid_trial_count[uid])))
|
|
if uid in uid_reg_info:
|
|
info = uid_reg_info[uid]
|
|
if info["created_at"]:
|
|
updates.append((row_num, 8, info["created_at"]))
|
|
if info["download_channel"]:
|
|
updates.append((row_num, 9, info["download_channel"]))
|
|
|
|
print(f"Total updates to write: {len(updates)}")
|
|
|
|
# Step 6: Write updates in batches
|
|
# Group by column and find contiguous ranges
|
|
# Column index -> list of (row_num, value)
|
|
col_updates = {3: [], 7: [], 8: [], 9: []}
|
|
col_names = {3: "D", 7: "H", 8: "I", 9: "J"}
|
|
|
|
for row_num, col_idx, value in updates:
|
|
col_updates[col_idx].append((row_num, value))
|
|
|
|
# Sort each column's updates by row_num
|
|
for col_idx in col_updates:
|
|
col_updates[col_idx].sort(key=lambda x: x[0])
|
|
|
|
# Write in contiguous batches
|
|
total_written = 0
|
|
for col_idx in [7, 3, 8, 9]: # Write H first, then D, I, J
|
|
col_letter = col_names[col_idx]
|
|
items = col_updates[col_idx]
|
|
if not items:
|
|
continue
|
|
|
|
# Group into contiguous ranges
|
|
i = 0
|
|
while i < len(items):
|
|
start_row = items[i][0]
|
|
values = [[items[i][1]]]
|
|
j = i + 1
|
|
while j < len(items) and items[j][0] == items[j-1][0] + 1:
|
|
values.append([items[j][1]])
|
|
j += 1
|
|
end_row = items[j-1][0]
|
|
|
|
if write_range(start_row, end_row, col_letter, values):
|
|
total_written += len(values)
|
|
time.sleep(0.05)
|
|
i = j
|
|
|
|
# Summary
|
|
phones_matched = len(phone_to_uid)
|
|
h_written = len(col_updates[7])
|
|
d_written = len(col_updates[3])
|
|
i_written = len(col_updates[8])
|
|
j_written = len(col_updates[9])
|
|
|
|
print(f"\n=== SUMMARY ===")
|
|
print(f"Phones matched in MySQL: {phones_matched}")
|
|
print(f"H column (UID) written: {h_written}")
|
|
print(f"D column (trial count) written: {d_written}")
|
|
print(f"I column (reg date) written: {i_written}")
|
|
print(f"J column (channel) written: {j_written}")
|
|
print(f"Total cells written: {total_written}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|