201 lines
7.2 KiB
Python
201 lines
7.2 KiB
Python
#!/usr/bin/env python3
|
|
"""Update the 小龙 sheet with UID, trial count, registration date, and download channel.
|
|
V2: Fixed phone matching and batch writes."""
|
|
import json, re, time, sys
|
|
import mysql.connector, psycopg2, requests
|
|
|
|
FEISHU_TOKEN = "t-g10464c0UK5L67JVXSDDT3EWM4DPLSDY5C7R7NS6"
|
|
SPREADSHEET_TOKEN = "DU4dsUOJThfbPStMcgBcsMH5nyb"
|
|
SHEET_ID = "dff8c7"
|
|
|
|
MYSQL_CONFIG = {
|
|
"host": "bj-cdb-dh2fkqa0.sql.tencentcdb.com", "port": 27751,
|
|
"user": "read_only", "password": "fsdo45ijfmfmuu77$%^&", "database": "vala_user",
|
|
}
|
|
PG_CONFIG = {
|
|
"host": "bj-postgres-16pob4sg.sql.tencentcdb.com", "port": 28591,
|
|
"user": "ai_member", "password": "LdfjdjL83h3h3^$&**YGG*", "database": "vala_bi",
|
|
}
|
|
|
|
def read_sheet():
|
|
url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{SHEET_ID}!A3:J1142?valueRenderOption=ToString"
|
|
r = requests.get(url, headers={"Authorization": f"Bearer {FEISHU_TOKEN}"})
|
|
data = r.json()
|
|
if data.get("code") != 0:
|
|
print(f"Error reading sheet: {data}"); sys.exit(1)
|
|
return data["data"]["valueRange"]["values"]
|
|
|
|
def write_range(range_str, values):
|
|
url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values"
|
|
headers = {"Authorization": f"Bearer {FEISHU_TOKEN}", "Content-Type": "application/json"}
|
|
body = {"valueRange": {"range": range_str, "values": values}}
|
|
r = requests.put(url, headers=headers, json=body)
|
|
data = r.json()
|
|
if data.get("code") != 0:
|
|
print(f" ERROR {range_str}: {data}")
|
|
return False
|
|
print(f" OK {range_str}: {len(values)} cells")
|
|
return True
|
|
|
|
def main():
|
|
print("=== Reading sheet ===")
|
|
rows = read_sheet()
|
|
print(f"Rows: {len(rows)}")
|
|
|
|
# Parse rows, pad to 10 cols
|
|
parsed = []
|
|
for i, row in enumerate(rows):
|
|
while len(row) < 10:
|
|
row.append("")
|
|
parsed.append({
|
|
"idx": i, "row_num": i + 3,
|
|
"phone": row[4].strip() if row[4] else "",
|
|
"h_val": row[7].strip() if row[7] else "",
|
|
})
|
|
|
|
# Find rows needing phone lookup
|
|
phone_rows = [p for p in parsed if re.match(r'^\d{11}$', p["phone"]) and (not p["h_val"] or p["h_val"] == "未注册")]
|
|
existing_uids = set(p["h_val"] for p in parsed if p["h_val"].isdigit())
|
|
print(f"Need phone lookup: {len(phone_rows)}, existing UIDs: {len(existing_uids)}")
|
|
|
|
# Step 2: MySQL phone -> UID
|
|
phone_to_uid = {}
|
|
if phone_rows:
|
|
conn = mysql.connector.connect(**MYSQL_CONFIG)
|
|
cur = conn.cursor()
|
|
chunk_size = 50
|
|
for cs in range(0, len(phone_rows), chunk_size):
|
|
chunk = phone_rows[cs:cs+chunk_size]
|
|
conditions = [f"tel LIKE '{p['phone'][:3]}%{p['phone'][-4:]}'" for p in chunk]
|
|
query = f"SELECT id, tel FROM vala_app_account WHERE ({' OR '.join(conditions)}) AND deleted_at IS NULL"
|
|
cur.execute(query)
|
|
for uid, tel in cur.fetchall():
|
|
for p in chunk:
|
|
if p["phone"][:3] == tel[:3] and p["phone"][-4:] == tel[-4:]:
|
|
phone_to_uid[p["phone"]] = str(uid)
|
|
break
|
|
time.sleep(0.05)
|
|
cur.close(); conn.close()
|
|
print(f"Phone->UID matches: {len(phone_to_uid)}")
|
|
|
|
# Step 3: Collect all UIDs
|
|
all_uids = set(existing_uids)
|
|
for uid in phone_to_uid.values():
|
|
all_uids.add(uid)
|
|
print(f"Total UIDs to query PG: {len(all_uids)}")
|
|
|
|
# Step 4: PostgreSQL queries
|
|
uid_reg = {} # uid -> {created_at, download_channel}
|
|
uid_trial = {} # uid -> count
|
|
|
|
if all_uids:
|
|
conn = psycopg2.connect(**PG_CONFIG)
|
|
cur = conn.cursor()
|
|
uid_list = list(all_uids)
|
|
cs = 100
|
|
for start in range(0, len(uid_list), cs):
|
|
chunk = uid_list[start:start+cs]
|
|
ph = ",".join(["%s"]*len(chunk))
|
|
cur.execute(f"SELECT id, created_at::date, download_channel FROM bi_vala_app_account WHERE id IN ({ph}) AND status=1 AND deleted_at IS NULL", chunk)
|
|
for uid, ca, dc in cur.fetchall():
|
|
uid_reg[str(uid)] = {"created_at": str(ca) if ca else "", "download_channel": dc or ""}
|
|
time.sleep(0.05)
|
|
for start in range(0, len(uid_list), cs):
|
|
chunk = uid_list[start:start+cs]
|
|
ph = ",".join(["%s"]*len(chunk))
|
|
cur.execute(f"SELECT account_id, COUNT(*) FROM bi_user_course_detail WHERE account_id IN ({ph}) AND expire_time IS NULL AND deleted_at IS NULL GROUP BY account_id", chunk)
|
|
for uid, cnt in cur.fetchall():
|
|
uid_trial[str(uid)] = cnt
|
|
time.sleep(0.05)
|
|
cur.close(); conn.close()
|
|
print(f"Reg info: {len(uid_reg)}, Trial counts: {len(uid_trial)}")
|
|
|
|
# Step 5: Build full column arrays for batch write
|
|
# We have 1140 rows (A3:J1142). Build arrays for H, D, I, J.
|
|
# For each row, determine what to write.
|
|
N = len(parsed)
|
|
h_vals = [""] * N
|
|
d_vals = [""] * N
|
|
i_vals = [""] * N
|
|
j_vals = [""] * N
|
|
|
|
h_written_count = 0
|
|
d_written_count = 0
|
|
i_written_count = 0
|
|
j_written_count = 0
|
|
|
|
for p in parsed:
|
|
i = p["idx"]
|
|
phone = p["phone"]
|
|
h_val = p["h_val"]
|
|
|
|
# Determine UID
|
|
uid = None
|
|
if re.match(r'^\d{11}$', phone) and phone in phone_to_uid:
|
|
uid = phone_to_uid[phone]
|
|
elif h_val.isdigit():
|
|
uid = h_val
|
|
|
|
if not uid:
|
|
continue
|
|
|
|
# H: write UID if phone matched and H was empty
|
|
if re.match(r'^\d{11}$', phone) and phone in phone_to_uid:
|
|
if not h_val or h_val == "未注册":
|
|
h_vals[i] = uid
|
|
h_written_count += 1
|
|
|
|
# D: trial count
|
|
if uid in uid_trial:
|
|
d_vals[i] = str(uid_trial[uid])
|
|
d_written_count += 1
|
|
|
|
# I: reg date
|
|
if uid in uid_reg and uid_reg[uid]["created_at"]:
|
|
i_vals[i] = uid_reg[uid]["created_at"]
|
|
i_written_count += 1
|
|
|
|
# J: channel
|
|
if uid in uid_reg and uid_reg[uid]["download_channel"]:
|
|
j_vals[i] = uid_reg[uid]["download_channel"]
|
|
j_written_count += 1
|
|
|
|
# Step 6: Write in full column batches
|
|
# Write H column
|
|
h_range = f"{SHEET_ID}!H3:H{2+N}"
|
|
h_values = [[v] for v in h_vals]
|
|
if any(v for v in h_vals):
|
|
write_range(h_range, h_values)
|
|
time.sleep(0.1)
|
|
|
|
# Write D column
|
|
d_range = f"{SHEET_ID}!D3:D{2+N}"
|
|
d_values = [[v] for v in d_vals]
|
|
if any(v for v in d_vals):
|
|
write_range(d_range, d_values)
|
|
time.sleep(0.1)
|
|
|
|
# Write I column
|
|
i_range = f"{SHEET_ID}!I3:I{2+N}"
|
|
i_values = [[v] for v in i_vals]
|
|
if any(v for v in i_vals):
|
|
write_range(i_range, i_values)
|
|
time.sleep(0.1)
|
|
|
|
# Write J column
|
|
j_range = f"{SHEET_ID}!J3:J{2+N}"
|
|
j_values = [[v] for v in j_vals]
|
|
if any(v for v in j_vals):
|
|
write_range(j_range, j_values)
|
|
time.sleep(0.1)
|
|
|
|
print(f"\n=== SUMMARY ===")
|
|
print(f"Phones matched in MySQL: {len(phone_to_uid)}")
|
|
print(f"H column (UID) written: {h_written_count}")
|
|
print(f"D column (trial count) written: {d_written_count}")
|
|
print(f"I column (reg date) written: {i_written_count}")
|
|
print(f"J column (channel) written: {j_written_count}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|