ai_member_xiaoxi/scripts/update_wudi_bot_sheet.py
2026-06-06 08:00:01 +08:00

208 lines
8.1 KiB
Python

#!/usr/bin/env python3
"""Update Bot 吴迪 sheet: H(UID), D(trial count), I(reg date), J(download channel).
Only fills empty cells, never overwrites existing data."""
import json, re, time, sys, requests, mysql.connector, psycopg2
def get_token():
r = requests.post('https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal',
json={"app_id":"cli_a929ae22e0b8dcc8","app_secret":"OtFjMy7p3qE3VvLbMdcWidwgHOnGD4FJ"})
return r.json()['tenant_access_token']
TOKEN = get_token()
SPREADSHEET_TOKEN = "NoZqsFi47hIOHEt9j8WcfRtbnug"
SHEET_ID = "f975f0"
MYSQL_CONFIG = {
"host": "bj-cdb-dh2fkqa0.sql.tencentcdb.com", "port": 27751,
"user": "read_only", "password": "fsdo45ijfmfmuu77$%^&", "database": "vala_user",
}
PG_CONFIG = {
"host": "bj-postgres-16pob4sg.sql.tencentcdb.com", "port": 28591,
"user": "ai_member", "password": "LdfjdjL83h3h3^$&**YGG*", "database": "vala_bi",
}
CHANNEL_MAP = {
"Apple App Store": "苹果", "科大讯飞学习机": "讯飞", "学而思学习机": "学而思",
"华为应用市场": "华为", "小米应用市场": "小米", "应用宝应用市场": "应用宝",
"希沃学习机": "希沃", "荣耀应用市场": "荣耀", "小度学习机": "小度",
"oppo应用市场": "OPPO", "vivo应用市场": "VIVO", "京东方学习机": "京东方",
"步步高学习机": "步步高", "作业帮学习机": "作业帮", "魅族应用市场": "魅族",
"官网": "官网",
}
def read_sheet():
url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values/{SHEET_ID}!A3:J8045?valueRenderOption=ToString"
r = requests.get(url, headers={"Authorization": f"Bearer {TOKEN}"})
data = r.json()
if data.get("code") != 0:
print(f"Error reading sheet: {data}"); sys.exit(1)
return data["data"]["valueRange"]["values"]
def write_range(range_str, values):
url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{SPREADSHEET_TOKEN}/values"
headers = {"Authorization": f"Bearer {TOKEN}", "Content-Type": "application/json"}
body = {"valueRange": {"range": range_str, "values": values}}
r = requests.put(url, headers=headers, json=body)
data = r.json()
if data.get("code") != 0:
print(f" ERROR {range_str}: {data}")
return False
print(f" OK {range_str}: {len(values)} cells")
return True
def write_contiguous(col_letter, items):
if not items:
return 0
items.sort(key=lambda x: x[0])
total = 0
i = 0
while i < len(items):
start_row = items[i][0]
values = [[items[i][1]]]
j = i + 1
while j < len(items) and items[j][0] == items[j-1][0] + 1:
values.append([items[j][1]])
j += 1
end_row = items[j-1][0]
range_str = f"{SHEET_ID}!{col_letter}{start_row}:{col_letter}{end_row}"
if write_range(range_str, values):
total += len(values)
time.sleep(0.05)
i = j
return total
def main():
print("=== Reading sheet ===")
rows = read_sheet()
print(f"Rows: {len(rows)}")
parsed = []
for i, row in enumerate(rows):
while len(row) < 10:
row.append("")
parsed.append({
"idx": i, "row_num": i + 3,
"phone": str(row[4]).strip() if row[4] is not None else "",
"h_val": str(row[7]).strip() if row[7] is not None else "",
"d_val": str(row[3]).strip() if row[3] is not None else "",
"i_val": str(row[8]).strip() if row[8] is not None else "",
"j_val": str(row[9]).strip() if row[9] is not None else "",
})
# Step 1: Phone -> UID via MySQL
phone_rows = [p for p in parsed if re.match(r'^\d{11}$', p["phone"])]
print(f"Rows with 11-digit phone: {len(phone_rows)}")
phone_to_uid = {}
if phone_rows:
conn = mysql.connector.connect(**MYSQL_CONFIG)
cur = conn.cursor()
for cs in range(0, len(phone_rows), 50):
chunk = phone_rows[cs:cs+50]
conditions = [f"tel LIKE '{p['phone'][:3]}%{p['phone'][-4:]}'" for p in chunk]
query = f"SELECT id, tel FROM vala_app_account WHERE ({' OR '.join(conditions)}) AND deleted_at IS NULL"
cur.execute(query)
for uid, tel in cur.fetchall():
for p in chunk:
if p["phone"][:3] == tel[:3] and p["phone"][-4:] == tel[-4:]:
phone_to_uid[p["phone"]] = str(uid)
break
time.sleep(0.05)
cur.close(); conn.close()
print(f"Phone->UID matches: {len(phone_to_uid)}")
# Step 2: Collect all UIDs
all_uids = set()
for p in parsed:
if p["h_val"].isdigit():
all_uids.add(p["h_val"])
for uid in phone_to_uid.values():
all_uids.add(uid)
print(f"Total UIDs to query PG: {len(all_uids)}")
# Step 3: PG queries
uid_reg = {}
uid_trial = {}
if all_uids:
conn = psycopg2.connect(**PG_CONFIG)
cur = conn.cursor()
uid_list = list(all_uids)
for start in range(0, len(uid_list), 100):
chunk = uid_list[start:start+100]
ph = ",".join(["%s"]*len(chunk))
cur.execute(f"SELECT id, created_at::date, download_channel FROM bi_vala_app_account WHERE id IN ({ph}) AND status=1 AND deleted_at IS NULL", chunk)
for uid, ca, dc in cur.fetchall():
uid_reg[str(uid)] = {"created_at": str(ca) if ca else "", "download_channel": dc or ""}
time.sleep(0.05)
for start in range(0, len(uid_list), 100):
chunk = uid_list[start:start+100]
ph = ",".join(["%s"]*len(chunk))
cur.execute(f"SELECT account_id, COUNT(*) FROM bi_user_course_detail WHERE account_id IN ({ph}) AND expire_time IS NULL AND deleted_at IS NULL GROUP BY account_id", chunk)
for uid, cnt in cur.fetchall():
uid_trial[str(uid)] = cnt
time.sleep(0.05)
cur.close(); conn.close()
print(f"Reg info: {len(uid_reg)}, Trial counts: {len(uid_trial)}")
# Step 4: Build updates - ONLY fill empty cells
h_updates = []
d_updates = []
i_updates = []
j_updates = []
for p in parsed:
row_num = p["row_num"]
phone = p["phone"]
h_val = p["h_val"]
d_val = p["d_val"]
i_val = p["i_val"]
j_val = p["j_val"]
uid = None
if re.match(r'^\d{11}$', phone) and phone in phone_to_uid:
uid = phone_to_uid[phone]
elif h_val.isdigit():
uid = h_val
if not uid:
continue
# H: write UID only if phone matched and H is empty/未注册
if re.match(r'^\d{11}$', phone) and phone in phone_to_uid:
if not h_val or h_val == "未注册":
h_updates.append((row_num, phone_to_uid[phone]))
# D: trial count only if currently empty
if uid in uid_trial and not d_val:
d_updates.append((row_num, str(uid_trial[uid])))
# I: reg date only if currently empty
if uid in uid_reg and uid_reg[uid]["created_at"] and not i_val:
i_updates.append((row_num, uid_reg[uid]["created_at"]))
# J: channel only if currently empty
if uid in uid_reg and uid_reg[uid]["download_channel"] and not j_val:
raw_ch = uid_reg[uid]["download_channel"]
mapped = CHANNEL_MAP.get(raw_ch, raw_ch)
j_updates.append((row_num, mapped))
print(f"H updates: {len(h_updates)}, D updates: {len(d_updates)}, I updates: {len(i_updates)}, J updates: {len(j_updates)}")
# Step 5: Write
total = 0
total += write_contiguous("H", h_updates)
total += write_contiguous("D", d_updates)
total += write_contiguous("I", i_updates)
total += write_contiguous("J", j_updates)
print(f"\n=== SUMMARY ===")
print(f"Phones matched in MySQL: {len(phone_to_uid)}")
print(f"H column (UID) written: {len(h_updates)}")
print(f"D column (trial count) written: {len(d_updates)}")
print(f"I column (reg date) written: {len(i_updates)}")
print(f"J column (channel) written: {len(j_updates)}")
print(f"Total cells written: {total}")
if __name__ == "__main__":
main()