ai_member_xiaoxi/scripts/export_phone_md5.py
2026-06-04 08:00:01 +08:00

136 lines
4.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
导出 bi_vala_app_account 手机号 MD5 映射表
用途:
1. 全量导出python3 export_phone_md5.py → output/phone_md5_export.csv
2. 按 account_id 导出python3 export_phone_md5.py --ids 123,456,789
3. 被其他 skill 调用from scripts.export_phone_md5 import query_phone_md5
输出 CSV 字段account_id, tel_encrypt, phone_plain, phone_md5
"""
import csv
import os
import sys
import argparse
from datetime import datetime
import psycopg2
import psycopg2.extras
SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__))
WORKSPACE_DIR = os.path.dirname(SCRIPTS_DIR)
sys.path.insert(0, SCRIPTS_DIR)
from phone_encrypt import decrypt_phone, phone_md5, tel_encrypt_to_md5
# ── 数据库配置 ──
PG_HOST = "bj-postgres-16pob4sg.sql.tencentcdb.com"
PG_PORT = 28591
PG_USER = "ai_member"
PG_DB = "vala_bi"
# 从 secrets.env 读取密码
SECRETS_FILE = os.path.join(WORKSPACE_DIR, "secrets.env")
def _get_pg_password() -> str:
"""从 secrets.env 读取 PostgreSQL 密码"""
with open(SECRETS_FILE) as f:
for line in f:
line = line.strip()
if line.startswith("PG_ONLINE_PASSWORD="):
return line.split("=", 1)[1].strip().strip('"').strip("'")
raise RuntimeError("未找到 PG_ONLINE_PASSWORD 在 secrets.env 中")
def _get_conn():
return psycopg2.connect(
host=PG_HOST, port=PG_PORT, user=PG_USER,
password=_get_pg_password(), dbname=PG_DB
)
def query_phone_md5(account_ids: list[int] = None) -> list[dict]:
"""
查询手机号 MD5 映射
Args:
account_ids: 指定 account_id 列表None 表示全量
Returns:
[{account_id, tel_encrypt, phone_plain, phone_md5}, ...]
"""
conn = _get_conn()
try:
cur = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
if account_ids:
sql = """
SELECT id AS account_id, tel_encrypt
FROM bi_vala_app_account
WHERE id = ANY(%s) AND tel_encrypt IS NOT NULL AND tel_encrypt != ''
"""
cur.execute(sql, (account_ids,))
else:
sql = """
SELECT id AS account_id, tel_encrypt
FROM bi_vala_app_account
WHERE tel_encrypt IS NOT NULL AND tel_encrypt != ''
"""
cur.execute(sql)
rows = cur.fetchall()
results = []
for row in rows:
tel_enc = row["tel_encrypt"]
try:
phone = decrypt_phone(tel_enc)
md5 = phone_md5(phone)
except Exception:
phone = ""
md5 = ""
results.append({
"account_id": row["account_id"],
"tel_encrypt": tel_enc,
"phone_plain": phone,
"phone_md5": md5,
})
return results
finally:
conn.close()
def export_csv(account_ids: list[int] = None, output_path: str = None) -> str:
"""
导出为 CSV 文件
Returns:
输出文件路径
"""
if output_path is None:
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
output_path = os.path.join(WORKSPACE_DIR, "output", f"phone_md5_export_{ts}.csv")
os.makedirs(os.path.dirname(output_path), exist_ok=True)
results = query_phone_md5(account_ids)
with open(output_path, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=["account_id", "tel_encrypt", "phone_plain", "phone_md5"])
writer.writeheader()
writer.writerows(results)
print(f"导出完成: {len(results)} 条记录 → {output_path}")
return output_path
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="导出手机号 MD5 映射表")
parser.add_argument("--ids", help="指定 account_id逗号分隔不传则全量导出")
parser.add_argument("--output", help="输出文件路径,默认 output/phone_md5_export_<时间戳>.csv")
args = parser.parse_args()
ids = None
if args.ids:
ids = [int(x.strip()) for x in args.ids.split(",") if x.strip()]
export_csv(ids, args.output)