70 lines
2.9 KiB
Python
70 lines
2.9 KiB
Python
"""采集器配置"""
|
|
|
|
import json
|
|
import os
|
|
from dataclasses import dataclass, field, asdict
|
|
from dotenv import load_dotenv
|
|
|
|
load_dotenv()
|
|
|
|
DEFAULT_CONFIG_PATH = os.path.expanduser("~/.wechat-cli/collect-chats/config.json")
|
|
|
|
|
|
@dataclass
|
|
class CollectorConfig:
|
|
# ---- MySQL ----
|
|
mysql_host: str = os.environ.get("MYSQL_HOST", "localhost")
|
|
mysql_port: int = int(os.environ.get("MYSQL_PORT", "3306"))
|
|
mysql_user: str = os.environ.get("MYSQL_USER", "root")
|
|
mysql_password: str = os.environ.get("MYSQL_PASSWORD", "")
|
|
mysql_database: str = os.environ.get("MYSQL_DATABASE", "")
|
|
mysql_table: str = os.environ.get("MYSQL_TABLE", "wechat_group_message")
|
|
|
|
# ---- 腾讯 COS ----
|
|
cos_secret_id: str = os.environ.get("COS_SECRET_ID", "")
|
|
cos_secret_key: str = os.environ.get("COS_SECRET_KEY", "")
|
|
cos_bucket: str = os.environ.get("COS_BUCKET", "")
|
|
cos_region: str = os.environ.get("COS_REGION", "ap-beijing")
|
|
cos_download_domain: str = os.environ.get("COS_DOWNLOAD_DOMAIN", "")
|
|
cos_base_path: str = os.environ.get("COS_BASE_PATH", "")
|
|
|
|
# ---- 扫描策略 ----
|
|
min_interval: float = 15.0 # hot 群扫描间隔(秒)
|
|
base_interval: float = 30.0 # warm 群扫描间隔
|
|
max_interval: float = 120.0 # cold 群最大间隔(保证 ≤2min 入库)
|
|
backoff_factor: float = 1.2 # cold 退避系数
|
|
batch_size: int = 10 # 每轮最多扫描群数
|
|
messages_per_scan: int = 200 # 每群每次最多拉取消息数
|
|
jitter_max: float = 1.0 # 群间随机延迟上限(秒)
|
|
cycle_sleep: float = 5.0 # 轮次间休眠(秒)
|
|
discovery_interval: float = 180.0 # 群聊发现间隔(秒)
|
|
hot_threshold: int = 300 # 最新消息 < N秒 算 hot
|
|
warm_threshold: int = 3600 # 最新消息 < N秒 算 warm
|
|
|
|
# ---- 过滤 ----
|
|
whitelist: list = field(default_factory=list) # 空=全部采集
|
|
blacklist: list = field(default_factory=list) # 跳过的群(支持正则)
|
|
|
|
# ---- 回溯补录 ----
|
|
backfill_interval: float = 600.0 # 定时扫描间隔(秒), 默认 10 分钟
|
|
backfill_lookback_days: int = 7 # 回溯天数
|
|
backfill_enabled: bool = True # 是否启用回溯补录
|
|
|
|
# ---- 其他 ----
|
|
log_level: str = "DEBUG"
|
|
|
|
@classmethod
|
|
def load(cls, path=None):
|
|
path = path or DEFAULT_CONFIG_PATH
|
|
if os.path.isfile(path):
|
|
with open(path, encoding="utf-8") as f:
|
|
data = json.load(f)
|
|
return cls(**{k: v for k, v in data.items() if k in cls.__dataclass_fields__})
|
|
return cls()
|
|
|
|
def save(self, path=None):
|
|
path = path or DEFAULT_CONFIG_PATH
|
|
os.makedirs(os.path.dirname(path), exist_ok=True)
|
|
with open(path, "w", encoding="utf-8") as f:
|
|
json.dump(asdict(self), f, ensure_ascii=False, indent=2)
|