"""采集器配置""" import json import os from dataclasses import dataclass, field, asdict from dotenv import load_dotenv load_dotenv() DEFAULT_CONFIG_PATH = os.path.expanduser("~/.wechat-cli/collect-chats/config.json") @dataclass class CollectorConfig: # ---- MySQL ---- mysql_host: str = os.environ.get("MYSQL_HOST", "localhost") mysql_port: int = int(os.environ.get("MYSQL_PORT", "3306")) mysql_user: str = os.environ.get("MYSQL_USER", "root") mysql_password: str = os.environ.get("MYSQL_PASSWORD", "") mysql_database: str = os.environ.get("MYSQL_DATABASE", "") mysql_table: str = os.environ.get("MYSQL_TABLE", "wechat_group_message") # ---- 腾讯 COS ---- cos_secret_id: str = os.environ.get("COS_SECRET_ID", "") cos_secret_key: str = os.environ.get("COS_SECRET_KEY", "") cos_bucket: str = os.environ.get("COS_BUCKET", "") cos_region: str = os.environ.get("COS_REGION", "ap-beijing") cos_download_domain: str = os.environ.get("COS_DOWNLOAD_DOMAIN", "") cos_base_path: str = os.environ.get("COS_BASE_PATH", "") # ---- 扫描策略 ---- min_interval: float = 15.0 # hot 群扫描间隔(秒) base_interval: float = 30.0 # warm 群扫描间隔 max_interval: float = 120.0 # cold 群最大间隔(保证 ≤2min 入库) backoff_factor: float = 1.2 # cold 退避系数 batch_size: int = 10 # 每轮最多扫描群数 messages_per_scan: int = 200 # 每群每次最多拉取消息数 jitter_max: float = 1.0 # 群间随机延迟上限(秒) cycle_sleep: float = 5.0 # 轮次间休眠(秒) discovery_interval: float = 180.0 # 群聊发现间隔(秒) hot_threshold: int = 300 # 最新消息 < N秒 算 hot warm_threshold: int = 3600 # 最新消息 < N秒 算 warm # ---- 过滤 ---- whitelist: list = field(default_factory=list) # 空=全部采集 blacklist: list = field(default_factory=list) # 跳过的群(支持正则) # ---- 回溯补录 ---- backfill_interval: float = 600.0 # 定时扫描间隔(秒), 默认 10 分钟 backfill_lookback_days: int = 7 # 回溯天数 backfill_enabled: bool = True # 是否启用回溯补录 # ---- 其他 ---- log_level: str = "DEBUG" @classmethod def load(cls, path=None): path = path or DEFAULT_CONFIG_PATH if os.path.isfile(path): with open(path, encoding="utf-8") as f: data = json.load(f) return cls(**{k: v for k, v in data.items() if k in cls.__dataclass_fields__}) return cls() def save(self, path=None): path = path or DEFAULT_CONFIG_PATH os.makedirs(os.path.dirname(path), exist_ok=True) with open(path, "w", encoding="utf-8") as f: json.dump(asdict(self), f, ensure_ascii=False, indent=2)