#!/usr/bin/env python3 """ 飞书群聊消息同步到电子表格 - Bot身份拉取群消息 - 非文本媒体上传到腾讯COS - 记录写入飞书电子表格 使用前修改下方配置常量。 """ import os import sys import json import subprocess import logging import re from datetime import datetime, timezone, timedelta from pathlib import Path # ============ 配置(使用前必须修改)============ # 飞书群 CHAT_ID = "oc_xxx" # 目标群ID # 飞书电子表格 SPREADSHEET_TOKEN = "xxx" # 电子表格token SHEET_ID = "xxx" # sheet页ID # Bot凭证 LARK_CLI_CONFIG = "/root/.openclaw/credentials/xiaokui" # Bot凭证目录 # 同步状态 LAST_SYNC_FILE = "/tmp/last_feedback_sync_time" # 同步时间记录文件 WORK_DIR = "/tmp/feedback_sync_workdir" # 临时工作目录 # COS(从 tencent-cos-upload skill 引用) COS_BASE_PATH = "vala_llm/user_feedback" # COS上的基础路径 # 时区 TZ = timezone(timedelta(hours=8)) # ============ 配置结束 ============ logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(message)s', datefmt='%Y-%m-%d %H:%M:%S') log = logging.getLogger(__name__) # 引入COS上传器 sys.path.insert(0, '/root/.openclaw/skills/tencent-cos-upload/scripts') from cos_upload import CosUploader def lark_cli(*args, cwd=None): """调用lark-cli并返回解析后的JSON""" env = os.environ.copy() env['LARKSUITE_CLI_CONFIG_DIR'] = LARK_CLI_CONFIG cmd = ['lark-cli'] + list(args) result = subprocess.run(cmd, capture_output=True, text=True, env=env, cwd=cwd or WORK_DIR) if result.returncode != 0 and not result.stdout: log.error(f"lark-cli error: {result.stderr}") return None try: return json.loads(result.stdout) except json.JSONDecodeError: log.error(f"lark-cli JSON parse error: {result.stdout[:200]}") return None def fetch_messages(start_iso: str, end_iso: str) -> list: """拉取群消息""" result = lark_cli( 'im', '+chat-messages-list', '--chat-id', CHAT_ID, '--start', start_iso, '--end', end_iso, '--sort', 'asc', '--page-size', '50', '--as', 'bot' ) if not result or not result.get('ok'): err = result.get('error', {}).get('message', 'unknown') if result else 'no response' log.error(f"拉取消息失败: {err}") return [] return result.get('data', {}).get('messages', []) def download_resource(msg_id: str, file_key: str, res_type: str, filename: str) -> str: """下载飞书消息中的资源,返回本地路径""" result = lark_cli( 'im', '+messages-resources-download', '--message-id', msg_id, '--file-key', file_key, '--type', res_type, '--output', filename, '--as', 'bot', cwd=WORK_DIR ) local_path = os.path.join(WORK_DIR, filename) if result and result.get('ok') and os.path.exists(local_path) and os.path.getsize(local_path) > 0: return local_path return None def append_to_sheet(rows: list): """追加行到电子表格""" if not rows: return values_json = json.dumps(rows, ensure_ascii=False) result = lark_cli( 'sheets', '+append', '--spreadsheet-token', SPREADSHEET_TOKEN, '--sheet-id', SHEET_ID, '--range', f'{SHEET_ID}!A:D', '--values', values_json, '--as', 'bot' ) if result and result.get('ok'): log.info(f"电子表格写入成功: {len(rows)} 行") else: err = result.get('error', {}).get('message', 'unknown') if result else 'no response' log.error(f"电子表格写入失败: {err}") def extract_file_key(msg_type: str, content: str) -> tuple: """从消息内容中提取file_key和资源类型""" if msg_type == 'image': match = re.search(r'img_[a-zA-Z0-9_-]+', content) return (match.group(0), 'image') if match else (None, None) elif msg_type in ('media', 'audio'): match = re.search(r'file_[a-zA-Z0-9_-]+', content) return (match.group(0), 'file') if match else (None, None) elif msg_type == 'file': try: c = json.loads(content) return (c.get('file_key'), 'file') except Exception: match = re.search(r'file_[a-zA-Z0-9_-]+', content) return (match.group(0), 'file') if match else (None, None) return None, None def get_media_info(msg_type: str, content: str) -> tuple: """返回 (cos子目录, 文件扩展名, content_type)""" if msg_type == 'image': return 'image', '.png', 'image/png' elif msg_type == 'media': name_match = re.search(r'name="([^"]*)"', content) ext = os.path.splitext(name_match.group(1))[1] if name_match else '.mp4' return 'video', ext or '.mp4', 'video/mp4' elif msg_type == 'audio': return 'audio', '.ogg', 'audio/ogg' elif msg_type == 'file': try: c = json.loads(content) ext = os.path.splitext(c.get('file_name', ''))[1] except Exception: ext = '' return 'file', ext or '.bin', 'application/octet-stream' return 'other', '', 'application/octet-stream' def process_message(msg: dict, cos_uploader, date_str: str) -> list: """处理单条消息,返回表格行 [时间, 反馈人, 类型, 内容/URL] 或 None""" msg_id = msg.get('message_id', '') sender_name = msg.get('sender', {}).get('name', '未知') create_time = msg.get('create_time', '') msg_type = msg.get('msg_type', '') content = msg.get('content', '') deleted = msg.get('deleted', False) if deleted or msg_type == 'system': return None # 文本消息 if msg_type in ('text', 'post'): text = re.sub(r'<[^>]*>', '', content).strip() return [create_time, sender_name, '文本', text] # 表情包 if msg_type == 'sticker': return [create_time, sender_name, '表情', '(表情包)'] # 媒体消息 cos_subdir, ext, content_type = get_media_info(msg_type, content) file_key, res_type = extract_file_key(msg_type, content) type_labels = {'image': '图片', 'media': '视频', 'audio': '语音', 'file': '文件'} type_label = type_labels.get(msg_type, msg_type) if msg_type == 'media': dur = re.search(r'duration="([^"]*)"', content) if dur: type_label += f'({dur.group(1)})' if not file_key: return [create_time, sender_name, type_label, f'(无法提取资源key)'] # 文件名:纯ASCII short_id = msg_id[-12:] if msg_type == 'media': name_match = re.search(r'name="([^"]*)"', content) orig_ext = os.path.splitext(name_match.group(1))[1] if name_match else ext filename = f'{short_id}{orig_ext or ext}' elif msg_type == 'file': try: c = json.loads(content) orig_ext = os.path.splitext(c.get('file_name', ''))[1] except Exception: orig_ext = ext filename = f'{short_id}{orig_ext or ext}' else: filename = f'{short_id}{ext}' cos_key = f'{COS_BASE_PATH}/{cos_subdir}/{date_str}/{filename}' # 下载 local_path = download_resource(msg_id, file_key, res_type, filename) if not local_path: log.warning(f"资源下载失败: {msg_id} ({msg_type})") return [create_time, sender_name, type_label, '(下载失败)'] # 上传COS try: url = cos_uploader.upload(local_path, cos_key, content_type) log.info(f"COS上传成功: {cos_key}") except Exception as e: log.error(f"COS上传失败: {cos_key} - {e}") url = f'(上传失败)' finally: try: os.remove(local_path) except Exception: pass return [create_time, sender_name, type_label, url] def main(): os.makedirs(WORK_DIR, exist_ok=True) # 读取上次同步时间 if os.path.exists(LAST_SYNC_FILE): with open(LAST_SYNC_FILE) as f: last_sync_iso = f.read().strip() else: last_sync_iso = (datetime.now(TZ) - timedelta(hours=1)).isoformat() current_iso = datetime.now(TZ).isoformat() log.info(f"开始同步: {last_sync_iso} -> {current_iso}") # 拉取消息 messages = fetch_messages(last_sync_iso, current_iso) if not messages: log.info("没有新消息,同步结束") with open(LAST_SYNC_FILE, 'w') as f: f.write(current_iso) return log.info(f"发现 {len(messages)} 条新消息") # 初始化COS cos_uploader = CosUploader() date_str = datetime.now(TZ).strftime('%Y-%m-%d') # 处理消息 rows = [] for msg in messages: row = process_message(msg, cos_uploader, date_str) if row: rows.append(row) # 写入表格 if rows: append_to_sheet(rows) log.info(f"同步完成: {len(rows)} 条记录") else: log.info("无有效消息需要写入") # 更新同步时间 with open(LAST_SYNC_FILE, 'w') as f: f.write(current_iso) if __name__ == '__main__': main()