#!/usr/bin/env python3 """L2剧本审校 - S3-U30-L4 幽灵宝藏 知识点统计(修正版)""" import re, json from collections import Counter with open("/tmp/l2_l4_review.md", "r") as f: content = f.read() # 找剧本表格(7列:类型|剧情内容|中文对白|翻译|知识点|组件|配置信息) tables = list(re.finditer(r']*>(.*?)', content, re.DOTALL)) script_table = tables[2].group(0) # 第3个表格是剧本表 tr_blocks = re.findall(r'(.*?)', script_table, re.DOTALL) print(f"剧本表总行数(含表头): {len(tr_blocks)}") data_rows = [] for tr in tr_blocks: tds = re.findall(r']*>(.*?)', tr, re.DOTALL) if len(tds) >= 7: clean = [] for td in tds: c = re.sub(r'<[^>]+>', '', td) c = re.sub(r'\*\*', '', c) c = re.sub(r'\{align="[^"]*"\}', '', c) c = re.sub(r'\{color="[^"]*"\}', '', c) c = c.strip() clean.append(c) data_rows.append(clean) # 跳过表头 header = data_rows[0] print(f"列结构: {header}") print(f"数据行: {len(data_rows)-1}") # 知识点列 = 索引4 kp_counter = Counter() kp_rows_map = {} for i, row in enumerate(data_rows[1:], start=2): row_type = row[0].strip() if len(row) > 0 else "" kp_text = row[4].strip() if len(row) > 4 else "" if kp_text: # 按换行分割 kps = [k.strip() for k in kp_text.split('\n') if k.strip()] for kp in kps: kp_counter[kp] += 1 if kp not in kp_rows_map: kp_rows_map[kp] = [] kp_rows_map[kp].append((i, row_type)) print("\n=== 知识点出现次数统计(以【知识点】列为准)===\n") print(f"{'知识点':<32} {'次数':>4} {'规范':>10} {'出现行(类型)'}") print("-" * 100) for kp, count in kp_counter.most_common(): status = "✅" if 2 <= count <= 3 else ("⚠️超标" if count > 3 else "⚠️不足") row_info = "; ".join([f"行{r}({t})" for r, t in kp_rows_map[kp]]) print(f"{kp:<32} {count:>4} {status:>10} {row_info}") # 汇总 print(f"\n=== 汇总 ===") total_kps = len(kp_counter) print(f"知识点总数: {total_kps}") for kp, count in kp_counter.most_common(): status = "✅" if 2 <= count <= 3 else ("⚠️超标" if count > 3 else "⚠️不足") print(f" {kp}: {count}次 {status}") # 删除线检查 print(f"\n=== 删除线行 ===") deleted = [] for i, row in enumerate(data_rows[1:], start=2): for col in [0, 1, 2, 3]: text = row[col] if len(row) > col else "" if '~~' in text: deleted.append(i) break print(f"含删除线: {deleted}") # 组件配置为空 print(f"\n=== 组件配置为空 ===") empty = [] for i, row in enumerate(data_rows[1:], start=2): row_type = row[0].strip() if len(row) > 0 else "" if row_type and 'TL' not in row_type: comp = row[5].strip() if len(row) > 5 else "" if not comp: empty.append((i, row_type)) print(f"组件配置为空的互动行: {len(empty)}个") for r, t in empty: print(f" 行{r}: {t}")