#!/usr/bin/env python3
"""L2剧本审校 - S3-U30-L4 幽灵宝藏 知识点统计(修正版)"""
import re, json
from collections import Counter
with open("/tmp/l2_l4_review.md", "r") as f:
content = f.read()
# 找剧本表格(7列:类型|剧情内容|中文对白|翻译|知识点|组件|配置信息)
tables = list(re.finditer(r']*>(.*?)', content, re.DOTALL))
script_table = tables[2].group(0) # 第3个表格是剧本表
tr_blocks = re.findall(r'(.*?)', script_table, re.DOTALL)
print(f"剧本表总行数(含表头): {len(tr_blocks)}")
data_rows = []
for tr in tr_blocks:
tds = re.findall(r']*>(.*?)', tr, re.DOTALL)
if len(tds) >= 7:
clean = []
for td in tds:
c = re.sub(r'<[^>]+>', '', td)
c = re.sub(r'\*\*', '', c)
c = re.sub(r'\{align="[^"]*"\}', '', c)
c = re.sub(r'\{color="[^"]*"\}', '', c)
c = c.strip()
clean.append(c)
data_rows.append(clean)
# 跳过表头
header = data_rows[0]
print(f"列结构: {header}")
print(f"数据行: {len(data_rows)-1}")
# 知识点列 = 索引4
kp_counter = Counter()
kp_rows_map = {}
for i, row in enumerate(data_rows[1:], start=2):
row_type = row[0].strip() if len(row) > 0 else ""
kp_text = row[4].strip() if len(row) > 4 else ""
if kp_text:
# 按换行分割
kps = [k.strip() for k in kp_text.split('\n') if k.strip()]
for kp in kps:
kp_counter[kp] += 1
if kp not in kp_rows_map:
kp_rows_map[kp] = []
kp_rows_map[kp].append((i, row_type))
print("\n=== 知识点出现次数统计(以【知识点】列为准)===\n")
print(f"{'知识点':<32} {'次数':>4} {'规范':>10} {'出现行(类型)'}")
print("-" * 100)
for kp, count in kp_counter.most_common():
status = "✅" if 2 <= count <= 3 else ("⚠️超标" if count > 3 else "⚠️不足")
row_info = "; ".join([f"行{r}({t})" for r, t in kp_rows_map[kp]])
print(f"{kp:<32} {count:>4} {status:>10} {row_info}")
# 汇总
print(f"\n=== 汇总 ===")
total_kps = len(kp_counter)
print(f"知识点总数: {total_kps}")
for kp, count in kp_counter.most_common():
status = "✅" if 2 <= count <= 3 else ("⚠️超标" if count > 3 else "⚠️不足")
print(f" {kp}: {count}次 {status}")
# 删除线检查
print(f"\n=== 删除线行 ===")
deleted = []
for i, row in enumerate(data_rows[1:], start=2):
for col in [0, 1, 2, 3]:
text = row[col] if len(row) > col else ""
if '~~' in text:
deleted.append(i)
break
print(f"含删除线: {deleted}")
# 组件配置为空
print(f"\n=== 组件配置为空 ===")
empty = []
for i, row in enumerate(data_rows[1:], start=2):
row_type = row[0].strip() if len(row) > 0 else ""
if row_type and 'TL' not in row_type:
comp = row[5].strip() if len(row) > 5 else ""
if not comp:
empty.append((i, row_type))
print(f"组件配置为空的互动行: {len(empty)}个")
for r, t in empty:
print(f" 行{r}: {t}")