92 lines
3.1 KiB
Python
92 lines
3.1 KiB
Python
#!/usr/bin/env python3
|
||
"""L2剧本审校 - S3-U30-L4 幽灵宝藏 知识点统计(修正版)"""
|
||
import re, json
|
||
from collections import Counter
|
||
|
||
with open("/tmp/l2_l4_review.md", "r") as f:
|
||
content = f.read()
|
||
|
||
# 找剧本表格(7列:类型|剧情内容|中文对白|翻译|知识点|组件|配置信息)
|
||
tables = list(re.finditer(r'<lark-table[^>]*>(.*?)</lark-table>', content, re.DOTALL))
|
||
script_table = tables[2].group(0) # 第3个表格是剧本表
|
||
|
||
tr_blocks = re.findall(r'<lark-tr>(.*?)</lark-tr>', script_table, re.DOTALL)
|
||
|
||
print(f"剧本表总行数(含表头): {len(tr_blocks)}")
|
||
|
||
data_rows = []
|
||
for tr in tr_blocks:
|
||
tds = re.findall(r'<lark-td[^>]*>(.*?)</lark-td>', tr, re.DOTALL)
|
||
if len(tds) >= 7:
|
||
clean = []
|
||
for td in tds:
|
||
c = re.sub(r'<[^>]+>', '', td)
|
||
c = re.sub(r'\*\*', '', c)
|
||
c = re.sub(r'\{align="[^"]*"\}', '', c)
|
||
c = re.sub(r'\{color="[^"]*"\}', '', c)
|
||
c = c.strip()
|
||
clean.append(c)
|
||
data_rows.append(clean)
|
||
|
||
# 跳过表头
|
||
header = data_rows[0]
|
||
print(f"列结构: {header}")
|
||
print(f"数据行: {len(data_rows)-1}")
|
||
|
||
# 知识点列 = 索引4
|
||
kp_counter = Counter()
|
||
kp_rows_map = {}
|
||
|
||
for i, row in enumerate(data_rows[1:], start=2):
|
||
row_type = row[0].strip() if len(row) > 0 else ""
|
||
kp_text = row[4].strip() if len(row) > 4 else ""
|
||
if kp_text:
|
||
# 按换行分割
|
||
kps = [k.strip() for k in kp_text.split('\n') if k.strip()]
|
||
for kp in kps:
|
||
kp_counter[kp] += 1
|
||
if kp not in kp_rows_map:
|
||
kp_rows_map[kp] = []
|
||
kp_rows_map[kp].append((i, row_type))
|
||
|
||
print("\n=== 知识点出现次数统计(以【知识点】列为准)===\n")
|
||
print(f"{'知识点':<32} {'次数':>4} {'规范':>10} {'出现行(类型)'}")
|
||
print("-" * 100)
|
||
|
||
for kp, count in kp_counter.most_common():
|
||
status = "✅" if 2 <= count <= 3 else ("⚠️超标" if count > 3 else "⚠️不足")
|
||
row_info = "; ".join([f"行{r}({t})" for r, t in kp_rows_map[kp]])
|
||
print(f"{kp:<32} {count:>4} {status:>10} {row_info}")
|
||
|
||
# 汇总
|
||
print(f"\n=== 汇总 ===")
|
||
total_kps = len(kp_counter)
|
||
print(f"知识点总数: {total_kps}")
|
||
for kp, count in kp_counter.most_common():
|
||
status = "✅" if 2 <= count <= 3 else ("⚠️超标" if count > 3 else "⚠️不足")
|
||
print(f" {kp}: {count}次 {status}")
|
||
|
||
# 删除线检查
|
||
print(f"\n=== 删除线行 ===")
|
||
deleted = []
|
||
for i, row in enumerate(data_rows[1:], start=2):
|
||
for col in [0, 1, 2, 3]:
|
||
text = row[col] if len(row) > col else ""
|
||
if '~~' in text:
|
||
deleted.append(i)
|
||
break
|
||
print(f"含删除线: {deleted}")
|
||
|
||
# 组件配置为空
|
||
print(f"\n=== 组件配置为空 ===")
|
||
empty = []
|
||
for i, row in enumerate(data_rows[1:], start=2):
|
||
row_type = row[0].strip() if len(row) > 0 else ""
|
||
if row_type and 'TL' not in row_type:
|
||
comp = row[5].strip() if len(row) > 5 else ""
|
||
if not comp:
|
||
empty.append((i, row_type))
|
||
print(f"组件配置为空的互动行: {len(empty)}个")
|
||
for r, t in empty:
|
||
print(f" 行{r}: {t}")
|