Python代码
#!/usr/bin/env python3
# batch_merge_ai.py
import os
import re
import argparse
from pathlib import Path
WEEK_RE = re.compile(r'^(\d{4}-W\d{1,2})$')
def safe_read(p: Path):
try:
return p.read_text(encoding='utf-8')
except UnicodeDecodeError:
return p.read_text(encoding='gbk', errors='ignore')
def parse(md: str):
sections, cur_sec, cur_sub, buf = {}, None, None, []
def flush():
if cur_sec and cur_sub is not None:
sections[cur_sec][cur_sub] = '\n'.join(buf).strip()
buf.clear()
for ln in md.splitlines():
ln = ln.rstrip()
if ln.startswith('## '):
flush()
cur_sec, cur_sub = ln[3:].strip(), None
sections[cur_sec] = {}
elif ln.startswith('### '):
flush()
cur_sub = ln[4:].strip()
if cur_sec:
sections[cur_sec][cur_sub] = ''
else:
if cur_sub is not None:
buf.append(ln)
flush()
return sections
def build(struct):
lines = []
for sec, subs in struct.items():
lines.append(f'## {sec}')
for sub, txt in subs.items():
lines.append(f'### {sub}')
lines.append(txt)
lines.append('')
return '\n'.join(lines).rstrip() + '\n'
def merge_one(base: Path):
week_name = base.name # 2024-W24
out_file = base.with_name(f'{week_name}-merged.md')
ai_tags = ['openai', 'claude', 'deepseek']
ai_files = {tag: base.with_name(f'{week_name}-{tag}.md') for tag in ai_tags}
# 以任意一份 AI 文件为“结构模板”,防止空目录报错
struct_file = next((f for f in ai_files.values() if f.exists()), None)
if not struct_file:
return
template = parse(safe_read(struct_file))
# 合并三份 AI
merged = {}
for sec, subs in template.items():
merged[sec] = {}
for sub in subs:
pieces = []
for tag in ai_tags:
txt = parse(safe_read(ai_files[tag])).get(sec, {}).get(sub, '').strip()
if txt:
pieces.append(f'[来自 {tag}]\n{txt}')
merged[sec][sub] = '\n\n'.join(pieces)
out_file.write_text(build(merged), encoding='utf-8')
print(f'生成 {out_file}')
def main():
parser = argparse.ArgumentParser(description='批量把 AI 三件套合并成 -merged.md')
parser.add_argument('directory', help='顶层目录')
args = parser.parse_args()
root = Path(args.directory)
for ai_file in root.rglob('*-deepseek.md'):
m = WEEK_RE.match(ai_file.stem.rsplit('-', 1)[0])
if m:
merge_one(ai_file.parent / m.group(1))
if __name__ == '__main__':
main()
主要功能和解释
使用示例