Python代码

#!/usr/bin/env python3
# batch_merge_ai.py
import os
import re
import argparse
from pathlib import Path
 
WEEK_RE = re.compile(r'^(\d{4}-W\d{1,2})$')
 
def safe_read(p: Path):
    try:
        return p.read_text(encoding='utf-8')
    except UnicodeDecodeError:
        return p.read_text(encoding='gbk', errors='ignore')
 
def parse(md: str):
    sections, cur_sec, cur_sub, buf = {}, None, None, []
    def flush():
        if cur_sec and cur_sub is not None:
            sections[cur_sec][cur_sub] = '\n'.join(buf).strip()
        buf.clear()
    for ln in md.splitlines():
        ln = ln.rstrip()
        if ln.startswith('## '):
            flush()
            cur_sec, cur_sub = ln[3:].strip(), None
            sections[cur_sec] = {}
        elif ln.startswith('### '):
            flush()
            cur_sub = ln[4:].strip()
            if cur_sec:
                sections[cur_sec][cur_sub] = ''
        else:
            if cur_sub is not None:
                buf.append(ln)
    flush()
    return sections
 
def build(struct):
    lines = []
    for sec, subs in struct.items():
        lines.append(f'## {sec}')
        for sub, txt in subs.items():
            lines.append(f'### {sub}')
            lines.append(txt)
            lines.append('')
    return '\n'.join(lines).rstrip() + '\n'
 
def merge_one(base: Path):
    week_name = base.name                       # 2024-W24
    out_file  = base.with_name(f'{week_name}-merged.md')
 
    ai_tags   = ['openai', 'claude', 'deepseek']
    ai_files  = {tag: base.with_name(f'{week_name}-{tag}.md') for tag in ai_tags}
 
    # 以任意一份 AI 文件为“结构模板”,防止空目录报错
    struct_file = next((f for f in ai_files.values() if f.exists()), None)
    if not struct_file:
        return
    template = parse(safe_read(struct_file))
 
    # 合并三份 AI
    merged = {}
    for sec, subs in template.items():
        merged[sec] = {}
        for sub in subs:
            pieces = []
            for tag in ai_tags:
                txt = parse(safe_read(ai_files[tag])).get(sec, {}).get(sub, '').strip()
                if txt:
                    pieces.append(f'[来自 {tag}]\n{txt}')
            merged[sec][sub] = '\n\n'.join(pieces)
 
    out_file.write_text(build(merged), encoding='utf-8')
    print(f'生成 {out_file}')
 
def main():
    parser = argparse.ArgumentParser(description='批量把 AI 三件套合并成 -merged.md')
    parser.add_argument('directory', help='顶层目录')
    args = parser.parse_args()
 
    root = Path(args.directory)
    for ai_file in root.rglob('*-deepseek.md'):
        m = WEEK_RE.match(ai_file.stem.rsplit('-', 1)[0])
        if m:
            merge_one(ai_file.parent / m.group(1))
 
if __name__ == '__main__':
    main()

主要功能和解释

使用示例