筆記一鍵修正¶

【條項款阿拉伯數字 + 清單空行】

本 Python Notebook 會做兩件事：

將 第…條、第…項、第…款 的中文數字（如：第六條、第一項、第一款）改為阿拉伯數字（第6條、第1項、第1款）。
確保 Markdown 中，任何清單（無序/有序）的"首個項目"前有一個空行（不改動程式碼區塊內文）。

使用方式：先在下方【設定】區調整 TARGET_PATHS / DRY_RUN / VERBOSE，再執行「Run All」。

安全建議：第一次請將 DRY_RUN = True 先預覽變更，再改為 False 實際寫入。

設定¶

TARGET_PATHS：要處理的檔案或資料夾（可多個），資料夾會遞迴掃描 .md/.markdown/.txt。相對路徑會以專案根目錄（偵測 .git）為基準解析。
DRY_RUN："True" 只預覽、"False" 會直接寫回檔案。
VERBOSE：顯示每個有變更的檔案與變更數。

In [1]:

  Copied!     
 
# --- 設定（請依需要修改）---
TARGET_PATHS = ["./mkdocs"]  # 檔案或資料夾路徑，可多個
DRY_RUN = False  # True=僅預覽；False=寫入檔案
VERBOSE = False   # 顯示每個檔案和變更數
# --- 設定（請依需要修改）--- TARGET_PATHS = ["./mkdocs"] # 檔案或資料夾路徑，可多個 DRY_RUN = False # True=僅預覽；False=寫入檔案 VERBOSE = False # 顯示每個檔案和變更數 

原理與限制¶

條/項轉換：僅處理緊貼形式 第[數字]條、第[數字]項、第[數字]款。數字可為中文（含十/百/千/萬、零、〇、○、◯、兩）或阿拉伯數字。
清單空行：當偵測到清單（-/*/+/1./1) 等）首個項目前，若上一行不是空白且不是清單，會自動插入一行空白。
程式碼區塊（``` 或 ~~~）內文字不會被改動。

In [2]:

  Copied!     
 
# --- 轉換工具（自包含）---
import os, glob, re
from pathlib import Path
from typing import Optional, Tuple, List

# 中文數字對照
CN_DIGIT = {
    '零': 0, '〇': 0, '○': 0, '◯': 0,
    '一': 1, '二': 2, '兩': 2, '三': 3, '四': 4, '五': 5,
    '六': 6, '七': 7, '八': 8, '九': 9,
}
CN_UNIT = {'十': 10, '百': 100, '千': 1000}

def cn_to_int(text: str) -> Optional[int]:
    """將中文數字（含 十/百/千/萬）或阿拉伯數字轉為整數。遇到未知字元則回傳 None。
    """
    if re.fullmatch(r'\d+', text):
        return int(text)
    total = 0
    section = 0
    number = 0
    for ch in text:
        if ch in CN_DIGIT:
            number = CN_DIGIT[ch]
        elif ch in CN_UNIT:
            unit = CN_UNIT[ch]
            if number == 0:
                number = 1  # 例如：十=10（前面的 1 省略）
            section += number * unit
            number = 0
        elif ch == '萬':
            part = section + number
            if part == 0:
                part = 1  # 單獨的「萬」視為 1 萬
            total += part * 10000
            section = 0
            number = 0
        else:
            return None
    return total + section + number

# 僅匹配緊貼形式的 第…條 / 第…項
PATTERN = re.compile(r'第([零〇○◯兩二一三四五六七八九十百千萬0-9]+)(條|項)')

def convert_ordinals(s: str) -> Tuple[str, int]:
    count = 0
    def repl(m):
        nonlocal count
        numtxt = m.group(1)
        unit = m.group(2)
        val = cn_to_int(numtxt)
        if val is None:
            return m.group(0)
        if str(val) == numtxt:
            return m.group(0)  # 已是阿拉伯數字
        count += 1
        return f'第{val}{unit}'
    out = PATTERN.sub(repl, s)
    return out, count

# 清單首項前自動補空行（不處理程式碼區塊內文）
LIST_ITEM_RE = re.compile(r'^([\t ]*)(?:> ?)*([\t ]*)(?:[-*+]|\d+[.)])\s+')
FENCE_RE = re.compile(r'^(?:[\t ]*)(```|~~~)')

def ensure_blank_line_before_lists(s: str) -> Tuple[str, int]:
    lines = s.splitlines()
    out: List[str] = []
    fixes = 0
    in_code = False
    fence_seq = None
    for i, line in enumerate(lines):
        # 追蹤程式碼區塊的開始/結束
        m_f = FENCE_RE.match(line)
        if m_f:
            tick = m_f.group(1)
            if not in_code:
                in_code = True
                fence_seq = tick
            elif tick == fence_seq:
                in_code = False
                fence_seq = None
        # 非程式碼區塊時，檢查是否為清單首項，需要補空行
        if not in_code and LIST_ITEM_RE.match(line):
            prev = out[-1] if out else ''
            if prev.strip() != '' and not LIST_ITEM_RE.match(prev):
                out.append('')
                fixes += 1
        out.append(line)
    return '\n'.join(out), fixes

def iter_target_files(paths: List[Path]) -> List[Path]:
    # Traverse every level under each target and collect files.
    # - Recursively walks all subdirectories (follows symlinks).
    # - Supports wildcard patterns in targets (e.g., **/*.md).
    # - Filters to .md/.markdown/.txt (case-insensitive).

    exts = {'.md', '.markdown', '.txt'}
    files: List[Path] = []

    def maybe_add(fp: Path):
        try:
            if fp.is_file() and fp.suffix.lower() in exts:
                files.append(fp)
        except Exception:
            pass

    for p in paths:
        pat = str(p)
        if any(ch in pat for ch in '*?['):
            for g in glob.glob(pat, recursive=True):
                gp = Path(g)
                if gp.is_dir():
                    for root, dirs, names in os.walk(gp, followlinks=True):
                        for name in names:
                            maybe_add(Path(root) / name)
                else:
                    maybe_add(gp)
        elif Path(p).is_dir():
            for root, dirs, names in os.walk(p, followlinks=True):
                for name in names:
                    maybe_add(Path(root) / name)
        elif Path(p).exists():
            maybe_add(Path(p))
    return files
# --- 轉換工具（自包含）--- import os, glob, re from pathlib import Path from typing import Optional, Tuple, List # 中文數字對照 CN_DIGIT = { '零': 0, '〇': 0, '○': 0, '◯': 0, '一': 1, '二': 2, '兩': 2, '三': 3, '四': 4, '五': 5, '六': 6, '七': 7, '八': 8, '九': 9, } CN_UNIT = {'十': 10, '百': 100, '千': 1000} def cn_to_int(text: str) -> Optional[int]: """將中文數字（含 十/百/千/萬）或阿拉伯數字轉為整數。遇到未知字元則回傳 None。 """ if re.fullmatch(r'\d+', text): return int(text) total = 0 section = 0 number = 0 for ch in text: if ch in CN_DIGIT: number = CN_DIGIT[ch] elif ch in CN_UNIT: unit = CN_UNIT[ch] if number == 0: number = 1 # 例如：十=10（前面的 1 省略） section += number * unit number = 0 elif ch == '萬': part = section + number if part == 0: part = 1 # 單獨的「萬」視為 1 萬 total += part * 10000 section = 0 number = 0 else: return None return total + section + number # 僅匹配緊貼形式的 第…條 / 第…項 PATTERN = re.compile(r'第([零〇○◯兩二一三四五六七八九十百千萬0-9]+)(條|項)') def convert_ordinals(s: str) -> Tuple[str, int]: count = 0 def repl(m): nonlocal count numtxt = m.group(1) unit = m.group(2) val = cn_to_int(numtxt) if val is None: return m.group(0) if str(val) == numtxt: return m.group(0) # 已是阿拉伯數字 count += 1 return f'第{val}{unit}' out = PATTERN.sub(repl, s) return out, count # 清單首項前自動補空行（不處理程式碼區塊內文） LIST_ITEM_RE = re.compile(r'^([\t ]*)(?:> ?)*([\t ]*)(?:[-*+]|\d+[.)])\s+') FENCE_RE = re.compile(r'^(?:[\t ]*)(```|~~~)') def ensure_blank_line_before_lists(s: str) -> Tuple[str, int]: lines = s.splitlines() out: List[str] = [] fixes = 0 in_code = False fence_seq = None for i, line in enumerate(lines): # 追蹤程式碼區塊的開始/結束 m_f = FENCE_RE.match(line) if m_f: tick = m_f.group(1) if not in_code: in_code = True fence_seq = tick elif tick == fence_seq: in_code = False fence_seq = None # 非程式碼區塊時，檢查是否為清單首項，需要補空行 if not in_code and LIST_ITEM_RE.match(line): prev = out[-1] if out else '' if prev.strip() != '' and not LIST_ITEM_RE.match(prev): out.append('') fixes += 1 out.append(line) return '\n'.join(out), fixes def iter_target_files(paths: List[Path]) -> List[Path]: # Traverse every level under each target and collect files. # - Recursively walks all subdirectories (follows symlinks). # - Supports wildcard patterns in targets (e.g., **/*.md). # - Filters to .md/.markdown/.txt (case-insensitive). exts = {'.md', '.markdown', '.txt'} files: List[Path] = [] def maybe_add(fp: Path): try: if fp.is_file() and fp.suffix.lower() in exts: files.append(fp) except Exception: pass for p in paths: pat = str(p) if any(ch in pat for ch in '*?['): for g in glob.glob(pat, recursive=True): gp = Path(g) if gp.is_dir(): for root, dirs, names in os.walk(gp, followlinks=True): for name in names: maybe_add(Path(root) / name) else: maybe_add(gp) elif Path(p).is_dir(): for root, dirs, names in os.walk(p, followlinks=True): for name in names: maybe_add(Path(root) / name) elif Path(p).exists(): maybe_add(Path(p)) return files 

In [3]:

  Copied!     
 
# 如果要固定以 Notebook 掛載的工作資料夾當根目錄
_base = Path.cwd() / "work"


def _resolve_target(p: str | os.PathLike) -> Path:
    p = Path(p)
    return (_base / p).resolve() if not p.is_absolute() else p


target_paths: List[Path] = [_resolve_target(p) for p in TARGET_PATHS]

if VERBOSE:
    print(f"[root] {_base}")
    for _t in target_paths:
        try:
            _rel = _t.relative_to(_base)
        except Exception:
            _rel = _t
        print(f"[target] {_rel}")

    # 列出目標目錄下的所有檔案與資料夾（遞迴），用於測試可見性與掛載是否正確
    for _t in target_paths:
        try:
            _t_rel = _t.relative_to(_base)
        except Exception:
            _t_rel = _t
        print(f"[tree] Listing under {_t_rel}:")
        if _t.is_dir():
            for _root, _dirs, _names in os.walk(_t, followlinks=True):
                for _d in _dirs:
                    _p = Path(_root) / _d
                    try:
                        _r = _p.relative_to(_base)
                    except Exception:
                        _r = _p
                    print(f"DIR  {_r}")
                for _n in _names:
                    _p = Path(_root) / _n
                    try:
                        _r = _p.relative_to(_base)
                    except Exception:
                        _r = _p
                    print(f"FILE {_r}")
        elif _t.is_file():
            print(f"FILE {_t_rel}")
        else:
            print(f"[warn] {_t_rel} not found")

# ---- 收集檔案（把 iterator 先展開成 list 才能檢查是否為空）----
files_list = list(iter_target_files(target_paths))
if not files_list and VERBOSE:
    print("[warn] No files found under targets. Check mounts and paths.")

ordinal_total = 0
list_fixes_total = 0
files_changed = 0

for f in files_list:
    try:
        text = f.read_text(encoding="utf-8")
    except Exception as e:
        print(f"[skip ] {f} (read error: {e})")
        continue

    tmp_text, ord_changes = convert_ordinals(text)
    new_text, list_changes = ensure_blank_line_before_lists(tmp_text)

    if ord_changes > 0 or list_changes > 0:
        files_changed += 1
        ordinal_total += ord_changes
        list_fixes_total += list_changes

        if VERBOSE:
            print(f"[update] {f} ({ord_changes} ordinals, {list_changes} list-fixes)")

        if not DRY_RUN:
            try:
                f.write_text(new_text, encoding="utf-8")
            except Exception as e:
                print(f"[error] {f} (write error: {e})")

summary = "Dry run" if DRY_RUN else "Done"
print(f"{summary}: {files_changed} file(s) "
      f"{'would be' if DRY_RUN else 'were'} updated, "
      f"{ordinal_total} ordinal-fix(es), {list_fixes_total} list-fix(es).")
# 如果要固定以 Notebook 掛載的工作資料夾當根目錄 _base = Path.cwd() / "work" def _resolve_target(p: str | os.PathLike) -> Path: p = Path(p) return (_base / p).resolve() if not p.is_absolute() else p target_paths: List[Path] = [_resolve_target(p) for p in TARGET_PATHS] if VERBOSE: print(f"[root] {_base}") for _t in target_paths: try: _rel = _t.relative_to(_base) except Exception: _rel = _t print(f"[target] {_rel}") # 列出目標目錄下的所有檔案與資料夾（遞迴），用於測試可見性與掛載是否正確 for _t in target_paths: try: _t_rel = _t.relative_to(_base) except Exception: _t_rel = _t print(f"[tree] Listing under {_t_rel}:") if _t.is_dir(): for _root, _dirs, _names in os.walk(_t, followlinks=True): for _d in _dirs: _p = Path(_root) / _d try: _r = _p.relative_to(_base) except Exception: _r = _p print(f"DIR {_r}") for _n in _names: _p = Path(_root) / _n try: _r = _p.relative_to(_base) except Exception: _r = _p print(f"FILE {_r}") elif _t.is_file(): print(f"FILE {_t_rel}") else: print(f"[warn] {_t_rel} not found") # ---- 收集檔案（把 iterator 先展開成 list 才能檢查是否為空）---- files_list = list(iter_target_files(target_paths)) if not files_list and VERBOSE: print("[warn] No files found under targets. Check mounts and paths.") ordinal_total = 0 list_fixes_total = 0 files_changed = 0 for f in files_list: try: text = f.read_text(encoding="utf-8") except Exception as e: print(f"[skip ] {f} (read error: {e})") continue tmp_text, ord_changes = convert_ordinals(text) new_text, list_changes = ensure_blank_line_before_lists(tmp_text) if ord_changes > 0 or list_changes > 0: files_changed += 1 ordinal_total += ord_changes list_fixes_total += list_changes if VERBOSE: print(f"[update] {f} ({ord_changes} ordinals, {list_changes} list-fixes)") if not DRY_RUN: try: f.write_text(new_text, encoding="utf-8") except Exception as e: print(f"[error] {f} (write error: {e})") summary = "Dry run" if DRY_RUN else "Done" print(f"{summary}: {files_changed} file(s) " f"{'would be' if DRY_RUN else 'were'} updated, " f"{ordinal_total} ordinal-fix(es), {list_fixes_total} list-fix(es).")

Done: 8 file(s) were updated, 39 ordinal-fix(es), 15 list-fix(es).

完成¶

若使用 DRY_RUN=True，僅會顯示"將會修改"的檔案與變更數。
確認結果後，將 DRY_RUN=False 並再執行一次即可寫回檔案。
需要擴充其他規則，歡迎在此 Notebook 繼續加上自己的處理邏輯。