筆記一鍵修正¶
【條項款阿拉伯數字 + 清單空行】
本 Python Notebook 會做兩件事:
- 將
第…條
、第…項
、第…款
的中文數字(如:第六條、第一項、第一款)改為阿拉伯數字(第6條、第1項、第1款)。 - 確保 Markdown 中,任何清單(無序/有序)的"首個項目"前有一個空行(不改動程式碼區塊內文)。
使用方式:先在下方【設定】區調整 TARGET_PATHS
/ DRY_RUN
/ VERBOSE
,再執行「Run All」。
安全建議:第一次請將 DRY_RUN = True
先預覽變更,再改為 False
實際寫入。
設定¶
TARGET_PATHS
:要處理的檔案或資料夾(可多個),資料夾會遞迴掃描.md
/.markdown
/.txt
。相對路徑會以專案根目錄(偵測.git
)為基準解析。DRY_RUN
:"True" 只預覽、"False" 會直接寫回檔案。VERBOSE
:顯示每個有變更的檔案與變更數。
In [1]:
Copied!
# --- 設定(請依需要修改)---
TARGET_PATHS = ["./mkdocs"] # 檔案或資料夾路徑,可多個
DRY_RUN = False # True=僅預覽;False=寫入檔案
VERBOSE = False # 顯示每個檔案和變更數
# --- 設定(請依需要修改)--- TARGET_PATHS = ["./mkdocs"] # 檔案或資料夾路徑,可多個 DRY_RUN = False # True=僅預覽;False=寫入檔案 VERBOSE = False # 顯示每個檔案和變更數
原理與限制¶
- 條/項轉換:僅處理緊貼形式
第[數字]條
、第[數字]項
、第[數字]款
。數字可為中文(含 十/百/千/萬、零、〇、○、◯、兩)或阿拉伯數字。 - 清單空行:當偵測到清單(
-
/*
/+
/1.
/1)
等)首個項目前,若上一行不是空白且不是清單,會自動插入一行空白。 - 程式碼區塊(``` 或 ~~~)內文字不會被改動。
In [2]:
Copied!
# --- 轉換工具(自包含)---
import os, glob, re
from pathlib import Path
from typing import Optional, Tuple, List
# 中文數字對照
CN_DIGIT = {
'零': 0, '〇': 0, '○': 0, '◯': 0,
'一': 1, '二': 2, '兩': 2, '三': 3, '四': 4, '五': 5,
'六': 6, '七': 7, '八': 8, '九': 9,
}
CN_UNIT = {'十': 10, '百': 100, '千': 1000}
def cn_to_int(text: str) -> Optional[int]:
"""將中文數字(含 十/百/千/萬)或阿拉伯數字轉為整數。遇到未知字元則回傳 None。
"""
if re.fullmatch(r'\d+', text):
return int(text)
total = 0
section = 0
number = 0
for ch in text:
if ch in CN_DIGIT:
number = CN_DIGIT[ch]
elif ch in CN_UNIT:
unit = CN_UNIT[ch]
if number == 0:
number = 1 # 例如:十=10(前面的 1 省略)
section += number * unit
number = 0
elif ch == '萬':
part = section + number
if part == 0:
part = 1 # 單獨的「萬」視為 1 萬
total += part * 10000
section = 0
number = 0
else:
return None
return total + section + number
# 僅匹配緊貼形式的 第…條 / 第…項
PATTERN = re.compile(r'第([零〇○◯兩二一三四五六七八九十百千萬0-9]+)(條|項)')
def convert_ordinals(s: str) -> Tuple[str, int]:
count = 0
def repl(m):
nonlocal count
numtxt = m.group(1)
unit = m.group(2)
val = cn_to_int(numtxt)
if val is None:
return m.group(0)
if str(val) == numtxt:
return m.group(0) # 已是阿拉伯數字
count += 1
return f'第{val}{unit}'
out = PATTERN.sub(repl, s)
return out, count
# 清單首項前自動補空行(不處理程式碼區塊內文)
LIST_ITEM_RE = re.compile(r'^([\t ]*)(?:> ?)*([\t ]*)(?:[-*+]|\d+[.)])\s+')
FENCE_RE = re.compile(r'^(?:[\t ]*)(```|~~~)')
def ensure_blank_line_before_lists(s: str) -> Tuple[str, int]:
lines = s.splitlines()
out: List[str] = []
fixes = 0
in_code = False
fence_seq = None
for i, line in enumerate(lines):
# 追蹤程式碼區塊的開始/結束
m_f = FENCE_RE.match(line)
if m_f:
tick = m_f.group(1)
if not in_code:
in_code = True
fence_seq = tick
elif tick == fence_seq:
in_code = False
fence_seq = None
# 非程式碼區塊時,檢查是否為清單首項,需要補空行
if not in_code and LIST_ITEM_RE.match(line):
prev = out[-1] if out else ''
if prev.strip() != '' and not LIST_ITEM_RE.match(prev):
out.append('')
fixes += 1
out.append(line)
return '\n'.join(out), fixes
def iter_target_files(paths: List[Path]) -> List[Path]:
# Traverse every level under each target and collect files.
# - Recursively walks all subdirectories (follows symlinks).
# - Supports wildcard patterns in targets (e.g., **/*.md).
# - Filters to .md/.markdown/.txt (case-insensitive).
exts = {'.md', '.markdown', '.txt'}
files: List[Path] = []
def maybe_add(fp: Path):
try:
if fp.is_file() and fp.suffix.lower() in exts:
files.append(fp)
except Exception:
pass
for p in paths:
pat = str(p)
if any(ch in pat for ch in '*?['):
for g in glob.glob(pat, recursive=True):
gp = Path(g)
if gp.is_dir():
for root, dirs, names in os.walk(gp, followlinks=True):
for name in names:
maybe_add(Path(root) / name)
else:
maybe_add(gp)
elif Path(p).is_dir():
for root, dirs, names in os.walk(p, followlinks=True):
for name in names:
maybe_add(Path(root) / name)
elif Path(p).exists():
maybe_add(Path(p))
return files
# --- 轉換工具(自包含)--- import os, glob, re from pathlib import Path from typing import Optional, Tuple, List # 中文數字對照 CN_DIGIT = { '零': 0, '〇': 0, '○': 0, '◯': 0, '一': 1, '二': 2, '兩': 2, '三': 3, '四': 4, '五': 5, '六': 6, '七': 7, '八': 8, '九': 9, } CN_UNIT = {'十': 10, '百': 100, '千': 1000} def cn_to_int(text: str) -> Optional[int]: """將中文數字(含 十/百/千/萬)或阿拉伯數字轉為整數。遇到未知字元則回傳 None。 """ if re.fullmatch(r'\d+', text): return int(text) total = 0 section = 0 number = 0 for ch in text: if ch in CN_DIGIT: number = CN_DIGIT[ch] elif ch in CN_UNIT: unit = CN_UNIT[ch] if number == 0: number = 1 # 例如:十=10(前面的 1 省略) section += number * unit number = 0 elif ch == '萬': part = section + number if part == 0: part = 1 # 單獨的「萬」視為 1 萬 total += part * 10000 section = 0 number = 0 else: return None return total + section + number # 僅匹配緊貼形式的 第…條 / 第…項 PATTERN = re.compile(r'第([零〇○◯兩二一三四五六七八九十百千萬0-9]+)(條|項)') def convert_ordinals(s: str) -> Tuple[str, int]: count = 0 def repl(m): nonlocal count numtxt = m.group(1) unit = m.group(2) val = cn_to_int(numtxt) if val is None: return m.group(0) if str(val) == numtxt: return m.group(0) # 已是阿拉伯數字 count += 1 return f'第{val}{unit}' out = PATTERN.sub(repl, s) return out, count # 清單首項前自動補空行(不處理程式碼區塊內文) LIST_ITEM_RE = re.compile(r'^([\t ]*)(?:> ?)*([\t ]*)(?:[-*+]|\d+[.)])\s+') FENCE_RE = re.compile(r'^(?:[\t ]*)(```|~~~)') def ensure_blank_line_before_lists(s: str) -> Tuple[str, int]: lines = s.splitlines() out: List[str] = [] fixes = 0 in_code = False fence_seq = None for i, line in enumerate(lines): # 追蹤程式碼區塊的開始/結束 m_f = FENCE_RE.match(line) if m_f: tick = m_f.group(1) if not in_code: in_code = True fence_seq = tick elif tick == fence_seq: in_code = False fence_seq = None # 非程式碼區塊時,檢查是否為清單首項,需要補空行 if not in_code and LIST_ITEM_RE.match(line): prev = out[-1] if out else '' if prev.strip() != '' and not LIST_ITEM_RE.match(prev): out.append('') fixes += 1 out.append(line) return '\n'.join(out), fixes def iter_target_files(paths: List[Path]) -> List[Path]: # Traverse every level under each target and collect files. # - Recursively walks all subdirectories (follows symlinks). # - Supports wildcard patterns in targets (e.g., **/*.md). # - Filters to .md/.markdown/.txt (case-insensitive). exts = {'.md', '.markdown', '.txt'} files: List[Path] = [] def maybe_add(fp: Path): try: if fp.is_file() and fp.suffix.lower() in exts: files.append(fp) except Exception: pass for p in paths: pat = str(p) if any(ch in pat for ch in '*?['): for g in glob.glob(pat, recursive=True): gp = Path(g) if gp.is_dir(): for root, dirs, names in os.walk(gp, followlinks=True): for name in names: maybe_add(Path(root) / name) else: maybe_add(gp) elif Path(p).is_dir(): for root, dirs, names in os.walk(p, followlinks=True): for name in names: maybe_add(Path(root) / name) elif Path(p).exists(): maybe_add(Path(p)) return files
In [3]:
Copied!
# 如果要固定以 Notebook 掛載的工作資料夾當根目錄
_base = Path.cwd() / "work"
def _resolve_target(p: str | os.PathLike) -> Path:
p = Path(p)
return (_base / p).resolve() if not p.is_absolute() else p
target_paths: List[Path] = [_resolve_target(p) for p in TARGET_PATHS]
if VERBOSE:
print(f"[root] {_base}")
for _t in target_paths:
try:
_rel = _t.relative_to(_base)
except Exception:
_rel = _t
print(f"[target] {_rel}")
# 列出目標目錄下的所有檔案與資料夾(遞迴),用於測試可見性與掛載是否正確
for _t in target_paths:
try:
_t_rel = _t.relative_to(_base)
except Exception:
_t_rel = _t
print(f"[tree] Listing under {_t_rel}:")
if _t.is_dir():
for _root, _dirs, _names in os.walk(_t, followlinks=True):
for _d in _dirs:
_p = Path(_root) / _d
try:
_r = _p.relative_to(_base)
except Exception:
_r = _p
print(f"DIR {_r}")
for _n in _names:
_p = Path(_root) / _n
try:
_r = _p.relative_to(_base)
except Exception:
_r = _p
print(f"FILE {_r}")
elif _t.is_file():
print(f"FILE {_t_rel}")
else:
print(f"[warn] {_t_rel} not found")
# ---- 收集檔案(把 iterator 先展開成 list 才能檢查是否為空)----
files_list = list(iter_target_files(target_paths))
if not files_list and VERBOSE:
print("[warn] No files found under targets. Check mounts and paths.")
ordinal_total = 0
list_fixes_total = 0
files_changed = 0
for f in files_list:
try:
text = f.read_text(encoding="utf-8")
except Exception as e:
print(f"[skip ] {f} (read error: {e})")
continue
tmp_text, ord_changes = convert_ordinals(text)
new_text, list_changes = ensure_blank_line_before_lists(tmp_text)
if ord_changes > 0 or list_changes > 0:
files_changed += 1
ordinal_total += ord_changes
list_fixes_total += list_changes
if VERBOSE:
print(f"[update] {f} ({ord_changes} ordinals, {list_changes} list-fixes)")
if not DRY_RUN:
try:
f.write_text(new_text, encoding="utf-8")
except Exception as e:
print(f"[error] {f} (write error: {e})")
summary = "Dry run" if DRY_RUN else "Done"
print(f"{summary}: {files_changed} file(s) "
f"{'would be' if DRY_RUN else 'were'} updated, "
f"{ordinal_total} ordinal-fix(es), {list_fixes_total} list-fix(es).")
# 如果要固定以 Notebook 掛載的工作資料夾當根目錄 _base = Path.cwd() / "work" def _resolve_target(p: str | os.PathLike) -> Path: p = Path(p) return (_base / p).resolve() if not p.is_absolute() else p target_paths: List[Path] = [_resolve_target(p) for p in TARGET_PATHS] if VERBOSE: print(f"[root] {_base}") for _t in target_paths: try: _rel = _t.relative_to(_base) except Exception: _rel = _t print(f"[target] {_rel}") # 列出目標目錄下的所有檔案與資料夾(遞迴),用於測試可見性與掛載是否正確 for _t in target_paths: try: _t_rel = _t.relative_to(_base) except Exception: _t_rel = _t print(f"[tree] Listing under {_t_rel}:") if _t.is_dir(): for _root, _dirs, _names in os.walk(_t, followlinks=True): for _d in _dirs: _p = Path(_root) / _d try: _r = _p.relative_to(_base) except Exception: _r = _p print(f"DIR {_r}") for _n in _names: _p = Path(_root) / _n try: _r = _p.relative_to(_base) except Exception: _r = _p print(f"FILE {_r}") elif _t.is_file(): print(f"FILE {_t_rel}") else: print(f"[warn] {_t_rel} not found") # ---- 收集檔案(把 iterator 先展開成 list 才能檢查是否為空)---- files_list = list(iter_target_files(target_paths)) if not files_list and VERBOSE: print("[warn] No files found under targets. Check mounts and paths.") ordinal_total = 0 list_fixes_total = 0 files_changed = 0 for f in files_list: try: text = f.read_text(encoding="utf-8") except Exception as e: print(f"[skip ] {f} (read error: {e})") continue tmp_text, ord_changes = convert_ordinals(text) new_text, list_changes = ensure_blank_line_before_lists(tmp_text) if ord_changes > 0 or list_changes > 0: files_changed += 1 ordinal_total += ord_changes list_fixes_total += list_changes if VERBOSE: print(f"[update] {f} ({ord_changes} ordinals, {list_changes} list-fixes)") if not DRY_RUN: try: f.write_text(new_text, encoding="utf-8") except Exception as e: print(f"[error] {f} (write error: {e})") summary = "Dry run" if DRY_RUN else "Done" print(f"{summary}: {files_changed} file(s) " f"{'would be' if DRY_RUN else 'were'} updated, " f"{ordinal_total} ordinal-fix(es), {list_fixes_total} list-fix(es).")
Done: 8 file(s) were updated, 39 ordinal-fix(es), 15 list-fix(es).
完成¶
- 若使用
DRY_RUN=True
,僅會顯示"將會修改"的檔案與變更數。 - 確認結果後,將
DRY_RUN=False
並再執行一次即可寫回檔案。 - 需要擴充其他規則,歡迎在此 Notebook 繼續加上自己的處理邏輯。