changemaker.lite/mkdocs/site/hooks/wikilinks_hook.py

200 lines
6.6 KiB
Python

"""
MkDocs Hook for Wiki-Link Resolution
Converts Obsidian-style [[wiki-links]] to standard Markdown links/images.
Supported syntax:
[[page-name]] → [page-name](../page-name/)
[[page-name|Display Text]] → [Display Text](../page-name/)
[[page-name#heading]] → [page-name](../page-name/#heading)
![[image.png]] → ![image.png](../path/to/image.png)
![[image.png|alt text]] → ![alt text](../path/to/image.png)
Links inside fenced code blocks and inline code are skipped.
Unresolved links are left as-is with a warning logged.
"""
import os
import re
import logging
from typing import Dict, Any, Optional
logger = logging.getLogger(__name__)
# Module-level file index: filename (lowercase) → relative path from docs root
_file_index: Dict[str, str] = {}
# Whether directory URLs are enabled
_use_directory_urls: bool = True
# Image extensions
_IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.svg', '.webp', '.ico', '.bmp', '.tiff', '.avif'}
def on_files(files: Any, config: Dict[str, Any]) -> None:
"""Build a file index mapping lowercased filenames to their paths."""
global _file_index, _use_directory_urls
_file_index.clear()
_use_directory_urls = config.get('use_directory_urls', True)
for f in files:
# f.src_path is relative to docs_dir, e.g. "docs/getting-started/installation.md"
src = f.src_path
basename = os.path.basename(src)
# For .md files, also index without extension
name_lower = basename.lower()
_file_index[name_lower] = src
if name_lower.endswith('.md'):
stem = name_lower[:-3]
# Don't overwrite if stem already exists (first wins)
if stem not in _file_index:
_file_index[stem] = src
logger.info(f"[wikilinks] Indexed {len(_file_index)} files for wiki-link resolution")
def _resolve_link(name: str) -> Optional[str]:
"""Look up a filename in the index (case-insensitive). Returns src_path or None."""
key = name.lower().strip()
return _file_index.get(key)
def _compute_relative_path(from_page_src: str, to_src: str, anchor: str = '') -> str:
"""Compute relative URL from one page to another, accounting for directory URLs."""
from_dir = os.path.dirname(from_page_src)
_, ext = os.path.splitext(to_src)
is_image = ext.lower() in _IMAGE_EXTENSIONS
is_md = ext.lower() == '.md'
if is_image:
# Images: direct relative path to the file
rel = os.path.relpath(to_src, from_dir)
return rel.replace(os.sep, '/')
if is_md and _use_directory_urls:
# With directory URLs, pages become page-name/index.html
# So we link to the directory (without .md)
page_dir = to_src[:-3] # strip .md
# Handle index.md → links to the directory itself
if os.path.basename(to_src).lower() == 'index.md':
page_dir = os.path.dirname(to_src)
rel = os.path.relpath(page_dir, from_dir)
url = rel.replace(os.sep, '/') + '/'
elif is_md:
# Without directory URLs, link directly to .md (MkDocs converts to .html)
rel = os.path.relpath(to_src, from_dir)
url = rel.replace(os.sep, '/').replace('.md', '.html')
else:
# Other files: direct path
rel = os.path.relpath(to_src, from_dir)
url = rel.replace(os.sep, '/')
if anchor:
url += '#' + anchor
return url
# Regex to match wiki-links: optional ! prefix, then [[content]]
_WIKILINK_RE = re.compile(r'(!?)\[\[([^\]]+)\]\]')
def _replace_wikilinks(markdown: str, page_src: str) -> str:
"""Replace wiki-links in markdown, skipping code blocks."""
lines = markdown.split('\n')
result_lines = []
in_fenced_block = False
for line in lines:
# Track fenced code blocks (``` or ~~~)
stripped = line.lstrip()
if stripped.startswith('```') or stripped.startswith('~~~'):
in_fenced_block = not in_fenced_block
result_lines.append(line)
continue
if in_fenced_block:
result_lines.append(line)
continue
# Process wiki-links in this line, but skip inline code
# Strategy: split by inline code spans, only process non-code parts
parts = re.split(r'(`[^`]+`)', line)
processed_parts = []
for part in parts:
if part.startswith('`') and part.endswith('`'):
# Inside inline code — leave as-is
processed_parts.append(part)
else:
# Process wiki-links in this segment
processed_parts.append(_WIKILINK_RE.sub(
lambda m: _resolve_wikilink(m, page_src),
part
))
result_lines.append(''.join(processed_parts))
return '\n'.join(result_lines)
def _resolve_wikilink(match: re.Match, page_src: str) -> str:
"""Resolve a single wiki-link match to markdown."""
is_embed = match.group(1) == '!'
inner = match.group(2).strip()
# Parse: name|display and name#anchor
display = None
anchor = ''
if '|' in inner:
name_part, display = inner.split('|', 1)
display = display.strip()
name_part = name_part.strip()
else:
name_part = inner
if '#' in name_part:
name_part, anchor = name_part.split('#', 1)
anchor = anchor.strip()
name_part = name_part.strip()
# Resolve the target file
target_src = _resolve_link(name_part)
if target_src is None:
# Try with .md appended
target_src = _resolve_link(name_part + '.md')
if target_src is None:
# Try common image extensions
for ext in ['.png', '.jpg', '.jpeg', '.gif', '.svg', '.webp']:
target_src = _resolve_link(name_part + ext)
if target_src:
break
if target_src is None:
logger.warning(f"[wikilinks] Unresolved wiki-link: [[{inner}]] in {page_src}")
return match.group(0) # Leave as-is
url = _compute_relative_path(page_src, target_src, anchor)
if is_embed:
# Image embed: ![[image.png]] or ![[image.png|alt text]]
alt = display or name_part
return f'![{alt}]({url})'
else:
# Document link: [[page]] or [[page|Display Text]]
label = display or (name_part + (f'#{anchor}' if anchor else ''))
return f'[{label}]({url})'
def on_page_markdown(markdown: str, page: Any, config: Dict[str, Any], files: Any) -> str:
"""Process wiki-links in page markdown content."""
# Quick check — skip pages without wiki-links
if '[[' not in markdown:
return markdown
return _replace_wikilinks(markdown, page.file.src_path)