200 lines
6.6 KiB
Python
200 lines
6.6 KiB
Python
"""
|
|
MkDocs Hook for Wiki-Link Resolution
|
|
|
|
Converts Obsidian-style [[wiki-links]] to standard Markdown links/images.
|
|
|
|
Supported syntax:
|
|
[[page-name]] → [page-name](../page-name/)
|
|
[[page-name|Display Text]] → [Display Text](../page-name/)
|
|
[[page-name#heading]] → [page-name](../page-name/#heading)
|
|
![[image.png]] → 
|
|
![[image.png|alt text]] → 
|
|
|
|
Links inside fenced code blocks and inline code are skipped.
|
|
Unresolved links are left as-is with a warning logged.
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import logging
|
|
from typing import Dict, Any, Optional
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Module-level file index: filename (lowercase) → relative path from docs root
|
|
_file_index: Dict[str, str] = {}
|
|
|
|
# Whether directory URLs are enabled
|
|
_use_directory_urls: bool = True
|
|
|
|
# Image extensions
|
|
_IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.svg', '.webp', '.ico', '.bmp', '.tiff', '.avif'}
|
|
|
|
|
|
def on_files(files: Any, config: Dict[str, Any]) -> None:
|
|
"""Build a file index mapping lowercased filenames to their paths."""
|
|
global _file_index, _use_directory_urls
|
|
_file_index.clear()
|
|
_use_directory_urls = config.get('use_directory_urls', True)
|
|
|
|
for f in files:
|
|
# f.src_path is relative to docs_dir, e.g. "docs/getting-started/installation.md"
|
|
src = f.src_path
|
|
basename = os.path.basename(src)
|
|
# For .md files, also index without extension
|
|
name_lower = basename.lower()
|
|
_file_index[name_lower] = src
|
|
|
|
if name_lower.endswith('.md'):
|
|
stem = name_lower[:-3]
|
|
# Don't overwrite if stem already exists (first wins)
|
|
if stem not in _file_index:
|
|
_file_index[stem] = src
|
|
|
|
logger.info(f"[wikilinks] Indexed {len(_file_index)} files for wiki-link resolution")
|
|
|
|
|
|
def _resolve_link(name: str) -> Optional[str]:
|
|
"""Look up a filename in the index (case-insensitive). Returns src_path or None."""
|
|
key = name.lower().strip()
|
|
return _file_index.get(key)
|
|
|
|
|
|
def _compute_relative_path(from_page_src: str, to_src: str, anchor: str = '') -> str:
|
|
"""Compute relative URL from one page to another, accounting for directory URLs."""
|
|
from_dir = os.path.dirname(from_page_src)
|
|
|
|
_, ext = os.path.splitext(to_src)
|
|
is_image = ext.lower() in _IMAGE_EXTENSIONS
|
|
is_md = ext.lower() == '.md'
|
|
|
|
if is_image:
|
|
# Images: direct relative path to the file
|
|
rel = os.path.relpath(to_src, from_dir)
|
|
return rel.replace(os.sep, '/')
|
|
|
|
if is_md and _use_directory_urls:
|
|
# With directory URLs, pages become page-name/index.html
|
|
# So we link to the directory (without .md)
|
|
page_dir = to_src[:-3] # strip .md
|
|
# Handle index.md → links to the directory itself
|
|
if os.path.basename(to_src).lower() == 'index.md':
|
|
page_dir = os.path.dirname(to_src)
|
|
rel = os.path.relpath(page_dir, from_dir)
|
|
url = rel.replace(os.sep, '/') + '/'
|
|
elif is_md:
|
|
# Without directory URLs, link directly to .md (MkDocs converts to .html)
|
|
rel = os.path.relpath(to_src, from_dir)
|
|
url = rel.replace(os.sep, '/').replace('.md', '.html')
|
|
else:
|
|
# Other files: direct path
|
|
rel = os.path.relpath(to_src, from_dir)
|
|
url = rel.replace(os.sep, '/')
|
|
|
|
if anchor:
|
|
url += '#' + anchor
|
|
|
|
return url
|
|
|
|
|
|
# Regex to match wiki-links: optional ! prefix, then [[content]]
|
|
_WIKILINK_RE = re.compile(r'(!?)\[\[([^\]]+)\]\]')
|
|
|
|
|
|
def _replace_wikilinks(markdown: str, page_src: str) -> str:
|
|
"""Replace wiki-links in markdown, skipping code blocks."""
|
|
lines = markdown.split('\n')
|
|
result_lines = []
|
|
in_fenced_block = False
|
|
|
|
for line in lines:
|
|
# Track fenced code blocks (``` or ~~~)
|
|
stripped = line.lstrip()
|
|
if stripped.startswith('```') or stripped.startswith('~~~'):
|
|
in_fenced_block = not in_fenced_block
|
|
result_lines.append(line)
|
|
continue
|
|
|
|
if in_fenced_block:
|
|
result_lines.append(line)
|
|
continue
|
|
|
|
# Process wiki-links in this line, but skip inline code
|
|
# Strategy: split by inline code spans, only process non-code parts
|
|
parts = re.split(r'(`[^`]+`)', line)
|
|
processed_parts = []
|
|
for part in parts:
|
|
if part.startswith('`') and part.endswith('`'):
|
|
# Inside inline code — leave as-is
|
|
processed_parts.append(part)
|
|
else:
|
|
# Process wiki-links in this segment
|
|
processed_parts.append(_WIKILINK_RE.sub(
|
|
lambda m: _resolve_wikilink(m, page_src),
|
|
part
|
|
))
|
|
result_lines.append(''.join(processed_parts))
|
|
|
|
return '\n'.join(result_lines)
|
|
|
|
|
|
def _resolve_wikilink(match: re.Match, page_src: str) -> str:
|
|
"""Resolve a single wiki-link match to markdown."""
|
|
is_embed = match.group(1) == '!'
|
|
inner = match.group(2).strip()
|
|
|
|
# Parse: name|display and name#anchor
|
|
display = None
|
|
anchor = ''
|
|
|
|
if '|' in inner:
|
|
name_part, display = inner.split('|', 1)
|
|
display = display.strip()
|
|
name_part = name_part.strip()
|
|
else:
|
|
name_part = inner
|
|
|
|
if '#' in name_part:
|
|
name_part, anchor = name_part.split('#', 1)
|
|
anchor = anchor.strip()
|
|
|
|
name_part = name_part.strip()
|
|
|
|
# Resolve the target file
|
|
target_src = _resolve_link(name_part)
|
|
|
|
if target_src is None:
|
|
# Try with .md appended
|
|
target_src = _resolve_link(name_part + '.md')
|
|
|
|
if target_src is None:
|
|
# Try common image extensions
|
|
for ext in ['.png', '.jpg', '.jpeg', '.gif', '.svg', '.webp']:
|
|
target_src = _resolve_link(name_part + ext)
|
|
if target_src:
|
|
break
|
|
|
|
if target_src is None:
|
|
logger.warning(f"[wikilinks] Unresolved wiki-link: [[{inner}]] in {page_src}")
|
|
return match.group(0) # Leave as-is
|
|
|
|
url = _compute_relative_path(page_src, target_src, anchor)
|
|
|
|
if is_embed:
|
|
# Image embed: ![[image.png]] or ![[image.png|alt text]]
|
|
alt = display or name_part
|
|
return f''
|
|
else:
|
|
# Document link: [[page]] or [[page|Display Text]]
|
|
label = display or (name_part + (f'#{anchor}' if anchor else ''))
|
|
return f'[{label}]({url})'
|
|
|
|
|
|
def on_page_markdown(markdown: str, page: Any, config: Dict[str, Any], files: Any) -> str:
|
|
"""Process wiki-links in page markdown content."""
|
|
# Quick check — skip pages without wiki-links
|
|
if '[[' not in markdown:
|
|
return markdown
|
|
|
|
return _replace_wikilinks(markdown, page.file.src_path)
|