"""Sphinx extension to rewrite internal links in reStructuredText files."""
from __future__ import annotations
from pathlib import PurePosixPath
from typing import Any
from urllib.parse import urlparse
from docutils import nodes
from sphinx.transforms import SphinxTransform
from sphinx.transforms.post_transforms import SphinxPostTransform
from sphinx.util import logging
logger = logging.getLogger(__name__)
[docs]
def _is_external(href: str) -> bool:
"""
Check if a given href is an external link.
Parameters
----------
href: str
The URL to check.
Returns
-------
bool
True if the URL is external, False otherwise.
"""
parsed = urlparse(href)
return bool(parsed.scheme and parsed.netloc)
[docs]
def _strip_docs_prefix(path_str: str, docs_relative_path: str) -> str:
"""
Remove the leading docs folder prefix from a path string.
This function handles both relative and absolute paths and normalizes
different prefix formats:
- Prefix "docs", "docs/", "/docs", "/docs/" all work consistently
- For relative paths like "docs/something", it removes the docs prefix
- For absolute paths like "/docs/something", it removes the docs prefix
Parameters
----------
path_str: str
The path string to modify.
docs_relative_path: str
The prefix to remove (can be in various formats).
Returns
-------
str
The modified path string with the prefix removed.
"""
if not path_str or not docs_relative_path:
return path_str
# Use PurePosixPath for cross-platform path normalization
original_path = PurePosixPath(path_str)
original_str = str(original_path)
# Normalize the prefix by removing leading/trailing slashes
# This makes "docs", "docs/", "/docs", "/docs/" all equivalent
normalized_prefix = docs_relative_path.strip("/")
if not normalized_prefix:
return original_str
# Create both relative and absolute versions of the prefix
relative_prefix = normalized_prefix + "/"
absolute_prefix = "/" + normalized_prefix + "/"
# Try to match relative prefix (e.g., "docs/")
if original_str.startswith(relative_prefix):
result = original_str[len(relative_prefix) :]
result = str(PurePosixPath(result)).lstrip("/")
return "" if result == "." else result
# Try to match absolute prefix (e.g., "/docs/")
if original_str.startswith(absolute_prefix):
result = original_str[len(absolute_prefix) :]
result = str(PurePosixPath(result)).lstrip("/")
return "" if result == "." else result
# Try to match exact prefix without trailing slash for edge cases
# like "docs" matching exactly "docs" or "/docs" matching exactly "/docs"
if original_str == normalized_prefix or original_str == "/" + normalized_prefix:
return ""
# If no prefix matched, return the normalized path
return original_str
[docs]
class RstImageRewriter(SphinxTransform):
"""Transform to rewrite image paths early in the process."""
default_priority = 210
supported_builders: tuple[str, ...] = (
"html",
"dirhtml",
"singlehtml",
"epub",
"latex",
"latexpdf",
)
[docs]
def apply(self) -> None:
"""Rewrite image paths in the document."""
builder = self.app.builder
# Check if the current builder is supported
if builder.name not in self.supported_builders:
return
docs_relative_path = self.app.config.docs_relative_path or "docs/"
# Process images only
changed = 0
for img in list(self.document.findall(nodes.image)):
uri = img.get("uri")
if not uri or _is_external(uri):
continue
# Strip prefix from image paths
original_uri = uri
stripped_uri = _strip_docs_prefix(uri, docs_relative_path)
if stripped_uri != original_uri:
img["uri"] = stripped_uri
changed += 1
if changed:
logger.info(
"[link_rewriter] %s: rewrote %d image path(s)",
self.env.docname,
changed,
)
[docs]
class RstLinkRewriter(SphinxPostTransform):
"""Post-transform to rewrite internal links in reStructuredText files."""
default_priority = 999
supported_builders: tuple[str, ...] = (
"html",
"dirhtml",
"singlehtml",
"epub",
"latex",
"latexpdf",
)
[docs]
def _sanitize_fragment_for_latex(self, fragment: str) -> str:
"""
Sanitize a fragment for LaTeX compatibility.
Parameters
----------
fragment: str
The fragment to sanitize.
Returns
-------
str
The sanitized fragment.
"""
if not fragment:
return ""
# LaTeX labels should be alphanumeric with hyphens/underscores
# Convert problematic characters to valid LaTeX label format
safe_frag = fragment.replace(".", "-").replace(" ", "-")
return "".join(c for c in safe_frag if c.isalnum() or c in "-_")
[docs]
def _process_references(self, docs_relative_path: str, exts: tuple[str, ...]) -> int:
"""Process and rewrite reference nodes."""
builder = self.app.builder
is_latex = builder.name in ("latex", "latexpdf")
changed = 0
for ref in list(self.document.findall(nodes.reference)):
uri = ref.get("refuri")
if not uri or _is_external(uri):
continue
if "#" in uri:
path_str, frag = uri.split("#", 1)
frag = frag.strip()
else:
path_str, frag = uri, ""
path = PurePosixPath(path_str)
if path.suffix not in exts:
continue
# Remove leading folder prefix
path_str = _strip_docs_prefix(path_str, docs_relative_path)
target_doc = str(PurePosixPath(path_str).with_suffix("")).lstrip("./")
try:
new_uri = builder.get_target_uri(target_doc)
# Handle fragments based on builder type
if frag:
if is_latex:
frag = self._sanitize_fragment_for_latex(frag)
new_uri = f"{new_uri}#{frag}"
ref["refuri"] = new_uri
changed += 1
except Exception:
logger.exception(
"[link_rewriter] %s: failed to resolve URI for %s",
self.env.docname,
target_doc,
)
return changed
[docs]
def run(self) -> None:
"""Rewrite internal links in the document."""
builder = self.app.builder
# Check if the current builder is supported
if builder.name not in self.supported_builders:
logger.debug(
"[link_rewriter] %s: skipping transformation for unsupported builder '%s'",
self.env.docname,
builder.name,
)
return
docs_relative_path = self.app.config.docs_relative_path or "docs/"
# Extensions to rewrite
exts = tuple(self.app.config.sphinx_linkfix_extensions or (".rst", ".md", ".txt"))
# Process references only (images are handled by RstImageRewriter)
changed = self._process_references(docs_relative_path, exts)
if changed:
logger.info("[link_rewriter] %s: rewrote %d link(s)", self.env.docname, changed)
[docs]
def setup(app: Any) -> dict[str, str | bool]:
"""
Set up the Sphinx extension.
Parameters
----------
app: Any
The Sphinx application object.
Returns
-------
dict[str, str | bool]
A dictionary with extension metadata.
"""
logger.info("[link_rewriter] extension loaded")
app.add_config_value("docs_relative_path", "docs/", "env")
app.add_config_value("sphinx_linkfix_extensions", (), "env")
app.add_transform(RstImageRewriter) # Early transform for images
app.add_post_transform(RstLinkRewriter) # Late transform for references
return {"version": "1.0", "parallel_read_safe": True}