Source code for sphinx_linkfix.extension

"""Sphinx extension to rewrite internal links in reStructuredText files."""

from __future__ import annotations

from pathlib import PurePosixPath
from typing import Any
from urllib.parse import urlparse

from docutils import nodes
from sphinx.transforms.post_transforms import SphinxPostTransform
from sphinx.util import logging

logger = logging.getLogger(__name__)


[docs] def _is_external(href: str) -> bool: """ Check if a given href is an external link. Parameters ---------- href: str The URL to check. Returns ------- bool True if the URL is external, False otherwise. """ parsed = urlparse(href) return bool(parsed.scheme and parsed.netloc)
[docs] def _strip_prefixes(path_str: str, prefixes: tuple[str, ...]) -> str: """ Remove leading folder prefixes from a path string. Parameters ---------- path_str: str The path string to modify. prefixes: tuple[str, ...] A tuple of prefixes to remove. Returns ------- str The modified path string with the prefixes removed. """ # For prefix matching, we need to work with the original path # but ensure cross-platform compatibility original_path = path_str.replace("\\", "/") # Convert backslashes to forward slashes # Find the longest matching prefix longest_match = "" for pref in prefixes: normalized_pref = pref.replace("\\", "/") # Normalize prefix too if original_path.startswith(normalized_pref) and len(normalized_pref) > len(longest_match): longest_match = normalized_pref if longest_match: result = original_path[len(longest_match) :] # Now normalize the result using PurePosixPath to clean up any .. or . components return str(PurePosixPath(result)) # If no prefix matched, normalize the whole path return str(PurePosixPath(original_path))
[docs] class RstLinkRewriter(SphinxPostTransform): """Post- transform to rewrite internal links in reStructuredText files.""" default_priority = 999 supported_builders: tuple[str, ...] = ( "html", "dirhtml", "singlehtml", "epub", "latex", "latexpdf", )
[docs] def _sanitize_fragment_for_latex(self, fragment: str) -> str: """ Sanitize a fragment for LaTeX compatibility. Parameters ---------- fragment: str The fragment to sanitize. Returns ------- str The sanitized fragment. """ if not fragment: return "" # LaTeX labels should be alphanumeric with hyphens/underscores # Convert problematic characters to valid LaTeX label format safe_frag = fragment.replace(".", "-").replace(" ", "-") return "".join(c for c in safe_frag if c.isalnum() or c in "-_")
[docs] def run(self) -> None: """Rewrite internal links in the document.""" builder = self.app.builder is_latex = builder.name in ("latex", "latexpdf") # Check if the current builder is supported if builder.name not in self.supported_builders: logger.debug( "[link_rewriter] %s: skipping transformation for unsupported builder '%s'", self.env.docname, builder.name, ) return changed = 0 prefixes = tuple( self.app.config.sphinx_linkfix_strip_prefixes or ("docs/", "./", "source/") ) # Extensions to rewrite exts = tuple(self.app.config.sphinx_linkfix_extensions or (".rst", ".md", ".txt")) for ref in list(self.document.traverse(nodes.reference)): uri = ref.get("refuri") if not uri or _is_external(uri): continue if "#" in uri: path_str, frag = uri.split("#", 1) frag = frag.strip() else: path_str, frag = uri, "" path = PurePosixPath(path_str) if path.suffix not in exts: continue # Remove leading folder prefixes like "docs/" path_str = _strip_prefixes(path_str, prefixes) target_doc = str(PurePosixPath(path_str).with_suffix("")).lstrip("./") try: new_uri = builder.get_target_uri(target_doc) # Handle fragments based on builder type if frag: if is_latex: frag = self._sanitize_fragment_for_latex(frag) new_uri = f"{new_uri}#{frag}" ref["refuri"] = new_uri changed += 1 except Exception as e: # noqa: BLE001 logger.warning( "[link_rewriter] %s: failed to resolve URI for %s: %s", self.env.docname, target_doc, e, ) if changed: logger.info("[link_rewriter] %s: rewrote %d link(s)", self.env.docname, changed)
[docs] def setup(app: Any) -> dict[str, str | bool]: """ Set up the Sphinx extension. Parameters ---------- app: Any The Sphinx application object. Returns ------- dict[str, str | bool] A dictionary with extension metadata. """ logger.info("[link_rewriter] extension loaded") app.add_config_value("sphinx_linkfix_strip_prefixes", (), "env") app.add_config_value("sphinx_linkfix_extensions", (), "env") app.add_post_transform(RstLinkRewriter) return {"version": "1.0", "parallel_read_safe": True}