summaryrefslogtreecommitdiff
path: root/Documentation/sphinx/kernel_include.py
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation/sphinx/kernel_include.py')
-rwxr-xr-xDocumentation/sphinx/kernel_include.py527
1 files changed, 527 insertions, 0 deletions
diff --git a/Documentation/sphinx/kernel_include.py b/Documentation/sphinx/kernel_include.py
new file mode 100755
index 000000000000..626762ff6af3
--- /dev/null
+++ b/Documentation/sphinx/kernel_include.py
@@ -0,0 +1,527 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# pylint: disable=R0903, R0912, R0914, R0915, C0209,W0707
+
+
+"""
+Implementation of the ``kernel-include`` reST-directive.
+
+:copyright: Copyright (C) 2016 Markus Heiser
+:license: GPL Version 2, June 1991 see linux/COPYING for details.
+
+The ``kernel-include`` reST-directive is a replacement for the ``include``
+directive. The ``kernel-include`` directive expand environment variables in
+the path name and allows to include files from arbitrary locations.
+
+.. hint::
+
+ Including files from arbitrary locations (e.g. from ``/etc``) is a
+ security risk for builders. This is why the ``include`` directive from
+ docutils *prohibit* pathnames pointing to locations *above* the filesystem
+ tree where the reST document with the include directive is placed.
+
+Substrings of the form $name or ${name} are replaced by the value of
+environment variable name. Malformed variable names and references to
+non-existing variables are left unchanged.
+
+**Supported Sphinx Include Options**:
+
+:param literal:
+ If present, the included file is inserted as a literal block.
+
+:param code:
+ Specify the language for syntax highlighting (e.g., 'c', 'python').
+
+:param encoding:
+ Specify the encoding of the included file (default: 'utf-8').
+
+:param tab-width:
+ Specify the number of spaces that a tab represents.
+
+:param start-line:
+ Line number at which to start including the file (1-based).
+
+:param end-line:
+ Line number at which to stop including the file (inclusive).
+
+:param start-after:
+ Include lines after the first line matching this text.
+
+:param end-before:
+ Include lines before the first line matching this text.
+
+:param number-lines:
+ Number the included lines (integer specifies start number).
+ Only effective with 'literal' or 'code' options.
+
+:param class:
+ Specify HTML class attribute for the included content.
+
+**Kernel-specific Extensions**:
+
+:param generate-cross-refs:
+ If present, instead of directly including the file, it calls
+ ParseDataStructs() to convert C data structures into cross-references
+ that link to comprehensive documentation in other ReST files.
+
+:param exception-file:
+ (Used with generate-cross-refs)
+
+ Path to a file containing rules for handling special cases:
+ - Ignore specific C data structures
+ - Use alternative reference names
+ - Specify different reference types
+
+:param warn-broken:
+ (Used with generate-cross-refs)
+
+ Enables warnings when auto-generated cross-references don't point to
+ existing documentation targets.
+"""
+
+# ==============================================================================
+# imports
+# ==============================================================================
+
+import os.path
+import re
+import sys
+
+from difflib import get_close_matches
+
+from docutils import io, nodes, statemachine
+from docutils.statemachine import ViewList
+from docutils.parsers.rst import Directive, directives
+from docutils.parsers.rst.directives.body import CodeBlock, NumberLines
+
+from sphinx.util import logging
+
+srctree = os.path.abspath(os.environ["srctree"])
+sys.path.insert(0, os.path.join(srctree, "tools/lib/python"))
+
+from kdoc.parse_data_structs import ParseDataStructs
+
+__version__ = "1.0"
+logger = logging.getLogger(__name__)
+
+RE_DOMAIN_REF = re.compile(r'\\ :(ref|c:type|c:func):`([^<`]+)(?:<([^>]+)>)?`\\')
+RE_SIMPLE_REF = re.compile(r'`([^`]+)`')
+RE_LINENO_REF = re.compile(r'^\s*-\s+LINENO_(\d+):\s+(.*)')
+RE_SPLIT_DOMAIN = re.compile(r"(.*)\.(.*)")
+
+def ErrorString(exc): # Shamelessly stolen from docutils
+ return f'{exc.__class__.__name}: {exc}'
+
+
+# ==============================================================================
+class KernelInclude(Directive):
+ """
+ KernelInclude (``kernel-include``) directive
+
+ Most of the stuff here came from Include directive defined at:
+ docutils/parsers/rst/directives/misc.py
+
+ Yet, overriding the class don't has any benefits: the original class
+ only have run() and argument list. Not all of them are implemented,
+ when checked against latest Sphinx version, as with time more arguments
+ were added.
+
+ So, keep its own list of supported arguments
+ """
+
+ required_arguments = 1
+ optional_arguments = 0
+ final_argument_whitespace = True
+ option_spec = {
+ 'literal': directives.flag,
+ 'code': directives.unchanged,
+ 'encoding': directives.encoding,
+ 'tab-width': int,
+ 'start-line': int,
+ 'end-line': int,
+ 'start-after': directives.unchanged_required,
+ 'end-before': directives.unchanged_required,
+ # ignored except for 'literal' or 'code':
+ 'number-lines': directives.unchanged, # integer or None
+ 'class': directives.class_option,
+
+ # Arguments that aren't from Sphinx Include directive
+ 'generate-cross-refs': directives.flag,
+ 'warn-broken': directives.flag,
+ 'toc': directives.flag,
+ 'exception-file': directives.unchanged,
+ }
+
+ def read_rawtext(self, path, encoding):
+ """Read and process file content with error handling"""
+ try:
+ self.state.document.settings.record_dependencies.add(path)
+ include_file = io.FileInput(source_path=path,
+ encoding=encoding,
+ error_handler=self.state.document.settings.input_encoding_error_handler)
+ except UnicodeEncodeError:
+ raise self.severe('Problems with directive path:\n'
+ 'Cannot encode input file path "%s" '
+ '(wrong locale?).' % path)
+ except IOError as error:
+ raise self.severe('Problems with directive path:\n%s.' % ErrorString(error))
+
+ try:
+ return include_file.read()
+ except UnicodeError as error:
+ raise self.severe('Problem with directive:\n%s' % ErrorString(error))
+
+ def apply_range(self, rawtext):
+ """
+ Handles start-line, end-line, start-after and end-before parameters
+ """
+
+ # Get to-be-included content
+ startline = self.options.get('start-line', None)
+ endline = self.options.get('end-line', None)
+ try:
+ if startline or (endline is not None):
+ lines = rawtext.splitlines()
+ rawtext = '\n'.join(lines[startline:endline])
+ except UnicodeError as error:
+ raise self.severe(f'Problem with "{self.name}" directive:\n'
+ + io.error_string(error))
+ # start-after/end-before: no restrictions on newlines in match-text,
+ # and no restrictions on matching inside lines vs. line boundaries
+ after_text = self.options.get("start-after", None)
+ if after_text:
+ # skip content in rawtext before *and incl.* a matching text
+ after_index = rawtext.find(after_text)
+ if after_index < 0:
+ raise self.severe('Problem with "start-after" option of "%s" '
+ "directive:\nText not found." % self.name)
+ rawtext = rawtext[after_index + len(after_text) :]
+ before_text = self.options.get("end-before", None)
+ if before_text:
+ # skip content in rawtext after *and incl.* a matching text
+ before_index = rawtext.find(before_text)
+ if before_index < 0:
+ raise self.severe('Problem with "end-before" option of "%s" '
+ "directive:\nText not found." % self.name)
+ rawtext = rawtext[:before_index]
+
+ return rawtext
+
+ def xref_text(self, env, path, tab_width):
+ """
+ Read and add contents from a C file parsed to have cross references.
+
+ There are two types of supported output here:
+ - A C source code with cross-references;
+ - a TOC table containing cross references.
+ """
+ parser = ParseDataStructs()
+
+ if 'exception-file' in self.options:
+ source_dir = os.path.dirname(os.path.abspath(
+ self.state_machine.input_lines.source(
+ self.lineno - self.state_machine.input_offset - 1)))
+ exceptions_file = os.path.join(source_dir, self.options['exception-file'])
+ else:
+ exceptions_file = None
+
+ parser.parse_file(path, exceptions_file)
+
+ # Store references on a symbol dict to be used at check time
+ if 'warn-broken' in self.options:
+ env._xref_files.add(path)
+
+ if "toc" not in self.options:
+
+ rawtext = ".. parsed-literal::\n\n" + parser.gen_output()
+ self.apply_range(rawtext)
+
+ include_lines = statemachine.string2lines(rawtext, tab_width,
+ convert_whitespace=True)
+
+ # Sphinx always blame the ".. <directive>", so placing
+ # line numbers here won't make any difference
+
+ self.state_machine.insert_input(include_lines, path)
+ return []
+
+ # TOC output is a ReST file, not a literal. So, we can add line
+ # numbers
+
+ startline = self.options.get('start-line', None)
+ endline = self.options.get('end-line', None)
+
+ relpath = os.path.relpath(path, srctree)
+
+ result = ViewList()
+ for line in parser.gen_toc().split("\n"):
+ match = RE_LINENO_REF.match(line)
+ if not match:
+ result.append(line, path)
+ continue
+
+ ln, ref = match.groups()
+ ln = int(ln)
+
+ # Filter line range if needed
+ if startline and (ln < startline):
+ continue
+
+ if endline and (ln > endline):
+ continue
+
+ # Sphinx numerates starting with zero, but text editors
+ # and other tools start from one
+ realln = ln + 1
+ result.append(f"- {ref}: {relpath}#{realln}", path, ln)
+
+ self.state_machine.insert_input(result, path)
+
+ return []
+
+ def literal(self, path, tab_width, rawtext):
+ """Output a literal block"""
+
+ # Convert tabs to spaces, if `tab_width` is positive.
+ if tab_width >= 0:
+ text = rawtext.expandtabs(tab_width)
+ else:
+ text = rawtext
+ literal_block = nodes.literal_block(rawtext, source=path,
+ classes=self.options.get("class", []))
+ literal_block.line = 1
+ self.add_name(literal_block)
+ if "number-lines" in self.options:
+ try:
+ startline = int(self.options["number-lines"] or 1)
+ except ValueError:
+ raise self.error(":number-lines: with non-integer start value")
+ endline = startline + len(include_lines)
+ if text.endswith("\n"):
+ text = text[:-1]
+ tokens = NumberLines([([], text)], startline, endline)
+ for classes, value in tokens:
+ if classes:
+ literal_block += nodes.inline(value, value,
+ classes=classes)
+ else:
+ literal_block += nodes.Text(value, value)
+ else:
+ literal_block += nodes.Text(text, text)
+ return [literal_block]
+
+ def code(self, path, tab_width):
+ """Output a code block"""
+
+ include_lines = statemachine.string2lines(rawtext, tab_width,
+ convert_whitespace=True)
+
+ self.options["source"] = path
+ codeblock = CodeBlock(self.name,
+ [self.options.pop("code")], # arguments
+ self.options,
+ include_lines,
+ self.lineno,
+ self.content_offset,
+ self.block_text,
+ self.state,
+ self.state_machine)
+ return codeblock.run()
+
+ def run(self):
+ """Include a file as part of the content of this reST file."""
+ env = self.state.document.settings.env
+
+ #
+ # The include logic accepts only patches relative to the
+ # Kernel source tree. The logic does check it to prevent
+ # directory traverse issues.
+ #
+
+ srctree = os.path.abspath(os.environ["srctree"])
+
+ path = os.path.expandvars(self.arguments[0])
+ src_path = os.path.join(srctree, path)
+
+ if os.path.isfile(src_path):
+ base = srctree
+ path = src_path
+ else:
+ raise self.warning(f'File "%s" doesn\'t exist', path)
+
+ abs_base = os.path.abspath(base)
+ abs_full_path = os.path.abspath(os.path.join(base, path))
+
+ try:
+ if os.path.commonpath([abs_full_path, abs_base]) != abs_base:
+ raise self.severe('Problems with "%s" directive, prohibited path: %s' %
+ (self.name, path))
+ except ValueError:
+ # Paths don't have the same drive (Windows) or other incompatibility
+ raise self.severe('Problems with "%s" directive, invalid path: %s' %
+ (self.name, path))
+
+ self.arguments[0] = path
+
+ #
+ # Add path location to Sphinx dependencies to ensure proper cache
+ # invalidation check.
+ #
+
+ env.note_dependency(os.path.abspath(path))
+
+ if not self.state.document.settings.file_insertion_enabled:
+ raise self.warning('"%s" directive disabled.' % self.name)
+ source = self.state_machine.input_lines.source(self.lineno -
+ self.state_machine.input_offset - 1)
+ source_dir = os.path.dirname(os.path.abspath(source))
+ path = directives.path(self.arguments[0])
+ if path.startswith("<") and path.endswith(">"):
+ path = os.path.join(self.standard_include_path, path[1:-1])
+ path = os.path.normpath(os.path.join(source_dir, path))
+
+ # HINT: this is the only line I had to change / commented out:
+ # path = utils.relative_path(None, path)
+
+ encoding = self.options.get("encoding",
+ self.state.document.settings.input_encoding)
+ tab_width = self.options.get("tab-width",
+ self.state.document.settings.tab_width)
+
+ # Get optional arguments to related to cross-references generation
+ if "generate-cross-refs" in self.options:
+ return self.xref_text(env, path, tab_width)
+
+ rawtext = self.read_rawtext(path, encoding)
+ rawtext = self.apply_range(rawtext)
+
+ if "code" in self.options:
+ return self.code(path, tab_width, rawtext)
+
+ return self.literal(path, tab_width, rawtext)
+
+# ==============================================================================
+
+reported = set()
+DOMAIN_INFO = {}
+all_refs = {}
+
+def fill_domain_info(env):
+ """
+ Get supported reference types for each Sphinx domain and C namespaces
+ """
+ if DOMAIN_INFO:
+ return
+
+ for domain_name, domain_instance in env.domains.items():
+ try:
+ object_types = list(domain_instance.object_types.keys())
+ DOMAIN_INFO[domain_name] = object_types
+ except AttributeError:
+ # Ignore domains that we can't retrieve object types, if any
+ pass
+
+ for domain in DOMAIN_INFO.keys():
+ domain_obj = env.get_domain(domain)
+ for name, dispname, objtype, docname, anchor, priority in domain_obj.get_objects():
+ ref_name = name.lower()
+
+ if domain == "c":
+ if '.' in ref_name:
+ ref_name = ref_name.split(".")[-1]
+
+ if not ref_name in all_refs:
+ all_refs[ref_name] = []
+
+ all_refs[ref_name].append(f"\t{domain}:{objtype}:`{name}` (from {docname})")
+
+def get_suggestions(app, env, node,
+ original_target, original_domain, original_reftype):
+ """Check if target exists in the other domain or with different reftypes."""
+ original_target = original_target.lower()
+
+ # Remove namespace if present
+ if original_domain == "c":
+ if '.' in original_target:
+ original_target = original_target.split(".")[-1]
+
+ suggestions = []
+
+ # If name exists, propose exact name match on different domains
+ if original_target in all_refs:
+ return all_refs[original_target]
+
+ # If not found, get a close match, using difflib.
+ # Such method is based on Ratcliff-Obershelp Algorithm, which seeks
+ # for a close match within a certain distance. We're using the defaults
+ # here, e.g. cutoff=0.6, proposing 3 alternatives
+ matches = get_close_matches(original_target, all_refs.keys())
+ for match in matches:
+ suggestions += all_refs[match]
+
+ return suggestions
+
+def check_missing_refs(app, env, node, contnode):
+ """Check broken refs for the files it creates xrefs"""
+ if not node.source:
+ return None
+
+ try:
+ xref_files = env._xref_files
+ except AttributeError:
+ logger.critical("FATAL: _xref_files not initialized!")
+ raise
+
+ # Only show missing references for kernel-include reference-parsed files
+ if node.source not in xref_files:
+ return None
+
+ fill_domain_info(env)
+
+ target = node.get('reftarget', '')
+ domain = node.get('refdomain', 'std')
+ reftype = node.get('reftype', '')
+
+ msg = f"Invalid xref: {domain}:{reftype}:`{target}`"
+
+ # Don't duplicate warnings
+ data = (node.source, msg)
+ if data in reported:
+ return None
+ reported.add(data)
+
+ suggestions = get_suggestions(app, env, node, target, domain, reftype)
+ if suggestions:
+ msg += ". Possible alternatives:\n" + '\n'.join(suggestions)
+
+ logger.warning(msg, location=node, type='ref', subtype='missing')
+
+ return None
+
+def merge_xref_info(app, env, docnames, other):
+ """
+ As each process modify env._xref_files, we need to merge them back.
+ """
+ if not hasattr(other, "_xref_files"):
+ return
+ env._xref_files.update(getattr(other, "_xref_files", set()))
+
+def init_xref_docs(app, env, docnames):
+ """Initialize a list of files that we're generating cross references¨"""
+ app.env._xref_files = set()
+
+# ==============================================================================
+
+def setup(app):
+ """Setup Sphinx exension"""
+
+ app.connect("env-before-read-docs", init_xref_docs)
+ app.connect("env-merge-info", merge_xref_info)
+ app.add_directive("kernel-include", KernelInclude)
+ app.connect("missing-reference", check_missing_refs)
+
+ return {
+ "version": __version__,
+ "parallel_read_safe": True,
+ "parallel_write_safe": True,
+ }