1 files changed, 527 insertions, 0 deletions
diff --git a/Documentation/sphinx/kernel_include.py b/Documentation/sphinx/kernel_include.py
new file mode 100755
index 000000000000..626762ff6af3
--- /dev/null
+++ b/Documentation/sphinx/kernel_include.py
@@ -0,0 +1,527 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# pylint: disable=R0903, R0912, R0914, R0915, C0209,W0707
+
+
+"""
+Implementation of the ``kernel-include`` reST-directive.
+
+:copyright:  Copyright (C) 2016  Markus Heiser
+:license:    GPL Version 2, June 1991 see linux/COPYING for details.
+
+The ``kernel-include`` reST-directive is a replacement for the ``include``
+directive. The ``kernel-include`` directive expand environment variables in
+the path name and allows to include files from arbitrary locations.
+
+.. hint::
+
+    Including files from arbitrary locations (e.g. from ``/etc``) is a
+    security risk for builders. This is why the ``include`` directive from
+    docutils *prohibit* pathnames pointing to locations *above* the filesystem
+    tree where the reST document with the include directive is placed.
+
+Substrings of the form $name or ${name} are replaced by the value of
+environment variable name. Malformed variable names and references to
+non-existing variables are left unchanged.
+
+**Supported Sphinx Include Options**:
+
+:param literal:
+    If present, the included file is inserted as a literal block.
+
+:param code:
+    Specify the language for syntax highlighting (e.g., 'c', 'python').
+
+:param encoding:
+    Specify the encoding of the included file (default: 'utf-8').
+
+:param tab-width:
+    Specify the number of spaces that a tab represents.
+
+:param start-line:
+    Line number at which to start including the file (1-based).
+
+:param end-line:
+    Line number at which to stop including the file (inclusive).
+
+:param start-after:
+    Include lines after the first line matching this text.
+
+:param end-before:
+    Include lines before the first line matching this text.
+
+:param number-lines:
+    Number the included lines (integer specifies start number).
+    Only effective with 'literal' or 'code' options.
+
+:param class:
+    Specify HTML class attribute for the included content.
+
+**Kernel-specific Extensions**:
+
+:param generate-cross-refs:
+    If present, instead of directly including the file, it calls
+    ParseDataStructs() to convert C data structures into cross-references
+    that link to comprehensive documentation in other ReST files.
+
+:param exception-file:
+    (Used with generate-cross-refs)
+
+    Path to a file containing rules for handling special cases:
+    - Ignore specific C data structures
+    - Use alternative reference names
+    - Specify different reference types
+
+:param warn-broken:
+    (Used with generate-cross-refs)
+
+    Enables warnings when auto-generated cross-references don't point to
+    existing documentation targets.
+"""
+
+# ==============================================================================
+# imports
+# ==============================================================================
+
+import os.path
+import re
+import sys
+
+from difflib import get_close_matches
+
+from docutils import io, nodes, statemachine
+from docutils.statemachine import ViewList
+from docutils.parsers.rst import Directive, directives
+from docutils.parsers.rst.directives.body import CodeBlock, NumberLines
+
+from sphinx.util import logging
+
+srctree = os.path.abspath(os.environ["srctree"])
+sys.path.insert(0, os.path.join(srctree, "tools/lib/python"))
+
+from kdoc.parse_data_structs import ParseDataStructs
+
+__version__ = "1.0"
+logger = logging.getLogger(__name__)
+
+RE_DOMAIN_REF = re.compile(r'\\ :(ref|c:type|c:func):`([^<`]+)(?:<([^>]+)>)?`\\')
+RE_SIMPLE_REF = re.compile(r'`([^`]+)`')
+RE_LINENO_REF = re.compile(r'^\s*-\s+LINENO_(\d+):\s+(.*)')
+RE_SPLIT_DOMAIN = re.compile(r"(.*)\.(.*)")
+
+def ErrorString(exc):  # Shamelessly stolen from docutils
+    return f'{exc.__class__.__name}: {exc}'
+
+
+# ==============================================================================
+class KernelInclude(Directive):
+    """
+    KernelInclude (``kernel-include``) directive
+
+    Most of the stuff here came from Include directive defined at:
+        docutils/parsers/rst/directives/misc.py
+
+    Yet, overriding the class don't has any benefits: the original class
+    only have run() and argument list. Not all of them are implemented,
+    when checked against latest Sphinx version, as with time more arguments
+    were added.
+
+    So, keep its own list of supported arguments
+    """
+
+    required_arguments = 1
+    optional_arguments = 0
+    final_argument_whitespace = True
+    option_spec = {
+        'literal': directives.flag,
+        'code': directives.unchanged,
+        'encoding': directives.encoding,
+        'tab-width': int,
+        'start-line': int,
+        'end-line': int,
+        'start-after': directives.unchanged_required,
+        'end-before': directives.unchanged_required,
+        # ignored except for 'literal' or 'code':
+        'number-lines': directives.unchanged,  # integer or None
+        'class': directives.class_option,
+
+        # Arguments that aren't from Sphinx Include directive
+        'generate-cross-refs': directives.flag,
+        'warn-broken': directives.flag,
+        'toc': directives.flag,
+        'exception-file': directives.unchanged,
+    }
+
+    def read_rawtext(self, path, encoding):
+            """Read and process file content with error handling"""
+            try:
+                self.state.document.settings.record_dependencies.add(path)
+                include_file = io.FileInput(source_path=path,
+                                            encoding=encoding,
+                                            error_handler=self.state.document.settings.input_encoding_error_handler)
+            except UnicodeEncodeError:
+                raise self.severe('Problems with directive path:\n'
+                                'Cannot encode input file path "%s" '
+                                '(wrong locale?).' % path)
+            except IOError as error:
+                raise self.severe('Problems with directive path:\n%s.' % ErrorString(error))
+
+            try:
+                return include_file.read()
+            except UnicodeError as error:
+                raise self.severe('Problem with directive:\n%s' % ErrorString(error))
+
+    def apply_range(self, rawtext):
+        """
+        Handles start-line, end-line, start-after and end-before parameters
+        """
+
+        # Get to-be-included content
+        startline = self.options.get('start-line', None)
+        endline = self.options.get('end-line', None)
+        try:
+            if startline or (endline is not None):
+                lines = rawtext.splitlines()
+                rawtext = '\n'.join(lines[startline:endline])
+        except UnicodeError as error:
+            raise self.severe(f'Problem with "{self.name}" directive:\n'
+                              + io.error_string(error))
+        # start-after/end-before: no restrictions on newlines in match-text,
+        # and no restrictions on matching inside lines vs. line boundaries
+        after_text = self.options.get("start-after", None)
+        if after_text:
+            # skip content in rawtext before *and incl.* a matching text
+            after_index = rawtext.find(after_text)
+            if after_index < 0:
+                raise self.severe('Problem with "start-after" option of "%s" '
+                                  "directive:\nText not found." % self.name)
+            rawtext = rawtext[after_index + len(after_text) :]
+        before_text = self.options.get("end-before", None)
+        if before_text:
+            # skip content in rawtext after *and incl.* a matching text
+            before_index = rawtext.find(before_text)
+            if before_index < 0:
+                raise self.severe('Problem with "end-before" option of "%s" '
+                                  "directive:\nText not found." % self.name)
+            rawtext = rawtext[:before_index]
+
+        return rawtext
+
+    def xref_text(self, env, path, tab_width):
+        """
+        Read and add contents from a C file parsed to have cross references.
+
+        There are two types of supported output here:
+        - A C source code with cross-references;
+        - a TOC table containing cross references.
+        """
+        parser = ParseDataStructs()
+
+        if 'exception-file' in self.options:
+            source_dir = os.path.dirname(os.path.abspath(
+                self.state_machine.input_lines.source(
+                    self.lineno - self.state_machine.input_offset - 1)))
+            exceptions_file = os.path.join(source_dir, self.options['exception-file'])
+        else:
+            exceptions_file = None
+
+        parser.parse_file(path, exceptions_file)
+
+        # Store references on a symbol dict to be used at check time
+        if 'warn-broken' in self.options:
+            env._xref_files.add(path)
+
+        if "toc" not in self.options:
+
+            rawtext = ".. parsed-literal::\n\n" + parser.gen_output()
+            self.apply_range(rawtext)
+
+            include_lines = statemachine.string2lines(rawtext, tab_width,
+                                                      convert_whitespace=True)
+
+            # Sphinx always blame the ".. <directive>", so placing
+            # line numbers here won't make any difference
+
+            self.state_machine.insert_input(include_lines, path)
+            return []
+
+        # TOC output is a ReST file, not a literal. So, we can add line
+        # numbers
+
+        startline = self.options.get('start-line', None)
+        endline = self.options.get('end-line', None)
+
+        relpath = os.path.relpath(path, srctree)
+
+        result = ViewList()
+        for line in parser.gen_toc().split("\n"):
+            match = RE_LINENO_REF.match(line)
+            if not match:
+                result.append(line, path)
+                continue
+
+            ln, ref = match.groups()
+            ln = int(ln)
+
+            # Filter line range if needed
+            if startline and (ln < startline):
+                continue
+
+            if endline and (ln > endline):
+                continue
+
+            # Sphinx numerates starting with zero, but text editors
+            # and other tools start from one
+            realln = ln + 1
+            result.append(f"- {ref}: {relpath}#{realln}", path, ln)
+
+        self.state_machine.insert_input(result, path)
+
+        return []
+
+    def literal(self, path, tab_width, rawtext):
+        """Output a literal block"""
+
+        # Convert tabs to spaces, if `tab_width` is positive.
+        if tab_width >= 0:
+            text = rawtext.expandtabs(tab_width)
+        else:
+            text = rawtext
+        literal_block = nodes.literal_block(rawtext, source=path,
+                                            classes=self.options.get("class", []))
+        literal_block.line = 1
+        self.add_name(literal_block)
+        if "number-lines" in self.options:
+            try:
+                startline = int(self.options["number-lines"] or 1)
+            except ValueError:
+                raise self.error(":number-lines: with non-integer start value")
+            endline = startline + len(include_lines)
+            if text.endswith("\n"):
+                text = text[:-1]
+            tokens = NumberLines([([], text)], startline, endline)
+            for classes, value in tokens:
+                if classes:
+                    literal_block += nodes.inline(value, value,
+                                                    classes=classes)
+                else:
+                    literal_block += nodes.Text(value, value)
+        else:
+            literal_block += nodes.Text(text, text)
+        return [literal_block]
+
+    def code(self, path, tab_width):
+        """Output a code block"""
+
+        include_lines = statemachine.string2lines(rawtext, tab_width,
+                                                  convert_whitespace=True)
+
+        self.options["source"] = path
+        codeblock = CodeBlock(self.name,
+                                [self.options.pop("code")],  # arguments
+                                self.options,
+                                include_lines,
+                                self.lineno,
+                                self.content_offset,
+                                self.block_text,
+                                self.state,
+                                self.state_machine)
+        return codeblock.run()
+
+    def run(self):
+        """Include a file as part of the content of this reST file."""
+        env = self.state.document.settings.env
+
+        #
+        # The include logic accepts only patches relative to the
+        # Kernel source tree.  The logic does check it to prevent
+        # directory traverse issues.
+        #
+
+        srctree = os.path.abspath(os.environ["srctree"])
+
+        path = os.path.expandvars(self.arguments[0])
+        src_path = os.path.join(srctree, path)
+
+        if os.path.isfile(src_path):
+            base = srctree
+            path = src_path
+        else:
+            raise self.warning(f'File "%s" doesn\'t exist', path)
+
+        abs_base = os.path.abspath(base)
+        abs_full_path = os.path.abspath(os.path.join(base, path))
+
+        try:
+            if os.path.commonpath([abs_full_path, abs_base]) != abs_base:
+                raise self.severe('Problems with "%s" directive, prohibited path: %s' %
+                                  (self.name, path))
+        except ValueError:
+            # Paths don't have the same drive (Windows) or other incompatibility
+            raise self.severe('Problems with "%s" directive, invalid path: %s' %
+                            (self.name, path))
+
+        self.arguments[0] = path
+
+        #
+        # Add path location to Sphinx dependencies to ensure proper cache
+        # invalidation check.
+        #
+
+        env.note_dependency(os.path.abspath(path))
+
+        if not self.state.document.settings.file_insertion_enabled:
+            raise self.warning('"%s" directive disabled.' % self.name)
+        source = self.state_machine.input_lines.source(self.lineno -
+                                                       self.state_machine.input_offset - 1)
+        source_dir = os.path.dirname(os.path.abspath(source))
+        path = directives.path(self.arguments[0])
+        if path.startswith("<") and path.endswith(">"):
+            path = os.path.join(self.standard_include_path, path[1:-1])
+        path = os.path.normpath(os.path.join(source_dir, path))
+
+        # HINT: this is the only line I had to change / commented out:
+        # path = utils.relative_path(None, path)
+
+        encoding = self.options.get("encoding",
+                                    self.state.document.settings.input_encoding)
+        tab_width = self.options.get("tab-width",
+                                     self.state.document.settings.tab_width)
+
+        # Get optional arguments to related to cross-references generation
+        if "generate-cross-refs" in self.options:
+            return self.xref_text(env, path, tab_width)
+
+        rawtext = self.read_rawtext(path, encoding)
+        rawtext = self.apply_range(rawtext)
+
+        if "code" in self.options:
+            return self.code(path, tab_width, rawtext)
+
+        return self.literal(path, tab_width, rawtext)
+
+# ==============================================================================
+
+reported = set()
+DOMAIN_INFO = {}
+all_refs = {}
+
+def fill_domain_info(env):
+    """
+    Get supported reference types for each Sphinx domain and C namespaces
+    """
+    if DOMAIN_INFO:
+        return
+
+    for domain_name, domain_instance in env.domains.items():
+        try:
+            object_types = list(domain_instance.object_types.keys())
+            DOMAIN_INFO[domain_name] = object_types
+        except AttributeError:
+            # Ignore domains that we can't retrieve object types, if any
+            pass
+
+    for domain in DOMAIN_INFO.keys():
+        domain_obj = env.get_domain(domain)
+        for name, dispname, objtype, docname, anchor, priority in domain_obj.get_objects():
+            ref_name = name.lower()
+
+            if domain == "c":
+                if '.' in ref_name:
+                    ref_name = ref_name.split(".")[-1]
+
+            if not ref_name in all_refs:
+                all_refs[ref_name] = []
+
+            all_refs[ref_name].append(f"\t{domain}:{objtype}:`{name}` (from {docname})")
+
+def get_suggestions(app, env, node,
+                    original_target, original_domain, original_reftype):
+    """Check if target exists in the other domain or with different reftypes."""
+    original_target = original_target.lower()
+
+    # Remove namespace if present
+    if original_domain == "c":
+        if '.' in original_target:
+            original_target = original_target.split(".")[-1]
+
+    suggestions = []
+
+    # If name exists, propose exact name match on different domains
+    if original_target in all_refs:
+        return all_refs[original_target]
+
+    # If not found, get a close match, using difflib.
+    # Such method is based on Ratcliff-Obershelp Algorithm, which seeks
+    # for a close match within a certain distance. We're using the defaults
+    # here, e.g. cutoff=0.6, proposing 3 alternatives
+    matches = get_close_matches(original_target, all_refs.keys())
+    for match in matches:
+        suggestions += all_refs[match]
+
+    return suggestions
+
+def check_missing_refs(app, env, node, contnode):
+    """Check broken refs for the files it creates xrefs"""
+    if not node.source:
+        return None
+
+    try:
+        xref_files = env._xref_files
+    except AttributeError:
+        logger.critical("FATAL: _xref_files not initialized!")
+        raise
+
+    # Only show missing references for kernel-include reference-parsed files
+    if node.source not in xref_files:
+        return None
+
+    fill_domain_info(env)
+
+    target = node.get('reftarget', '')
+    domain = node.get('refdomain', 'std')
+    reftype = node.get('reftype', '')
+
+    msg = f"Invalid xref: {domain}:{reftype}:`{target}`"
+
+    # Don't duplicate warnings
+    data = (node.source, msg)
+    if data in reported:
+        return None
+    reported.add(data)
+
+    suggestions = get_suggestions(app, env, node, target, domain, reftype)
+    if suggestions:
+        msg += ". Possible alternatives:\n" + '\n'.join(suggestions)
+
+    logger.warning(msg, location=node, type='ref', subtype='missing')
+
+    return None
+
+def merge_xref_info(app, env, docnames, other):
+    """
+    As each process modify env._xref_files, we need to merge them back.
+    """
+    if not hasattr(other, "_xref_files"):
+        return
+    env._xref_files.update(getattr(other, "_xref_files", set()))
+
+def init_xref_docs(app, env, docnames):
+    """Initialize a list of files that we're generating cross references¨"""
+    app.env._xref_files = set()
+
+# ==============================================================================
+
+def setup(app):
+    """Setup Sphinx exension"""
+
+    app.connect("env-before-read-docs", init_xref_docs)
+    app.connect("env-merge-info", merge_xref_info)
+    app.add_directive("kernel-include", KernelInclude)
+    app.connect("missing-reference", check_missing_refs)
+
+    return {
+        "version": __version__,
+        "parallel_read_safe": True,
+        "parallel_write_safe": True,
+    }