1 files changed, 95 insertions, 17 deletions
diff --git a/Documentation/sphinx/kernel_include.py b/Documentation/sphinx/kernel_include.py
index f94412cd17c9..626762ff6af3 100755
--- a/Documentation/sphinx/kernel_include.py
+++ b/Documentation/sphinx/kernel_include.py
@@ -87,6 +87,8 @@ import os.path
 import re
 import sys
 
+from difflib import get_close_matches
+
 from docutils import io, nodes, statemachine
 from docutils.statemachine import ViewList
 from docutils.parsers.rst import Directive, directives
@@ -95,15 +97,17 @@ from docutils.parsers.rst.directives.body import CodeBlock, NumberLines
 from sphinx.util import logging
 
 srctree = os.path.abspath(os.environ["srctree"])
-sys.path.insert(0, os.path.join(srctree, "tools/docs/lib"))
+sys.path.insert(0, os.path.join(srctree, "tools/lib/python"))
 
-from parse_data_structs import ParseDataStructs
+from kdoc.parse_data_structs import ParseDataStructs
 
 __version__ = "1.0"
 logger = logging.getLogger(__name__)
 
 RE_DOMAIN_REF = re.compile(r'\\ :(ref|c:type|c:func):`([^<`]+)(?:<([^>]+)>)?`\\')
 RE_SIMPLE_REF = re.compile(r'`([^`]+)`')
+RE_LINENO_REF = re.compile(r'^\s*-\s+LINENO_(\d+):\s+(.*)')
+RE_SPLIT_DOMAIN = re.compile(r"(.*)\.(.*)")
 
 def ErrorString(exc):  # Shamelessly stolen from docutils
     return f'{exc.__class__.__name}: {exc}'
@@ -212,14 +216,16 @@ class KernelInclude(Directive):
         - a TOC table containing cross references.
         """
         parser = ParseDataStructs()
-        parser.parse_file(path)
 
         if 'exception-file' in self.options:
             source_dir = os.path.dirname(os.path.abspath(
                 self.state_machine.input_lines.source(
                     self.lineno - self.state_machine.input_offset - 1)))
             exceptions_file = os.path.join(source_dir, self.options['exception-file'])
-            parser.process_exceptions(exceptions_file)
+        else:
+            exceptions_file = None
+
+        parser.parse_file(path, exceptions_file)
 
         # Store references on a symbol dict to be used at check time
         if 'warn-broken' in self.options:
@@ -242,23 +248,32 @@ class KernelInclude(Directive):
         # TOC output is a ReST file, not a literal. So, we can add line
         # numbers
 
-        rawtext = parser.gen_toc()
+        startline = self.options.get('start-line', None)
+        endline = self.options.get('end-line', None)
 
-        include_lines = statemachine.string2lines(rawtext, tab_width,
-                                                  convert_whitespace=True)
+        relpath = os.path.relpath(path, srctree)
 
-        # Append line numbers data
+        result = ViewList()
+        for line in parser.gen_toc().split("\n"):
+            match = RE_LINENO_REF.match(line)
+            if not match:
+                result.append(line, path)
+                continue
 
-        startline = self.options.get('start-line', None)
+            ln, ref = match.groups()
+            ln = int(ln)
 
-        result = ViewList()
-        if startline and startline > 0:
-            offset = startline - 1
-        else:
-            offset = 0
+            # Filter line range if needed
+            if startline and (ln < startline):
+                continue
+
+            if endline and (ln > endline):
+                continue
 
-        for ln, line in enumerate(include_lines, start=offset):
-            result.append(line, path, ln)
+            # Sphinx numerates starting with zero, but text editors
+            # and other tools start from one
+            realln = ln + 1
+            result.append(f"- {ref}: {relpath}#{realln}", path, ln)
 
         self.state_machine.insert_input(result, path)
 
@@ -388,6 +403,63 @@ class KernelInclude(Directive):
 # ==============================================================================
 
 reported = set()
+DOMAIN_INFO = {}
+all_refs = {}
+
+def fill_domain_info(env):
+    """
+    Get supported reference types for each Sphinx domain and C namespaces
+    """
+    if DOMAIN_INFO:
+        return
+
+    for domain_name, domain_instance in env.domains.items():
+        try:
+            object_types = list(domain_instance.object_types.keys())
+            DOMAIN_INFO[domain_name] = object_types
+        except AttributeError:
+            # Ignore domains that we can't retrieve object types, if any
+            pass
+
+    for domain in DOMAIN_INFO.keys():
+        domain_obj = env.get_domain(domain)
+        for name, dispname, objtype, docname, anchor, priority in domain_obj.get_objects():
+            ref_name = name.lower()
+
+            if domain == "c":
+                if '.' in ref_name:
+                    ref_name = ref_name.split(".")[-1]
+
+            if not ref_name in all_refs:
+                all_refs[ref_name] = []
+
+            all_refs[ref_name].append(f"\t{domain}:{objtype}:`{name}` (from {docname})")
+
+def get_suggestions(app, env, node,
+                    original_target, original_domain, original_reftype):
+    """Check if target exists in the other domain or with different reftypes."""
+    original_target = original_target.lower()
+
+    # Remove namespace if present
+    if original_domain == "c":
+        if '.' in original_target:
+            original_target = original_target.split(".")[-1]
+
+    suggestions = []
+
+    # If name exists, propose exact name match on different domains
+    if original_target in all_refs:
+        return all_refs[original_target]
+
+    # If not found, get a close match, using difflib.
+    # Such method is based on Ratcliff-Obershelp Algorithm, which seeks
+    # for a close match within a certain distance. We're using the defaults
+    # here, e.g. cutoff=0.6, proposing 3 alternatives
+    matches = get_close_matches(original_target, all_refs.keys())
+    for match in matches:
+        suggestions += all_refs[match]
+
+    return suggestions
 
 def check_missing_refs(app, env, node, contnode):
     """Check broken refs for the files it creates xrefs"""
@@ -404,11 +476,13 @@ def check_missing_refs(app, env, node, contnode):
     if node.source not in xref_files:
         return None
 
+    fill_domain_info(env)
+
     target = node.get('reftarget', '')
     domain = node.get('refdomain', 'std')
     reftype = node.get('reftype', '')
 
-    msg = f"can't link to: {domain}:{reftype}:: {target}"
+    msg = f"Invalid xref: {domain}:{reftype}:`{target}`"
 
     # Don't duplicate warnings
     data = (node.source, msg)
@@ -416,6 +490,10 @@ def check_missing_refs(app, env, node, contnode):
         return None
     reported.add(data)
 
+    suggestions = get_suggestions(app, env, node, target, domain, reftype)
+    if suggestions:
+        msg += ". Possible alternatives:\n" + '\n'.join(suggestions)
+
     logger.warning(msg, location=node, type='ref', subtype='missing')
 
     return None