diff options
Diffstat (limited to 'scripts/lib/kdoc/kdoc_parser.py')
| -rw-r--r-- | scripts/lib/kdoc/kdoc_parser.py | 858 | 
1 files changed, 391 insertions, 467 deletions
| diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 062453eefc7a..fe730099eca8 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -12,11 +12,12 @@ Read a C language source or header FILE and extract embedded  documentation comments  """ +import sys  import re  from pprint import pformat  from kdoc_re import NestedMatch, KernRe - +from kdoc_item import KdocItem  #  # Regular expressions used to parse kernel-doc markups at KernelDoc class. @@ -42,12 +43,13 @@ doc_decl = doc_com + KernRe(r'(\w+)', cache=False)  #         @{section-name}:  # while trying to not match literal block starts like "example::"  # +known_section_names = 'description|context|returns?|notes?|examples?' +known_sections = KernRe(known_section_names, flags = re.I)  doc_sect = doc_com + \ -            KernRe(r'\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:([^:].*)?$', -                flags=re.I, cache=False) +    KernRe(r'\s*(\@[.\w]+|\@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', +           flags=re.I, cache=False)  doc_content = doc_com_body + KernRe(r'(.*)', cache=False) -doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False)  doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False)  doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False)  doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) @@ -60,6 +62,25 @@ export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"  type_param = KernRe(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) +# +# Tests for the beginning of a kerneldoc block in its various forms. +# +doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) +doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)", cache = False) +doc_begin_func = KernRe(str(doc_com) +			# initial " * ' +                        r"(?:\w+\s*\*\s*)?" + 		# type (not captured) +                        r'(?:define\s+)?' + 		# possible "define" (not captured) +                        r'(\w+)\s*(?:\(\w*\))?\s*' +	# name and optional "(...)" +                        r'(?:[-:].*)?$',		# description (not captured) +                        cache = False) + +# +# A little helper to get rid of excess white space +# +multi_space = KernRe(r'\s\s+') +def trim_whitespace(s): +    return multi_space.sub(' ', s.strip()) +  class state:      """      State machine enums @@ -68,40 +89,26 @@ class state:      # Parser states      NORMAL        = 0        # normal code      NAME          = 1        # looking for function name -    BODY_MAYBE    = 2        # body - or maybe more description +    DECLARATION   = 2        # We have seen a declaration which might not be done      BODY          = 3        # the body of the comment -    BODY_WITH_BLANK_LINE = 4 # the body which has a blank line +    SPECIAL_SECTION = 4      # doc section ending with a blank line      PROTO         = 5        # scanning prototype      DOCBLOCK      = 6        # documentation block -    INLINE        = 7        # gathering doc outside main block +    INLINE_NAME   = 7        # gathering doc outside main block +    INLINE_TEXT   = 8	     # reading the body of inline docs      name = [          "NORMAL",          "NAME", -        "BODY_MAYBE", +        "DECLARATION",          "BODY", -        "BODY_WITH_BLANK_LINE", +        "SPECIAL_SECTION",          "PROTO",          "DOCBLOCK", -        "INLINE", +        "INLINE_NAME", +        "INLINE_TEXT",      ] -    # Inline documentation state -    INLINE_NA     = 0 # not applicable ($state != INLINE) -    INLINE_NAME   = 1 # looking for member name (@foo:) -    INLINE_TEXT   = 2 # looking for member documentation -    INLINE_END    = 3 # done -    INLINE_ERROR  = 4 # error - Comment without header was found. -                      # Spit a warning as it's not -                      # proper kernel-doc and ignore the rest. - -    inline_name = [ -        "", -        "_NAME", -        "_TEXT", -        "_END", -        "_ERROR", -    ]  SECTION_DEFAULT = "Description"  # default section @@ -110,10 +117,7 @@ class KernelEntry:      def __init__(self, config, ln):          self.config = config -        self.contents = "" -        self.function = "" -        self.sectcheck = "" -        self.struct_actual = "" +        self._contents = []          self.prototype = ""          self.warnings = [] @@ -124,7 +128,6 @@ class KernelEntry:          self.parameterdesc_start_lines = {}          self.section_start_lines = {} -        self.sectionlist = []          self.sections = {}          self.anon_struct_union = False @@ -133,10 +136,17 @@ class KernelEntry:          # State flags          self.brcount = 0 - -        self.in_doc_sect = False          self.declaration_start_line = ln + 1 +    # +    # Management of section contents +    # +    def add_text(self, text): +        self._contents.append(text) + +    def contents(self): +        return '\n'.join(self._contents) + '\n' +      # TODO: rename to emit_message after removal of kernel-doc.pl      def emit_msg(self, log_msg, warning=True):          """Emit a message""" @@ -151,13 +161,27 @@ class KernelEntry:          self.warnings.append(log_msg)          return +    # +    # Begin a new section. +    # +    def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False): +        if dump: +            self.dump_section(start_new = True) +        self.section = title +        self.new_start_line = line_no +      def dump_section(self, start_new=True):          """          Dumps section contents to arrays/hashes intended for that purpose.          """ - +        # +        # If we have accumulated no contents in the default ("description") +        # section, don't bother. +        # +        if self.section == SECTION_DEFAULT and not self._contents: +            return          name = self.section -        contents = self.contents +        contents = self.contents()          if type_param.match(name):              name = type_param.group(1) @@ -165,14 +189,6 @@ class KernelEntry:              self.parameterdescs[name] = contents              self.parameterdesc_start_lines[name] = self.new_start_line -            self.sectcheck += name + " " -            self.new_start_line = 0 - -        elif name == "@...": -            name = "..." -            self.parameterdescs[name] = contents -            self.sectcheck += name + " " -            self.parameterdesc_start_lines[name] = self.new_start_line              self.new_start_line = 0          else: @@ -181,10 +197,10 @@ class KernelEntry:                  if name != SECTION_DEFAULT:                      self.emit_msg(self.new_start_line,                                    f"duplicate section name '{name}'\n") -                self.sections[name] += contents +                # Treat as a new paragraph - add a blank line +                self.sections[name] += '\n' + contents              else:                  self.sections[name] = contents -                self.sectionlist.append(name)                  self.section_start_lines[name] = self.new_start_line                  self.new_start_line = 0 @@ -192,7 +208,7 @@ class KernelEntry:          if start_new:              self.section = SECTION_DEFAULT -            self.contents = "" +            self._contents = []  class KernelDoc: @@ -203,7 +219,6 @@ class KernelDoc:      # Section names -    section_intro = "Introduction"      section_context = "Context"      section_return = "Return" @@ -217,7 +232,6 @@ class KernelDoc:          # Initial state for the state machines          self.state = state.NORMAL -        self.inline_doc_state = state.INLINE_NA          # Store entry currently being processed          self.entry = None @@ -225,6 +239,14 @@ class KernelDoc:          # Place all potential outputs into an array          self.entries = [] +        # +        # We need Python 3.7 for its "dicts remember the insertion +        # order" guarantee +        # +        if sys.version_info.major == 3 and sys.version_info.minor < 7: +            self.emit_msg(0, +                          'Python 3.7 or later is required for correct results') +      def emit_msg(self, ln, msg, warning=True):          """Emit a message""" @@ -255,32 +277,20 @@ class KernelDoc:          The actual output and output filters will be handled elsewhere          """ -        # The implementation here is different than the original kernel-doc: -        # instead of checking for output filters or actually output anything, -        # it just stores the declaration content at self.entries, as the -        # output will happen on a separate class. -        # -        # For now, we're keeping the same name of the function just to make -        # easier to compare the source code of both scripts - -        args["declaration_start_line"] = self.entry.declaration_start_line -        args["type"] = dtype -        args["warnings"] = self.entry.warnings - -        # TODO: use colletions.OrderedDict to remove sectionlist - -        sections = args.get('sections', {}) -        sectionlist = args.get('sectionlist', []) +        item = KdocItem(name, dtype, self.entry.declaration_start_line, **args) +        item.warnings = self.entry.warnings          # Drop empty sections          # TODO: improve empty sections logic to emit warnings +        sections = self.entry.sections          for section in ["Description", "Return"]: -            if section in sectionlist: -                if not sections[section].rstrip(): -                    del sections[section] -                    sectionlist.remove(section) - -        self.entries.append((name, args)) +            if section in sections and not sections[section].rstrip(): +                del sections[section] +        item.set_sections(sections, self.entry.section_start_lines) +        item.set_params(self.entry.parameterlist, self.entry.parameterdescs, +                        self.entry.parametertypes, +                        self.entry.parameterdesc_start_lines) +        self.entries.append(item)          self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) @@ -294,7 +304,6 @@ class KernelDoc:          # State flags          self.state = state.NORMAL -        self.inline_doc_state = state.INLINE_NA      def push_parameter(self, ln, decl_type, param, dtype,                         org_arg, declaration_name): @@ -367,15 +376,6 @@ class KernelDoc:          org_arg = KernRe(r'\s\s+').sub(' ', org_arg)          self.entry.parametertypes[param] = org_arg -    def save_struct_actual(self, actual): -        """ -        Strip all spaces from the actual param so that it looks like -        one string item. -        """ - -        actual = KernRe(r'\s*').sub("", actual, count=1) - -        self.entry.struct_actual += actual + " "      def create_parameter_list(self, ln, decl_type, args,                                splitter, declaration_name): @@ -421,7 +421,6 @@ class KernelDoc:                      param = arg                  dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) -                self.save_struct_actual(param)                  self.push_parameter(ln, decl_type, param, dtype,                                      arg, declaration_name) @@ -438,7 +437,6 @@ class KernelDoc:                  dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) -                self.save_struct_actual(param)                  self.push_parameter(ln, decl_type, param, dtype,                                      arg, declaration_name) @@ -471,7 +469,6 @@ class KernelDoc:                          param = r.group(1) -                        self.save_struct_actual(r.group(2))                          self.push_parameter(ln, decl_type, r.group(2),                                              f"{dtype} {r.group(1)}",                                              arg, declaration_name) @@ -483,52 +480,27 @@ class KernelDoc:                              continue                          if dtype != "":  # Skip unnamed bit-fields -                            self.save_struct_actual(r.group(1))                              self.push_parameter(ln, decl_type, r.group(1),                                                  f"{dtype}:{r.group(2)}",                                                  arg, declaration_name)                      else: -                        self.save_struct_actual(param)                          self.push_parameter(ln, decl_type, param, dtype,                                              arg, declaration_name) -    def check_sections(self, ln, decl_name, decl_type, sectcheck, prmscheck): +    def check_sections(self, ln, decl_name, decl_type):          """          Check for errors inside sections, emitting warnings if not found          parameters are described.          """ - -        sects = sectcheck.split() -        prms = prmscheck.split() -        err = False - -        for sx in range(len(sects)):                  # pylint: disable=C0200 -            err = True -            for px in range(len(prms)):               # pylint: disable=C0200 -                prm_clean = prms[px] -                prm_clean = KernRe(r'\[.*\]').sub('', prm_clean) -                prm_clean = attribute.sub('', prm_clean) - -                # ignore array size in a parameter string; -                # however, the original param string may contain -                # spaces, e.g.:  addr[6 + 2] -                # and this appears in @prms as "addr[6" since the -                # parameter list is split at spaces; -                # hence just ignore "[..." for the sections check; -                prm_clean = KernRe(r'\[.*').sub('', prm_clean) - -                if prm_clean == sects[sx]: -                    err = False -                    break - -            if err: +        for section in self.entry.sections: +            if section not in self.entry.parameterlist and \ +               not known_sections.search(section):                  if decl_type == 'function':                      dname = f"{decl_type} parameter"                  else:                      dname = f"{decl_type} member" -                  self.emit_msg(ln, -                              f"Excess {dname} '{sects[sx]}' description in '{decl_name}'") +                              f"Excess {dname} '{section}' description in '{decl_name}'")      def check_return_section(self, ln, declaration_name, return_type):          """ @@ -666,6 +638,7 @@ class KernelDoc:              (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'),              (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'),              (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'), +            (KernRe(r'VIRTIO_DECLARE_FEATURES\s*\(' + args_pattern + r'\)', re.S), r'u64 \1; u64 \1_array[VIRTIO_FEATURES_DWORDS]'),          ]          # Regexes here are guaranteed to have the end limiter matching @@ -782,8 +755,7 @@ class KernelDoc:          self.create_parameter_list(ln, decl_type, members, ';',                                     declaration_name) -        self.check_sections(ln, declaration_name, decl_type, -                            self.entry.sectcheck, self.entry.struct_actual) +        self.check_sections(ln, declaration_name, decl_type)          # Adjust declaration for better display          declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) @@ -819,15 +791,7 @@ class KernelDoc:                  level += 1          self.output_declaration(decl_type, declaration_name, -                                struct=declaration_name,                                  definition=declaration, -                                parameterlist=self.entry.parameterlist, -                                parameterdescs=self.entry.parameterdescs, -                                parametertypes=self.entry.parametertypes, -                                parameterdesc_start_lines=self.entry.parameterdesc_start_lines, -                                sectionlist=self.entry.sectionlist, -                                sections=self.entry.sections, -                                section_start_lines=self.entry.section_start_lines,                                  purpose=self.entry.declaration_purpose)      def dump_enum(self, ln, proto): @@ -845,39 +809,48 @@ class KernelDoc:          # Strip #define macros inside enums          proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) -        members = None -        declaration_name = None - +        # +        # Parse out the name and members of the enum.  Typedef form first. +        #          r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;')          if r.search(proto):              declaration_name = r.group(2)              members = r.group(1).rstrip() +        # +        # Failing that, look for a straight enum +        #          else:              r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}')              if r.match(proto):                  declaration_name = r.group(1)                  members = r.group(2).rstrip() - -        if not members: -            self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") -            return - +        # +        # OK, this isn't going to work. +        # +            else: +                self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") +                return +        # +        # Make sure we found what we were expecting. +        #          if self.entry.identifier != declaration_name:              if self.entry.identifier == "":                  self.emit_msg(ln,                                f"{proto}: wrong kernel-doc identifier on prototype")              else:                  self.emit_msg(ln, -                              f"expecting prototype for enum {self.entry.identifier}. Prototype was for enum {declaration_name} instead") +                              f"expecting prototype for enum {self.entry.identifier}. " +                              f"Prototype was for enum {declaration_name} instead")              return          if not declaration_name:              declaration_name = "(anonymous)" - +        # +        # Parse out the name of each enum member, and verify that we +        # have a description for it. +        #          member_set = set() - -        members = KernRe(r'\([^;]*?[\)]').sub('', members) - +        members = KernRe(r'\([^;)]*\)').sub('', members)          for arg in members.split(','):              if not arg:                  continue @@ -888,20 +861,15 @@ class KernelDoc:                  self.emit_msg(ln,                                f"Enum value '{arg}' not described in enum '{declaration_name}'")              member_set.add(arg) - +        # +        # Ensure that every described member actually exists in the enum. +        #          for k in self.entry.parameterdescs:              if k not in member_set:                  self.emit_msg(ln,                                f"Excess enum value '%{k}' description in '{declaration_name}'")          self.output_declaration('enum', declaration_name, -                                enum=declaration_name, -                                parameterlist=self.entry.parameterlist, -                                parameterdescs=self.entry.parameterdescs, -                                parameterdesc_start_lines=self.entry.parameterdesc_start_lines, -                                sectionlist=self.entry.sectionlist, -                                sections=self.entry.sections, -                                section_start_lines=self.entry.section_start_lines,                                  purpose=self.entry.declaration_purpose)      def dump_declaration(self, ln, prototype): @@ -911,18 +879,13 @@ class KernelDoc:          if self.entry.decl_type == "enum":              self.dump_enum(ln, prototype) -            return - -        if self.entry.decl_type == "typedef": +        elif self.entry.decl_type == "typedef":              self.dump_typedef(ln, prototype) -            return - -        if self.entry.decl_type in ["union", "struct"]: +        elif self.entry.decl_type in ["union", "struct"]:              self.dump_struct(ln, prototype) -            return - -        self.output_declaration(self.entry.decl_type, prototype, -                                entry=self.entry) +        else: +            # This would be a bug +            self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}')      def dump_function(self, ln, prototype):          """ @@ -1056,38 +1019,20 @@ class KernelDoc:                            f"expecting prototype for {self.entry.identifier}(). Prototype was for {declaration_name}() instead")              return -        prms = " ".join(self.entry.parameterlist) -        self.check_sections(ln, declaration_name, "function", -                            self.entry.sectcheck, prms) +        self.check_sections(ln, declaration_name, "function")          self.check_return_section(ln, declaration_name, return_type)          if 'typedef' in return_type:              self.output_declaration(decl_type, declaration_name, -                                    function=declaration_name,                                      typedef=True,                                      functiontype=return_type, -                                    parameterlist=self.entry.parameterlist, -                                    parameterdescs=self.entry.parameterdescs, -                                    parametertypes=self.entry.parametertypes, -                                    parameterdesc_start_lines=self.entry.parameterdesc_start_lines, -                                    sectionlist=self.entry.sectionlist, -                                    sections=self.entry.sections, -                                    section_start_lines=self.entry.section_start_lines,                                      purpose=self.entry.declaration_purpose,                                      func_macro=func_macro)          else:              self.output_declaration(decl_type, declaration_name, -                                    function=declaration_name,                                      typedef=False,                                      functiontype=return_type, -                                    parameterlist=self.entry.parameterlist, -                                    parameterdescs=self.entry.parameterdescs, -                                    parametertypes=self.entry.parametertypes, -                                    parameterdesc_start_lines=self.entry.parameterdesc_start_lines, -                                    sectionlist=self.entry.sectionlist, -                                    sections=self.entry.sections, -                                    section_start_lines=self.entry.section_start_lines,                                      purpose=self.entry.declaration_purpose,                                      func_macro=func_macro) @@ -1124,16 +1069,8 @@ class KernelDoc:              self.create_parameter_list(ln, decl_type, args, ',', declaration_name)              self.output_declaration(decl_type, declaration_name, -                                    function=declaration_name,                                      typedef=True,                                      functiontype=return_type, -                                    parameterlist=self.entry.parameterlist, -                                    parameterdescs=self.entry.parameterdescs, -                                    parametertypes=self.entry.parametertypes, -                                    parameterdesc_start_lines=self.entry.parameterdesc_start_lines, -                                    sectionlist=self.entry.sectionlist, -                                    sections=self.entry.sections, -                                    section_start_lines=self.entry.section_start_lines,                                      purpose=self.entry.declaration_purpose)              return @@ -1153,10 +1090,6 @@ class KernelDoc:                  return              self.output_declaration('typedef', declaration_name, -                                    typedef=declaration_name, -                                    sectionlist=self.entry.sectionlist, -                                    sections=self.entry.sections, -                                    section_start_lines=self.entry.section_start_lines,                                      purpose=self.entry.declaration_purpose)              return @@ -1171,17 +1104,28 @@ class KernelDoc:          with a staticmethod decorator.          """ +        # We support documenting some exported symbols with different +        # names.  A horrible hack. +        suffixes = [ '_noprof' ] +          # Note: it accepts only one EXPORT_SYMBOL* per line, as having          # multiple export lines would violate Kernel coding style.          if export_symbol.search(line):              symbol = export_symbol.group(2) -            function_set.add(symbol) -            return - -        if export_symbol_ns.search(line): +        elif export_symbol_ns.search(line):              symbol = export_symbol_ns.group(2) -            function_set.add(symbol) +        else: +            return False +        # +        # Found an export, trim out any special suffixes +        # +        for suffix in suffixes: +            # Be backward compatible with Python < 3.9 +            if symbol.endswith(suffix): +                symbol = symbol[:-len(suffix)] +        function_set.add(symbol) +        return True      def process_normal(self, ln, line):          """ @@ -1193,7 +1137,6 @@ class KernelDoc:          # start a new entry          self.reset_state(ln) -        self.entry.in_doc_sect = False          # next line is always the function name          self.state = state.NAME @@ -1202,81 +1145,61 @@ class KernelDoc:          """          STATE_NAME: Looking for the "name - description" line          """ - +        # +        # Check for a DOC: block and handle them specially. +        #          if doc_block.search(line): -            self.entry.new_start_line = ln              if not doc_block.group(1): -                self.entry.section = self.section_intro +                self.entry.begin_section(ln, "Introduction")              else: -                self.entry.section = doc_block.group(1) +                self.entry.begin_section(ln, doc_block.group(1))              self.entry.identifier = self.entry.section              self.state = state.DOCBLOCK -            return - -        if doc_decl.search(line): +        # +        # Otherwise we're looking for a normal kerneldoc declaration line. +        # +        elif doc_decl.search(line):              self.entry.identifier = doc_decl.group(1) -            self.entry.is_kernel_comment = False - -            decl_start = str(doc_com)       # comment block asterisk -            fn_type = r"(?:\w+\s*\*\s*)?"  # type (for non-functions) -            parenthesis = r"(?:\(\w*\))?"   # optional parenthesis on function -            decl_end = r"(?:[-:].*)"         # end of the name part - -            # test for pointer declaration type, foo * bar() - desc -            r = KernRe(fr"^{decl_start}([\w\s]+?){parenthesis}?\s*{decl_end}?$") -            if r.search(line): -                self.entry.identifier = r.group(1)              # Test for data declaration -            r = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)") -            if r.search(line): -                self.entry.decl_type = r.group(1) -                self.entry.identifier = r.group(2) -                self.entry.is_kernel_comment = True +            if doc_begin_data.search(line): +                self.entry.decl_type = doc_begin_data.group(1) +                self.entry.identifier = doc_begin_data.group(2) +            # +            # Look for a function description +            # +            elif doc_begin_func.search(line): +                self.entry.identifier = doc_begin_func.group(1) +                self.entry.decl_type = "function" +            # +            # We struck out. +            #              else: -                # Look for foo() or static void foo() - description; -                # or misspelt identifier - -                r1 = KernRe(fr"^{decl_start}{fn_type}(\w+)\s*{parenthesis}\s*{decl_end}?$") -                r2 = KernRe(fr"^{decl_start}{fn_type}(\w+[^-:]*){parenthesis}\s*{decl_end}$") - -                for r in [r1, r2]: -                    if r.search(line): -                        self.entry.identifier = r.group(1) -                        self.entry.decl_type = "function" - -                        r = KernRe(r"define\s+") -                        self.entry.identifier = r.sub("", self.entry.identifier) -                        self.entry.is_kernel_comment = True -                        break - -            self.entry.identifier = self.entry.identifier.strip(" ") - +                self.emit_msg(ln, +                              f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") +                self.state = state.NORMAL +                return +            # +            # OK, set up for a new kerneldoc entry. +            #              self.state = state.BODY - +            self.entry.identifier = self.entry.identifier.strip(" ")              # if there's no @param blocks need to set up default section here -            self.entry.section = SECTION_DEFAULT -            self.entry.new_start_line = ln + 1 - +            self.entry.begin_section(ln + 1) +            # +            # Find the description portion, which *should* be there but +            # isn't always. +            # (We should be able to capture this from the previous parsing - someday) +            #              r = KernRe("[-:](.*)")              if r.search(line): -                # strip leading/trailing/multiple spaces -                self.entry.descr = r.group(1).strip(" ") - -                r = KernRe(r"\s+") -                self.entry.descr = r.sub(" ", self.entry.descr) -                self.entry.declaration_purpose = self.entry.descr -                self.state = state.BODY_MAYBE +                self.entry.declaration_purpose = trim_whitespace(r.group(1)) +                self.state = state.DECLARATION              else:                  self.entry.declaration_purpose = "" -            if not self.entry.is_kernel_comment: -                self.emit_msg(ln, -                              f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") -                self.state = state.NORMAL -              if not self.entry.declaration_purpose and self.config.wshort_desc:                  self.emit_msg(ln,                                f"missing initial short description on line:\n{line}") @@ -1290,60 +1213,51 @@ class KernelDoc:                  self.emit_msg(ln,                                f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}",                                    warning=False) - -            return - +        #          # Failed to find an identifier. Emit a warning -        self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") - -    def process_body(self, ln, line): -        """ -        STATE_BODY and STATE_BODY_MAYBE: the bulk of a kerneldoc comment. -        """ - -        if self.state == state.BODY_WITH_BLANK_LINE: -            r = KernRe(r"\s*\*\s?\S") -            if r.match(line): -                self.dump_section() -                self.entry.section = SECTION_DEFAULT -                self.entry.new_start_line = ln -                self.entry.contents = "" +        # +        else: +            self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") +    # +    # Helper function to determine if a new section is being started. +    # +    def is_new_section(self, ln, line):          if doc_sect.search(line): -            self.entry.in_doc_sect = True +            self.state = state.BODY +            # +            # Pick out the name of our new section, tweaking it if need be. +            #              newsection = doc_sect.group(1) - -            if newsection.lower() in ["description", "context"]: -                newsection = newsection.title() - -            # Special case: @return is a section, not a param description -            if newsection.lower() in ["@return", "@returns", -                                      "return", "returns"]: +            if newsection.lower() == 'description': +                newsection = 'Description' +            elif newsection.lower() == 'context': +                newsection = 'Context' +                self.state = state.SPECIAL_SECTION +            elif newsection.lower() in ["@return", "@returns", +                                        "return", "returns"]:                  newsection = "Return" - -            # Perl kernel-doc has a check here for contents before sections. -            # the logic there is always false, as in_doc_sect variable is -            # always true. So, just don't implement Wcontents_before_sections - -            # .title() +                self.state = state.SPECIAL_SECTION +            elif newsection[0] == '@': +                self.state = state.SPECIAL_SECTION +            # +            # Initialize the contents, and get the new section going. +            #              newcontents = doc_sect.group(2)              if not newcontents:                  newcontents = "" - -            if self.entry.contents.strip("\n"): -                self.dump_section() - -            self.entry.new_start_line = ln -            self.entry.section = newsection +            self.dump_section() +            self.entry.begin_section(ln, newsection)              self.entry.leading_space = None -            self.entry.contents = newcontents.lstrip() -            if self.entry.contents: -                self.entry.contents += "\n" - -            self.state = state.BODY -            return +            self.entry.add_text(newcontents.lstrip()) +            return True +        return False +    # +    # Helper function to detect (and effect) the end of a kerneldoc comment. +    # +    def is_comment_end(self, ln, line):          if doc_end.search(line):              self.dump_section() @@ -1356,100 +1270,128 @@ class KernelDoc:              self.entry.new_start_line = ln + 1              self.state = state.PROTO -            return +            return True +        return False + +    def process_decl(self, ln, line): +        """ +        STATE_DECLARATION: We've seen the beginning of a declaration +        """ +        if self.is_new_section(ln, line) or self.is_comment_end(ln, line): +            return +        # +        # Look for anything with the " * " line beginning. +        #          if doc_content.search(line):              cont = doc_content.group(1) - +            # +            # A blank line means that we have moved out of the declaration +            # part of the comment (without any "special section" parameter +            # descriptions). +            #              if cont == "": -                if self.entry.section == self.section_context: -                    self.dump_section() - -                    self.entry.new_start_line = ln -                    self.state = state.BODY -                else: -                    if self.entry.section != SECTION_DEFAULT: -                        self.state = state.BODY_WITH_BLANK_LINE -                    else: -                        self.state = state.BODY - -                    self.entry.contents += "\n" - -            elif self.state == state.BODY_MAYBE: - -                # Continued declaration purpose -                self.entry.declaration_purpose = self.entry.declaration_purpose.rstrip() -                self.entry.declaration_purpose += " " + cont - -                r = KernRe(r"\s+") -                self.entry.declaration_purpose = r.sub(' ', -                                                       self.entry.declaration_purpose) - +                self.state = state.BODY +            # +            # Otherwise we have more of the declaration section to soak up. +            #              else: -                if self.entry.section.startswith('@') or        \ -                   self.entry.section == self.section_context: -                    if self.entry.leading_space is None: -                        r = KernRe(r'^(\s+)') -                        if r.match(cont): -                            self.entry.leading_space = len(r.group(1)) -                        else: -                            self.entry.leading_space = 0 - -                    # Double-check if leading space are realy spaces -                    pos = 0 -                    for i in range(0, self.entry.leading_space): -                        if cont[i] != " ": -                            break -                        pos += 1 - -                    cont = cont[pos:] +                self.entry.declaration_purpose = \ +                    trim_whitespace(self.entry.declaration_purpose + ' ' + cont) +        else: +            # Unknown line, ignore +            self.emit_msg(ln, f"bad line: {line}") -                    # NEW LOGIC: -                    # In case it is different, update it -                    if self.entry.leading_space != pos: -                        self.entry.leading_space = pos -                self.entry.contents += cont + "\n" +    def process_special(self, ln, line): +        """ +        STATE_SPECIAL_SECTION: a section ending with a blank line +        """ +        # +        # If we have hit a blank line (only the " * " marker), then this +        # section is done. +        # +        if KernRe(r"\s*\*\s*$").match(line): +            self.entry.begin_section(ln, dump = True) +            self.state = state.BODY +            return +        # +        # Not a blank line, look for the other ways to end the section. +        # +        if self.is_new_section(ln, line) or self.is_comment_end(ln, line):              return +        # +        # OK, we should have a continuation of the text for this section. +        # +        if doc_content.search(line): +            cont = doc_content.group(1) +            # +            # If the lines of text after the first in a special section have +            # leading white space, we need to trim it out or Sphinx will get +            # confused.  For the second line (the None case), see what we +            # find there and remember it. +            # +            if self.entry.leading_space is None: +                r = KernRe(r'^(\s+)') +                if r.match(cont): +                    self.entry.leading_space = len(r.group(1)) +                else: +                    self.entry.leading_space = 0 +            # +            # Otherwise, before trimming any leading chars, be *sure* +            # that they are white space.  We should maybe warn if this +            # isn't the case. +            # +            for i in range(0, self.entry.leading_space): +                if cont[i] != " ": +                    self.entry.leading_space = i +                    break +            # +            # Add the trimmed result to the section and we're done. +            # +            self.entry.add_text(cont[self.entry.leading_space:]) +        else: +            # Unknown line, ignore +            self.emit_msg(ln, f"bad line: {line}") -        # Unknown line, ignore -        self.emit_msg(ln, f"bad line: {line}") +    def process_body(self, ln, line): +        """ +        STATE_BODY: the bulk of a kerneldoc comment. +        """ +        if self.is_new_section(ln, line) or self.is_comment_end(ln, line): +            return -    def process_inline(self, ln, line): -        """STATE_INLINE: docbook comments within a prototype.""" +        if doc_content.search(line): +            cont = doc_content.group(1) +            self.entry.add_text(cont) +        else: +            # Unknown line, ignore +            self.emit_msg(ln, f"bad line: {line}") -        if self.inline_doc_state == state.INLINE_NAME and \ -           doc_inline_sect.search(line): -            self.entry.section = doc_inline_sect.group(1) -            self.entry.new_start_line = ln +    def process_inline_name(self, ln, line): +        """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" -            self.entry.contents = doc_inline_sect.group(2).lstrip() -            if self.entry.contents != "": -                self.entry.contents += "\n" +        if doc_inline_sect.search(line): +            self.entry.begin_section(ln, doc_inline_sect.group(1)) +            self.entry.add_text(doc_inline_sect.group(2).lstrip()) +            self.state = state.INLINE_TEXT +        elif doc_inline_end.search(line): +            self.dump_section() +            self.state = state.PROTO +        elif doc_content.search(line): +            self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") +            self.state = state.PROTO +        # else ... ?? -            self.inline_doc_state = state.INLINE_TEXT -            # Documentation block end */ -            return +    def process_inline_text(self, ln, line): +        """STATE_INLINE_TEXT: docbook comments within a prototype."""          if doc_inline_end.search(line): -            if self.entry.contents not in ["", "\n"]: -                self.dump_section() - +            self.dump_section()              self.state = state.PROTO -            self.inline_doc_state = state.INLINE_NA -            return - -        if doc_content.search(line): -            if self.inline_doc_state == state.INLINE_TEXT: -                self.entry.contents += doc_content.group(1) + "\n" -                if not self.entry.contents.strip(" ").rstrip("\n"): -                    self.entry.contents = "" - -            elif self.inline_doc_state == state.INLINE_NAME: -                self.emit_msg(ln, -                              f"Incorrect use of kernel-doc format: {line}") - -                self.inline_doc_state = state.INLINE_ERROR +        elif doc_content.search(line): +            self.entry.add_text(doc_content.group(1)) +        # else ... ??      def syscall_munge(self, ln, proto):         # pylint: disable=W0613          """ @@ -1531,105 +1473,94 @@ class KernelDoc:          """Ancillary routine to process a function prototype"""          # strip C99-style comments to end of line -        r = KernRe(r"\/\/.*$", re.S) -        line = r.sub('', line) - +        line = KernRe(r"\/\/.*$", re.S).sub('', line) +        # +        # Soak up the line's worth of prototype text, stopping at { or ; if present. +        #          if KernRe(r'\s*#\s*define').match(line):              self.entry.prototype = line -        elif line.startswith('#'): -            # Strip other macros like #ifdef/#ifndef/#endif/... -            pass -        else: +        elif not line.startswith('#'):   # skip other preprocessor stuff              r = KernRe(r'([^\{]*)')              if r.match(line):                  self.entry.prototype += r.group(1) + " " - +        # +        # If we now have the whole prototype, clean it up and declare victory. +        #          if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): -            # strip comments -            r = KernRe(r'/\*.*?\*/') -            self.entry.prototype = r.sub('', self.entry.prototype) - -            # strip newlines/cr's -            r = KernRe(r'[\r\n]+') -            self.entry.prototype = r.sub(' ', self.entry.prototype) - -            # strip leading spaces -            r = KernRe(r'^\s+') -            self.entry.prototype = r.sub('', self.entry.prototype) - +            # strip comments and surrounding spaces +            self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip() +            #              # Handle self.entry.prototypes for function pointers like:              #       int (*pcs_config)(struct foo) - +            # by turning it into +            #	    int pcs_config(struct foo) +            #              r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)')              self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) - +            # +            # Handle special declaration syntaxes +            #              if 'SYSCALL_DEFINE' in self.entry.prototype:                  self.entry.prototype = self.syscall_munge(ln,                                                            self.entry.prototype) - -            r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') -            if r.search(self.entry.prototype): -                self.entry.prototype = self.tracepoint_munge(ln, -                                                             self.entry.prototype) - +            else: +                r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') +                if r.search(self.entry.prototype): +                    self.entry.prototype = self.tracepoint_munge(ln, +                                                                 self.entry.prototype) +            # +            # ... and we're done +            #              self.dump_function(ln, self.entry.prototype)              self.reset_state(ln)      def process_proto_type(self, ln, line):          """Ancillary routine to process a type""" -        # Strip newlines/cr's. -        line = KernRe(r'[\r\n]+', re.S).sub(' ', line) - -        # Strip leading spaces -        line = KernRe(r'^\s+', re.S).sub('', line) - -        # Strip trailing spaces -        line = KernRe(r'\s+$', re.S).sub('', line) - -        # Strip C99-style comments to the end of the line -        line = KernRe(r"\/\/.*$", re.S).sub('', line) +        # Strip C99-style comments and surrounding whitespace +        line = KernRe(r"//.*$", re.S).sub('', line).strip() +        if not line: +            return # nothing to see here          # To distinguish preprocessor directive from regular declaration later.          if line.startswith('#'):              line += ";" - -        r = KernRe(r'([^\{\};]*)([\{\};])(.*)') -        while True: -            if r.search(line): -                if self.entry.prototype: -                    self.entry.prototype += " " -                self.entry.prototype += r.group(1) + r.group(2) - -                self.entry.brcount += r.group(2).count('{') -                self.entry.brcount -= r.group(2).count('}') - -                self.entry.brcount = max(self.entry.brcount, 0) - -                if r.group(2) == ';' and self.entry.brcount == 0: +        # +        # Split the declaration on any of { } or ;, and accumulate pieces +        # until we hit a semicolon while not inside {brackets} +        # +        r = KernRe(r'(.*?)([{};])') +        for chunk in r.split(line): +            if chunk:  # Ignore empty matches +                self.entry.prototype += chunk +                # +                # This cries out for a match statement ... someday after we can +                # drop Python 3.9 ... +                # +                if chunk == '{': +                    self.entry.brcount += 1 +                elif chunk == '}': +                    self.entry.brcount -= 1 +                elif chunk == ';' and self.entry.brcount <= 0:                      self.dump_declaration(ln, self.entry.prototype)                      self.reset_state(ln) -                    break - -                line = r.group(3) -            else: -                self.entry.prototype += line -                break +                    return +        # +        # We hit the end of the line while still in the declaration; put +        # in a space to represent the newline. +        # +        self.entry.prototype += ' '      def process_proto(self, ln, line):          """STATE_PROTO: reading a function/whatever prototype."""          if doc_inline_oneline.search(line): -            self.entry.section = doc_inline_oneline.group(1) -            self.entry.contents = doc_inline_oneline.group(2) - -            if self.entry.contents != "": -                self.entry.contents += "\n" -                self.dump_section(start_new=False) +            self.entry.begin_section(ln, doc_inline_oneline.group(1)) +            self.entry.add_text(doc_inline_oneline.group(2)) +            self.dump_section()          elif doc_inline_start.search(line): -            self.state = state.INLINE -            self.inline_doc_state = state.INLINE_NAME +            self.state = state.INLINE_NAME          elif self.entry.decl_type == 'function':              self.process_proto_function(ln, line) @@ -1642,14 +1573,11 @@ class KernelDoc:          if doc_end.search(line):              self.dump_section() -            self.output_declaration("doc", self.entry.identifier, -                                    sectionlist=self.entry.sectionlist, -                                    sections=self.entry.sections, -                                    section_start_lines=self.entry.section_start_lines) +            self.output_declaration("doc", self.entry.identifier)              self.reset_state(ln)          elif doc_content.search(line): -            self.entry.contents += doc_content.group(1) + "\n" +            self.entry.add_text(doc_content.group(1))      def parse_export(self):          """ @@ -1670,6 +1598,22 @@ class KernelDoc:          return export_table +    # +    # The state/action table telling us which function to invoke in +    # each state. +    # +    state_actions = { +        state.NORMAL:			process_normal, +        state.NAME:			process_name, +        state.BODY:			process_body, +        state.DECLARATION:		process_decl, +        state.SPECIAL_SECTION:		process_special, +        state.INLINE_NAME:		process_inline_name, +        state.INLINE_TEXT:		process_inline_text, +        state.PROTO:			process_proto, +        state.DOCBLOCK:			process_docblock, +        } +      def parse_kdoc(self):          """          Open and process each line of a C source file. @@ -1680,7 +1624,6 @@ class KernelDoc:          Besides parsing kernel-doc tags, it also parses export symbols.          """ -        cont = False          prev = ""          prev_ln = None          export_table = set() @@ -1696,23 +1639,18 @@ class KernelDoc:                      if self.state == state.PROTO:                          if line.endswith("\\"):                              prev += line.rstrip("\\") -                            cont = True -                              if not prev_ln:                                  prev_ln = ln -                              continue -                        if cont: +                        if prev:                              ln = prev_ln                              line = prev + line                              prev = "" -                            cont = False                              prev_ln = None -                    self.config.log.debug("%d %s%s: %s", +                    self.config.log.debug("%d %s: %s",                                            ln, state.name[self.state], -                                          state.inline_name[self.inline_doc_state],                                            line)                      # This is an optimization over the original script. @@ -1720,25 +1658,11 @@ class KernelDoc:                      # it was read twice. Here, we use the already-existing                      # loop to parse exported symbols as well.                      # -                    # TODO: It should be noticed that not all states are -                    # needed here. On a future cleanup, process export only -                    # at the states that aren't handling comment markups. -                    self.process_export(export_table, line) +                    if (self.state != state.NORMAL) or \ +                       not self.process_export(export_table, line): +                        # Hand this line to the appropriate state handler +                        self.state_actions[self.state](self, ln, line) -                    # Hand this line to the appropriate state handler -                    if self.state == state.NORMAL: -                        self.process_normal(ln, line) -                    elif self.state == state.NAME: -                        self.process_name(ln, line) -                    elif self.state in [state.BODY, state.BODY_MAYBE, -                                        state.BODY_WITH_BLANK_LINE]: -                        self.process_body(ln, line) -                    elif self.state == state.INLINE:  # scanning for inline parameters -                        self.process_inline(ln, line) -                    elif self.state == state.PROTO: -                        self.process_proto(ln, line) -                    elif self.state == state.DOCBLOCK: -                        self.process_docblock(ln, line)          except OSError:              self.config.log.error(f"Error: Cannot open file {self.fname}") | 
