From 6656ae4df1a6ad6dfb5c4ce4c76136a42abb9bf4 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 7 Aug 2025 15:16:28 -0600 Subject: docs: kdoc: consolidate the stripping of private struct/union members There were two locations duplicating the logic of stripping private members and associated comments; coalesce them into one, and add some comments describing what's going on. Output change: we now no longer add extraneous white space around macro definitions. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250807211639.47286-2-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index c3fe4bd5eab4..93fcd8807aa8 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -81,6 +81,21 @@ multi_space = KernRe(r'\s\s+') def trim_whitespace(s): return multi_space.sub(' ', s.strip()) +# +# Remove struct/enum members that have been marked "private". +# +def trim_private_members(text): + # + # First look for a "public:" block that ends a private region, then + # handle the "private until the end" case. + # + text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) + text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) + # + # We needed the comments to do the above, but now we can take them out. + # + return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() + class state: """ State machine enums @@ -568,12 +583,6 @@ class KernelDoc: args_pattern = r'([^,)]+)' sub_prefixes = [ - (KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', re.S | re.I), ''), - (KernRe(r'\/\*\s*private:.*', re.S | re.I), ''), - - # Strip comments - (KernRe(r'\/\*.*?\*\/', re.S), ''), - # Strip attributes (attribute, ' '), (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), @@ -648,6 +657,7 @@ class KernelDoc: (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), ] + members = trim_private_members(members) for search, sub in sub_prefixes: members = search.sub(sub, members) @@ -797,24 +807,18 @@ class KernelDoc: """ Stores an enum inside self.entries array. """ - - # Ignore members marked private - proto = KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', flags=re.S).sub('', proto) - proto = KernRe(r'\/\*\s*private:.*}', flags=re.S).sub('}', proto) - - # Strip comments - proto = KernRe(r'\/\*.*?\*\/', flags=re.S).sub('', proto) - - # Strip #define macros inside enums + # + # Strip preprocessor directives. Note that this depends on the + # trailing semicolon we added in process_proto_type(). + # proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) - # # Parse out the name and members of the enum. Typedef form first. # r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') if r.search(proto): declaration_name = r.group(2) - members = r.group(1).rstrip() + members = trim_private_members(r.group(1)) # # Failing that, look for a straight enum # @@ -822,7 +826,7 @@ class KernelDoc: r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') if r.match(proto): declaration_name = r.group(1) - members = r.group(2).rstrip() + members = trim_private_members(r.group(2)) # # OK, this isn't going to work. # -- cgit From 259feba4dde78f165b03e231ea9985dfe600c202 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 7 Aug 2025 15:16:29 -0600 Subject: docs: kdoc: Move a regex line in dump_struct() The complex struct_members regex was defined far from its use; bring the two together. Remove some extraneous backslashes while making the move. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250807211639.47286-3-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 93fcd8807aa8..aa6d11bf29b1 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -551,7 +551,6 @@ class KernelDoc: ] definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" - struct_members = KernRe(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\}\;]*)(\;)') # Extract struct/union definition members = None @@ -683,6 +682,7 @@ class KernelDoc: # So, we need to have an extra loop on Python to override such # re limitation. + struct_members = KernRe(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\};]*)(;)') while True: tuples = struct_members.findall(members) if not tuples: -- cgit From 5fd513f01169ae93d202b8c30f0837096664e7d7 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 7 Aug 2025 15:16:30 -0600 Subject: docs: kdoc: backslashectomy in kdoc_parser A lot of the regular expressions in this file have extraneous backslashes that may have been needed in Perl, but aren't helpful here. Take them out to reduce slightly the visual noise. Escaping of (){}[] has been left in place, even when unnecessary, for visual clarity. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250807211639.47286-4-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index aa6d11bf29b1..14ded23f11e0 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -46,7 +46,7 @@ doc_decl = doc_com + KernRe(r'(\w+)', cache=False) known_section_names = 'description|context|returns?|notes?|examples?' known_sections = KernRe(known_section_names, flags = re.I) doc_sect = doc_com + \ - KernRe(r'\s*(\@[.\w]+|\@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', + KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', flags=re.I, cache=False) doc_content = doc_com_body + KernRe(r'(.*)', cache=False) @@ -60,7 +60,7 @@ attribute = KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) -type_param = KernRe(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) +type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) # # Tests for the beginning of a kerneldoc block in its various forms. @@ -405,7 +405,7 @@ class KernelDoc: for arg in args.split(splitter): # Strip comments - arg = KernRe(r'\/\*.*\*\/').sub('', arg) + arg = KernRe(r'/\*.*\*/').sub('', arg) # Ignore argument attributes arg = KernRe(r'\sPOS0?\s').sub(' ', arg) @@ -428,7 +428,7 @@ class KernelDoc: arg = arg.replace('#', ',') - r = KernRe(r'[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)') + r = KernRe(r'[^\(]+\(\*?\s*([\w\[\].]*)\s*\)') if r.match(arg): param = r.group(1) else: @@ -443,7 +443,7 @@ class KernelDoc: # Array-of-pointers arg = arg.replace('#', ',') - r = KernRe(r'[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)') + r = KernRe(r'[^\(]+\(\s*\*\s*([\w\[\].]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)') if r.match(arg): param = r.group(1) else: @@ -709,7 +709,7 @@ class KernelDoc: if not arg: continue - r = KernRe(r'^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)') + r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)') if r.match(arg): # Pointer-to-function dtype = r.group(1) @@ -1044,7 +1044,7 @@ class KernelDoc: Stores a typedef inside self.entries array. """ - typedef_type = r'((?:\s+[\w\*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' + typedef_type = r'((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' typedef_ident = r'\*?\s*(\w\S+)\s*' typedef_args = r'\s*\((.*)\);' @@ -1265,7 +1265,7 @@ class KernelDoc: self.dump_section() # Look for doc_com + + doc_end: - r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') + r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/') if r.match(line): self.emit_msg(ln, f"suspicious ending line: {line}") @@ -1476,7 +1476,7 @@ class KernelDoc: """Ancillary routine to process a function prototype""" # strip C99-style comments to end of line - line = KernRe(r"\/\/.*$", re.S).sub('', line) + line = KernRe(r"//.*$", re.S).sub('', line) # # Soak up the line's worth of prototype text, stopping at { or ; if present. # -- cgit From 64cf83bcd3217a9583caeb404ff136366a46705c Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 7 Aug 2025 15:16:31 -0600 Subject: docs: kdoc: move the prefix transforms out of dump_struct() dump_struct is one of the longest functions in the kdoc_parser class, making it hard to read and reason about. Move the definition of the prefix transformations out of the function, join them with the definition of "attribute" (which was defined at the top of the file but only used here), and reformat the code slightly for shorter line widths. Just code movement in the end. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250807211639.47286-5-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 179 +++++++++++++++++++++------------------- 1 file changed, 96 insertions(+), 83 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 14ded23f11e0..3d007d200da6 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -54,8 +54,6 @@ doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) -attribute = KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", - flags=re.I | re.S, cache=False) export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) @@ -74,6 +72,97 @@ doc_begin_func = KernRe(str(doc_com) + # initial " * ' r'(?:[-:].*)?$', # description (not captured) cache = False) +# +# Here begins a long set of transformations to turn structure member prefixes +# and macro invocations into something we can parse and generate kdoc for. +# +struct_attribute = KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", + flags=re.I | re.S, cache=False) +struct_args_pattern = r'([^,)]+)' + +struct_prefixes = [ + # Strip attributes + (struct_attribute, ' '), + (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__packed\s*', re.S), ' '), + (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned', re.S), ' '), + # + # Unwrap struct_group macros based on this definition: + # __struct_group(TAG, NAME, ATTRS, MEMBERS...) + # which has variants like: struct_group(NAME, MEMBERS...) + # Only MEMBERS arguments require documentation. + # + # Parsing them happens on two steps: + # + # 1. drop struct group arguments that aren't at MEMBERS, + # storing them as STRUCT_GROUP(MEMBERS) + # + # 2. remove STRUCT_GROUP() ancillary macro. + # + # The original logic used to remove STRUCT_GROUP() using an + # advanced regex: + # + # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; + # + # with two patterns that are incompatible with + # Python re module, as it has: + # + # - a recursive pattern: (?1) + # - an atomic grouping: (?>...) + # + # I tried a simpler version: but it didn't work either: + # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; + # + # As it doesn't properly match the end parenthesis on some cases. + # + # So, a better solution was crafted: there's now a NestedMatch + # class that ensures that delimiters after a search are properly + # matched. So, the implementation to drop STRUCT_GROUP() will be + # handled in separate. + # + (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), + (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), + # + # Replace macros + # + # TODO: use NestedMatch for FOO($1, $2, ...) matches + # + # it is better to also move those to the NestedMatch logic, + # to ensure that parenthesis will be properly matched. + # + (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), + r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), + (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), + r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), + (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', + re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), + (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', + re.S), r'unsigned long \1[1 << ((\2) - 1)]'), + (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + + r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + + struct_args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + + struct_args_pattern + r'\)', re.S), r'\1 \2[]'), + (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), + (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), +] +# +# Regexes here are guaranteed to have the end limiter matching +# the start delimiter. Yet, right now, only one replace group +# is allowed. +# +struct_nested_prefixes = [ + (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), +] + + # # A little helper to get rid of excess white space # @@ -578,91 +667,15 @@ class KernelDoc: self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. Prototype was for {decl_type} {declaration_name} instead\n") return - - args_pattern = r'([^,)]+)' - - sub_prefixes = [ - # Strip attributes - (attribute, ' '), - (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__packed\s*', re.S), ' '), - (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned', re.S), ' '), - - # Unwrap struct_group macros based on this definition: - # __struct_group(TAG, NAME, ATTRS, MEMBERS...) - # which has variants like: struct_group(NAME, MEMBERS...) - # Only MEMBERS arguments require documentation. - # - # Parsing them happens on two steps: - # - # 1. drop struct group arguments that aren't at MEMBERS, - # storing them as STRUCT_GROUP(MEMBERS) - # - # 2. remove STRUCT_GROUP() ancillary macro. - # - # The original logic used to remove STRUCT_GROUP() using an - # advanced regex: - # - # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; - # - # with two patterns that are incompatible with - # Python re module, as it has: - # - # - a recursive pattern: (?1) - # - an atomic grouping: (?>...) - # - # I tried a simpler version: but it didn't work either: - # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; - # - # As it doesn't properly match the end parenthesis on some cases. - # - # So, a better solution was crafted: there's now a NestedMatch - # class that ensures that delimiters after a search are properly - # matched. So, the implementation to drop STRUCT_GROUP() will be - # handled in separate. - - (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), - (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), - (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), - (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), - - # Replace macros - # - # TODO: use NestedMatch for FOO($1, $2, ...) matches - # - # it is better to also move those to the NestedMatch logic, - # to ensure that parenthesis will be properly matched. - - (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), - (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), - (KernRe(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), - (KernRe(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'), - (KernRe(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'), - (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'), - (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'), - ] - - # Regexes here are guaranteed to have the end limiter matching - # the start delimiter. Yet, right now, only one replace group - # is allowed. - - sub_nested_prefixes = [ - (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), - ] - + # + # Go through the list of members applying all of our transformations. + # members = trim_private_members(members) - for search, sub in sub_prefixes: + for search, sub in struct_prefixes: members = search.sub(sub, members) nested = NestedMatch() - - for search, sub in sub_nested_prefixes: + for search, sub in struct_nested_prefixes: members = nested.sub(search, sub, members) # Keeps the original declaration as-is -- cgit From 0f7344129434a6b44d4abb5080a9d67dd734ee07 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 7 Aug 2025 15:16:32 -0600 Subject: docs: kdoc: split top-level prototype parsing out of dump_struct() Move the initial split of the prototype into its own function in the ongoing effort to cut dump_struct() down to size. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250807211639.47286-6-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 43 +++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 23 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 3d007d200da6..ab896dcd9572 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -624,13 +624,11 @@ class KernelDoc: self.emit_msg(ln, f"No description found for return value of '{declaration_name}'") - def dump_struct(self, ln, proto): - """ - Store an entry for an struct or union - """ - + # + # Split apart a structure prototype; returns (struct|union, name, members) or None + # + def split_struct_proto(self, proto): type_pattern = r'(struct|union)' - qualifiers = [ "__attribute__", "__packed", @@ -638,34 +636,33 @@ class KernelDoc: "____cacheline_aligned_in_smp", "____cacheline_aligned", ] - definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" - # Extract struct/union definition - members = None - declaration_name = None - decl_type = None - r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) if r.search(proto): - decl_type = r.group(1) - declaration_name = r.group(2) - members = r.group(3) + return (r.group(1), r.group(2), r.group(3)) else: r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') - if r.search(proto): - decl_type = r.group(1) - declaration_name = r.group(3) - members = r.group(2) + return (r.group(1), r.group(3), r.group(2)) + return None - if not members: + def dump_struct(self, ln, proto): + """ + Store an entry for an struct or union + """ + # + # Do the basic parse to get the pieces of the declaration. + # + struct_parts = self.split_struct_proto(proto) + if not struct_parts: self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") return + decl_type, declaration_name, members = struct_parts if self.entry.identifier != declaration_name: - self.emit_msg(ln, - f"expecting prototype for {decl_type} {self.entry.identifier}. Prototype was for {decl_type} {declaration_name} instead\n") + self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. " + f"Prototype was for {decl_type} {declaration_name} instead\n") return # # Go through the list of members applying all of our transformations. @@ -695,7 +692,7 @@ class KernelDoc: # So, we need to have an extra loop on Python to override such # re limitation. - struct_members = KernRe(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\};]*)(;)') + struct_members = KernRe(r'(struct|union)([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\};]*)(;)') while True: tuples = struct_members.findall(members) if not tuples: -- cgit From 77e3c875f0a83d9192079e88d8569ac36c6b6bea Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 7 Aug 2025 15:16:33 -0600 Subject: docs: kdoc: split struct-member rewriting out of dump_struct() The massive loop that massages struct members shares no data with the rest of dump_struct(); split it out into its own function. Code movement only, no other changes. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250807211639.47286-7-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 65 +++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 31 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index ab896dcd9572..fbd7f6ce3360 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -647,37 +647,7 @@ class KernelDoc: return (r.group(1), r.group(3), r.group(2)) return None - def dump_struct(self, ln, proto): - """ - Store an entry for an struct or union - """ - # - # Do the basic parse to get the pieces of the declaration. - # - struct_parts = self.split_struct_proto(proto) - if not struct_parts: - self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") - return - decl_type, declaration_name, members = struct_parts - - if self.entry.identifier != declaration_name: - self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. " - f"Prototype was for {decl_type} {declaration_name} instead\n") - return - # - # Go through the list of members applying all of our transformations. - # - members = trim_private_members(members) - for search, sub in struct_prefixes: - members = search.sub(sub, members) - - nested = NestedMatch() - for search, sub in struct_nested_prefixes: - members = nested.sub(search, sub, members) - - # Keeps the original declaration as-is - declaration = members - + def rewrite_struct_members(self, members): # Split nested struct/union elements # # This loop was simpler at the original kernel-doc perl version, as @@ -768,6 +738,39 @@ class KernelDoc: newmember += f"{dtype} {s_id}.{name}; " members = members.replace(oldmember, newmember) + return members + + def dump_struct(self, ln, proto): + """ + Store an entry for an struct or union + """ + # + # Do the basic parse to get the pieces of the declaration. + # + struct_parts = self.split_struct_proto(proto) + if not struct_parts: + self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") + return + decl_type, declaration_name, members = struct_parts + + if self.entry.identifier != declaration_name: + self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. " + f"Prototype was for {decl_type} {declaration_name} instead\n") + return + # + # Go through the list of members applying all of our transformations. + # + members = trim_private_members(members) + for search, sub in struct_prefixes: + members = search.sub(sub, members) + + nested = NestedMatch() + for search, sub in struct_nested_prefixes: + members = nested.sub(search, sub, members) + + # Keeps the original declaration as-is + declaration = members + members = self.rewrite_struct_members(members) # Ignore other nested elements, like enums members = re.sub(r'(\{[^\{\}]*\})', '', members) -- cgit From f8208676c1c85c0b91e726954c05f5859b890ccb Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 7 Aug 2025 15:16:34 -0600 Subject: docs: kdoc: rework the rewrite_struct_members() main loop Adopt a more Pythonic form for the main loop of this function, getting rid of the "while True:" construction and making the actual loop invariant explicit. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250807211639.47286-8-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index fbd7f6ce3360..e11f3d6e9469 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -663,11 +663,8 @@ class KernelDoc: # re limitation. struct_members = KernRe(r'(struct|union)([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\};]*)(;)') - while True: - tuples = struct_members.findall(members) - if not tuples: - break - + tuples = struct_members.findall(members) + while tuples: for t in tuples: newmember = "" maintype = t[0] @@ -738,6 +735,7 @@ class KernelDoc: newmember += f"{dtype} {s_id}.{name}; " members = members.replace(oldmember, newmember) + tuples = struct_members.findall(members) return members def dump_struct(self, ln, proto): -- cgit From fb20e610393b02a832a0bf4964e12c20a7ffa2f8 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 7 Aug 2025 15:16:35 -0600 Subject: docs: kdoc: remove an extraneous strip() call ...the variable in question was already strip()ed at the top of the loop. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250807211639.47286-9-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 1 - 1 file changed, 1 deletion(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index e11f3d6e9469..0c279aa802a0 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -703,7 +703,6 @@ class KernelDoc: newmember += f"{dtype}{s_id}.{name}{extra}; " else: - arg = arg.strip() # Handle bitmaps arg = KernRe(r':\s*\d+\s*').sub('', arg) -- cgit From a8c4b0a8f1969e5ee0e8abb62cde8f0d7bcc2009 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 7 Aug 2025 15:16:36 -0600 Subject: docs: kdoc: Some rewrite_struct_members() commenting Add comments to rewrite_struct_members() describing what it is actually doing, and reformat/comment the main struct_members regex so that it is (more) comprehensible to humans. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250807211639.47286-10-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 0c279aa802a0..e3d0270b1a19 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -647,22 +647,28 @@ class KernelDoc: return (r.group(1), r.group(3), r.group(2)) return None + # + # Rewrite the members of a structure or union for easier formatting later on. + # Among other things, this function will turn a member like: + # + # struct { inner_members; } foo; + # + # into: + # + # struct foo; inner_members; + # def rewrite_struct_members(self, members): - # Split nested struct/union elements - # - # This loop was simpler at the original kernel-doc perl version, as - # while ($members =~ m/$struct_members/) { ... } - # reads 'members' string on each interaction. # - # Python behavior is different: it parses 'members' only once, - # creating a list of tuples from the first interaction. + # Process struct/union members from the most deeply nested outward. The + # trick is in the ^{ below - it prevents a match of an outer struct/union + # until the inner one has been munged (removing the "{" in the process). # - # On other words, this won't get nested structs. - # - # So, we need to have an extra loop on Python to override such - # re limitation. - - struct_members = KernRe(r'(struct|union)([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\};]*)(;)') + struct_members = KernRe(r'(struct|union)' # 0: declaration type + r'([^\{\};]+)' # 1: possible name + r'(\{)' + r'([^\{\}]*)' # 3: Contents of declaration + r'(\})' + r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration tuples = struct_members.findall(members) while tuples: for t in tuples: -- cgit From e6dd4e2a5ca1c1e2fb168249532da1d95b5b24af Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 7 Aug 2025 15:16:37 -0600 Subject: docs: kdoc: further rewrite_struct_members() cleanup Get rid of some redundant checks, and generally tighten up the code; no logical change. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250807211639.47286-11-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 86 ++++++++++++++++++++--------------------- 1 file changed, 41 insertions(+), 45 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index e3d0270b1a19..b3f937901037 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -673,73 +673,69 @@ class KernelDoc: while tuples: for t in tuples: newmember = "" - maintype = t[0] - s_ids = t[5] - content = t[3] - - oldmember = "".join(t) - - for s_id in s_ids.split(','): + oldmember = "".join(t) # Reconstruct the original formatting + dtype, name, lbr, content, rbr, rest, semi = t + # + # Pass through each field name, normalizing the form and formatting. + # + for s_id in rest.split(','): s_id = s_id.strip() - - newmember += f"{maintype} {s_id}; " + newmember += f"{dtype} {s_id}; " + # + # Remove bitfield/array/pointer info, getting the bare name. + # s_id = KernRe(r'[:\[].*').sub('', s_id) s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) - + # + # Pass through the members of this inner structure/union. + # for arg in content.split(';'): arg = arg.strip() - - if not arg: - continue - + # + # Look for (type)(*name)(args) - pointer to function + # r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)') if r.match(arg): + dtype, name, extra = r.group(1), r.group(2), r.group(3) # Pointer-to-function - dtype = r.group(1) - name = r.group(2) - extra = r.group(3) - - if not name: - continue - if not s_id: # Anonymous struct/union newmember += f"{dtype}{name}{extra}; " else: newmember += f"{dtype}{s_id}.{name}{extra}; " - + # + # Otherwise a non-function member. + # else: - # Handle bitmaps + # + # Remove bitmap and array portions and spaces around commas + # arg = KernRe(r':\s*\d+\s*').sub('', arg) - - # Handle arrays arg = KernRe(r'\[.*\]').sub('', arg) - - # Handle multiple IDs arg = KernRe(r'\s*,\s*').sub(',', arg) - + # + # Look for a normal decl - "type name[,name...]" + # r = KernRe(r'(.*)\s+([\S+,]+)') - if r.search(arg): - dtype = r.group(1) - names = r.group(2) + for name in r.group(2).split(','): + name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name) + if not s_id: + # Anonymous struct/union + newmember += f"{r.group(1)} {name}; " + else: + newmember += f"{r.group(1)} {s_id}.{name}; " else: newmember += f"{arg}; " - continue - - for name in names.split(','): - name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name).strip() - - if not name: - continue - - if not s_id: - # Anonymous struct/union - newmember += f"{dtype} {name}; " - else: - newmember += f"{dtype} {s_id}.{name}; " - + # + # At the end of the s_id loop, replace the original declaration with + # the munged version. + # members = members.replace(oldmember, newmember) + # + # End of the tuple loop - search again and see if there are outer members + # that now turn up. + # tuples = struct_members.findall(members) return members -- cgit From 23c47b09315935df140ca5ce2ddddb85453ed64d Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 7 Aug 2025 15:16:38 -0600 Subject: docs: kdoc: extract output formatting from dump_struct() The last thing done in dump_struct() is to format the structure for printing. That, too, is a separate activity; split it out into its own function. dump_struct() now fits in a single, full-hight editor screen. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250807211639.47286-12-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 72 +++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 35 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index b3f937901037..878fbfab4ac7 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -739,6 +739,42 @@ class KernelDoc: tuples = struct_members.findall(members) return members + # + # Format the struct declaration into a standard form for inclusion in the + # resulting docs. + # + def format_struct_decl(self, declaration): + # + # Insert newlines, get rid of extra spaces. + # + declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) + declaration = KernRe(r'\}\s+;').sub('};', declaration) + # + # Format inline enums with each member on its own line. + # + r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') + while r.search(declaration): + declaration = r.sub(r'\1,\n\2', declaration) + # + # Now go through and supply the right number of tabs + # for each line. + # + def_args = declaration.split('\n') + level = 1 + declaration = "" + for clause in def_args: + clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1) + if clause: + if '}' in clause and level > 1: + level -= 1 + if not clause.startswith('#'): + declaration += "\t" * level + declaration += "\t" + clause + "\n" + if "{" in clause and "}" not in clause: + level += 1 + return declaration + + def dump_struct(self, ln, proto): """ Store an entry for an struct or union @@ -777,42 +813,8 @@ class KernelDoc: self.create_parameter_list(ln, decl_type, members, ';', declaration_name) self.check_sections(ln, declaration_name, decl_type) - - # Adjust declaration for better display - declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) - declaration = KernRe(r'\}\s+;').sub('};', declaration) - - # Better handle inlined enums - while True: - r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') - if not r.search(declaration): - break - - declaration = r.sub(r'\1,\n\2', declaration) - - def_args = declaration.split('\n') - level = 1 - declaration = "" - for clause in def_args: - - clause = clause.strip() - clause = KernRe(r'\s+').sub(' ', clause, count=1) - - if not clause: - continue - - if '}' in clause and level > 1: - level -= 1 - - if not KernRe(r'^\s*#').match(clause): - declaration += "\t" * level - - declaration += "\t" + clause + "\n" - if "{" in clause and "}" not in clause: - level += 1 - self.output_declaration(decl_type, declaration_name, - definition=declaration, + definition=self.format_struct_decl(declaration), purpose=self.entry.declaration_purpose) def dump_enum(self, ln, proto): -- cgit From e282303e718b2007b3db77c6db75ecaf4419a1af Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 7 Aug 2025 15:16:39 -0600 Subject: docs: kdoc: a few final dump_struct() touches Add a couple more comments so that each phase of the process is now clearly marked. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250807211639.47286-13-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 878fbfab4ac7..9b21fb86709a 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -802,14 +802,15 @@ class KernelDoc: nested = NestedMatch() for search, sub in struct_nested_prefixes: members = nested.sub(search, sub, members) - - # Keeps the original declaration as-is + # + # Deal with embedded struct and union members, and drop enums entirely. + # declaration = members members = self.rewrite_struct_members(members) - - # Ignore other nested elements, like enums members = re.sub(r'(\{[^\{\}]*\})', '', members) - + # + # Output the result and we are done. + # self.create_parameter_list(ln, decl_type, members, ';', declaration_name) self.check_sections(ln, declaration_name, decl_type) -- cgit