From 6656ae4df1a6ad6dfb5c4ce4c76136a42abb9bf4 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 7 Aug 2025 15:16:28 -0600 Subject: docs: kdoc: consolidate the stripping of private struct/union members There were two locations duplicating the logic of stripping private members and associated comments; coalesce them into one, and add some comments describing what's going on. Output change: we now no longer add extraneous white space around macro definitions. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250807211639.47286-2-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index c3fe4bd5eab4..93fcd8807aa8 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -81,6 +81,21 @@ multi_space = KernRe(r'\s\s+') def trim_whitespace(s): return multi_space.sub(' ', s.strip()) +# +# Remove struct/enum members that have been marked "private". +# +def trim_private_members(text): + # + # First look for a "public:" block that ends a private region, then + # handle the "private until the end" case. + # + text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) + text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) + # + # We needed the comments to do the above, but now we can take them out. + # + return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() + class state: """ State machine enums @@ -568,12 +583,6 @@ class KernelDoc: args_pattern = r'([^,)]+)' sub_prefixes = [ - (KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', re.S | re.I), ''), - (KernRe(r'\/\*\s*private:.*', re.S | re.I), ''), - - # Strip comments - (KernRe(r'\/\*.*?\*\/', re.S), ''), - # Strip attributes (attribute, ' '), (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), @@ -648,6 +657,7 @@ class KernelDoc: (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), ] + members = trim_private_members(members) for search, sub in sub_prefixes: members = search.sub(sub, members) @@ -797,24 +807,18 @@ class KernelDoc: """ Stores an enum inside self.entries array. """ - - # Ignore members marked private - proto = KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', flags=re.S).sub('', proto) - proto = KernRe(r'\/\*\s*private:.*}', flags=re.S).sub('}', proto) - - # Strip comments - proto = KernRe(r'\/\*.*?\*\/', flags=re.S).sub('', proto) - - # Strip #define macros inside enums + # + # Strip preprocessor directives. Note that this depends on the + # trailing semicolon we added in process_proto_type(). + # proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) - # # Parse out the name and members of the enum. Typedef form first. # r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') if r.search(proto): declaration_name = r.group(2) - members = r.group(1).rstrip() + members = trim_private_members(r.group(1)) # # Failing that, look for a straight enum # @@ -822,7 +826,7 @@ class KernelDoc: r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') if r.match(proto): declaration_name = r.group(1) - members = r.group(2).rstrip() + members = trim_private_members(r.group(2)) # # OK, this isn't going to work. # -- cgit From 259feba4dde78f165b03e231ea9985dfe600c202 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 7 Aug 2025 15:16:29 -0600 Subject: docs: kdoc: Move a regex line in dump_struct() The complex struct_members regex was defined far from its use; bring the two together. Remove some extraneous backslashes while making the move. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250807211639.47286-3-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 93fcd8807aa8..aa6d11bf29b1 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -551,7 +551,6 @@ class KernelDoc: ] definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" - struct_members = KernRe(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\}\;]*)(\;)') # Extract struct/union definition members = None @@ -683,6 +682,7 @@ class KernelDoc: # So, we need to have an extra loop on Python to override such # re limitation. + struct_members = KernRe(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\};]*)(;)') while True: tuples = struct_members.findall(members) if not tuples: -- cgit From 5fd513f01169ae93d202b8c30f0837096664e7d7 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 7 Aug 2025 15:16:30 -0600 Subject: docs: kdoc: backslashectomy in kdoc_parser A lot of the regular expressions in this file have extraneous backslashes that may have been needed in Perl, but aren't helpful here. Take them out to reduce slightly the visual noise. Escaping of (){}[] has been left in place, even when unnecessary, for visual clarity. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250807211639.47286-4-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index aa6d11bf29b1..14ded23f11e0 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -46,7 +46,7 @@ doc_decl = doc_com + KernRe(r'(\w+)', cache=False) known_section_names = 'description|context|returns?|notes?|examples?' known_sections = KernRe(known_section_names, flags = re.I) doc_sect = doc_com + \ - KernRe(r'\s*(\@[.\w]+|\@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', + KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', flags=re.I, cache=False) doc_content = doc_com_body + KernRe(r'(.*)', cache=False) @@ -60,7 +60,7 @@ attribute = KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) -type_param = KernRe(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) +type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) # # Tests for the beginning of a kerneldoc block in its various forms. @@ -405,7 +405,7 @@ class KernelDoc: for arg in args.split(splitter): # Strip comments - arg = KernRe(r'\/\*.*\*\/').sub('', arg) + arg = KernRe(r'/\*.*\*/').sub('', arg) # Ignore argument attributes arg = KernRe(r'\sPOS0?\s').sub(' ', arg) @@ -428,7 +428,7 @@ class KernelDoc: arg = arg.replace('#', ',') - r = KernRe(r'[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)') + r = KernRe(r'[^\(]+\(\*?\s*([\w\[\].]*)\s*\)') if r.match(arg): param = r.group(1) else: @@ -443,7 +443,7 @@ class KernelDoc: # Array-of-pointers arg = arg.replace('#', ',') - r = KernRe(r'[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)') + r = KernRe(r'[^\(]+\(\s*\*\s*([\w\[\].]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)') if r.match(arg): param = r.group(1) else: @@ -709,7 +709,7 @@ class KernelDoc: if not arg: continue - r = KernRe(r'^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)') + r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)') if r.match(arg): # Pointer-to-function dtype = r.group(1) @@ -1044,7 +1044,7 @@ class KernelDoc: Stores a typedef inside self.entries array. """ - typedef_type = r'((?:\s+[\w\*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' + typedef_type = r'((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' typedef_ident = r'\*?\s*(\w\S+)\s*' typedef_args = r'\s*\((.*)\);' @@ -1265,7 +1265,7 @@ class KernelDoc: self.dump_section() # Look for doc_com + + doc_end: - r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') + r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/') if r.match(line): self.emit_msg(ln, f"suspicious ending line: {line}") @@ -1476,7 +1476,7 @@ class KernelDoc: """Ancillary routine to process a function prototype""" # strip C99-style comments to end of line - line = KernRe(r"\/\/.*$", re.S).sub('', line) + line = KernRe(r"//.*$", re.S).sub('', line) # # Soak up the line's worth of prototype text, stopping at { or ; if present. # -- cgit From 64cf83bcd3217a9583caeb404ff136366a46705c Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 7 Aug 2025 15:16:31 -0600 Subject: docs: kdoc: move the prefix transforms out of dump_struct() dump_struct is one of the longest functions in the kdoc_parser class, making it hard to read and reason about. Move the definition of the prefix transformations out of the function, join them with the definition of "attribute" (which was defined at the top of the file but only used here), and reformat the code slightly for shorter line widths. Just code movement in the end. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250807211639.47286-5-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 179 +++++++++++++++++++++------------------- 1 file changed, 96 insertions(+), 83 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 14ded23f11e0..3d007d200da6 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -54,8 +54,6 @@ doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) -attribute = KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", - flags=re.I | re.S, cache=False) export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) @@ -74,6 +72,97 @@ doc_begin_func = KernRe(str(doc_com) + # initial " * ' r'(?:[-:].*)?$', # description (not captured) cache = False) +# +# Here begins a long set of transformations to turn structure member prefixes +# and macro invocations into something we can parse and generate kdoc for. +# +struct_attribute = KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", + flags=re.I | re.S, cache=False) +struct_args_pattern = r'([^,)]+)' + +struct_prefixes = [ + # Strip attributes + (struct_attribute, ' '), + (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__packed\s*', re.S), ' '), + (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned', re.S), ' '), + # + # Unwrap struct_group macros based on this definition: + # __struct_group(TAG, NAME, ATTRS, MEMBERS...) + # which has variants like: struct_group(NAME, MEMBERS...) + # Only MEMBERS arguments require documentation. + # + # Parsing them happens on two steps: + # + # 1. drop struct group arguments that aren't at MEMBERS, + # storing them as STRUCT_GROUP(MEMBERS) + # + # 2. remove STRUCT_GROUP() ancillary macro. + # + # The original logic used to remove STRUCT_GROUP() using an + # advanced regex: + # + # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; + # + # with two patterns that are incompatible with + # Python re module, as it has: + # + # - a recursive pattern: (?1) + # - an atomic grouping: (?>...) + # + # I tried a simpler version: but it didn't work either: + # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; + # + # As it doesn't properly match the end parenthesis on some cases. + # + # So, a better solution was crafted: there's now a NestedMatch + # class that ensures that delimiters after a search are properly + # matched. So, the implementation to drop STRUCT_GROUP() will be + # handled in separate. + # + (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), + (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), + # + # Replace macros + # + # TODO: use NestedMatch for FOO($1, $2, ...) matches + # + # it is better to also move those to the NestedMatch logic, + # to ensure that parenthesis will be properly matched. + # + (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), + r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), + (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), + r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), + (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', + re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), + (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', + re.S), r'unsigned long \1[1 << ((\2) - 1)]'), + (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + + r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + + struct_args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + + struct_args_pattern + r'\)', re.S), r'\1 \2[]'), + (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), + (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), +] +# +# Regexes here are guaranteed to have the end limiter matching +# the start delimiter. Yet, right now, only one replace group +# is allowed. +# +struct_nested_prefixes = [ + (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), +] + + # # A little helper to get rid of excess white space # @@ -578,91 +667,15 @@ class KernelDoc: self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. Prototype was for {decl_type} {declaration_name} instead\n") return - - args_pattern = r'([^,)]+)' - - sub_prefixes = [ - # Strip attributes - (attribute, ' '), - (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__packed\s*', re.S), ' '), - (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned', re.S), ' '), - - # Unwrap struct_group macros based on this definition: - # __struct_group(TAG, NAME, ATTRS, MEMBERS...) - # which has variants like: struct_group(NAME, MEMBERS...) - # Only MEMBERS arguments require documentation. - # - # Parsing them happens on two steps: - # - # 1. drop struct group arguments that aren't at MEMBERS, - # storing them as STRUCT_GROUP(MEMBERS) - # - # 2. remove STRUCT_GROUP() ancillary macro. - # - # The original logic used to remove STRUCT_GROUP() using an - # advanced regex: - # - # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; - # - # with two patterns that are incompatible with - # Python re module, as it has: - # - # - a recursive pattern: (?1) - # - an atomic grouping: (?>...) - # - # I tried a simpler version: but it didn't work either: - # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; - # - # As it doesn't properly match the end parenthesis on some cases. - # - # So, a better solution was crafted: there's now a NestedMatch - # class that ensures that delimiters after a search are properly - # matched. So, the implementation to drop STRUCT_GROUP() will be - # handled in separate. - - (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), - (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), - (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), - (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), - - # Replace macros - # - # TODO: use NestedMatch for FOO($1, $2, ...) matches - # - # it is better to also move those to the NestedMatch logic, - # to ensure that parenthesis will be properly matched. - - (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), - (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), - (KernRe(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), - (KernRe(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'), - (KernRe(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'), - (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'), - (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'), - ] - - # Regexes here are guaranteed to have the end limiter matching - # the start delimiter. Yet, right now, only one replace group - # is allowed. - - sub_nested_prefixes = [ - (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), - ] - + # + # Go through the list of members applying all of our transformations. + # members = trim_private_members(members) - for search, sub in sub_prefixes: + for search, sub in struct_prefixes: members = search.sub(sub, members) nested = NestedMatch() - - for search, sub in sub_nested_prefixes: + for search, sub in struct_nested_prefixes: members = nested.sub(search, sub, members) # Keeps the original declaration as-is -- cgit From 0f7344129434a6b44d4abb5080a9d67dd734ee07 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 7 Aug 2025 15:16:32 -0600 Subject: docs: kdoc: split top-level prototype parsing out of dump_struct() Move the initial split of the prototype into its own function in the ongoing effort to cut dump_struct() down to size. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250807211639.47286-6-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 43 +++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 23 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 3d007d200da6..ab896dcd9572 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -624,13 +624,11 @@ class KernelDoc: self.emit_msg(ln, f"No description found for return value of '{declaration_name}'") - def dump_struct(self, ln, proto): - """ - Store an entry for an struct or union - """ - + # + # Split apart a structure prototype; returns (struct|union, name, members) or None + # + def split_struct_proto(self, proto): type_pattern = r'(struct|union)' - qualifiers = [ "__attribute__", "__packed", @@ -638,34 +636,33 @@ class KernelDoc: "____cacheline_aligned_in_smp", "____cacheline_aligned", ] - definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" - # Extract struct/union definition - members = None - declaration_name = None - decl_type = None - r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) if r.search(proto): - decl_type = r.group(1) - declaration_name = r.group(2) - members = r.group(3) + return (r.group(1), r.group(2), r.group(3)) else: r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') - if r.search(proto): - decl_type = r.group(1) - declaration_name = r.group(3) - members = r.group(2) + return (r.group(1), r.group(3), r.group(2)) + return None - if not members: + def dump_struct(self, ln, proto): + """ + Store an entry for an struct or union + """ + # + # Do the basic parse to get the pieces of the declaration. + # + struct_parts = self.split_struct_proto(proto) + if not struct_parts: self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") return + decl_type, declaration_name, members = struct_parts if self.entry.identifier != declaration_name: - self.emit_msg(ln, - f"expecting prototype for {decl_type} {self.entry.identifier}. Prototype was for {decl_type} {declaration_name} instead\n") + self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. " + f"Prototype was for {decl_type} {declaration_name} instead\n") return # # Go through the list of members applying all of our transformations. @@ -695,7 +692,7 @@ class KernelDoc: # So, we need to have an extra loop on Python to override such # re limitation. - struct_members = KernRe(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\};]*)(;)') + struct_members = KernRe(r'(struct|union)([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\};]*)(;)') while True: tuples = struct_members.findall(members) if not tuples: -- cgit From 77e3c875f0a83d9192079e88d8569ac36c6b6bea Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 7 Aug 2025 15:16:33 -0600 Subject: docs: kdoc: split struct-member rewriting out of dump_struct() The massive loop that massages struct members shares no data with the rest of dump_struct(); split it out into its own function. Code movement only, no other changes. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250807211639.47286-7-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 65 +++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 31 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index ab896dcd9572..fbd7f6ce3360 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -647,37 +647,7 @@ class KernelDoc: return (r.group(1), r.group(3), r.group(2)) return None - def dump_struct(self, ln, proto): - """ - Store an entry for an struct or union - """ - # - # Do the basic parse to get the pieces of the declaration. - # - struct_parts = self.split_struct_proto(proto) - if not struct_parts: - self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") - return - decl_type, declaration_name, members = struct_parts - - if self.entry.identifier != declaration_name: - self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. " - f"Prototype was for {decl_type} {declaration_name} instead\n") - return - # - # Go through the list of members applying all of our transformations. - # - members = trim_private_members(members) - for search, sub in struct_prefixes: - members = search.sub(sub, members) - - nested = NestedMatch() - for search, sub in struct_nested_prefixes: - members = nested.sub(search, sub, members) - - # Keeps the original declaration as-is - declaration = members - + def rewrite_struct_members(self, members): # Split nested struct/union elements # # This loop was simpler at the original kernel-doc perl version, as @@ -768,6 +738,39 @@ class KernelDoc: newmember += f"{dtype} {s_id}.{name}; " members = members.replace(oldmember, newmember) + return members + + def dump_struct(self, ln, proto): + """ + Store an entry for an struct or union + """ + # + # Do the basic parse to get the pieces of the declaration. + # + struct_parts = self.split_struct_proto(proto) + if not struct_parts: + self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") + return + decl_type, declaration_name, members = struct_parts + + if self.entry.identifier != declaration_name: + self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. " + f"Prototype was for {decl_type} {declaration_name} instead\n") + return + # + # Go through the list of members applying all of our transformations. + # + members = trim_private_members(members) + for search, sub in struct_prefixes: + members = search.sub(sub, members) + + nested = NestedMatch() + for search, sub in struct_nested_prefixes: + members = nested.sub(search, sub, members) + + # Keeps the original declaration as-is + declaration = members + members = self.rewrite_struct_members(members) # Ignore other nested elements, like enums members = re.sub(r'(\{[^\{\}]*\})', '', members) -- cgit From f8208676c1c85c0b91e726954c05f5859b890ccb Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 7 Aug 2025 15:16:34 -0600 Subject: docs: kdoc: rework the rewrite_struct_members() main loop Adopt a more Pythonic form for the main loop of this function, getting rid of the "while True:" construction and making the actual loop invariant explicit. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250807211639.47286-8-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index fbd7f6ce3360..e11f3d6e9469 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -663,11 +663,8 @@ class KernelDoc: # re limitation. struct_members = KernRe(r'(struct|union)([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\};]*)(;)') - while True: - tuples = struct_members.findall(members) - if not tuples: - break - + tuples = struct_members.findall(members) + while tuples: for t in tuples: newmember = "" maintype = t[0] @@ -738,6 +735,7 @@ class KernelDoc: newmember += f"{dtype} {s_id}.{name}; " members = members.replace(oldmember, newmember) + tuples = struct_members.findall(members) return members def dump_struct(self, ln, proto): -- cgit From fb20e610393b02a832a0bf4964e12c20a7ffa2f8 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 7 Aug 2025 15:16:35 -0600 Subject: docs: kdoc: remove an extraneous strip() call ...the variable in question was already strip()ed at the top of the loop. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250807211639.47286-9-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 1 - 1 file changed, 1 deletion(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index e11f3d6e9469..0c279aa802a0 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -703,7 +703,6 @@ class KernelDoc: newmember += f"{dtype}{s_id}.{name}{extra}; " else: - arg = arg.strip() # Handle bitmaps arg = KernRe(r':\s*\d+\s*').sub('', arg) -- cgit From a8c4b0a8f1969e5ee0e8abb62cde8f0d7bcc2009 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 7 Aug 2025 15:16:36 -0600 Subject: docs: kdoc: Some rewrite_struct_members() commenting Add comments to rewrite_struct_members() describing what it is actually doing, and reformat/comment the main struct_members regex so that it is (more) comprehensible to humans. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250807211639.47286-10-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 0c279aa802a0..e3d0270b1a19 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -647,22 +647,28 @@ class KernelDoc: return (r.group(1), r.group(3), r.group(2)) return None + # + # Rewrite the members of a structure or union for easier formatting later on. + # Among other things, this function will turn a member like: + # + # struct { inner_members; } foo; + # + # into: + # + # struct foo; inner_members; + # def rewrite_struct_members(self, members): - # Split nested struct/union elements - # - # This loop was simpler at the original kernel-doc perl version, as - # while ($members =~ m/$struct_members/) { ... } - # reads 'members' string on each interaction. # - # Python behavior is different: it parses 'members' only once, - # creating a list of tuples from the first interaction. + # Process struct/union members from the most deeply nested outward. The + # trick is in the ^{ below - it prevents a match of an outer struct/union + # until the inner one has been munged (removing the "{" in the process). # - # On other words, this won't get nested structs. - # - # So, we need to have an extra loop on Python to override such - # re limitation. - - struct_members = KernRe(r'(struct|union)([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\};]*)(;)') + struct_members = KernRe(r'(struct|union)' # 0: declaration type + r'([^\{\};]+)' # 1: possible name + r'(\{)' + r'([^\{\}]*)' # 3: Contents of declaration + r'(\})' + r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration tuples = struct_members.findall(members) while tuples: for t in tuples: -- cgit From e6dd4e2a5ca1c1e2fb168249532da1d95b5b24af Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 7 Aug 2025 15:16:37 -0600 Subject: docs: kdoc: further rewrite_struct_members() cleanup Get rid of some redundant checks, and generally tighten up the code; no logical change. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250807211639.47286-11-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 86 ++++++++++++++++++++--------------------- 1 file changed, 41 insertions(+), 45 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index e3d0270b1a19..b3f937901037 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -673,73 +673,69 @@ class KernelDoc: while tuples: for t in tuples: newmember = "" - maintype = t[0] - s_ids = t[5] - content = t[3] - - oldmember = "".join(t) - - for s_id in s_ids.split(','): + oldmember = "".join(t) # Reconstruct the original formatting + dtype, name, lbr, content, rbr, rest, semi = t + # + # Pass through each field name, normalizing the form and formatting. + # + for s_id in rest.split(','): s_id = s_id.strip() - - newmember += f"{maintype} {s_id}; " + newmember += f"{dtype} {s_id}; " + # + # Remove bitfield/array/pointer info, getting the bare name. + # s_id = KernRe(r'[:\[].*').sub('', s_id) s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) - + # + # Pass through the members of this inner structure/union. + # for arg in content.split(';'): arg = arg.strip() - - if not arg: - continue - + # + # Look for (type)(*name)(args) - pointer to function + # r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)') if r.match(arg): + dtype, name, extra = r.group(1), r.group(2), r.group(3) # Pointer-to-function - dtype = r.group(1) - name = r.group(2) - extra = r.group(3) - - if not name: - continue - if not s_id: # Anonymous struct/union newmember += f"{dtype}{name}{extra}; " else: newmember += f"{dtype}{s_id}.{name}{extra}; " - + # + # Otherwise a non-function member. + # else: - # Handle bitmaps + # + # Remove bitmap and array portions and spaces around commas + # arg = KernRe(r':\s*\d+\s*').sub('', arg) - - # Handle arrays arg = KernRe(r'\[.*\]').sub('', arg) - - # Handle multiple IDs arg = KernRe(r'\s*,\s*').sub(',', arg) - + # + # Look for a normal decl - "type name[,name...]" + # r = KernRe(r'(.*)\s+([\S+,]+)') - if r.search(arg): - dtype = r.group(1) - names = r.group(2) + for name in r.group(2).split(','): + name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name) + if not s_id: + # Anonymous struct/union + newmember += f"{r.group(1)} {name}; " + else: + newmember += f"{r.group(1)} {s_id}.{name}; " else: newmember += f"{arg}; " - continue - - for name in names.split(','): - name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name).strip() - - if not name: - continue - - if not s_id: - # Anonymous struct/union - newmember += f"{dtype} {name}; " - else: - newmember += f"{dtype} {s_id}.{name}; " - + # + # At the end of the s_id loop, replace the original declaration with + # the munged version. + # members = members.replace(oldmember, newmember) + # + # End of the tuple loop - search again and see if there are outer members + # that now turn up. + # tuples = struct_members.findall(members) return members -- cgit From 23c47b09315935df140ca5ce2ddddb85453ed64d Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 7 Aug 2025 15:16:38 -0600 Subject: docs: kdoc: extract output formatting from dump_struct() The last thing done in dump_struct() is to format the structure for printing. That, too, is a separate activity; split it out into its own function. dump_struct() now fits in a single, full-hight editor screen. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250807211639.47286-12-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 72 +++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 35 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index b3f937901037..878fbfab4ac7 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -739,6 +739,42 @@ class KernelDoc: tuples = struct_members.findall(members) return members + # + # Format the struct declaration into a standard form for inclusion in the + # resulting docs. + # + def format_struct_decl(self, declaration): + # + # Insert newlines, get rid of extra spaces. + # + declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) + declaration = KernRe(r'\}\s+;').sub('};', declaration) + # + # Format inline enums with each member on its own line. + # + r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') + while r.search(declaration): + declaration = r.sub(r'\1,\n\2', declaration) + # + # Now go through and supply the right number of tabs + # for each line. + # + def_args = declaration.split('\n') + level = 1 + declaration = "" + for clause in def_args: + clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1) + if clause: + if '}' in clause and level > 1: + level -= 1 + if not clause.startswith('#'): + declaration += "\t" * level + declaration += "\t" + clause + "\n" + if "{" in clause and "}" not in clause: + level += 1 + return declaration + + def dump_struct(self, ln, proto): """ Store an entry for an struct or union @@ -777,42 +813,8 @@ class KernelDoc: self.create_parameter_list(ln, decl_type, members, ';', declaration_name) self.check_sections(ln, declaration_name, decl_type) - - # Adjust declaration for better display - declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) - declaration = KernRe(r'\}\s+;').sub('};', declaration) - - # Better handle inlined enums - while True: - r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') - if not r.search(declaration): - break - - declaration = r.sub(r'\1,\n\2', declaration) - - def_args = declaration.split('\n') - level = 1 - declaration = "" - for clause in def_args: - - clause = clause.strip() - clause = KernRe(r'\s+').sub(' ', clause, count=1) - - if not clause: - continue - - if '}' in clause and level > 1: - level -= 1 - - if not KernRe(r'^\s*#').match(clause): - declaration += "\t" * level - - declaration += "\t" + clause + "\n" - if "{" in clause and "}" not in clause: - level += 1 - self.output_declaration(decl_type, declaration_name, - definition=declaration, + definition=self.format_struct_decl(declaration), purpose=self.entry.declaration_purpose) def dump_enum(self, ln, proto): -- cgit From e282303e718b2007b3db77c6db75ecaf4419a1af Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 7 Aug 2025 15:16:39 -0600 Subject: docs: kdoc: a few final dump_struct() touches Add a couple more comments so that each phase of the process is now clearly marked. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250807211639.47286-13-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 878fbfab4ac7..9b21fb86709a 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -802,14 +802,15 @@ class KernelDoc: nested = NestedMatch() for search, sub in struct_nested_prefixes: members = nested.sub(search, sub, members) - - # Keeps the original declaration as-is + # + # Deal with embedded struct and union members, and drop enums entirely. + # declaration = members members = self.rewrite_struct_members(members) - - # Ignore other nested elements, like enums members = re.sub(r'(\{[^\{\}]*\})', '', members) - + # + # Output the result and we are done. + # self.create_parameter_list(ln, decl_type, members, ';', declaration_name) self.check_sections(ln, declaration_name, decl_type) -- cgit From 670ec7333a2c4823ac777b70f045cf731525ae5e Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 14 Aug 2025 09:40:29 -0600 Subject: docs: kdoc: remove dead code create_parameter_list() tests an argument against the same regex twice, in two different locations; remove the pointless extra tests and the never-executed error cases that go with them. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250814154035.328769-2-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 9e65948f8254..96e3fe4ec431 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -564,28 +564,18 @@ class KernelDoc: args.insert(0, first_arg.pop()) dtype = ' '.join(first_arg) + bitfield_re = KernRe(r'(.*?):(\w+)') for param in args: - if KernRe(r'^(\*+)\s*(.*)').match(param): - r = KernRe(r'^(\*+)\s*(.*)') - if not r.match(param): - self.emit_msg(ln, f"Invalid param: {param}") - continue - - param = r.group(1) - + r = KernRe(r'^(\*+)\s*(.*)') + if r.match(param): self.push_parameter(ln, decl_type, r.group(2), f"{dtype} {r.group(1)}", arg, declaration_name) - elif KernRe(r'(.*?):(\w+)').search(param): - r = KernRe(r'(.*?):(\w+)') - if not r.match(param): - self.emit_msg(ln, f"Invalid param: {param}") - continue - + elif bitfield_re.search(param): if dtype != "": # Skip unnamed bit-fields - self.push_parameter(ln, decl_type, r.group(1), - f"{dtype}:{r.group(2)}", + self.push_parameter(ln, decl_type, bitfield_re.group(1), + f"{dtype}:{bitfield_re.group(2)}", arg, declaration_name) else: self.push_parameter(ln, decl_type, param, dtype, -- cgit From f51b42b99e1d35698e0277337fde2c15ccc29a2b Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 14 Aug 2025 09:40:30 -0600 Subject: docs: kdoc: tidy up space removal in create_parameter_list() Remove a redundant test and add a comment describing what the space removal is doing. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250814154035.328769-3-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 96e3fe4ec431..53051ce831ba 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -545,12 +545,14 @@ class KernelDoc: arg, declaration_name) elif arg: + # + # Clean up extraneous spaces and split the string at commas; the first + # element of the resulting list will also include the type information. + # arg = KernRe(r'\s*:\s*').sub(":", arg) arg = KernRe(r'\s*\[').sub('[', arg) - args = KernRe(r'\s*,\s*').split(arg) - if args[0] and '*' in args[0]: - args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) + args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) first_arg = [] r = KernRe(r'^(.*\s+)(.*?\[.*\].*)$') -- cgit From 05d72fe07242a8e4535aa52e0858f9198e668a41 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 14 Aug 2025 09:40:31 -0600 Subject: docs: kdoc: clean up the create_parameter_list() "first arg" logic The logic for finding the name of the first in a series of variable names is somewhat convoluted and, in the use of .extend(), actively buggy. Document what is happening and simplify the logic. Acked-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250814154035.328769-4-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 53051ce831ba..07234ce04409 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -553,18 +553,18 @@ class KernelDoc: arg = KernRe(r'\s*\[').sub('[', arg) args = KernRe(r'\s*,\s*').split(arg) args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) - - first_arg = [] - r = KernRe(r'^(.*\s+)(.*?\[.*\].*)$') - if args[0] and r.match(args[0]): - args.pop(0) - first_arg.extend(r.group(1)) - first_arg.append(r.group(2)) + # + # args[0] has a string of "type a". If "a" includes an [array] + # declaration, we want to not be fooled by any white space inside + # the brackets, so detect and handle that case specially. + # + r = KernRe(r'^([^[\]]*\s+)(.*)$') + if r.match(args[0]): + args[0] = r.group(2) + dtype = r.group(1) else: - first_arg = KernRe(r'\s+').split(args.pop(0)) - - args.insert(0, first_arg.pop()) - dtype = ' '.join(first_arg) + # No space in args[0]; this seems wrong but preserves previous behavior + dtype = '' bitfield_re = KernRe(r'(.*?):(\w+)') for param in args: -- cgit From 8f05fbc5afb86f0d4dcae33f3cb0cda561d4d93e Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 14 Aug 2025 09:40:32 -0600 Subject: docs: kdoc: add a couple more comments in create_parameter_list() Make what the final code is doing a bit more clear to slow readers like me. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250814154035.328769-5-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 07234ce04409..29881757bf1c 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -568,12 +568,18 @@ class KernelDoc: bitfield_re = KernRe(r'(.*?):(\w+)') for param in args: + # + # For pointers, shift the star(s) from the variable name to the + # type declaration. + # r = KernRe(r'^(\*+)\s*(.*)') if r.match(param): self.push_parameter(ln, decl_type, r.group(2), f"{dtype} {r.group(1)}", arg, declaration_name) - + # + # Perform a similar shift for bitfields. + # elif bitfield_re.search(param): if dtype != "": # Skip unnamed bit-fields self.push_parameter(ln, decl_type, bitfield_re.group(1), -- cgit From bf6b310d1b7e31a1cd6951eb75608a1d2876c04a Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 14 Aug 2025 09:40:33 -0600 Subject: docs: kdoc: tighten up the array-of-pointers case Simplify one gnarly regex and remove another altogether; add a comment describing what is going on. There will be no #-substituted commas in this case, so don't bother trying to put them back. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250814154035.328769-6-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 29881757bf1c..7f4d95dd47d4 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -527,23 +527,21 @@ class KernelDoc: dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) - + # + # The array-of-pointers case. Dig the parameter name out from the middle + # of the declaration. + # elif KernRe(r'\(.+\)\s*\[').search(arg): - # Array-of-pointers - - arg = arg.replace('#', ',') - r = KernRe(r'[^\(]+\(\s*\*\s*([\w\[\].]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)') + r = KernRe(r'[^\(]+\(\s*\*\s*' # Up to "(" and maybe "*" + r'([\w.]*?)' # The actual pointer name + r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion] if r.match(arg): param = r.group(1) else: self.emit_msg(ln, f"Invalid param: {arg}") param = arg - - dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) - - self.push_parameter(ln, decl_type, param, dtype, - arg, declaration_name) - + dtype = arg.replace(param, '') + self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) elif arg: # # Clean up extraneous spaces and split the string at commas; the first -- cgit From e5d91662fcbac251dd17f04dbacf4d997939316e Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 14 Aug 2025 09:40:34 -0600 Subject: docs: kdoc: tighten up the pointer-to-function case Tighten up the code and remove an unneeded regex operation. Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250814154035.328769-7-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 7f4d95dd47d4..998b1ece932a 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -511,22 +511,21 @@ class KernelDoc: # Treat preprocessor directive as a typeless variable self.push_parameter(ln, decl_type, arg, "", "", declaration_name) - + # + # The pointer-to-function case. + # elif KernRe(r'\(.+\)\s*\(').search(arg): - # Pointer-to-function - arg = arg.replace('#', ',') - - r = KernRe(r'[^\(]+\(\*?\s*([\w\[\].]*)\s*\)') + r = KernRe(r'[^\(]+\(\*?\s*' # Everything up to "(*" + r'([\w\[\].]*)' # Capture the name and possible [array] + r'\s*\)') # Make sure the trailing ")" is there if r.match(arg): param = r.group(1) else: self.emit_msg(ln, f"Invalid param: {arg}") param = arg - - dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) - self.push_parameter(ln, decl_type, param, dtype, - arg, declaration_name) + dtype = arg.replace(param, '') + self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) # # The array-of-pointers case. Dig the parameter name out from the middle # of the declaration. -- cgit From 1d8125e27323d8a378cb38f88a6c5a0d7fdb2f6c Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 14 Aug 2025 09:40:35 -0600 Subject: docs: kdoc: remove redundant comment stripping By the time stuff gets to create_parameter_list(), comments have long since been stripped out, so we do not need to do it again here. Acked-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20250814154035.328769-8-corbet@lwn.net --- scripts/lib/kdoc/kdoc_parser.py | 3 --- 1 file changed, 3 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 998b1ece932a..a560546c1867 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -493,9 +493,6 @@ class KernelDoc: args = arg_expr.sub(r"\1#", args) for arg in args.split(splitter): - # Strip comments - arg = KernRe(r'/\*.*\*/').sub('', arg) - # Ignore argument attributes arg = KernRe(r'\sPOS0?\s').sub(' ', arg) -- cgit From 8b00d6fe96960aaba1b923d4a8c1ddb173c9c1ff Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 4 Sep 2025 13:33:56 -0600 Subject: docs: kdoc: trim __cacheline_group_* with the other annotations The special case for __cacheline_group_begin/end() can be handled by just adding another pattern to the struct_prefixes, eliminating the need for a special case in push_parameter(). One change is that these annotations no longer appear in the rendered output, just like all the other annotations that we clean out. Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index a560546c1867..a90f77d6b669 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -90,6 +90,7 @@ struct_prefixes = [ (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), (KernRe(r'\s*____cacheline_aligned', re.S), ' '), + (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), # # Unwrap struct_group macros based on this definition: # __struct_group(TAG, NAME, ATTRS, MEMBERS...) @@ -447,12 +448,6 @@ class KernelDoc: self.entry.parameterdescs[param] = "anonymous\n" self.entry.anon_struct_union = True - # Handle cache group enforcing variables: they do not need - # to be described in header files - elif "__cacheline_group" in param: - # Ignore __cacheline_group_begin and __cacheline_group_end - return - # Warn if parameter has no description # (but ignore ones starting with # as these are not parameters # but inline preprocessor statements) -- cgit From e214cca38f1f35d42e63e990c610c96f993343c4 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 4 Sep 2025 14:01:20 -0600 Subject: docs: kdoc: tighten up the push_parameter() no-type case The handling of untyped parameters involved a number of redundant tests; restructure the code to remove them and be more compact. No output changes. Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 44 +++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 24 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index a90f77d6b669..2118c20b3056 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -423,30 +423,26 @@ class KernelDoc: param = KernRe(r'[\[\)].*').sub('', param, count=1) - if dtype == "" and param.endswith("..."): - if KernRe(r'\w\.\.\.$').search(param): - # For named variable parameters of the form `x...`, - # remove the dots - param = param[:-3] - else: - # Handles unnamed variable parameters - param = "..." - - if param not in self.entry.parameterdescs or \ - not self.entry.parameterdescs[param]: - - self.entry.parameterdescs[param] = "variable arguments" - - elif dtype == "" and (not param or param == "void"): - param = "void" - self.entry.parameterdescs[param] = "no arguments" - - elif dtype == "" and param in ["struct", "union"]: - # Handle unnamed (anonymous) union or struct - dtype = param - param = "{unnamed_" + param + "}" - self.entry.parameterdescs[param] = "anonymous\n" - self.entry.anon_struct_union = True + # + # Look at various "anonymous type" cases. + # + if dtype == '': + if param.endswith("..."): + if len(param) > 3: # there is a name provided, use that + param = param[:-3] + if not self.entry.parameterdescs.get(param): + self.entry.parameterdescs[param] = "variable arguments" + + elif (not param) or param == "void": + param = "void" + self.entry.parameterdescs[param] = "no arguments" + + elif param in ["struct", "union"]: + # Handle unnamed (anonymous) union or struct + dtype = param + param = "{unnamed_" + param + "}" + self.entry.parameterdescs[param] = "anonymous\n" + self.entry.anon_struct_union = True # Warn if parameter has no description # (but ignore ones starting with # as these are not parameters -- cgit From f853e83006ab39c3dafe085a488c14bb46906601 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 4 Sep 2025 14:27:00 -0600 Subject: docs: kdoc: remove a single-use variable struct_attribute is only used once, so just put its value there directly and drop the name. Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 2118c20b3056..b25c8d80b965 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -76,13 +76,11 @@ doc_begin_func = KernRe(str(doc_com) + # initial " * ' # Here begins a long set of transformations to turn structure member prefixes # and macro invocations into something we can parse and generate kdoc for. # -struct_attribute = KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", - flags=re.I | re.S, cache=False) struct_args_pattern = r'([^,)]+)' struct_prefixes = [ # Strip attributes - (struct_attribute, ' '), + (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), -- cgit From 4c232a81b0831e7bfa7518968e431d5db29b2cac Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 4 Sep 2025 16:40:49 -0600 Subject: docs: kdoc: move the function transform patterns out of dump_function() Move these definitions to file level, where they are executed once, and don't clutter the function itself. Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 78 ++++++++++++++++++----------------------- 1 file changed, 35 insertions(+), 43 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index b25c8d80b965..37811cddd55c 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -161,6 +161,37 @@ struct_nested_prefixes = [ (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), ] +# +# Transforms for function prototypes +# +function_xforms = [ + (r"^static +", "", 0), + (r"^extern +", "", 0), + (r"^asmlinkage +", "", 0), + (r"^inline +", "", 0), + (r"^__inline__ +", "", 0), + (r"^__inline +", "", 0), + (r"^__always_inline +", "", 0), + (r"^noinline +", "", 0), + (r"^__FORTIFY_INLINE +", "", 0), + (r"__init +", "", 0), + (r"__init_or_module +", "", 0), + (r"__deprecated +", "", 0), + (r"__flatten +", "", 0), + (r"__meminit +", "", 0), + (r"__must_check +", "", 0), + (r"__weak +", "", 0), + (r"__sched +", "", 0), + (r"_noprof", "", 0), + (r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +", "", 0), + (r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +", "", 0), + (r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +", "", 0), + (r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)", r"\1, \2", 0), + (r"__attribute_const__ +", "", 0), + (r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+", "", 0), +] + + # # A little helper to get rid of excess white space @@ -894,49 +925,10 @@ class KernelDoc: return_type = '' decl_type = 'function' - # Prefixes that would be removed - sub_prefixes = [ - (r"^static +", "", 0), - (r"^extern +", "", 0), - (r"^asmlinkage +", "", 0), - (r"^inline +", "", 0), - (r"^__inline__ +", "", 0), - (r"^__inline +", "", 0), - (r"^__always_inline +", "", 0), - (r"^noinline +", "", 0), - (r"^__FORTIFY_INLINE +", "", 0), - (r"__init +", "", 0), - (r"__init_or_module +", "", 0), - (r"__deprecated +", "", 0), - (r"__flatten +", "", 0), - (r"__meminit +", "", 0), - (r"__must_check +", "", 0), - (r"__weak +", "", 0), - (r"__sched +", "", 0), - (r"_noprof", "", 0), - (r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +", "", 0), - (r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +", "", 0), - (r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +", "", 0), - (r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)", r"\1, \2", 0), - (r"__attribute_const__ +", "", 0), - - # It seems that Python support for re.X is broken: - # At least for me (Python 3.13), this didn't work -# (r""" -# __attribute__\s*\(\( -# (?: -# [\w\s]+ # attribute name -# (?:\([^)]*\))? # attribute arguments -# \s*,? # optional comma at the end -# )+ -# \)\)\s+ -# """, "", re.X), - - # So, remove whitespaces and comments from it - (r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+", "", 0), - ] - - for search, sub, flags in sub_prefixes: + # + # Apply the initial transformations. + # + for search, sub, flags in function_xforms: prototype = KernRe(search, flags).sub(sub, prototype) # Macros are a special case, as they change the prototype format -- cgit From a2752f8c631201e189f501fc4d320354efa3e72e Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 4 Sep 2025 16:49:52 -0600 Subject: doc: kdoc: unify transform handling Both functions and structs are passed through a set of regex-based transforms, but the two were structured differently, despite being the same thing. Create a utility function to apply transformations and use it in both cases. Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 65 +++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 31 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 37811cddd55c..1a1558211acd 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -78,7 +78,7 @@ doc_begin_func = KernRe(str(doc_com) + # initial " * ' # struct_args_pattern = r'([^,)]+)' -struct_prefixes = [ +struct_xforms = [ # Strip attributes (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), @@ -165,33 +165,39 @@ struct_nested_prefixes = [ # Transforms for function prototypes # function_xforms = [ - (r"^static +", "", 0), - (r"^extern +", "", 0), - (r"^asmlinkage +", "", 0), - (r"^inline +", "", 0), - (r"^__inline__ +", "", 0), - (r"^__inline +", "", 0), - (r"^__always_inline +", "", 0), - (r"^noinline +", "", 0), - (r"^__FORTIFY_INLINE +", "", 0), - (r"__init +", "", 0), - (r"__init_or_module +", "", 0), - (r"__deprecated +", "", 0), - (r"__flatten +", "", 0), - (r"__meminit +", "", 0), - (r"__must_check +", "", 0), - (r"__weak +", "", 0), - (r"__sched +", "", 0), - (r"_noprof", "", 0), - (r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +", "", 0), - (r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +", "", 0), - (r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +", "", 0), - (r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)", r"\1, \2", 0), - (r"__attribute_const__ +", "", 0), - (r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+", "", 0), + (KernRe(r"^static +"), ""), + (KernRe(r"^extern +"), ""), + (KernRe(r"^asmlinkage +"), ""), + (KernRe(r"^inline +"), ""), + (KernRe(r"^__inline__ +"), ""), + (KernRe(r"^__inline +"), ""), + (KernRe(r"^__always_inline +"), ""), + (KernRe(r"^noinline +"), ""), + (KernRe(r"^__FORTIFY_INLINE +"), ""), + (KernRe(r"__init +"), ""), + (KernRe(r"__init_or_module +"), ""), + (KernRe(r"__deprecated +"), ""), + (KernRe(r"__flatten +"), ""), + (KernRe(r"__meminit +"), ""), + (KernRe(r"__must_check +"), ""), + (KernRe(r"__weak +"), ""), + (KernRe(r"__sched +"), ""), + (KernRe(r"_noprof"), ""), + (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), + (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), + (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), + (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), + (KernRe(r"__attribute_const__ +"), ""), + (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), ] - +# +# Apply a set of transforms to a block of text. +# +def apply_transforms(xforms, text): + for search, subst in xforms: + text = search.sub(subst, text) + return text # # A little helper to get rid of excess white space @@ -807,8 +813,7 @@ class KernelDoc: # Go through the list of members applying all of our transformations. # members = trim_private_members(members) - for search, sub in struct_prefixes: - members = search.sub(sub, members) + members = apply_transforms(struct_xforms, members) nested = NestedMatch() for search, sub in struct_nested_prefixes: @@ -924,12 +929,10 @@ class KernelDoc: func_macro = False return_type = '' decl_type = 'function' - # # Apply the initial transformations. # - for search, sub, flags in function_xforms: - prototype = KernRe(search, flags).sub(sub, prototype) + prototype = apply_transforms(function_xforms, prototype) # Macros are a special case, as they change the prototype format new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) -- cgit From fee63c8f10c2fe77f618f9955c2f5521ff9cc622 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Fri, 5 Sep 2025 15:53:05 -0600 Subject: docs: kdoc: remove a couple of spurious regex characters The "name" regex in dump_function() includes both the tilde and colon characters, but neither has any place in function prototypes. Remove the characters, after which the regex simplifies to "\w+" No output changes. Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 1a1558211acd..decd127df82e 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -958,7 +958,7 @@ class KernelDoc: # - atomic_set (macro) # - pci_match_device, __copy_to_user (long return type) - name = r'[a-zA-Z0-9_~:]+' + name = r'\w+' prototype_end1 = r'[^\(]*' prototype_end2 = r'[^\{]*' prototype_end = fr'\(({prototype_end1}|{prototype_end2})\)' -- cgit From 08b5228cf455d46a23bfb341766563d1a48e3c8f Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Fri, 5 Sep 2025 16:30:48 -0600 Subject: docs: kdoc: remove a useless empty capture group The is_define_proto case in dump_function() uses a regex with an empty capture group - () - that has no use; just take it out. Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index decd127df82e..f9be5414244d 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -972,11 +972,11 @@ class KernelDoc: found = False if is_define_proto: - r = KernRe(r'^()(' + name + r')\s+') + r = KernRe(r'^(' + name + r')\s+') if r.search(prototype): return_type = '' - declaration_name = r.group(2) + declaration_name = r.group(1) func_macro = True found = True -- cgit From ff1f2af341b72bd5b6b5d432da55faf2f6d24cfe Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 8 Sep 2025 13:22:42 -0600 Subject: docs: kdoc: Simplify the dump_function() prototype regexes The regexes for the parsing of function prototypes were more complicated than they needed to be and difficult to understand -- at least, I spent a fair amount of time bashing my head against them. Simplify them, and add some documentation comments as well. Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index f9be5414244d..ec2e6e83df05 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -959,15 +959,15 @@ class KernelDoc: # - pci_match_device, __copy_to_user (long return type) name = r'\w+' - prototype_end1 = r'[^\(]*' - prototype_end2 = r'[^\{]*' - prototype_end = fr'\(({prototype_end1}|{prototype_end2})\)' - - # Besides compiling, Perl qr{[\w\s]+} works as a non-capturing group. - # So, this needs to be mapped in Python with (?:...)? or (?:...)+ - type1 = r'(?:[\w\s]+)?' type2 = r'(?:[\w\s]+\*+)+' + # + # Attempt to match first on (args) with no internal parentheses; this + # lets us easily filter out __acquires() and other post-args stuff. If + # that fails, just grab the rest of the line to the last closing + # parenthesis. + # + proto_args = r'\(([^\(]*|.*)\)' found = False @@ -983,9 +983,9 @@ class KernelDoc: if not found: patterns = [ - rf'^()({name})\s*{prototype_end}', - rf'^({type1})\s+({name})\s*{prototype_end}', - rf'^({type2})\s*({name})\s*{prototype_end}', + rf'^()({name})\s*{proto_args}', + rf'^({type1})\s+({name})\s*{proto_args}', + rf'^({type2})\s*({name})\s*{proto_args}', ] for p in patterns: -- cgit From 370f430527ecd35938ad94167e45fc784f6e4d95 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 8 Sep 2025 16:01:15 -0600 Subject: docs: kdoc: consolidate some of the macro-processing logic The logic to handle macros is split in dump_function(); bring it all together into a single place and add a comment saying what's going on. Remove the unneeded is_define_proto variable, and tighten up the code a bit. Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 43 +++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 23 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index ec2e6e83df05..27329ce9b5e9 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -926,21 +926,31 @@ class KernelDoc: Stores a function of function macro inside self.entries array. """ - func_macro = False + found = func_macro = False return_type = '' decl_type = 'function' # # Apply the initial transformations. # prototype = apply_transforms(function_xforms, prototype) - - # Macros are a special case, as they change the prototype format + # + # If we have a macro, remove the "#define" at the front. + # new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) if new_proto != prototype: - is_define_proto = True prototype = new_proto - else: - is_define_proto = False + # + # Dispense with the simple "#define A B" case here; the key + # is the space after the name of the symbol being defined. + # NOTE that the seemingly misnamed "func_macro" indicates a + # macro *without* arguments. + # + r = KernRe(r'^(\w+)\s+') + if r.search(prototype): + return_type = '' + declaration_name = r.group(1) + func_macro = True + found = True # Yes, this truly is vile. We are looking for: # 1. Return type (may be nothing if we're looking at a macro) @@ -968,19 +978,10 @@ class KernelDoc: # parenthesis. # proto_args = r'\(([^\(]*|.*)\)' - - found = False - - if is_define_proto: - r = KernRe(r'^(' + name + r')\s+') - - if r.search(prototype): - return_type = '' - declaration_name = r.group(1) - func_macro = True - - found = True - + # + # (Except for the simple macro case) attempt to split up the prototype + # in the various ways we understand. + # if not found: patterns = [ rf'^()({name})\s*{proto_args}', @@ -990,16 +991,12 @@ class KernelDoc: for p in patterns: r = KernRe(p) - if r.match(prototype): - return_type = r.group(1) declaration_name = r.group(2) args = r.group(3) - self.create_parameter_list(ln, decl_type, args, ',', declaration_name) - found = True break if not found: -- cgit From 3dff54410e56ddee2dad8824ea77e60cd5b16d5b Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 8 Sep 2025 16:21:28 -0600 Subject: docs: kdoc: final dump_function() cleanups Add some more comments to dump_function(), add some comments, and trim out an unneeded duplicate output_declaration() call. Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 27329ce9b5e9..5e41acfef7b8 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -999,32 +999,28 @@ class KernelDoc: declaration_name) found = True break + # + # Parsing done; make sure that things are as we expect. + # if not found: self.emit_msg(ln, f"cannot understand function prototype: '{prototype}'") return - if self.entry.identifier != declaration_name: - self.emit_msg(ln, - f"expecting prototype for {self.entry.identifier}(). Prototype was for {declaration_name}() instead") + self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). " + f"Prototype was for {declaration_name}() instead") return - self.check_sections(ln, declaration_name, "function") - self.check_return_section(ln, declaration_name, return_type) + # + # Store the result. + # + self.output_declaration(decl_type, declaration_name, + typedef=('typedef' in return_type), + functiontype=return_type, + purpose=self.entry.declaration_purpose, + func_macro=func_macro) - if 'typedef' in return_type: - self.output_declaration(decl_type, declaration_name, - typedef=True, - functiontype=return_type, - purpose=self.entry.declaration_purpose, - func_macro=func_macro) - else: - self.output_declaration(decl_type, declaration_name, - typedef=False, - functiontype=return_type, - purpose=self.entry.declaration_purpose, - func_macro=func_macro) def dump_typedef(self, ln, proto): """ -- cgit From 999a642d7e7d4241cc7dba942a13c67d0685284b Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 8 Sep 2025 16:32:10 -0600 Subject: docs: kdoc: remove some dead code in dump_typedef() The regex in this block of code makes no sense, and a quick test shows that it never matches anything; simply delete the code. No output changes. Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 5 ----- 1 file changed, 5 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 5e41acfef7b8..7c739b495d58 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1060,11 +1060,6 @@ class KernelDoc: purpose=self.entry.declaration_purpose) return - # Handle nested parentheses or brackets - r = KernRe(r'(\(*.\)\s*|\[*.\]\s*);$') - while r.search(proto): - proto = r.sub('', proto) - # Parse simple typedefs r = KernRe(r'typedef.*\s+(\w+)\s*;') if r.match(proto): -- cgit From 00fa9bc4e93cb336575a5f2c1da90d2443ff14c8 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 8 Sep 2025 16:49:58 -0600 Subject: docs: kdoc: remove redundant comment stripping in dump_typedef() By the time we get here, comments have long since been stripped out; there is no need to do it again. Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 3 --- 1 file changed, 3 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 7c739b495d58..ad9df0536bbf 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1034,9 +1034,6 @@ class KernelDoc: typedef1 = KernRe(r'typedef' + typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) typedef2 = KernRe(r'typedef' + typedef_type + typedef_ident + typedef_args) - # Strip comments - proto = KernRe(r'/\*.*?\*/', flags=re.S).sub('', proto) - # Parse function typedef prototypes for r in [typedef1, typedef2]: if not r.match(proto): -- cgit From c01878437739f86851da76235f394346c6bd8ce3 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 8 Sep 2025 17:12:14 -0600 Subject: docs: kdoc: a few more dump_typedef() tweaks Merge "typedef" into the typedef_type pattern rather than repeating it later, and add some comments. Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index ad9df0536bbf..2376f180b1fa 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1026,13 +1026,15 @@ class KernelDoc: """ Stores a typedef inside self.entries array. """ - - typedef_type = r'((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' + # + # We start by looking for function typedefs. + # + typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' typedef_ident = r'\*?\s*(\w\S+)\s*' typedef_args = r'\s*\((.*)\);' - typedef1 = KernRe(r'typedef' + typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) - typedef2 = KernRe(r'typedef' + typedef_type + typedef_ident + typedef_args) + typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) + typedef2 = KernRe(typedef_type + typedef_ident + typedef_args) # Parse function typedef prototypes for r in [typedef1, typedef2]: @@ -1048,16 +1050,16 @@ class KernelDoc: f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") return - decl_type = 'function' - self.create_parameter_list(ln, decl_type, args, ',', declaration_name) + self.create_parameter_list(ln, 'function', args, ',', declaration_name) - self.output_declaration(decl_type, declaration_name, + self.output_declaration('function', declaration_name, typedef=True, functiontype=return_type, purpose=self.entry.declaration_purpose) return - - # Parse simple typedefs + # + # Not a function, try to parse a simple typedef. + # r = KernRe(r'typedef.*\s+(\w+)\s*;') if r.match(proto): declaration_name = r.group(1) -- cgit