diff options
Diffstat (limited to 'scripts/lib')
-rw-r--r-- | scripts/lib/kdoc/kdoc_parser.py | 848 |
1 files changed, 414 insertions, 434 deletions
diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index fe730099eca8..2376f180b1fa 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -46,7 +46,7 @@ doc_decl = doc_com + KernRe(r'(\w+)', cache=False) known_section_names = 'description|context|returns?|notes?|examples?' known_sections = KernRe(known_section_names, flags = re.I) doc_sect = doc_com + \ - KernRe(r'\s*(\@[.\w]+|\@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', + KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', flags=re.I, cache=False) doc_content = doc_com_body + KernRe(r'(.*)', cache=False) @@ -54,13 +54,11 @@ doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) -attribute = KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", - flags=re.I | re.S, cache=False) export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) -type_param = KernRe(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) +type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) # # Tests for the beginning of a kerneldoc block in its various forms. @@ -75,12 +73,154 @@ doc_begin_func = KernRe(str(doc_com) + # initial " * ' cache = False) # +# Here begins a long set of transformations to turn structure member prefixes +# and macro invocations into something we can parse and generate kdoc for. +# +struct_args_pattern = r'([^,)]+)' + +struct_xforms = [ + # Strip attributes + (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), + (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__packed\s*', re.S), ' '), + (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned', re.S), ' '), + (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), + # + # Unwrap struct_group macros based on this definition: + # __struct_group(TAG, NAME, ATTRS, MEMBERS...) + # which has variants like: struct_group(NAME, MEMBERS...) + # Only MEMBERS arguments require documentation. + # + # Parsing them happens on two steps: + # + # 1. drop struct group arguments that aren't at MEMBERS, + # storing them as STRUCT_GROUP(MEMBERS) + # + # 2. remove STRUCT_GROUP() ancillary macro. + # + # The original logic used to remove STRUCT_GROUP() using an + # advanced regex: + # + # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; + # + # with two patterns that are incompatible with + # Python re module, as it has: + # + # - a recursive pattern: (?1) + # - an atomic grouping: (?>...) + # + # I tried a simpler version: but it didn't work either: + # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; + # + # As it doesn't properly match the end parenthesis on some cases. + # + # So, a better solution was crafted: there's now a NestedMatch + # class that ensures that delimiters after a search are properly + # matched. So, the implementation to drop STRUCT_GROUP() will be + # handled in separate. + # + (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), + (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), + # + # Replace macros + # + # TODO: use NestedMatch for FOO($1, $2, ...) matches + # + # it is better to also move those to the NestedMatch logic, + # to ensure that parenthesis will be properly matched. + # + (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), + r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), + (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), + r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), + (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', + re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), + (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', + re.S), r'unsigned long \1[1 << ((\2) - 1)]'), + (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + + r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + + struct_args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + + struct_args_pattern + r'\)', re.S), r'\1 \2[]'), + (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), + (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), +] +# +# Regexes here are guaranteed to have the end limiter matching +# the start delimiter. Yet, right now, only one replace group +# is allowed. +# +struct_nested_prefixes = [ + (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), +] + +# +# Transforms for function prototypes +# +function_xforms = [ + (KernRe(r"^static +"), ""), + (KernRe(r"^extern +"), ""), + (KernRe(r"^asmlinkage +"), ""), + (KernRe(r"^inline +"), ""), + (KernRe(r"^__inline__ +"), ""), + (KernRe(r"^__inline +"), ""), + (KernRe(r"^__always_inline +"), ""), + (KernRe(r"^noinline +"), ""), + (KernRe(r"^__FORTIFY_INLINE +"), ""), + (KernRe(r"__init +"), ""), + (KernRe(r"__init_or_module +"), ""), + (KernRe(r"__deprecated +"), ""), + (KernRe(r"__flatten +"), ""), + (KernRe(r"__meminit +"), ""), + (KernRe(r"__must_check +"), ""), + (KernRe(r"__weak +"), ""), + (KernRe(r"__sched +"), ""), + (KernRe(r"_noprof"), ""), + (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), + (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), + (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), + (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), + (KernRe(r"__attribute_const__ +"), ""), + (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), +] + +# +# Apply a set of transforms to a block of text. +# +def apply_transforms(xforms, text): + for search, subst in xforms: + text = search.sub(subst, text) + return text + +# # A little helper to get rid of excess white space # multi_space = KernRe(r'\s\s+') def trim_whitespace(s): return multi_space.sub(' ', s.strip()) +# +# Remove struct/enum members that have been marked "private". +# +def trim_private_members(text): + # + # First look for a "public:" block that ends a private region, then + # handle the "private until the end" case. + # + text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) + text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) + # + # We needed the comments to do the above, but now we can take them out. + # + return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() + class state: """ State machine enums @@ -318,36 +458,26 @@ class KernelDoc: param = KernRe(r'[\[\)].*').sub('', param, count=1) - if dtype == "" and param.endswith("..."): - if KernRe(r'\w\.\.\.$').search(param): - # For named variable parameters of the form `x...`, - # remove the dots - param = param[:-3] - else: - # Handles unnamed variable parameters - param = "..." - - if param not in self.entry.parameterdescs or \ - not self.entry.parameterdescs[param]: - - self.entry.parameterdescs[param] = "variable arguments" - - elif dtype == "" and (not param or param == "void"): - param = "void" - self.entry.parameterdescs[param] = "no arguments" - - elif dtype == "" and param in ["struct", "union"]: - # Handle unnamed (anonymous) union or struct - dtype = param - param = "{unnamed_" + param + "}" - self.entry.parameterdescs[param] = "anonymous\n" - self.entry.anon_struct_union = True - - # Handle cache group enforcing variables: they do not need - # to be described in header files - elif "__cacheline_group" in param: - # Ignore __cacheline_group_begin and __cacheline_group_end - return + # + # Look at various "anonymous type" cases. + # + if dtype == '': + if param.endswith("..."): + if len(param) > 3: # there is a name provided, use that + param = param[:-3] + if not self.entry.parameterdescs.get(param): + self.entry.parameterdescs[param] = "variable arguments" + + elif (not param) or param == "void": + param = "void" + self.entry.parameterdescs[param] = "no arguments" + + elif param in ["struct", "union"]: + # Handle unnamed (anonymous) union or struct + dtype = param + param = "{unnamed_" + param + "}" + self.entry.parameterdescs[param] = "anonymous\n" + self.entry.anon_struct_union = True # Warn if parameter has no description # (but ignore ones starting with # as these are not parameters @@ -389,9 +519,6 @@ class KernelDoc: args = arg_expr.sub(r"\1#", args) for arg in args.split(splitter): - # Strip comments - arg = KernRe(r'\/\*.*\*\/').sub('', arg) - # Ignore argument attributes arg = KernRe(r'\sPOS0?\s').sub(' ', arg) @@ -407,81 +534,76 @@ class KernelDoc: # Treat preprocessor directive as a typeless variable self.push_parameter(ln, decl_type, arg, "", "", declaration_name) - + # + # The pointer-to-function case. + # elif KernRe(r'\(.+\)\s*\(').search(arg): - # Pointer-to-function - arg = arg.replace('#', ',') - - r = KernRe(r'[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)') + r = KernRe(r'[^\(]+\(\*?\s*' # Everything up to "(*" + r'([\w\[\].]*)' # Capture the name and possible [array] + r'\s*\)') # Make sure the trailing ")" is there if r.match(arg): param = r.group(1) else: self.emit_msg(ln, f"Invalid param: {arg}") param = arg - - dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) - self.push_parameter(ln, decl_type, param, dtype, - arg, declaration_name) - + dtype = arg.replace(param, '') + self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) + # + # The array-of-pointers case. Dig the parameter name out from the middle + # of the declaration. + # elif KernRe(r'\(.+\)\s*\[').search(arg): - # Array-of-pointers - - arg = arg.replace('#', ',') - r = KernRe(r'[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)') + r = KernRe(r'[^\(]+\(\s*\*\s*' # Up to "(" and maybe "*" + r'([\w.]*?)' # The actual pointer name + r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion] if r.match(arg): param = r.group(1) else: self.emit_msg(ln, f"Invalid param: {arg}") param = arg - - dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) - - self.push_parameter(ln, decl_type, param, dtype, - arg, declaration_name) - + dtype = arg.replace(param, '') + self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) elif arg: + # + # Clean up extraneous spaces and split the string at commas; the first + # element of the resulting list will also include the type information. + # arg = KernRe(r'\s*:\s*').sub(":", arg) arg = KernRe(r'\s*\[').sub('[', arg) - args = KernRe(r'\s*,\s*').split(arg) - if args[0] and '*' in args[0]: - args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) - - first_arg = [] - r = KernRe(r'^(.*\s+)(.*?\[.*\].*)$') - if args[0] and r.match(args[0]): - args.pop(0) - first_arg.extend(r.group(1)) - first_arg.append(r.group(2)) + args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) + # + # args[0] has a string of "type a". If "a" includes an [array] + # declaration, we want to not be fooled by any white space inside + # the brackets, so detect and handle that case specially. + # + r = KernRe(r'^([^[\]]*\s+)(.*)$') + if r.match(args[0]): + args[0] = r.group(2) + dtype = r.group(1) else: - first_arg = KernRe(r'\s+').split(args.pop(0)) - - args.insert(0, first_arg.pop()) - dtype = ' '.join(first_arg) + # No space in args[0]; this seems wrong but preserves previous behavior + dtype = '' + bitfield_re = KernRe(r'(.*?):(\w+)') for param in args: - if KernRe(r'^(\*+)\s*(.*)').match(param): - r = KernRe(r'^(\*+)\s*(.*)') - if not r.match(param): - self.emit_msg(ln, f"Invalid param: {param}") - continue - - param = r.group(1) - + # + # For pointers, shift the star(s) from the variable name to the + # type declaration. + # + r = KernRe(r'^(\*+)\s*(.*)') + if r.match(param): self.push_parameter(ln, decl_type, r.group(2), f"{dtype} {r.group(1)}", arg, declaration_name) - - elif KernRe(r'(.*?):(\w+)').search(param): - r = KernRe(r'(.*?):(\w+)') - if not r.match(param): - self.emit_msg(ln, f"Invalid param: {param}") - continue - + # + # Perform a similar shift for bitfields. + # + elif bitfield_re.search(param): if dtype != "": # Skip unnamed bit-fields - self.push_parameter(ln, decl_type, r.group(1), - f"{dtype}:{r.group(2)}", + self.push_parameter(ln, decl_type, bitfield_re.group(1), + f"{dtype}:{bitfield_re.group(2)}", arg, declaration_name) else: self.push_parameter(ln, decl_type, param, dtype, @@ -520,13 +642,11 @@ class KernelDoc: self.emit_msg(ln, f"No description found for return value of '{declaration_name}'") - def dump_struct(self, ln, proto): - """ - Store an entry for an struct or union - """ - + # + # Split apart a structure prototype; returns (struct|union, name, members) or None + # + def split_struct_proto(self, proto): type_pattern = r'(struct|union)' - qualifiers = [ "__attribute__", "__packed", @@ -534,288 +654,202 @@ class KernelDoc: "____cacheline_aligned_in_smp", "____cacheline_aligned", ] - definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" - struct_members = KernRe(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\}\;]*)(\;)') - - # Extract struct/union definition - members = None - declaration_name = None - decl_type = None r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) if r.search(proto): - decl_type = r.group(1) - declaration_name = r.group(2) - members = r.group(3) + return (r.group(1), r.group(2), r.group(3)) else: r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') - if r.search(proto): - decl_type = r.group(1) - declaration_name = r.group(3) - members = r.group(2) - - if not members: - self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") - return - - if self.entry.identifier != declaration_name: - self.emit_msg(ln, - f"expecting prototype for {decl_type} {self.entry.identifier}. Prototype was for {decl_type} {declaration_name} instead\n") - return - - args_pattern = r'([^,)]+)' - - sub_prefixes = [ - (KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', re.S | re.I), ''), - (KernRe(r'\/\*\s*private:.*', re.S | re.I), ''), - - # Strip comments - (KernRe(r'\/\*.*?\*\/', re.S), ''), - - # Strip attributes - (attribute, ' '), - (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__packed\s*', re.S), ' '), - (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned', re.S), ' '), - - # Unwrap struct_group macros based on this definition: - # __struct_group(TAG, NAME, ATTRS, MEMBERS...) - # which has variants like: struct_group(NAME, MEMBERS...) - # Only MEMBERS arguments require documentation. - # - # Parsing them happens on two steps: - # - # 1. drop struct group arguments that aren't at MEMBERS, - # storing them as STRUCT_GROUP(MEMBERS) - # - # 2. remove STRUCT_GROUP() ancillary macro. - # - # The original logic used to remove STRUCT_GROUP() using an - # advanced regex: - # - # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; - # - # with two patterns that are incompatible with - # Python re module, as it has: - # - # - a recursive pattern: (?1) - # - an atomic grouping: (?>...) - # - # I tried a simpler version: but it didn't work either: - # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; - # - # As it doesn't properly match the end parenthesis on some cases. - # - # So, a better solution was crafted: there's now a NestedMatch - # class that ensures that delimiters after a search are properly - # matched. So, the implementation to drop STRUCT_GROUP() will be - # handled in separate. - - (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), - (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), - (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), - (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), - - # Replace macros - # - # TODO: use NestedMatch for FOO($1, $2, ...) matches - # - # it is better to also move those to the NestedMatch logic, - # to ensure that parenthesis will be properly matched. - - (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), - (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), - (KernRe(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), - (KernRe(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'), - (KernRe(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'), - (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'), - (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'), - (KernRe(r'VIRTIO_DECLARE_FEATURES\s*\(' + args_pattern + r'\)', re.S), r'u64 \1; u64 \1_array[VIRTIO_FEATURES_DWORDS]'), - ] - - # Regexes here are guaranteed to have the end limiter matching - # the start delimiter. Yet, right now, only one replace group - # is allowed. - - sub_nested_prefixes = [ - (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), - ] - - for search, sub in sub_prefixes: - members = search.sub(sub, members) - - nested = NestedMatch() - - for search, sub in sub_nested_prefixes: - members = nested.sub(search, sub, members) - - # Keeps the original declaration as-is - declaration = members - - # Split nested struct/union elements - # - # This loop was simpler at the original kernel-doc perl version, as - # while ($members =~ m/$struct_members/) { ... } - # reads 'members' string on each interaction. - # - # Python behavior is different: it parses 'members' only once, - # creating a list of tuples from the first interaction. + return (r.group(1), r.group(3), r.group(2)) + return None + # + # Rewrite the members of a structure or union for easier formatting later on. + # Among other things, this function will turn a member like: + # + # struct { inner_members; } foo; + # + # into: + # + # struct foo; inner_members; + # + def rewrite_struct_members(self, members): # - # On other words, this won't get nested structs. + # Process struct/union members from the most deeply nested outward. The + # trick is in the ^{ below - it prevents a match of an outer struct/union + # until the inner one has been munged (removing the "{" in the process). # - # So, we need to have an extra loop on Python to override such - # re limitation. - - while True: - tuples = struct_members.findall(members) - if not tuples: - break - + struct_members = KernRe(r'(struct|union)' # 0: declaration type + r'([^\{\};]+)' # 1: possible name + r'(\{)' + r'([^\{\}]*)' # 3: Contents of declaration + r'(\})' + r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration + tuples = struct_members.findall(members) + while tuples: for t in tuples: newmember = "" - maintype = t[0] - s_ids = t[5] - content = t[3] - - oldmember = "".join(t) - - for s_id in s_ids.split(','): + oldmember = "".join(t) # Reconstruct the original formatting + dtype, name, lbr, content, rbr, rest, semi = t + # + # Pass through each field name, normalizing the form and formatting. + # + for s_id in rest.split(','): s_id = s_id.strip() - - newmember += f"{maintype} {s_id}; " + newmember += f"{dtype} {s_id}; " + # + # Remove bitfield/array/pointer info, getting the bare name. + # s_id = KernRe(r'[:\[].*').sub('', s_id) s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) - + # + # Pass through the members of this inner structure/union. + # for arg in content.split(';'): arg = arg.strip() - - if not arg: - continue - - r = KernRe(r'^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)') + # + # Look for (type)(*name)(args) - pointer to function + # + r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)') if r.match(arg): + dtype, name, extra = r.group(1), r.group(2), r.group(3) # Pointer-to-function - dtype = r.group(1) - name = r.group(2) - extra = r.group(3) - - if not name: - continue - if not s_id: # Anonymous struct/union newmember += f"{dtype}{name}{extra}; " else: newmember += f"{dtype}{s_id}.{name}{extra}; " - + # + # Otherwise a non-function member. + # else: - arg = arg.strip() - # Handle bitmaps + # + # Remove bitmap and array portions and spaces around commas + # arg = KernRe(r':\s*\d+\s*').sub('', arg) - - # Handle arrays arg = KernRe(r'\[.*\]').sub('', arg) - - # Handle multiple IDs arg = KernRe(r'\s*,\s*').sub(',', arg) - + # + # Look for a normal decl - "type name[,name...]" + # r = KernRe(r'(.*)\s+([\S+,]+)') - if r.search(arg): - dtype = r.group(1) - names = r.group(2) + for name in r.group(2).split(','): + name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name) + if not s_id: + # Anonymous struct/union + newmember += f"{r.group(1)} {name}; " + else: + newmember += f"{r.group(1)} {s_id}.{name}; " else: newmember += f"{arg}; " - continue - - for name in names.split(','): - name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name).strip() - - if not name: - continue - - if not s_id: - # Anonymous struct/union - newmember += f"{dtype} {name}; " - else: - newmember += f"{dtype} {s_id}.{name}; " - + # + # At the end of the s_id loop, replace the original declaration with + # the munged version. + # members = members.replace(oldmember, newmember) + # + # End of the tuple loop - search again and see if there are outer members + # that now turn up. + # + tuples = struct_members.findall(members) + return members - # Ignore other nested elements, like enums - members = re.sub(r'(\{[^\{\}]*\})', '', members) - - self.create_parameter_list(ln, decl_type, members, ';', - declaration_name) - self.check_sections(ln, declaration_name, decl_type) - - # Adjust declaration for better display + # + # Format the struct declaration into a standard form for inclusion in the + # resulting docs. + # + def format_struct_decl(self, declaration): + # + # Insert newlines, get rid of extra spaces. + # declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) declaration = KernRe(r'\}\s+;').sub('};', declaration) - - # Better handle inlined enums - while True: - r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') - if not r.search(declaration): - break - + # + # Format inline enums with each member on its own line. + # + r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') + while r.search(declaration): declaration = r.sub(r'\1,\n\2', declaration) - + # + # Now go through and supply the right number of tabs + # for each line. + # def_args = declaration.split('\n') level = 1 declaration = "" for clause in def_args: + clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1) + if clause: + if '}' in clause and level > 1: + level -= 1 + if not clause.startswith('#'): + declaration += "\t" * level + declaration += "\t" + clause + "\n" + if "{" in clause and "}" not in clause: + level += 1 + return declaration - clause = clause.strip() - clause = KernRe(r'\s+').sub(' ', clause, count=1) - - if not clause: - continue - - if '}' in clause and level > 1: - level -= 1 - if not KernRe(r'^\s*#').match(clause): - declaration += "\t" * level + def dump_struct(self, ln, proto): + """ + Store an entry for an struct or union + """ + # + # Do the basic parse to get the pieces of the declaration. + # + struct_parts = self.split_struct_proto(proto) + if not struct_parts: + self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") + return + decl_type, declaration_name, members = struct_parts - declaration += "\t" + clause + "\n" - if "{" in clause and "}" not in clause: - level += 1 + if self.entry.identifier != declaration_name: + self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. " + f"Prototype was for {decl_type} {declaration_name} instead\n") + return + # + # Go through the list of members applying all of our transformations. + # + members = trim_private_members(members) + members = apply_transforms(struct_xforms, members) + nested = NestedMatch() + for search, sub in struct_nested_prefixes: + members = nested.sub(search, sub, members) + # + # Deal with embedded struct and union members, and drop enums entirely. + # + declaration = members + members = self.rewrite_struct_members(members) + members = re.sub(r'(\{[^\{\}]*\})', '', members) + # + # Output the result and we are done. + # + self.create_parameter_list(ln, decl_type, members, ';', + declaration_name) + self.check_sections(ln, declaration_name, decl_type) self.output_declaration(decl_type, declaration_name, - definition=declaration, + definition=self.format_struct_decl(declaration), purpose=self.entry.declaration_purpose) def dump_enum(self, ln, proto): """ Stores an enum inside self.entries array. """ - - # Ignore members marked private - proto = KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', flags=re.S).sub('', proto) - proto = KernRe(r'\/\*\s*private:.*}', flags=re.S).sub('}', proto) - - # Strip comments - proto = KernRe(r'\/\*.*?\*\/', flags=re.S).sub('', proto) - - # Strip #define macros inside enums + # + # Strip preprocessor directives. Note that this depends on the + # trailing semicolon we added in process_proto_type(). + # proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) - # # Parse out the name and members of the enum. Typedef form first. # r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') if r.search(proto): declaration_name = r.group(2) - members = r.group(1).rstrip() + members = trim_private_members(r.group(1)) # # Failing that, look for a straight enum # @@ -823,7 +857,7 @@ class KernelDoc: r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') if r.match(proto): declaration_name = r.group(1) - members = r.group(2).rstrip() + members = trim_private_members(r.group(2)) # # OK, this isn't going to work. # @@ -892,62 +926,31 @@ class KernelDoc: Stores a function of function macro inside self.entries array. """ - func_macro = False + found = func_macro = False return_type = '' decl_type = 'function' - - # Prefixes that would be removed - sub_prefixes = [ - (r"^static +", "", 0), - (r"^extern +", "", 0), - (r"^asmlinkage +", "", 0), - (r"^inline +", "", 0), - (r"^__inline__ +", "", 0), - (r"^__inline +", "", 0), - (r"^__always_inline +", "", 0), - (r"^noinline +", "", 0), - (r"^__FORTIFY_INLINE +", "", 0), - (r"__init +", "", 0), - (r"__init_or_module +", "", 0), - (r"__deprecated +", "", 0), - (r"__flatten +", "", 0), - (r"__meminit +", "", 0), - (r"__must_check +", "", 0), - (r"__weak +", "", 0), - (r"__sched +", "", 0), - (r"_noprof", "", 0), - (r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +", "", 0), - (r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +", "", 0), - (r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +", "", 0), - (r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)", r"\1, \2", 0), - (r"__attribute_const__ +", "", 0), - - # It seems that Python support for re.X is broken: - # At least for me (Python 3.13), this didn't work -# (r""" -# __attribute__\s*\(\( -# (?: -# [\w\s]+ # attribute name -# (?:\([^)]*\))? # attribute arguments -# \s*,? # optional comma at the end -# )+ -# \)\)\s+ -# """, "", re.X), - - # So, remove whitespaces and comments from it - (r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+", "", 0), - ] - - for search, sub, flags in sub_prefixes: - prototype = KernRe(search, flags).sub(sub, prototype) - - # Macros are a special case, as they change the prototype format + # + # Apply the initial transformations. + # + prototype = apply_transforms(function_xforms, prototype) + # + # If we have a macro, remove the "#define" at the front. + # new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) if new_proto != prototype: - is_define_proto = True prototype = new_proto - else: - is_define_proto = False + # + # Dispense with the simple "#define A B" case here; the key + # is the space after the name of the symbol being defined. + # NOTE that the seemingly misnamed "func_macro" indicates a + # macro *without* arguments. + # + r = KernRe(r'^(\w+)\s+') + if r.search(prototype): + return_type = '' + declaration_name = r.group(1) + func_macro = True + found = True # Yes, this truly is vile. We are looking for: # 1. Return type (may be nothing if we're looking at a macro) @@ -965,91 +968,73 @@ class KernelDoc: # - atomic_set (macro) # - pci_match_device, __copy_to_user (long return type) - name = r'[a-zA-Z0-9_~:]+' - prototype_end1 = r'[^\(]*' - prototype_end2 = r'[^\{]*' - prototype_end = fr'\(({prototype_end1}|{prototype_end2})\)' - - # Besides compiling, Perl qr{[\w\s]+} works as a non-capturing group. - # So, this needs to be mapped in Python with (?:...)? or (?:...)+ - + name = r'\w+' type1 = r'(?:[\w\s]+)?' type2 = r'(?:[\w\s]+\*+)+' - - found = False - - if is_define_proto: - r = KernRe(r'^()(' + name + r')\s+') - - if r.search(prototype): - return_type = '' - declaration_name = r.group(2) - func_macro = True - - found = True - + # + # Attempt to match first on (args) with no internal parentheses; this + # lets us easily filter out __acquires() and other post-args stuff. If + # that fails, just grab the rest of the line to the last closing + # parenthesis. + # + proto_args = r'\(([^\(]*|.*)\)' + # + # (Except for the simple macro case) attempt to split up the prototype + # in the various ways we understand. + # if not found: patterns = [ - rf'^()({name})\s*{prototype_end}', - rf'^({type1})\s+({name})\s*{prototype_end}', - rf'^({type2})\s*({name})\s*{prototype_end}', + rf'^()({name})\s*{proto_args}', + rf'^({type1})\s+({name})\s*{proto_args}', + rf'^({type2})\s*({name})\s*{proto_args}', ] for p in patterns: r = KernRe(p) - if r.match(prototype): - return_type = r.group(1) declaration_name = r.group(2) args = r.group(3) - self.create_parameter_list(ln, decl_type, args, ',', declaration_name) - found = True break + # + # Parsing done; make sure that things are as we expect. + # if not found: self.emit_msg(ln, f"cannot understand function prototype: '{prototype}'") return - if self.entry.identifier != declaration_name: - self.emit_msg(ln, - f"expecting prototype for {self.entry.identifier}(). Prototype was for {declaration_name}() instead") + self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). " + f"Prototype was for {declaration_name}() instead") return - self.check_sections(ln, declaration_name, "function") - self.check_return_section(ln, declaration_name, return_type) + # + # Store the result. + # + self.output_declaration(decl_type, declaration_name, + typedef=('typedef' in return_type), + functiontype=return_type, + purpose=self.entry.declaration_purpose, + func_macro=func_macro) - if 'typedef' in return_type: - self.output_declaration(decl_type, declaration_name, - typedef=True, - functiontype=return_type, - purpose=self.entry.declaration_purpose, - func_macro=func_macro) - else: - self.output_declaration(decl_type, declaration_name, - typedef=False, - functiontype=return_type, - purpose=self.entry.declaration_purpose, - func_macro=func_macro) def dump_typedef(self, ln, proto): """ Stores a typedef inside self.entries array. """ - - typedef_type = r'((?:\s+[\w\*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' + # + # We start by looking for function typedefs. + # + typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' typedef_ident = r'\*?\s*(\w\S+)\s*' typedef_args = r'\s*\((.*)\);' - typedef1 = KernRe(r'typedef' + typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) - typedef2 = KernRe(r'typedef' + typedef_type + typedef_ident + typedef_args) - - # Strip comments - proto = KernRe(r'/\*.*?\*/', flags=re.S).sub('', proto) + typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) + typedef2 = KernRe(typedef_type + typedef_ident + typedef_args) # Parse function typedef prototypes for r in [typedef1, typedef2]: @@ -1065,21 +1050,16 @@ class KernelDoc: f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") return - decl_type = 'function' - self.create_parameter_list(ln, decl_type, args, ',', declaration_name) + self.create_parameter_list(ln, 'function', args, ',', declaration_name) - self.output_declaration(decl_type, declaration_name, + self.output_declaration('function', declaration_name, typedef=True, functiontype=return_type, purpose=self.entry.declaration_purpose) return - - # Handle nested parentheses or brackets - r = KernRe(r'(\(*.\)\s*|\[*.\]\s*);$') - while r.search(proto): - proto = r.sub('', proto) - - # Parse simple typedefs + # + # Not a function, try to parse a simple typedef. + # r = KernRe(r'typedef.*\s+(\w+)\s*;') if r.match(proto): declaration_name = r.group(1) @@ -1262,7 +1242,7 @@ class KernelDoc: self.dump_section() # Look for doc_com + <text> + doc_end: - r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') + r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/') if r.match(line): self.emit_msg(ln, f"suspicious ending line: {line}") @@ -1473,7 +1453,7 @@ class KernelDoc: """Ancillary routine to process a function prototype""" # strip C99-style comments to end of line - line = KernRe(r"\/\/.*$", re.S).sub('', line) + line = KernRe(r"//.*$", re.S).sub('', line) # # Soak up the line's worth of prototype text, stopping at { or ; if present. # |