From d966dc658ce381c56d85cd477e095944b8470379 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:11 +0800 Subject: scripts/kernel-doc.py: move KernelDoc class to a separate file In preparation for letting kerneldoc Sphinx extension to import Python libraries, move regex ancillary classes to a separate file. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/c76df228504e711c6b4bcd23d5a0ea1fda678cda.1744106241.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_parser.py | 1690 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 1690 insertions(+) create mode 100755 scripts/lib/kdoc/kdoc_parser.py (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py new file mode 100755 index 000000000000..3ce116595546 --- /dev/null +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -0,0 +1,1690 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab . +# +# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 + +""" +kdoc_parser +=========== + +Read a C language source or header FILE and extract embedded +documentation comments +""" + +import argparse +import re +from pprint import pformat + +from kdoc_re import NestedMatch, Re + + +# +# Regular expressions used to parse kernel-doc markups at KernelDoc class. +# +# Let's declare them in lowercase outside any class to make easier to +# convert from the python script. +# +# As those are evaluated at the beginning, no need to cache them +# + +# Allow whitespace at end of comment start. +doc_start = Re(r'^/\*\*\s*$', cache=False) + +doc_end = Re(r'\*/', cache=False) +doc_com = Re(r'\s*\*\s*', cache=False) +doc_com_body = Re(r'\s*\* ?', cache=False) +doc_decl = doc_com + Re(r'(\w+)', cache=False) + +# @params and a strictly limited set of supported section names +# Specifically: +# Match @word: +# @...: +# @{section-name}: +# while trying to not match literal block starts like "example::" +# +doc_sect = doc_com + \ + Re(r'\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:([^:].*)?$', + flags=re.I, cache=False) + +doc_content = doc_com_body + Re(r'(.*)', cache=False) +doc_block = doc_com + Re(r'DOC:\s*(.*)?', cache=False) +doc_inline_start = Re(r'^\s*/\*\*\s*$', cache=False) +doc_inline_sect = Re(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) +doc_inline_end = Re(r'^\s*\*/\s*$', cache=False) +doc_inline_oneline = Re(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) +attribute = Re(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", + flags=re.I | re.S, cache=False) + +export_symbol = Re(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) +export_symbol_ns = Re(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) + +type_param = Re(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) + + +class KernelDoc: + """ + Read a C language source or header FILE and extract embedded + documentation comments. + """ + + # Parser states + STATE_NORMAL = 0 # normal code + STATE_NAME = 1 # looking for function name + STATE_BODY_MAYBE = 2 # body - or maybe more description + STATE_BODY = 3 # the body of the comment + STATE_BODY_WITH_BLANK_LINE = 4 # the body which has a blank line + STATE_PROTO = 5 # scanning prototype + STATE_DOCBLOCK = 6 # documentation block + STATE_INLINE = 7 # gathering doc outside main block + + st_name = [ + "NORMAL", + "NAME", + "BODY_MAYBE", + "BODY", + "BODY_WITH_BLANK_LINE", + "PROTO", + "DOCBLOCK", + "INLINE", + ] + + # Inline documentation state + STATE_INLINE_NA = 0 # not applicable ($state != STATE_INLINE) + STATE_INLINE_NAME = 1 # looking for member name (@foo:) + STATE_INLINE_TEXT = 2 # looking for member documentation + STATE_INLINE_END = 3 # done + STATE_INLINE_ERROR = 4 # error - Comment without header was found. + # Spit a warning as it's not + # proper kernel-doc and ignore the rest. + + st_inline_name = [ + "", + "_NAME", + "_TEXT", + "_END", + "_ERROR", + ] + + # Section names + + section_default = "Description" # default section + section_intro = "Introduction" + section_context = "Context" + section_return = "Return" + + undescribed = "-- undescribed --" + + def __init__(self, config, fname): + """Initialize internal variables""" + + self.fname = fname + self.config = config + + # Initial state for the state machines + self.state = self.STATE_NORMAL + self.inline_doc_state = self.STATE_INLINE_NA + + # Store entry currently being processed + self.entry = None + + # Place all potential outputs into an array + self.entries = [] + + def show_warnings(self, dtype, declaration_name): # pylint: disable=W0613 + """ + Allow filtering out warnings + """ + + # TODO: implement it + + return True + + # TODO: rename to emit_message + def emit_warning(self, ln, msg, warning=True): + """Emit a message""" + + if warning: + self.config.log.warning("%s:%d %s", self.fname, ln, msg) + else: + self.config.log.info("%s:%d %s", self.fname, ln, msg) + + def dump_section(self, start_new=True): + """ + Dumps section contents to arrays/hashes intended for that purpose. + """ + + name = self.entry.section + contents = self.entry.contents + + # TODO: we can prevent dumping empty sections here with: + # + # if self.entry.contents.strip("\n"): + # if start_new: + # self.entry.section = self.section_default + # self.entry.contents = "" + # + # return + # + # But, as we want to be producing the same output of the + # venerable kernel-doc Perl tool, let's just output everything, + # at least for now + + if type_param.match(name): + name = type_param.group(1) + + self.entry.parameterdescs[name] = contents + self.entry.parameterdesc_start_lines[name] = self.entry.new_start_line + + self.entry.sectcheck += name + " " + self.entry.new_start_line = 0 + + elif name == "@...": + name = "..." + self.entry.parameterdescs[name] = contents + self.entry.sectcheck += name + " " + self.entry.parameterdesc_start_lines[name] = self.entry.new_start_line + self.entry.new_start_line = 0 + + else: + if name in self.entry.sections and self.entry.sections[name] != "": + # Only warn on user-specified duplicate section names + if name != self.section_default: + self.emit_warning(self.entry.new_start_line, + f"duplicate section name '{name}'\n") + self.entry.sections[name] += contents + else: + self.entry.sections[name] = contents + self.entry.sectionlist.append(name) + self.entry.section_start_lines[name] = self.entry.new_start_line + self.entry.new_start_line = 0 + +# self.config.log.debug("Section: %s : %s", name, pformat(vars(self.entry))) + + if start_new: + self.entry.section = self.section_default + self.entry.contents = "" + + # TODO: rename it to store_declaration + def output_declaration(self, dtype, name, **args): + """ + Stores the entry into an entry array. + + The actual output and output filters will be handled elsewhere + """ + + # The implementation here is different than the original kernel-doc: + # instead of checking for output filters or actually output anything, + # it just stores the declaration content at self.entries, as the + # output will happen on a separate class. + # + # For now, we're keeping the same name of the function just to make + # easier to compare the source code of both scripts + + if "declaration_start_line" not in args: + args["declaration_start_line"] = self.entry.declaration_start_line + + args["type"] = dtype + + # TODO: use colletions.OrderedDict + + sections = args.get('sections', {}) + sectionlist = args.get('sectionlist', []) + + # Drop empty sections + # TODO: improve it to emit warnings + for section in ["Description", "Return"]: + if section in sectionlist: + if not sections[section].rstrip(): + del sections[section] + sectionlist.remove(section) + + self.entries.append((name, args)) + + self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) + + def reset_state(self, ln): + """ + Ancillary routine to create a new entry. It initializes all + variables used by the state machine. + """ + + self.entry = argparse.Namespace + + self.entry.contents = "" + self.entry.function = "" + self.entry.sectcheck = "" + self.entry.struct_actual = "" + self.entry.prototype = "" + + self.entry.parameterlist = [] + self.entry.parameterdescs = {} + self.entry.parametertypes = {} + self.entry.parameterdesc_start_lines = {} + + self.entry.section_start_lines = {} + self.entry.sectionlist = [] + self.entry.sections = {} + + self.entry.anon_struct_union = False + + self.entry.leading_space = None + + # State flags + self.state = self.STATE_NORMAL + self.inline_doc_state = self.STATE_INLINE_NA + self.entry.brcount = 0 + + self.entry.in_doc_sect = False + self.entry.declaration_start_line = ln + + def push_parameter(self, ln, decl_type, param, dtype, + org_arg, declaration_name): + """ + Store parameters and their descriptions at self.entry. + """ + + if self.entry.anon_struct_union and dtype == "" and param == "}": + return # Ignore the ending }; from anonymous struct/union + + self.entry.anon_struct_union = False + + param = Re(r'[\[\)].*').sub('', param, count=1) + + if dtype == "" and param.endswith("..."): + if Re(r'\w\.\.\.$').search(param): + # For named variable parameters of the form `x...`, + # remove the dots + param = param[:-3] + else: + # Handles unnamed variable parameters + param = "..." + + if param not in self.entry.parameterdescs or \ + not self.entry.parameterdescs[param]: + + self.entry.parameterdescs[param] = "variable arguments" + + elif dtype == "" and (not param or param == "void"): + param = "void" + self.entry.parameterdescs[param] = "no arguments" + + elif dtype == "" and param in ["struct", "union"]: + # Handle unnamed (anonymous) union or struct + dtype = param + param = "{unnamed_" + param + "}" + self.entry.parameterdescs[param] = "anonymous\n" + self.entry.anon_struct_union = True + + # Handle cache group enforcing variables: they do not need + # to be described in header files + elif "__cacheline_group" in param: + # Ignore __cacheline_group_begin and __cacheline_group_end + return + + # Warn if parameter has no description + # (but ignore ones starting with # as these are not parameters + # but inline preprocessor statements) + if param not in self.entry.parameterdescs and not param.startswith("#"): + self.entry.parameterdescs[param] = self.undescribed + + if self.show_warnings(dtype, declaration_name) and "." not in param: + if decl_type == 'function': + dname = f"{decl_type} parameter" + else: + dname = f"{decl_type} member" + + self.emit_warning(ln, + f"{dname} '{param}' not described in '{declaration_name}'") + + # Strip spaces from param so that it is one continuous string on + # parameterlist. This fixes a problem where check_sections() + # cannot find a parameter like "addr[6 + 2]" because it actually + # appears as "addr[6", "+", "2]" on the parameter list. + # However, it's better to maintain the param string unchanged for + # output, so just weaken the string compare in check_sections() + # to ignore "[blah" in a parameter string. + + self.entry.parameterlist.append(param) + org_arg = Re(r'\s\s+').sub(' ', org_arg) + self.entry.parametertypes[param] = org_arg + + def save_struct_actual(self, actual): + """ + Strip all spaces from the actual param so that it looks like + one string item. + """ + + actual = Re(r'\s*').sub("", actual, count=1) + + self.entry.struct_actual += actual + " " + + def create_parameter_list(self, ln, decl_type, args, + splitter, declaration_name): + """ + Creates a list of parameters, storing them at self.entry. + """ + + # temporarily replace all commas inside function pointer definition + arg_expr = Re(r'(\([^\),]+),') + while arg_expr.search(args): + args = arg_expr.sub(r"\1#", args) + + for arg in args.split(splitter): + # Strip comments + arg = Re(r'\/\*.*\*\/').sub('', arg) + + # Ignore argument attributes + arg = Re(r'\sPOS0?\s').sub(' ', arg) + + # Strip leading/trailing spaces + arg = arg.strip() + arg = Re(r'\s+').sub(' ', arg, count=1) + + if arg.startswith('#'): + # Treat preprocessor directive as a typeless variable just to fill + # corresponding data structures "correctly". Catch it later in + # output_* subs. + + # Treat preprocessor directive as a typeless variable + self.push_parameter(ln, decl_type, arg, "", + "", declaration_name) + + elif Re(r'\(.+\)\s*\(').search(arg): + # Pointer-to-function + + arg = arg.replace('#', ',') + + r = Re(r'[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)') + if r.match(arg): + param = r.group(1) + else: + self.emit_warning(ln, f"Invalid param: {arg}") + param = arg + + dtype = Re(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) + self.save_struct_actual(param) + self.push_parameter(ln, decl_type, param, dtype, + arg, declaration_name) + + elif Re(r'\(.+\)\s*\[').search(arg): + # Array-of-pointers + + arg = arg.replace('#', ',') + r = Re(r'[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)') + if r.match(arg): + param = r.group(1) + else: + self.emit_warning(ln, f"Invalid param: {arg}") + param = arg + + dtype = Re(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) + + self.save_struct_actual(param) + self.push_parameter(ln, decl_type, param, dtype, + arg, declaration_name) + + elif arg: + arg = Re(r'\s*:\s*').sub(":", arg) + arg = Re(r'\s*\[').sub('[', arg) + + args = Re(r'\s*,\s*').split(arg) + if args[0] and '*' in args[0]: + args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) + + first_arg = [] + r = Re(r'^(.*\s+)(.*?\[.*\].*)$') + if args[0] and r.match(args[0]): + args.pop(0) + first_arg.extend(r.group(1)) + first_arg.append(r.group(2)) + else: + first_arg = Re(r'\s+').split(args.pop(0)) + + args.insert(0, first_arg.pop()) + dtype = ' '.join(first_arg) + + for param in args: + if Re(r'^(\*+)\s*(.*)').match(param): + r = Re(r'^(\*+)\s*(.*)') + if not r.match(param): + self.emit_warning(ln, f"Invalid param: {param}") + continue + + param = r.group(1) + + self.save_struct_actual(r.group(2)) + self.push_parameter(ln, decl_type, r.group(2), + f"{dtype} {r.group(1)}", + arg, declaration_name) + + elif Re(r'(.*?):(\w+)').search(param): + r = Re(r'(.*?):(\w+)') + if not r.match(param): + self.emit_warning(ln, f"Invalid param: {param}") + continue + + if dtype != "": # Skip unnamed bit-fields + self.save_struct_actual(r.group(1)) + self.push_parameter(ln, decl_type, r.group(1), + f"{dtype}:{r.group(2)}", + arg, declaration_name) + else: + self.save_struct_actual(param) + self.push_parameter(ln, decl_type, param, dtype, + arg, declaration_name) + + def check_sections(self, ln, decl_name, decl_type, sectcheck, prmscheck): + """ + Check for errors inside sections, emitting warnings if not found + parameters are described. + """ + + sects = sectcheck.split() + prms = prmscheck.split() + err = False + + for sx in range(len(sects)): # pylint: disable=C0200 + err = True + for px in range(len(prms)): # pylint: disable=C0200 + prm_clean = prms[px] + prm_clean = Re(r'\[.*\]').sub('', prm_clean) + prm_clean = attribute.sub('', prm_clean) + + # ignore array size in a parameter string; + # however, the original param string may contain + # spaces, e.g.: addr[6 + 2] + # and this appears in @prms as "addr[6" since the + # parameter list is split at spaces; + # hence just ignore "[..." for the sections check; + prm_clean = Re(r'\[.*').sub('', prm_clean) + + if prm_clean == sects[sx]: + err = False + break + + if err: + if decl_type == 'function': + dname = f"{decl_type} parameter" + else: + dname = f"{decl_type} member" + + self.emit_warning(ln, + f"Excess {dname} '{sects[sx]}' description in '{decl_name}'") + + def check_return_section(self, ln, declaration_name, return_type): + """ + If the function doesn't return void, warns about the lack of a + return description. + """ + + if not self.config.wreturn: + return + + # Ignore an empty return type (It's a macro) + # Ignore functions with a "void" return type (but not "void *") + if not return_type or Re(r'void\s*\w*\s*$').search(return_type): + return + + if not self.entry.sections.get("Return", None): + self.emit_warning(ln, + f"No description found for return value of '{declaration_name}'") + + def dump_struct(self, ln, proto): + """ + Store an entry for an struct or union + """ + + type_pattern = r'(struct|union)' + + qualifiers = [ + "__attribute__", + "__packed", + "__aligned", + "____cacheline_aligned_in_smp", + "____cacheline_aligned", + ] + + definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" + struct_members = Re(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\}\;]*)(\;)') + + # Extract struct/union definition + members = None + declaration_name = None + decl_type = None + + r = Re(type_pattern + r'\s+(\w+)\s*' + definition_body) + if r.search(proto): + decl_type = r.group(1) + declaration_name = r.group(2) + members = r.group(3) + else: + r = Re(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') + + if r.search(proto): + decl_type = r.group(1) + declaration_name = r.group(3) + members = r.group(2) + + if not members: + self.emit_warning(ln, f"{proto} error: Cannot parse struct or union!") + self.config.errors += 1 + return + + if self.entry.identifier != declaration_name: + self.emit_warning(ln, + f"expecting prototype for {decl_type} {self.entry.identifier}. Prototype was for {decl_type} {declaration_name} instead\n") + return + + args_pattern = r'([^,)]+)' + + sub_prefixes = [ + (Re(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', re.S | re.I), ''), + (Re(r'\/\*\s*private:.*', re.S | re.I), ''), + + # Strip comments + (Re(r'\/\*.*?\*\/', re.S), ''), + + # Strip attributes + (attribute, ' '), + (Re(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), + (Re(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), + (Re(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), + (Re(r'\s*__packed\s*', re.S), ' '), + (Re(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), + (Re(r'\s*____cacheline_aligned_in_smp', re.S), ' '), + (Re(r'\s*____cacheline_aligned', re.S), ' '), + + # Unwrap struct_group macros based on this definition: + # __struct_group(TAG, NAME, ATTRS, MEMBERS...) + # which has variants like: struct_group(NAME, MEMBERS...) + # Only MEMBERS arguments require documentation. + # + # Parsing them happens on two steps: + # + # 1. drop struct group arguments that aren't at MEMBERS, + # storing them as STRUCT_GROUP(MEMBERS) + # + # 2. remove STRUCT_GROUP() ancillary macro. + # + # The original logic used to remove STRUCT_GROUP() using an + # advanced regex: + # + # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; + # + # with two patterns that are incompatible with + # Python re module, as it has: + # + # - a recursive pattern: (?1) + # - an atomic grouping: (?>...) + # + # I tried a simpler version: but it didn't work either: + # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; + # + # As it doesn't properly match the end parenthesis on some cases. + # + # So, a better solution was crafted: there's now a NestedMatch + # class that ensures that delimiters after a search are properly + # matched. So, the implementation to drop STRUCT_GROUP() will be + # handled in separate. + + (Re(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), + (Re(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), + (Re(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), + (Re(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), + + # Replace macros + # + # TODO: it is better to also move those to the NestedMatch logic, + # to ensure that parenthesis will be properly matched. + + (Re(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), + (Re(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), + (Re(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), + (Re(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'), + (Re(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), + (Re(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), + (Re(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'), + (Re(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'), + (Re(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'), + ] + + # Regexes here are guaranteed to have the end limiter matching + # the start delimiter. Yet, right now, only one replace group + # is allowed. + + sub_nested_prefixes = [ + (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), + ] + + for search, sub in sub_prefixes: + members = search.sub(sub, members) + + nested = NestedMatch() + + for search, sub in sub_nested_prefixes: + members = nested.sub(search, sub, members) + + # Keeps the original declaration as-is + declaration = members + + # Split nested struct/union elements + # + # This loop was simpler at the original kernel-doc perl version, as + # while ($members =~ m/$struct_members/) { ... } + # reads 'members' string on each interaction. + # + # Python behavior is different: it parses 'members' only once, + # creating a list of tuples from the first interaction. + # + # On other words, this won't get nested structs. + # + # So, we need to have an extra loop on Python to override such + # re limitation. + + while True: + tuples = struct_members.findall(members) + if not tuples: + break + + for t in tuples: + newmember = "" + maintype = t[0] + s_ids = t[5] + content = t[3] + + oldmember = "".join(t) + + for s_id in s_ids.split(','): + s_id = s_id.strip() + + newmember += f"{maintype} {s_id}; " + s_id = Re(r'[:\[].*').sub('', s_id) + s_id = Re(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) + + for arg in content.split(';'): + arg = arg.strip() + + if not arg: + continue + + r = Re(r'^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)') + if r.match(arg): + # Pointer-to-function + dtype = r.group(1) + name = r.group(2) + extra = r.group(3) + + if not name: + continue + + if not s_id: + # Anonymous struct/union + newmember += f"{dtype}{name}{extra}; " + else: + newmember += f"{dtype}{s_id}.{name}{extra}; " + + else: + arg = arg.strip() + # Handle bitmaps + arg = Re(r':\s*\d+\s*').sub('', arg) + + # Handle arrays + arg = Re(r'\[.*\]').sub('', arg) + + # Handle multiple IDs + arg = Re(r'\s*,\s*').sub(',', arg) + + r = Re(r'(.*)\s+([\S+,]+)') + + if r.search(arg): + dtype = r.group(1) + names = r.group(2) + else: + newmember += f"{arg}; " + continue + + for name in names.split(','): + name = Re(r'^\s*\**(\S+)\s*').sub(r'\1', name).strip() + + if not name: + continue + + if not s_id: + # Anonymous struct/union + newmember += f"{dtype} {name}; " + else: + newmember += f"{dtype} {s_id}.{name}; " + + members = members.replace(oldmember, newmember) + + # Ignore other nested elements, like enums + members = re.sub(r'(\{[^\{\}]*\})', '', members) + + self.create_parameter_list(ln, decl_type, members, ';', + declaration_name) + self.check_sections(ln, declaration_name, decl_type, + self.entry.sectcheck, self.entry.struct_actual) + + # Adjust declaration for better display + declaration = Re(r'([\{;])').sub(r'\1\n', declaration) + declaration = Re(r'\}\s+;').sub('};', declaration) + + # Better handle inlined enums + while True: + r = Re(r'(enum\s+\{[^\}]+),([^\n])') + if not r.search(declaration): + break + + declaration = r.sub(r'\1,\n\2', declaration) + + def_args = declaration.split('\n') + level = 1 + declaration = "" + for clause in def_args: + + clause = clause.strip() + clause = Re(r'\s+').sub(' ', clause, count=1) + + if not clause: + continue + + if '}' in clause and level > 1: + level -= 1 + + if not Re(r'^\s*#').match(clause): + declaration += "\t" * level + + declaration += "\t" + clause + "\n" + if "{" in clause and "}" not in clause: + level += 1 + + self.output_declaration(decl_type, declaration_name, + struct=declaration_name, + module=self.entry.modulename, + definition=declaration, + parameterlist=self.entry.parameterlist, + parameterdescs=self.entry.parameterdescs, + parametertypes=self.entry.parametertypes, + sectionlist=self.entry.sectionlist, + sections=self.entry.sections, + purpose=self.entry.declaration_purpose) + + def dump_enum(self, ln, proto): + """ + Stores an enum inside self.entries array. + """ + + # Ignore members marked private + proto = Re(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', flags=re.S).sub('', proto) + proto = Re(r'\/\*\s*private:.*}', flags=re.S).sub('}', proto) + + # Strip comments + proto = Re(r'\/\*.*?\*\/', flags=re.S).sub('', proto) + + # Strip #define macros inside enums + proto = Re(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) + + members = None + declaration_name = None + + r = Re(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') + if r.search(proto): + declaration_name = r.group(2) + members = r.group(1).rstrip() + else: + r = Re(r'enum\s+(\w*)\s*\{(.*)\}') + if r.match(proto): + declaration_name = r.group(1) + members = r.group(2).rstrip() + + if not members: + self.emit_warning(ln, f"{proto}: error: Cannot parse enum!") + self.config.errors += 1 + return + + if self.entry.identifier != declaration_name: + if self.entry.identifier == "": + self.emit_warning(ln, + f"{proto}: wrong kernel-doc identifier on prototype") + else: + self.emit_warning(ln, + f"expecting prototype for enum {self.entry.identifier}. Prototype was for enum {declaration_name} instead") + return + + if not declaration_name: + declaration_name = "(anonymous)" + + member_set = set() + + members = Re(r'\([^;]*?[\)]').sub('', members) + + for arg in members.split(','): + if not arg: + continue + arg = Re(r'^\s*(\w+).*').sub(r'\1', arg) + self.entry.parameterlist.append(arg) + if arg not in self.entry.parameterdescs: + self.entry.parameterdescs[arg] = self.undescribed + if self.show_warnings("enum", declaration_name): + self.emit_warning(ln, + f"Enum value '{arg}' not described in enum '{declaration_name}'") + member_set.add(arg) + + for k in self.entry.parameterdescs: + if k not in member_set: + if self.show_warnings("enum", declaration_name): + self.emit_warning(ln, + f"Excess enum value '%{k}' description in '{declaration_name}'") + + self.output_declaration('enum', declaration_name, + enum=declaration_name, + module=self.config.modulename, + parameterlist=self.entry.parameterlist, + parameterdescs=self.entry.parameterdescs, + sectionlist=self.entry.sectionlist, + sections=self.entry.sections, + purpose=self.entry.declaration_purpose) + + def dump_declaration(self, ln, prototype): + """ + Stores a data declaration inside self.entries array. + """ + + if self.entry.decl_type == "enum": + self.dump_enum(ln, prototype) + return + + if self.entry.decl_type == "typedef": + self.dump_typedef(ln, prototype) + return + + if self.entry.decl_type in ["union", "struct"]: + self.dump_struct(ln, prototype) + return + + # TODO: handle other types + self.output_declaration(self.entry.decl_type, prototype, + entry=self.entry) + + def dump_function(self, ln, prototype): + """ + Stores a function of function macro inside self.entries array. + """ + + func_macro = False + return_type = '' + decl_type = 'function' + + # Prefixes that would be removed + sub_prefixes = [ + (r"^static +", "", 0), + (r"^extern +", "", 0), + (r"^asmlinkage +", "", 0), + (r"^inline +", "", 0), + (r"^__inline__ +", "", 0), + (r"^__inline +", "", 0), + (r"^__always_inline +", "", 0), + (r"^noinline +", "", 0), + (r"^__FORTIFY_INLINE +", "", 0), + (r"__init +", "", 0), + (r"__init_or_module +", "", 0), + (r"__deprecated +", "", 0), + (r"__flatten +", "", 0), + (r"__meminit +", "", 0), + (r"__must_check +", "", 0), + (r"__weak +", "", 0), + (r"__sched +", "", 0), + (r"_noprof", "", 0), + (r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +", "", 0), + (r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +", "", 0), + (r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +", "", 0), + (r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)", r"\1, \2", 0), + (r"__attribute_const__ +", "", 0), + + # It seems that Python support for re.X is broken: + # At least for me (Python 3.13), this didn't work +# (r""" +# __attribute__\s*\(\( +# (?: +# [\w\s]+ # attribute name +# (?:\([^)]*\))? # attribute arguments +# \s*,? # optional comma at the end +# )+ +# \)\)\s+ +# """, "", re.X), + + # So, remove whitespaces and comments from it + (r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+", "", 0), + ] + + for search, sub, flags in sub_prefixes: + prototype = Re(search, flags).sub(sub, prototype) + + # Macros are a special case, as they change the prototype format + new_proto = Re(r"^#\s*define\s+").sub("", prototype) + if new_proto != prototype: + is_define_proto = True + prototype = new_proto + else: + is_define_proto = False + + # Yes, this truly is vile. We are looking for: + # 1. Return type (may be nothing if we're looking at a macro) + # 2. Function name + # 3. Function parameters. + # + # All the while we have to watch out for function pointer parameters + # (which IIRC is what the two sections are for), C types (these + # regexps don't even start to express all the possibilities), and + # so on. + # + # If you mess with these regexps, it's a good idea to check that + # the following functions' documentation still comes out right: + # - parport_register_device (function pointer parameters) + # - atomic_set (macro) + # - pci_match_device, __copy_to_user (long return type) + + name = r'[a-zA-Z0-9_~:]+' + prototype_end1 = r'[^\(]*' + prototype_end2 = r'[^\{]*' + prototype_end = fr'\(({prototype_end1}|{prototype_end2})\)' + + # Besides compiling, Perl qr{[\w\s]+} works as a non-capturing group. + # So, this needs to be mapped in Python with (?:...)? or (?:...)+ + + type1 = r'(?:[\w\s]+)?' + type2 = r'(?:[\w\s]+\*+)+' + + found = False + + if is_define_proto: + r = Re(r'^()(' + name + r')\s+') + + if r.search(prototype): + return_type = '' + declaration_name = r.group(2) + func_macro = True + + found = True + + if not found: + patterns = [ + rf'^()({name})\s*{prototype_end}', + rf'^({type1})\s+({name})\s*{prototype_end}', + rf'^({type2})\s*({name})\s*{prototype_end}', + ] + + for p in patterns: + r = Re(p) + + if r.match(prototype): + + return_type = r.group(1) + declaration_name = r.group(2) + args = r.group(3) + + self.create_parameter_list(ln, decl_type, args, ',', + declaration_name) + + found = True + break + if not found: + self.emit_warning(ln, + f"cannot understand function prototype: '{prototype}'") + return + + if self.entry.identifier != declaration_name: + self.emit_warning(ln, + f"expecting prototype for {self.entry.identifier}(). Prototype was for {declaration_name}() instead") + return + + prms = " ".join(self.entry.parameterlist) + self.check_sections(ln, declaration_name, "function", + self.entry.sectcheck, prms) + + self.check_return_section(ln, declaration_name, return_type) + + if 'typedef' in return_type: + self.output_declaration(decl_type, declaration_name, + function=declaration_name, + typedef=True, + module=self.config.modulename, + functiontype=return_type, + parameterlist=self.entry.parameterlist, + parameterdescs=self.entry.parameterdescs, + parametertypes=self.entry.parametertypes, + sectionlist=self.entry.sectionlist, + sections=self.entry.sections, + purpose=self.entry.declaration_purpose, + func_macro=func_macro) + else: + self.output_declaration(decl_type, declaration_name, + function=declaration_name, + typedef=False, + module=self.config.modulename, + functiontype=return_type, + parameterlist=self.entry.parameterlist, + parameterdescs=self.entry.parameterdescs, + parametertypes=self.entry.parametertypes, + sectionlist=self.entry.sectionlist, + sections=self.entry.sections, + purpose=self.entry.declaration_purpose, + func_macro=func_macro) + + def dump_typedef(self, ln, proto): + """ + Stores a typedef inside self.entries array. + """ + + typedef_type = r'((?:\s+[\w\*]+\b){1,8})\s*' + typedef_ident = r'\*?\s*(\w\S+)\s*' + typedef_args = r'\s*\((.*)\);' + + typedef1 = Re(r'typedef' + typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) + typedef2 = Re(r'typedef' + typedef_type + typedef_ident + typedef_args) + + # Strip comments + proto = Re(r'/\*.*?\*/', flags=re.S).sub('', proto) + + # Parse function typedef prototypes + for r in [typedef1, typedef2]: + if not r.match(proto): + continue + + return_type = r.group(1).strip() + declaration_name = r.group(2) + args = r.group(3) + + if self.entry.identifier != declaration_name: + self.emit_warning(ln, + f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") + return + + decl_type = 'function' + self.create_parameter_list(ln, decl_type, args, ',', declaration_name) + + self.output_declaration(decl_type, declaration_name, + function=declaration_name, + typedef=True, + module=self.entry.modulename, + functiontype=return_type, + parameterlist=self.entry.parameterlist, + parameterdescs=self.entry.parameterdescs, + parametertypes=self.entry.parametertypes, + sectionlist=self.entry.sectionlist, + sections=self.entry.sections, + purpose=self.entry.declaration_purpose) + return + + # Handle nested parentheses or brackets + r = Re(r'(\(*.\)\s*|\[*.\]\s*);$') + while r.search(proto): + proto = r.sub('', proto) + + # Parse simple typedefs + r = Re(r'typedef.*\s+(\w+)\s*;') + if r.match(proto): + declaration_name = r.group(1) + + if self.entry.identifier != declaration_name: + self.emit_warning(ln, f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") + return + + self.output_declaration('typedef', declaration_name, + typedef=declaration_name, + module=self.entry.modulename, + sectionlist=self.entry.sectionlist, + sections=self.entry.sections, + purpose=self.entry.declaration_purpose) + return + + self.emit_warning(ln, "error: Cannot parse typedef!") + self.config.errors += 1 + + @staticmethod + def process_export(function_table, line): + """ + process EXPORT_SYMBOL* tags + + This method is called both internally and externally, so, it + doesn't use self. + """ + + if export_symbol.search(line): + symbol = export_symbol.group(2) + function_table.add(symbol) + + if export_symbol_ns.search(line): + symbol = export_symbol_ns.group(2) + function_table.add(symbol) + + def process_normal(self, ln, line): + """ + STATE_NORMAL: looking for the /** to begin everything. + """ + + if not doc_start.match(line): + return + + # start a new entry + self.reset_state(ln + 1) + self.entry.in_doc_sect = False + + # next line is always the function name + self.state = self.STATE_NAME + + def process_name(self, ln, line): + """ + STATE_NAME: Looking for the "name - description" line + """ + + if doc_block.search(line): + self.entry.new_start_line = ln + + if not doc_block.group(1): + self.entry.section = self.section_intro + else: + self.entry.section = doc_block.group(1) + + self.state = self.STATE_DOCBLOCK + return + + if doc_decl.search(line): + self.entry.identifier = doc_decl.group(1) + self.entry.is_kernel_comment = False + + decl_start = str(doc_com) # comment block asterisk + fn_type = r"(?:\w+\s*\*\s*)?" # type (for non-functions) + parenthesis = r"(?:\(\w*\))?" # optional parenthesis on function + decl_end = r"(?:[-:].*)" # end of the name part + + # test for pointer declaration type, foo * bar() - desc + r = Re(fr"^{decl_start}([\w\s]+?){parenthesis}?\s*{decl_end}?$") + if r.search(line): + self.entry.identifier = r.group(1) + + # Test for data declaration + r = Re(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)") + if r.search(line): + self.entry.decl_type = r.group(1) + self.entry.identifier = r.group(2) + self.entry.is_kernel_comment = True + else: + # Look for foo() or static void foo() - description; + # or misspelt identifier + + r1 = Re(fr"^{decl_start}{fn_type}(\w+)\s*{parenthesis}\s*{decl_end}?$") + r2 = Re(fr"^{decl_start}{fn_type}(\w+[^-:]*){parenthesis}\s*{decl_end}$") + + for r in [r1, r2]: + if r.search(line): + self.entry.identifier = r.group(1) + self.entry.decl_type = "function" + + r = Re(r"define\s+") + self.entry.identifier = r.sub("", self.entry.identifier) + self.entry.is_kernel_comment = True + break + + self.entry.identifier = self.entry.identifier.strip(" ") + + self.state = self.STATE_BODY + + # if there's no @param blocks need to set up default section here + self.entry.section = self.section_default + self.entry.new_start_line = ln + 1 + + r = Re("[-:](.*)") + if r.search(line): + # strip leading/trailing/multiple spaces + self.entry.descr = r.group(1).strip(" ") + + r = Re(r"\s+") + self.entry.descr = r.sub(" ", self.entry.descr) + self.entry.declaration_purpose = self.entry.descr + self.state = self.STATE_BODY_MAYBE + else: + self.entry.declaration_purpose = "" + + if not self.entry.is_kernel_comment: + self.emit_warning(ln, + f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") + self.state = self.STATE_NORMAL + + if not self.entry.declaration_purpose and self.config.wshort_desc: + self.emit_warning(ln, + f"missing initial short description on line:\n{line}") + + if not self.entry.identifier and self.entry.decl_type != "enum": + self.emit_warning(ln, + f"wrong kernel-doc identifier on line:\n{line}") + self.state = self.STATE_NORMAL + + if self.config.verbose: + self.emit_warning(ln, + f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", + warning=False) + + return + + # Failed to find an identifier. Emit a warning + self.emit_warning(ln, f"Cannot find identifier on line:\n{line}") + + def process_body(self, ln, line): + """ + STATE_BODY and STATE_BODY_MAYBE: the bulk of a kerneldoc comment. + """ + + if self.state == self.STATE_BODY_WITH_BLANK_LINE: + r = Re(r"\s*\*\s?\S") + if r.match(line): + self.dump_section() + self.entry.section = self.section_default + self.entry.new_start_line = line + self.entry.contents = "" + + if doc_sect.search(line): + self.entry.in_doc_sect = True + newsection = doc_sect.group(1) + + if newsection.lower() in ["description", "context"]: + newsection = newsection.title() + + # Special case: @return is a section, not a param description + if newsection.lower() in ["@return", "@returns", + "return", "returns"]: + newsection = "Return" + + # Perl kernel-doc has a check here for contents before sections. + # the logic there is always false, as in_doc_sect variable is + # always true. So, just don't implement Wcontents_before_sections + + # .title() + newcontents = doc_sect.group(2) + if not newcontents: + newcontents = "" + + if self.entry.contents.strip("\n"): + self.dump_section() + + self.entry.new_start_line = ln + self.entry.section = newsection + self.entry.leading_space = None + + self.entry.contents = newcontents.lstrip() + if self.entry.contents: + self.entry.contents += "\n" + + self.state = self.STATE_BODY + return + + if doc_end.search(line): + self.dump_section() + + # Look for doc_com + + doc_end: + r = Re(r'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') + if r.match(line): + self.emit_warning(ln, f"suspicious ending line: {line}") + + self.entry.prototype = "" + self.entry.new_start_line = ln + 1 + + self.state = self.STATE_PROTO + return + + if doc_content.search(line): + cont = doc_content.group(1) + + if cont == "": + if self.entry.section == self.section_context: + self.dump_section() + + self.entry.new_start_line = ln + self.state = self.STATE_BODY + else: + if self.entry.section != self.section_default: + self.state = self.STATE_BODY_WITH_BLANK_LINE + else: + self.state = self.STATE_BODY + + self.entry.contents += "\n" + + elif self.state == self.STATE_BODY_MAYBE: + + # Continued declaration purpose + self.entry.declaration_purpose = self.entry.declaration_purpose.rstrip() + self.entry.declaration_purpose += " " + cont + + r = Re(r"\s+") + self.entry.declaration_purpose = r.sub(' ', + self.entry.declaration_purpose) + + else: + if self.entry.section.startswith('@') or \ + self.entry.section == self.section_context: + if self.entry.leading_space is None: + r = Re(r'^(\s+)') + if r.match(cont): + self.entry.leading_space = len(r.group(1)) + else: + self.entry.leading_space = 0 + + # Double-check if leading space are realy spaces + pos = 0 + for i in range(0, self.entry.leading_space): + if cont[i] != " ": + break + pos += 1 + + cont = cont[pos:] + + # NEW LOGIC: + # In case it is different, update it + if self.entry.leading_space != pos: + self.entry.leading_space = pos + + self.entry.contents += cont + "\n" + return + + # Unknown line, ignore + self.emit_warning(ln, f"bad line: {line}") + + def process_inline(self, ln, line): + """STATE_INLINE: docbook comments within a prototype.""" + + if self.inline_doc_state == self.STATE_INLINE_NAME and \ + doc_inline_sect.search(line): + self.entry.section = doc_inline_sect.group(1) + self.entry.new_start_line = ln + + self.entry.contents = doc_inline_sect.group(2).lstrip() + if self.entry.contents != "": + self.entry.contents += "\n" + + self.inline_doc_state = self.STATE_INLINE_TEXT + # Documentation block end */ + return + + if doc_inline_end.search(line): + if self.entry.contents not in ["", "\n"]: + self.dump_section() + + self.state = self.STATE_PROTO + self.inline_doc_state = self.STATE_INLINE_NA + return + + if doc_content.search(line): + if self.inline_doc_state == self.STATE_INLINE_TEXT: + self.entry.contents += doc_content.group(1) + "\n" + if not self.entry.contents.strip(" ").rstrip("\n"): + self.entry.contents = "" + + elif self.inline_doc_state == self.STATE_INLINE_NAME: + self.emit_warning(ln, + f"Incorrect use of kernel-doc format: {line}") + + self.inline_doc_state = self.STATE_INLINE_ERROR + + def syscall_munge(self, ln, proto): # pylint: disable=W0613 + """ + Handle syscall definitions + """ + + is_void = False + + # Strip newlines/CR's + proto = re.sub(r'[\r\n]+', ' ', proto) + + # Check if it's a SYSCALL_DEFINE0 + if 'SYSCALL_DEFINE0' in proto: + is_void = True + + # Replace SYSCALL_DEFINE with correct return type & function name + proto = Re(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) + + r = Re(r'long\s+(sys_.*?),') + if r.search(proto): + proto = proto.replace(',', '(', count=1) + elif is_void: + proto = proto.replace(')', '(void)', count=1) + + # Now delete all of the odd-numbered commas in the proto + # so that argument types & names don't have a comma between them + count = 0 + length = len(proto) + + if is_void: + length = 0 # skip the loop if is_void + + for ix in range(length): + if proto[ix] == ',': + count += 1 + if count % 2 == 1: + proto = proto[:ix] + ' ' + proto[ix + 1:] + + return proto + + def tracepoint_munge(self, ln, proto): + """ + Handle tracepoint definitions + """ + + tracepointname = None + tracepointargs = None + + # Match tracepoint name based on different patterns + r = Re(r'TRACE_EVENT\((.*?),') + if r.search(proto): + tracepointname = r.group(1) + + r = Re(r'DEFINE_SINGLE_EVENT\((.*?),') + if r.search(proto): + tracepointname = r.group(1) + + r = Re(r'DEFINE_EVENT\((.*?),(.*?),') + if r.search(proto): + tracepointname = r.group(2) + + if tracepointname: + tracepointname = tracepointname.lstrip() + + r = Re(r'TP_PROTO\((.*?)\)') + if r.search(proto): + tracepointargs = r.group(1) + + if not tracepointname or not tracepointargs: + self.emit_warning(ln, + f"Unrecognized tracepoint format:\n{proto}\n") + else: + proto = f"static inline void trace_{tracepointname}({tracepointargs})" + self.entry.identifier = f"trace_{self.entry.identifier}" + + return proto + + def process_proto_function(self, ln, line): + """Ancillary routine to process a function prototype""" + + # strip C99-style comments to end of line + r = Re(r"\/\/.*$", re.S) + line = r.sub('', line) + + if Re(r'\s*#\s*define').match(line): + self.entry.prototype = line + elif line.startswith('#'): + # Strip other macros like #ifdef/#ifndef/#endif/... + pass + else: + r = Re(r'([^\{]*)') + if r.match(line): + self.entry.prototype += r.group(1) + " " + + if '{' in line or ';' in line or Re(r'\s*#\s*define').match(line): + # strip comments + r = Re(r'/\*.*?\*/') + self.entry.prototype = r.sub('', self.entry.prototype) + + # strip newlines/cr's + r = Re(r'[\r\n]+') + self.entry.prototype = r.sub(' ', self.entry.prototype) + + # strip leading spaces + r = Re(r'^\s+') + self.entry.prototype = r.sub('', self.entry.prototype) + + # Handle self.entry.prototypes for function pointers like: + # int (*pcs_config)(struct foo) + + r = Re(r'^(\S+\s+)\(\s*\*(\S+)\)') + self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) + + if 'SYSCALL_DEFINE' in self.entry.prototype: + self.entry.prototype = self.syscall_munge(ln, + self.entry.prototype) + + r = Re(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') + if r.search(self.entry.prototype): + self.entry.prototype = self.tracepoint_munge(ln, + self.entry.prototype) + + self.dump_function(ln, self.entry.prototype) + self.reset_state(ln) + + def process_proto_type(self, ln, line): + """Ancillary routine to process a type""" + + # Strip newlines/cr's. + line = Re(r'[\r\n]+', re.S).sub(' ', line) + + # Strip leading spaces + line = Re(r'^\s+', re.S).sub('', line) + + # Strip trailing spaces + line = Re(r'\s+$', re.S).sub('', line) + + # Strip C99-style comments to the end of the line + line = Re(r"\/\/.*$", re.S).sub('', line) + + # To distinguish preprocessor directive from regular declaration later. + if line.startswith('#'): + line += ";" + + r = Re(r'([^\{\};]*)([\{\};])(.*)') + while True: + if r.search(line): + if self.entry.prototype: + self.entry.prototype += " " + self.entry.prototype += r.group(1) + r.group(2) + + self.entry.brcount += r.group(2).count('{') + self.entry.brcount -= r.group(2).count('}') + + self.entry.brcount = max(self.entry.brcount, 0) + + if r.group(2) == ';' and self.entry.brcount == 0: + self.dump_declaration(ln, self.entry.prototype) + self.reset_state(ln) + break + + line = r.group(3) + else: + self.entry.prototype += line + break + + def process_proto(self, ln, line): + """STATE_PROTO: reading a function/whatever prototype.""" + + if doc_inline_oneline.search(line): + self.entry.section = doc_inline_oneline.group(1) + self.entry.contents = doc_inline_oneline.group(2) + + if self.entry.contents != "": + self.entry.contents += "\n" + self.dump_section(start_new=False) + + elif doc_inline_start.search(line): + self.state = self.STATE_INLINE + self.inline_doc_state = self.STATE_INLINE_NAME + + elif self.entry.decl_type == 'function': + self.process_proto_function(ln, line) + + else: + self.process_proto_type(ln, line) + + def process_docblock(self, ln, line): + """STATE_DOCBLOCK: within a DOC: block.""" + + if doc_end.search(line): + self.dump_section() + self.output_declaration("doc", None, + sectionlist=self.entry.sectionlist, + sections=self.entry.sections, module=self.config.modulename) + self.reset_state(ln) + + elif doc_content.search(line): + self.entry.contents += doc_content.group(1) + "\n" + + def run(self): + """ + Open and process each line of a C source file. + he parsing is controlled via a state machine, and the line is passed + to a different process function depending on the state. The process + function may update the state as needed. + """ + + cont = False + prev = "" + prev_ln = None + + try: + with open(self.fname, "r", encoding="utf8", + errors="backslashreplace") as fp: + for ln, line in enumerate(fp): + + line = line.expandtabs().strip("\n") + + # Group continuation lines on prototypes + if self.state == self.STATE_PROTO: + if line.endswith("\\"): + prev += line.removesuffix("\\") + cont = True + + if not prev_ln: + prev_ln = ln + + continue + + if cont: + ln = prev_ln + line = prev + line + prev = "" + cont = False + prev_ln = None + + self.config.log.debug("%d %s%s: %s", + ln, self.st_name[self.state], + self.st_inline_name[self.inline_doc_state], + line) + + # TODO: not all states allow EXPORT_SYMBOL*, so this + # can be optimized later on to speedup parsing + self.process_export(self.config.function_table, line) + + # Hand this line to the appropriate state handler + if self.state == self.STATE_NORMAL: + self.process_normal(ln, line) + elif self.state == self.STATE_NAME: + self.process_name(ln, line) + elif self.state in [self.STATE_BODY, self.STATE_BODY_MAYBE, + self.STATE_BODY_WITH_BLANK_LINE]: + self.process_body(ln, line) + elif self.state == self.STATE_INLINE: # scanning for inline parameters + self.process_inline(ln, line) + elif self.state == self.STATE_PROTO: + self.process_proto(ln, line) + elif self.state == self.STATE_DOCBLOCK: + self.process_docblock(ln, line) + except OSError: + self.config.log.error(f"Error: Cannot open file {self.fname}") + self.config.errors += 1 -- cgit From c3597ab27bc0e5eae23c74a76380000a0f8481e1 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:17 +0800 Subject: scripts/kernel-doc.py: fix line number output With the Pyhton version, the actual output happens after parsing, from records stored at self.entries. Ensure that line numbers will be properly stored there and that they'll produce the desired results at the ReST output. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/5182a531d14b5fe9e1fc5da5f9dae05d66852a60.1744106242.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_parser.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 3ce116595546..e8c86448d6b5 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -276,7 +276,7 @@ class KernelDoc: self.entry.brcount = 0 self.entry.in_doc_sect = False - self.entry.declaration_start_line = ln + self.entry.declaration_start_line = ln + 1 def push_parameter(self, ln, decl_type, param, dtype, org_arg, declaration_name): @@ -806,8 +806,10 @@ class KernelDoc: parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, parametertypes=self.entry.parametertypes, + parameterdesc_start_lines=self.entry.parameterdesc_start_lines, sectionlist=self.entry.sectionlist, sections=self.entry.sections, + section_start_lines=self.entry.section_start_lines, purpose=self.entry.declaration_purpose) def dump_enum(self, ln, proto): @@ -882,8 +884,10 @@ class KernelDoc: module=self.config.modulename, parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, + parameterdesc_start_lines=self.entry.parameterdesc_start_lines, sectionlist=self.entry.sectionlist, sections=self.entry.sections, + section_start_lines=self.entry.section_start_lines, purpose=self.entry.declaration_purpose) def dump_declaration(self, ln, prototype): @@ -1054,8 +1058,10 @@ class KernelDoc: parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, parametertypes=self.entry.parametertypes, + parameterdesc_start_lines=self.entry.parameterdesc_start_lines, sectionlist=self.entry.sectionlist, sections=self.entry.sections, + section_start_lines=self.entry.section_start_lines, purpose=self.entry.declaration_purpose, func_macro=func_macro) else: @@ -1067,8 +1073,10 @@ class KernelDoc: parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, parametertypes=self.entry.parametertypes, + parameterdesc_start_lines=self.entry.parameterdesc_start_lines, sectionlist=self.entry.sectionlist, sections=self.entry.sections, + section_start_lines=self.entry.section_start_lines, purpose=self.entry.declaration_purpose, func_macro=func_macro) @@ -1112,8 +1120,10 @@ class KernelDoc: parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, parametertypes=self.entry.parametertypes, + parameterdesc_start_lines=self.entry.parameterdesc_start_lines, sectionlist=self.entry.sectionlist, sections=self.entry.sections, + section_start_lines=self.entry.section_start_lines, purpose=self.entry.declaration_purpose) return @@ -1136,6 +1146,7 @@ class KernelDoc: module=self.entry.modulename, sectionlist=self.entry.sectionlist, sections=self.entry.sections, + section_start_lines=self.entry.section_start_lines, purpose=self.entry.declaration_purpose) return @@ -1168,7 +1179,7 @@ class KernelDoc: return # start a new entry - self.reset_state(ln + 1) + self.reset_state(ln) self.entry.in_doc_sect = False # next line is always the function name @@ -1281,7 +1292,7 @@ class KernelDoc: if r.match(line): self.dump_section() self.entry.section = self.section_default - self.entry.new_start_line = line + self.entry.new_start_line = ln self.entry.contents = "" if doc_sect.search(line): @@ -1619,7 +1630,9 @@ class KernelDoc: self.dump_section() self.output_declaration("doc", None, sectionlist=self.entry.sectionlist, - sections=self.entry.sections, module=self.config.modulename) + sections=self.entry.sections, + section_start_lines=self.entry.section_start_lines, + module=self.config.modulename) self.reset_state(ln) elif doc_content.search(line): -- cgit From 408269ae35d6b88d48477af56a2376ea05e619ca Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:18 +0800 Subject: scripts/kernel-doc.py: fix handling of doc output check The filtering logic was seeking for the DOC name to check for symbols, but such data is stored only inside a section. Add it to the output_declaration, as it is quicker/easier to check the declaration name than to check inside each section. While here, make sure that the output for both ReST and man after filtering will be similar to what kernel-doc Perl version does. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/6d8b77af85295452c0191863ea1041f4195aeaaf.1744106242.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_parser.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index e8c86448d6b5..74b311c8184c 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1198,6 +1198,7 @@ class KernelDoc: else: self.entry.section = doc_block.group(1) + self.entry.identifier = self.entry.section self.state = self.STATE_DOCBLOCK return @@ -1628,7 +1629,7 @@ class KernelDoc: if doc_end.search(line): self.dump_section() - self.output_declaration("doc", None, + self.output_declaration("doc", self.entry.identifier, sectionlist=self.entry.sectionlist, sections=self.entry.sections, section_start_lines=self.entry.section_start_lines, -- cgit From 9cbc2d3b137bfdb7937265c46e9e5d7e72952841 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:20 +0800 Subject: scripts/kernel-doc.py: postpone warnings to the output plugin We don't want to have warnings displayed for symbols that weren't output. So, postpone warnings print to the output plugin, where symbol output is validated. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/e6344711e390cf22af02a56bb5dd51ca67c0afb6.1744106242.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_parser.py | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 74b311c8184c..3698ef625367 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -131,23 +131,23 @@ class KernelDoc: # Place all potential outputs into an array self.entries = [] - def show_warnings(self, dtype, declaration_name): # pylint: disable=W0613 - """ - Allow filtering out warnings - """ - - # TODO: implement it - - return True - # TODO: rename to emit_message def emit_warning(self, ln, msg, warning=True): """Emit a message""" + log_msg = f"{self.fname}:{ln} {msg}" + + if self.entry: + # Delegate warning output to output logic, as this way it + # will report warnings/info only for symbols that are output + + self.entry.warnings.append((warning, log_msg)) + return + if warning: - self.config.log.warning("%s:%d %s", self.fname, ln, msg) + self.config.log.warning(log_msg) else: - self.config.log.info("%s:%d %s", self.fname, ln, msg) + self.config.log.info(log_msg) def dump_section(self, start_new=True): """ @@ -221,10 +221,9 @@ class KernelDoc: # For now, we're keeping the same name of the function just to make # easier to compare the source code of both scripts - if "declaration_start_line" not in args: - args["declaration_start_line"] = self.entry.declaration_start_line - + args["declaration_start_line"] = self.entry.declaration_start_line args["type"] = dtype + args["warnings"] = self.entry.warnings # TODO: use colletions.OrderedDict @@ -257,6 +256,8 @@ class KernelDoc: self.entry.struct_actual = "" self.entry.prototype = "" + self.entry.warnings = [] + self.entry.parameterlist = [] self.entry.parameterdescs = {} self.entry.parametertypes = {} @@ -328,7 +329,7 @@ class KernelDoc: if param not in self.entry.parameterdescs and not param.startswith("#"): self.entry.parameterdescs[param] = self.undescribed - if self.show_warnings(dtype, declaration_name) and "." not in param: + if "." not in param: if decl_type == 'function': dname = f"{decl_type} parameter" else: @@ -868,16 +869,14 @@ class KernelDoc: self.entry.parameterlist.append(arg) if arg not in self.entry.parameterdescs: self.entry.parameterdescs[arg] = self.undescribed - if self.show_warnings("enum", declaration_name): - self.emit_warning(ln, - f"Enum value '{arg}' not described in enum '{declaration_name}'") + self.emit_warning(ln, + f"Enum value '{arg}' not described in enum '{declaration_name}'") member_set.add(arg) for k in self.entry.parameterdescs: if k not in member_set: - if self.show_warnings("enum", declaration_name): - self.emit_warning(ln, - f"Excess enum value '%{k}' description in '{declaration_name}'") + self.emit_warning(ln, + f"Excess enum value '%{k}' description in '{declaration_name}'") self.output_declaration('enum', declaration_name, enum=declaration_name, -- cgit From 485f6f7960c468d9e27665f61517dc5fc097ea98 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:26 +0800 Subject: scripts/kernel-doc.py: adjust some coding style issues Make pylint happier by adding some missing documentation and addressing a couple of pylint warnings. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/0f9d5473105e4c09c6c41e3db72cc63f1d4d55f9.1744106242.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_parser.py | 30 +++++++----------------------- 1 file changed, 7 insertions(+), 23 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 3698ef625367..dcb9515fc40b 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -131,7 +131,7 @@ class KernelDoc: # Place all potential outputs into an array self.entries = [] - # TODO: rename to emit_message + # TODO: rename to emit_message after removal of kernel-doc.pl def emit_warning(self, ln, msg, warning=True): """Emit a message""" @@ -157,19 +157,6 @@ class KernelDoc: name = self.entry.section contents = self.entry.contents - # TODO: we can prevent dumping empty sections here with: - # - # if self.entry.contents.strip("\n"): - # if start_new: - # self.entry.section = self.section_default - # self.entry.contents = "" - # - # return - # - # But, as we want to be producing the same output of the - # venerable kernel-doc Perl tool, let's just output everything, - # at least for now - if type_param.match(name): name = type_param.group(1) @@ -205,7 +192,7 @@ class KernelDoc: self.entry.section = self.section_default self.entry.contents = "" - # TODO: rename it to store_declaration + # TODO: rename it to store_declaration after removal of kernel-doc.pl def output_declaration(self, dtype, name, **args): """ Stores the entry into an entry array. @@ -225,13 +212,13 @@ class KernelDoc: args["type"] = dtype args["warnings"] = self.entry.warnings - # TODO: use colletions.OrderedDict + # TODO: use colletions.OrderedDict to remove sectionlist sections = args.get('sections', {}) sectionlist = args.get('sectionlist', []) # Drop empty sections - # TODO: improve it to emit warnings + # TODO: improve empty sections logic to emit warnings for section in ["Description", "Return"]: if section in sectionlist: if not sections[section].rstrip(): @@ -636,7 +623,9 @@ class KernelDoc: # Replace macros # - # TODO: it is better to also move those to the NestedMatch logic, + # TODO: use NestedMatch for FOO($1, $2, ...) matches + # + # it is better to also move those to the NestedMatch logic, # to ensure that parenthesis will be properly matched. (Re(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), @@ -906,7 +895,6 @@ class KernelDoc: self.dump_struct(ln, prototype) return - # TODO: handle other types self.output_declaration(self.entry.decl_type, prototype, entry=self.entry) @@ -1680,10 +1668,6 @@ class KernelDoc: self.st_inline_name[self.inline_doc_state], line) - # TODO: not all states allow EXPORT_SYMBOL*, so this - # can be optimized later on to speedup parsing - self.process_export(self.config.function_table, line) - # Hand this line to the appropriate state handler if self.state == self.STATE_NORMAL: self.process_normal(ln, line) -- cgit From 78ea748f7978d39a6ee29897d3bd32e6208f74ac Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:27 +0800 Subject: scripts/lib/kdoc/kdoc_parser.py: fix Python compat with < v3.13 - str.replace count was introduced only in Python 3.13; - before Python 3.13, f-string dict arguments can't use the same delimiter of the main string. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/e2b8e8361294558dae09236e4b8fbea5d86be5a3.1744106242.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index dcb9515fc40b..e48ed128ca04 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1444,9 +1444,9 @@ class KernelDoc: r = Re(r'long\s+(sys_.*?),') if r.search(proto): - proto = proto.replace(',', '(', count=1) + proto = Re(',').sub('(', proto, count=1) elif is_void: - proto = proto.replace(')', '(void)', count=1) + proto = Re(r'\)').sub('(void)', proto, count=1) # Now delete all of the odd-numbered commas in the proto # so that argument types & names don't have a comma between them -- cgit From 2ab867a4941de2e9d7804e76ab002ad74c73b078 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:28 +0800 Subject: scripts/kernel-doc.py: move modulename to man class Only man output requires a modulename. Move its definition to the man class. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/583085e3885b0075d16ef9961b4f2ad870f30a55.1744106242.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_parser.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index e48ed128ca04..f923600561f8 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -791,7 +791,6 @@ class KernelDoc: self.output_declaration(decl_type, declaration_name, struct=declaration_name, - module=self.entry.modulename, definition=declaration, parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, @@ -869,7 +868,6 @@ class KernelDoc: self.output_declaration('enum', declaration_name, enum=declaration_name, - module=self.config.modulename, parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, parameterdesc_start_lines=self.entry.parameterdesc_start_lines, @@ -1040,7 +1038,6 @@ class KernelDoc: self.output_declaration(decl_type, declaration_name, function=declaration_name, typedef=True, - module=self.config.modulename, functiontype=return_type, parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, @@ -1055,7 +1052,6 @@ class KernelDoc: self.output_declaration(decl_type, declaration_name, function=declaration_name, typedef=False, - module=self.config.modulename, functiontype=return_type, parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, @@ -1102,7 +1098,6 @@ class KernelDoc: self.output_declaration(decl_type, declaration_name, function=declaration_name, typedef=True, - module=self.entry.modulename, functiontype=return_type, parameterlist=self.entry.parameterlist, parameterdescs=self.entry.parameterdescs, @@ -1130,7 +1125,6 @@ class KernelDoc: self.output_declaration('typedef', declaration_name, typedef=declaration_name, - module=self.entry.modulename, sectionlist=self.entry.sectionlist, sections=self.entry.sections, section_start_lines=self.entry.section_start_lines, @@ -1619,8 +1613,7 @@ class KernelDoc: self.output_declaration("doc", self.entry.identifier, sectionlist=self.entry.sectionlist, sections=self.entry.sections, - section_start_lines=self.entry.section_start_lines, - module=self.config.modulename) + section_start_lines=self.entry.section_start_lines) self.reset_state(ln) elif doc_content.search(line): -- cgit From e4b2bd908c3d8f071d4fac6e588fffc6110c1b1f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:30 +0800 Subject: scripts/lib/kdoc/kdoc_parser.py: remove a python 3.9 dependency str.removesuffix() was added on Python 3.9, but rstrip() actually does the same thing, as we just want to remove a single character. It is also shorter. So, use it. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/f64cc4adef107ada26da4bfb7e4b7002dd783173.1744106242.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index f923600561f8..77e8bfeccc8e 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1641,7 +1641,7 @@ class KernelDoc: # Group continuation lines on prototypes if self.state == self.STATE_PROTO: if line.endswith("\\"): - prev += line.removesuffix("\\") + prev += line.rstrip("\\") cont = True if not prev_ln: -- cgit From 11afeab6d74d1be80420b47113c4893c88dcc04b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:31 +0800 Subject: scripts/kernel-doc.py: Properly handle Werror and exit codes The original kernel-doc script has a logic to return warnings as errors, and to report the number of warnings found, if in verbose mode. Implement it to be fully compatible with the original script. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/de33b0cebd9fdf82d8b221bcfe41db7269286222.1744106242.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_parser.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 77e8bfeccc8e..43e6ffbdcc2c 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -137,17 +137,18 @@ class KernelDoc: log_msg = f"{self.fname}:{ln} {msg}" + if not warning: + self.config.log.info(log_msg) + return + if self.entry: # Delegate warning output to output logic, as this way it # will report warnings/info only for symbols that are output - self.entry.warnings.append((warning, log_msg)) + self.entry.warnings.append(log_msg) return - if warning: - self.config.log.warning(log_msg) - else: - self.config.log.info(log_msg) + self.config.log.warning(log_msg) def dump_section(self, start_new=True): """ @@ -556,7 +557,6 @@ class KernelDoc: if not members: self.emit_warning(ln, f"{proto} error: Cannot parse struct or union!") - self.config.errors += 1 return if self.entry.identifier != declaration_name: @@ -831,7 +831,6 @@ class KernelDoc: if not members: self.emit_warning(ln, f"{proto}: error: Cannot parse enum!") - self.config.errors += 1 return if self.entry.identifier != declaration_name: @@ -1132,7 +1131,6 @@ class KernelDoc: return self.emit_warning(ln, "error: Cannot parse typedef!") - self.config.errors += 1 @staticmethod def process_export(function_table, line): @@ -1677,4 +1675,3 @@ class KernelDoc: self.process_docblock(ln, line) except OSError: self.config.log.error(f"Error: Cannot open file {self.fname}") - self.config.errors += 1 -- cgit From 16740c29dbf3275a22691d3d7c63701992872898 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:34 +0800 Subject: scripts/kernel_doc.py: better handle exported symbols Change the logic which detects internal/external symbols in a way that we can re-use it when calling via Sphinx extension. While here, remove an unused self.config var and let it clearer that self.config variables are read-only. This helps to allow handling multiple times in parallel if ever needed. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/6a69ba8d2b7ee6a6427abb53e60d09bd4d3565ee.1744106242.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_parser.py | 52 +++++++++++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 7 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 43e6ffbdcc2c..33f00c77dd5f 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1133,21 +1133,25 @@ class KernelDoc: self.emit_warning(ln, "error: Cannot parse typedef!") @staticmethod - def process_export(function_table, line): + def process_export(function_set, line): """ process EXPORT_SYMBOL* tags - This method is called both internally and externally, so, it - doesn't use self. + This method doesn't use any variable from the class, so declare it + with a staticmethod decorator. """ + # Note: it accepts only one EXPORT_SYMBOL* per line, as having + # multiple export lines would violate Kernel coding style. + if export_symbol.search(line): symbol = export_symbol.group(2) - function_table.add(symbol) + function_set.add(symbol) + return if export_symbol_ns.search(line): symbol = export_symbol_ns.group(2) - function_table.add(symbol) + function_set.add(symbol) def process_normal(self, ln, line): """ @@ -1617,17 +1621,39 @@ class KernelDoc: elif doc_content.search(line): self.entry.contents += doc_content.group(1) + "\n" - def run(self): + def parse_export(self): + """ + Parses EXPORT_SYMBOL* macros from a single Kernel source file. + """ + + export_table = set() + + try: + with open(self.fname, "r", encoding="utf8", + errors="backslashreplace") as fp: + + for line in fp: + self.process_export(export_table, line) + + except IOError: + return None + + return export_table + + def parse_kdoc(self): """ Open and process each line of a C source file. - he parsing is controlled via a state machine, and the line is passed + The parsing is controlled via a state machine, and the line is passed to a different process function depending on the state. The process function may update the state as needed. + + Besides parsing kernel-doc tags, it also parses export symbols. """ cont = False prev = "" prev_ln = None + export_table = set() try: with open(self.fname, "r", encoding="utf8", @@ -1659,6 +1685,16 @@ class KernelDoc: self.st_inline_name[self.inline_doc_state], line) + # This is an optimization over the original script. + # There, when export_file was used for the same file, + # it was read twice. Here, we use the already-existing + # loop to parse exported symbols as well. + # + # TODO: It should be noticed that not all states are + # needed here. On a future cleanup, process export only + # at the states that aren't handling comment markups. + self.process_export(export_table, line) + # Hand this line to the appropriate state handler if self.state == self.STATE_NORMAL: self.process_normal(ln, line) @@ -1675,3 +1711,5 @@ class KernelDoc: self.process_docblock(ln, line) except OSError: self.config.log.error(f"Error: Cannot open file {self.fname}") + + return export_table, self.entries -- cgit From 04a383ced6965fedc9c1b6c83d841acce076b53c Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 8 Apr 2025 18:09:35 +0800 Subject: scripts/kernel-doc.py: Rename the kernel doc Re class to KernRe Using just "Re" makes it harder to distinguish from the native "re" class. So, let's rename it. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/4e095ecd5235a3e811ddcf5bad4cfb92f1da0a4a.1744106242.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_parser.py | 264 ++++++++++++++++++++-------------------- 1 file changed, 132 insertions(+), 132 deletions(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 33f00c77dd5f..f60722bcc687 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -16,7 +16,7 @@ import argparse import re from pprint import pformat -from kdoc_re import NestedMatch, Re +from kdoc_re import NestedMatch, KernRe # @@ -29,12 +29,12 @@ from kdoc_re import NestedMatch, Re # # Allow whitespace at end of comment start. -doc_start = Re(r'^/\*\*\s*$', cache=False) +doc_start = KernRe(r'^/\*\*\s*$', cache=False) -doc_end = Re(r'\*/', cache=False) -doc_com = Re(r'\s*\*\s*', cache=False) -doc_com_body = Re(r'\s*\* ?', cache=False) -doc_decl = doc_com + Re(r'(\w+)', cache=False) +doc_end = KernRe(r'\*/', cache=False) +doc_com = KernRe(r'\s*\*\s*', cache=False) +doc_com_body = KernRe(r'\s*\* ?', cache=False) +doc_decl = doc_com + KernRe(r'(\w+)', cache=False) # @params and a strictly limited set of supported section names # Specifically: @@ -44,22 +44,22 @@ doc_decl = doc_com + Re(r'(\w+)', cache=False) # while trying to not match literal block starts like "example::" # doc_sect = doc_com + \ - Re(r'\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:([^:].*)?$', + KernRe(r'\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:([^:].*)?$', flags=re.I, cache=False) -doc_content = doc_com_body + Re(r'(.*)', cache=False) -doc_block = doc_com + Re(r'DOC:\s*(.*)?', cache=False) -doc_inline_start = Re(r'^\s*/\*\*\s*$', cache=False) -doc_inline_sect = Re(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) -doc_inline_end = Re(r'^\s*\*/\s*$', cache=False) -doc_inline_oneline = Re(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) -attribute = Re(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", +doc_content = doc_com_body + KernRe(r'(.*)', cache=False) +doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) +doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) +doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) +doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) +doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) +attribute = KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False) -export_symbol = Re(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) -export_symbol_ns = Re(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) +export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) +export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) -type_param = Re(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) +type_param = KernRe(r"\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) class KernelDoc: @@ -278,10 +278,10 @@ class KernelDoc: self.entry.anon_struct_union = False - param = Re(r'[\[\)].*').sub('', param, count=1) + param = KernRe(r'[\[\)].*').sub('', param, count=1) if dtype == "" and param.endswith("..."): - if Re(r'\w\.\.\.$').search(param): + if KernRe(r'\w\.\.\.$').search(param): # For named variable parameters of the form `x...`, # remove the dots param = param[:-3] @@ -335,7 +335,7 @@ class KernelDoc: # to ignore "[blah" in a parameter string. self.entry.parameterlist.append(param) - org_arg = Re(r'\s\s+').sub(' ', org_arg) + org_arg = KernRe(r'\s\s+').sub(' ', org_arg) self.entry.parametertypes[param] = org_arg def save_struct_actual(self, actual): @@ -344,7 +344,7 @@ class KernelDoc: one string item. """ - actual = Re(r'\s*').sub("", actual, count=1) + actual = KernRe(r'\s*').sub("", actual, count=1) self.entry.struct_actual += actual + " " @@ -355,20 +355,20 @@ class KernelDoc: """ # temporarily replace all commas inside function pointer definition - arg_expr = Re(r'(\([^\),]+),') + arg_expr = KernRe(r'(\([^\),]+),') while arg_expr.search(args): args = arg_expr.sub(r"\1#", args) for arg in args.split(splitter): # Strip comments - arg = Re(r'\/\*.*\*\/').sub('', arg) + arg = KernRe(r'\/\*.*\*\/').sub('', arg) # Ignore argument attributes - arg = Re(r'\sPOS0?\s').sub(' ', arg) + arg = KernRe(r'\sPOS0?\s').sub(' ', arg) # Strip leading/trailing spaces arg = arg.strip() - arg = Re(r'\s+').sub(' ', arg, count=1) + arg = KernRe(r'\s+').sub(' ', arg, count=1) if arg.startswith('#'): # Treat preprocessor directive as a typeless variable just to fill @@ -379,63 +379,63 @@ class KernelDoc: self.push_parameter(ln, decl_type, arg, "", "", declaration_name) - elif Re(r'\(.+\)\s*\(').search(arg): + elif KernRe(r'\(.+\)\s*\(').search(arg): # Pointer-to-function arg = arg.replace('#', ',') - r = Re(r'[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)') + r = KernRe(r'[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)') if r.match(arg): param = r.group(1) else: self.emit_warning(ln, f"Invalid param: {arg}") param = arg - dtype = Re(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) + dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) self.save_struct_actual(param) self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) - elif Re(r'\(.+\)\s*\[').search(arg): + elif KernRe(r'\(.+\)\s*\[').search(arg): # Array-of-pointers arg = arg.replace('#', ',') - r = Re(r'[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)') + r = KernRe(r'[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)') if r.match(arg): param = r.group(1) else: self.emit_warning(ln, f"Invalid param: {arg}") param = arg - dtype = Re(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) + dtype = KernRe(r'([^\(]+\(\*?)\s*' + re.escape(param)).sub(r'\1', arg) self.save_struct_actual(param) self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) elif arg: - arg = Re(r'\s*:\s*').sub(":", arg) - arg = Re(r'\s*\[').sub('[', arg) + arg = KernRe(r'\s*:\s*').sub(":", arg) + arg = KernRe(r'\s*\[').sub('[', arg) - args = Re(r'\s*,\s*').split(arg) + args = KernRe(r'\s*,\s*').split(arg) if args[0] and '*' in args[0]: args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) first_arg = [] - r = Re(r'^(.*\s+)(.*?\[.*\].*)$') + r = KernRe(r'^(.*\s+)(.*?\[.*\].*)$') if args[0] and r.match(args[0]): args.pop(0) first_arg.extend(r.group(1)) first_arg.append(r.group(2)) else: - first_arg = Re(r'\s+').split(args.pop(0)) + first_arg = KernRe(r'\s+').split(args.pop(0)) args.insert(0, first_arg.pop()) dtype = ' '.join(first_arg) for param in args: - if Re(r'^(\*+)\s*(.*)').match(param): - r = Re(r'^(\*+)\s*(.*)') + if KernRe(r'^(\*+)\s*(.*)').match(param): + r = KernRe(r'^(\*+)\s*(.*)') if not r.match(param): self.emit_warning(ln, f"Invalid param: {param}") continue @@ -447,8 +447,8 @@ class KernelDoc: f"{dtype} {r.group(1)}", arg, declaration_name) - elif Re(r'(.*?):(\w+)').search(param): - r = Re(r'(.*?):(\w+)') + elif KernRe(r'(.*?):(\w+)').search(param): + r = KernRe(r'(.*?):(\w+)') if not r.match(param): self.emit_warning(ln, f"Invalid param: {param}") continue @@ -477,7 +477,7 @@ class KernelDoc: err = True for px in range(len(prms)): # pylint: disable=C0200 prm_clean = prms[px] - prm_clean = Re(r'\[.*\]').sub('', prm_clean) + prm_clean = KernRe(r'\[.*\]').sub('', prm_clean) prm_clean = attribute.sub('', prm_clean) # ignore array size in a parameter string; @@ -486,7 +486,7 @@ class KernelDoc: # and this appears in @prms as "addr[6" since the # parameter list is split at spaces; # hence just ignore "[..." for the sections check; - prm_clean = Re(r'\[.*').sub('', prm_clean) + prm_clean = KernRe(r'\[.*').sub('', prm_clean) if prm_clean == sects[sx]: err = False @@ -512,7 +512,7 @@ class KernelDoc: # Ignore an empty return type (It's a macro) # Ignore functions with a "void" return type (but not "void *") - if not return_type or Re(r'void\s*\w*\s*$').search(return_type): + if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): return if not self.entry.sections.get("Return", None): @@ -535,20 +535,20 @@ class KernelDoc: ] definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" - struct_members = Re(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\}\;]*)(\;)') + struct_members = KernRe(type_pattern + r'([^\{\};]+)(\{)([^\{\}]*)(\})([^\{\}\;]*)(\;)') # Extract struct/union definition members = None declaration_name = None decl_type = None - r = Re(type_pattern + r'\s+(\w+)\s*' + definition_body) + r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) if r.search(proto): decl_type = r.group(1) declaration_name = r.group(2) members = r.group(3) else: - r = Re(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') + r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') if r.search(proto): decl_type = r.group(1) @@ -567,21 +567,21 @@ class KernelDoc: args_pattern = r'([^,)]+)' sub_prefixes = [ - (Re(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', re.S | re.I), ''), - (Re(r'\/\*\s*private:.*', re.S | re.I), ''), + (KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', re.S | re.I), ''), + (KernRe(r'\/\*\s*private:.*', re.S | re.I), ''), # Strip comments - (Re(r'\/\*.*?\*\/', re.S), ''), + (KernRe(r'\/\*.*?\*\/', re.S), ''), # Strip attributes (attribute, ' '), - (Re(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), - (Re(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), - (Re(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), - (Re(r'\s*__packed\s*', re.S), ' '), - (Re(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), - (Re(r'\s*____cacheline_aligned_in_smp', re.S), ' '), - (Re(r'\s*____cacheline_aligned', re.S), ' '), + (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), + (KernRe(r'\s*__packed\s*', re.S), ' '), + (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), + (KernRe(r'\s*____cacheline_aligned', re.S), ' '), # Unwrap struct_group macros based on this definition: # __struct_group(TAG, NAME, ATTRS, MEMBERS...) @@ -616,10 +616,10 @@ class KernelDoc: # matched. So, the implementation to drop STRUCT_GROUP() will be # handled in separate. - (Re(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), - (Re(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), - (Re(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), - (Re(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), + (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), + (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), # Replace macros # @@ -628,15 +628,15 @@ class KernelDoc: # it is better to also move those to the NestedMatch logic, # to ensure that parenthesis will be properly matched. - (Re(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), - (Re(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), - (Re(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), - (Re(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'), - (Re(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), - (Re(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), - (Re(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'), - (Re(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'), - (Re(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'), + (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), + (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), + (KernRe(r'DECLARE_BITMAP\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), + (KernRe(r'DECLARE_HASHTABLE\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'unsigned long \1[1 << ((\2) - 1)]'), + (KernRe(r'DECLARE_KFIFO\s*\(' + args_pattern + r',\s*' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\2 *\1'), + (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + args_pattern + r',\s*' + args_pattern + r'\)', re.S), r'\1 \2[]'), + (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + args_pattern + r'\)', re.S), r'dma_addr_t \1'), + (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + args_pattern + r'\)', re.S), r'__u32 \1'), ] # Regexes here are guaranteed to have the end limiter matching @@ -689,8 +689,8 @@ class KernelDoc: s_id = s_id.strip() newmember += f"{maintype} {s_id}; " - s_id = Re(r'[:\[].*').sub('', s_id) - s_id = Re(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) + s_id = KernRe(r'[:\[].*').sub('', s_id) + s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) for arg in content.split(';'): arg = arg.strip() @@ -698,7 +698,7 @@ class KernelDoc: if not arg: continue - r = Re(r'^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)') + r = KernRe(r'^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)') if r.match(arg): # Pointer-to-function dtype = r.group(1) @@ -717,15 +717,15 @@ class KernelDoc: else: arg = arg.strip() # Handle bitmaps - arg = Re(r':\s*\d+\s*').sub('', arg) + arg = KernRe(r':\s*\d+\s*').sub('', arg) # Handle arrays - arg = Re(r'\[.*\]').sub('', arg) + arg = KernRe(r'\[.*\]').sub('', arg) # Handle multiple IDs - arg = Re(r'\s*,\s*').sub(',', arg) + arg = KernRe(r'\s*,\s*').sub(',', arg) - r = Re(r'(.*)\s+([\S+,]+)') + r = KernRe(r'(.*)\s+([\S+,]+)') if r.search(arg): dtype = r.group(1) @@ -735,7 +735,7 @@ class KernelDoc: continue for name in names.split(','): - name = Re(r'^\s*\**(\S+)\s*').sub(r'\1', name).strip() + name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name).strip() if not name: continue @@ -757,12 +757,12 @@ class KernelDoc: self.entry.sectcheck, self.entry.struct_actual) # Adjust declaration for better display - declaration = Re(r'([\{;])').sub(r'\1\n', declaration) - declaration = Re(r'\}\s+;').sub('};', declaration) + declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) + declaration = KernRe(r'\}\s+;').sub('};', declaration) # Better handle inlined enums while True: - r = Re(r'(enum\s+\{[^\}]+),([^\n])') + r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') if not r.search(declaration): break @@ -774,7 +774,7 @@ class KernelDoc: for clause in def_args: clause = clause.strip() - clause = Re(r'\s+').sub(' ', clause, count=1) + clause = KernRe(r'\s+').sub(' ', clause, count=1) if not clause: continue @@ -782,7 +782,7 @@ class KernelDoc: if '}' in clause and level > 1: level -= 1 - if not Re(r'^\s*#').match(clause): + if not KernRe(r'^\s*#').match(clause): declaration += "\t" * level declaration += "\t" + clause + "\n" @@ -807,24 +807,24 @@ class KernelDoc: """ # Ignore members marked private - proto = Re(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', flags=re.S).sub('', proto) - proto = Re(r'\/\*\s*private:.*}', flags=re.S).sub('}', proto) + proto = KernRe(r'\/\*\s*private:.*?\/\*\s*public:.*?\*\/', flags=re.S).sub('', proto) + proto = KernRe(r'\/\*\s*private:.*}', flags=re.S).sub('}', proto) # Strip comments - proto = Re(r'\/\*.*?\*\/', flags=re.S).sub('', proto) + proto = KernRe(r'\/\*.*?\*\/', flags=re.S).sub('', proto) # Strip #define macros inside enums - proto = Re(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) + proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) members = None declaration_name = None - r = Re(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') + r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') if r.search(proto): declaration_name = r.group(2) members = r.group(1).rstrip() else: - r = Re(r'enum\s+(\w*)\s*\{(.*)\}') + r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') if r.match(proto): declaration_name = r.group(1) members = r.group(2).rstrip() @@ -847,12 +847,12 @@ class KernelDoc: member_set = set() - members = Re(r'\([^;]*?[\)]').sub('', members) + members = KernRe(r'\([^;]*?[\)]').sub('', members) for arg in members.split(','): if not arg: continue - arg = Re(r'^\s*(\w+).*').sub(r'\1', arg) + arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) self.entry.parameterlist.append(arg) if arg not in self.entry.parameterdescs: self.entry.parameterdescs[arg] = self.undescribed @@ -947,10 +947,10 @@ class KernelDoc: ] for search, sub, flags in sub_prefixes: - prototype = Re(search, flags).sub(sub, prototype) + prototype = KernRe(search, flags).sub(sub, prototype) # Macros are a special case, as they change the prototype format - new_proto = Re(r"^#\s*define\s+").sub("", prototype) + new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) if new_proto != prototype: is_define_proto = True prototype = new_proto @@ -987,7 +987,7 @@ class KernelDoc: found = False if is_define_proto: - r = Re(r'^()(' + name + r')\s+') + r = KernRe(r'^()(' + name + r')\s+') if r.search(prototype): return_type = '' @@ -1004,7 +1004,7 @@ class KernelDoc: ] for p in patterns: - r = Re(p) + r = KernRe(p) if r.match(prototype): @@ -1071,11 +1071,11 @@ class KernelDoc: typedef_ident = r'\*?\s*(\w\S+)\s*' typedef_args = r'\s*\((.*)\);' - typedef1 = Re(r'typedef' + typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) - typedef2 = Re(r'typedef' + typedef_type + typedef_ident + typedef_args) + typedef1 = KernRe(r'typedef' + typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) + typedef2 = KernRe(r'typedef' + typedef_type + typedef_ident + typedef_args) # Strip comments - proto = Re(r'/\*.*?\*/', flags=re.S).sub('', proto) + proto = KernRe(r'/\*.*?\*/', flags=re.S).sub('', proto) # Parse function typedef prototypes for r in [typedef1, typedef2]: @@ -1109,12 +1109,12 @@ class KernelDoc: return # Handle nested parentheses or brackets - r = Re(r'(\(*.\)\s*|\[*.\]\s*);$') + r = KernRe(r'(\(*.\)\s*|\[*.\]\s*);$') while r.search(proto): proto = r.sub('', proto) # Parse simple typedefs - r = Re(r'typedef.*\s+(\w+)\s*;') + r = KernRe(r'typedef.*\s+(\w+)\s*;') if r.match(proto): declaration_name = r.group(1) @@ -1195,12 +1195,12 @@ class KernelDoc: decl_end = r"(?:[-:].*)" # end of the name part # test for pointer declaration type, foo * bar() - desc - r = Re(fr"^{decl_start}([\w\s]+?){parenthesis}?\s*{decl_end}?$") + r = KernRe(fr"^{decl_start}([\w\s]+?){parenthesis}?\s*{decl_end}?$") if r.search(line): self.entry.identifier = r.group(1) # Test for data declaration - r = Re(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)") + r = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)") if r.search(line): self.entry.decl_type = r.group(1) self.entry.identifier = r.group(2) @@ -1209,15 +1209,15 @@ class KernelDoc: # Look for foo() or static void foo() - description; # or misspelt identifier - r1 = Re(fr"^{decl_start}{fn_type}(\w+)\s*{parenthesis}\s*{decl_end}?$") - r2 = Re(fr"^{decl_start}{fn_type}(\w+[^-:]*){parenthesis}\s*{decl_end}$") + r1 = KernRe(fr"^{decl_start}{fn_type}(\w+)\s*{parenthesis}\s*{decl_end}?$") + r2 = KernRe(fr"^{decl_start}{fn_type}(\w+[^-:]*){parenthesis}\s*{decl_end}$") for r in [r1, r2]: if r.search(line): self.entry.identifier = r.group(1) self.entry.decl_type = "function" - r = Re(r"define\s+") + r = KernRe(r"define\s+") self.entry.identifier = r.sub("", self.entry.identifier) self.entry.is_kernel_comment = True break @@ -1230,12 +1230,12 @@ class KernelDoc: self.entry.section = self.section_default self.entry.new_start_line = ln + 1 - r = Re("[-:](.*)") + r = KernRe("[-:](.*)") if r.search(line): # strip leading/trailing/multiple spaces self.entry.descr = r.group(1).strip(" ") - r = Re(r"\s+") + r = KernRe(r"\s+") self.entry.descr = r.sub(" ", self.entry.descr) self.entry.declaration_purpose = self.entry.descr self.state = self.STATE_BODY_MAYBE @@ -1272,7 +1272,7 @@ class KernelDoc: """ if self.state == self.STATE_BODY_WITH_BLANK_LINE: - r = Re(r"\s*\*\s?\S") + r = KernRe(r"\s*\*\s?\S") if r.match(line): self.dump_section() self.entry.section = self.section_default @@ -1318,7 +1318,7 @@ class KernelDoc: self.dump_section() # Look for doc_com + + doc_end: - r = Re(r'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') + r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') if r.match(line): self.emit_warning(ln, f"suspicious ending line: {line}") @@ -1351,7 +1351,7 @@ class KernelDoc: self.entry.declaration_purpose = self.entry.declaration_purpose.rstrip() self.entry.declaration_purpose += " " + cont - r = Re(r"\s+") + r = KernRe(r"\s+") self.entry.declaration_purpose = r.sub(' ', self.entry.declaration_purpose) @@ -1359,7 +1359,7 @@ class KernelDoc: if self.entry.section.startswith('@') or \ self.entry.section == self.section_context: if self.entry.leading_space is None: - r = Re(r'^(\s+)') + r = KernRe(r'^(\s+)') if r.match(cont): self.entry.leading_space = len(r.group(1)) else: @@ -1436,13 +1436,13 @@ class KernelDoc: is_void = True # Replace SYSCALL_DEFINE with correct return type & function name - proto = Re(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) + proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) - r = Re(r'long\s+(sys_.*?),') + r = KernRe(r'long\s+(sys_.*?),') if r.search(proto): - proto = Re(',').sub('(', proto, count=1) + proto = KernRe(',').sub('(', proto, count=1) elif is_void: - proto = Re(r'\)').sub('(void)', proto, count=1) + proto = KernRe(r'\)').sub('(void)', proto, count=1) # Now delete all of the odd-numbered commas in the proto # so that argument types & names don't have a comma between them @@ -1469,22 +1469,22 @@ class KernelDoc: tracepointargs = None # Match tracepoint name based on different patterns - r = Re(r'TRACE_EVENT\((.*?),') + r = KernRe(r'TRACE_EVENT\((.*?),') if r.search(proto): tracepointname = r.group(1) - r = Re(r'DEFINE_SINGLE_EVENT\((.*?),') + r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') if r.search(proto): tracepointname = r.group(1) - r = Re(r'DEFINE_EVENT\((.*?),(.*?),') + r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') if r.search(proto): tracepointname = r.group(2) if tracepointname: tracepointname = tracepointname.lstrip() - r = Re(r'TP_PROTO\((.*?)\)') + r = KernRe(r'TP_PROTO\((.*?)\)') if r.search(proto): tracepointargs = r.group(1) @@ -1501,43 +1501,43 @@ class KernelDoc: """Ancillary routine to process a function prototype""" # strip C99-style comments to end of line - r = Re(r"\/\/.*$", re.S) + r = KernRe(r"\/\/.*$", re.S) line = r.sub('', line) - if Re(r'\s*#\s*define').match(line): + if KernRe(r'\s*#\s*define').match(line): self.entry.prototype = line elif line.startswith('#'): # Strip other macros like #ifdef/#ifndef/#endif/... pass else: - r = Re(r'([^\{]*)') + r = KernRe(r'([^\{]*)') if r.match(line): self.entry.prototype += r.group(1) + " " - if '{' in line or ';' in line or Re(r'\s*#\s*define').match(line): + if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): # strip comments - r = Re(r'/\*.*?\*/') + r = KernRe(r'/\*.*?\*/') self.entry.prototype = r.sub('', self.entry.prototype) # strip newlines/cr's - r = Re(r'[\r\n]+') + r = KernRe(r'[\r\n]+') self.entry.prototype = r.sub(' ', self.entry.prototype) # strip leading spaces - r = Re(r'^\s+') + r = KernRe(r'^\s+') self.entry.prototype = r.sub('', self.entry.prototype) # Handle self.entry.prototypes for function pointers like: # int (*pcs_config)(struct foo) - r = Re(r'^(\S+\s+)\(\s*\*(\S+)\)') + r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) if 'SYSCALL_DEFINE' in self.entry.prototype: self.entry.prototype = self.syscall_munge(ln, self.entry.prototype) - r = Re(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') + r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') if r.search(self.entry.prototype): self.entry.prototype = self.tracepoint_munge(ln, self.entry.prototype) @@ -1549,22 +1549,22 @@ class KernelDoc: """Ancillary routine to process a type""" # Strip newlines/cr's. - line = Re(r'[\r\n]+', re.S).sub(' ', line) + line = KernRe(r'[\r\n]+', re.S).sub(' ', line) # Strip leading spaces - line = Re(r'^\s+', re.S).sub('', line) + line = KernRe(r'^\s+', re.S).sub('', line) # Strip trailing spaces - line = Re(r'\s+$', re.S).sub('', line) + line = KernRe(r'\s+$', re.S).sub('', line) # Strip C99-style comments to the end of the line - line = Re(r"\/\/.*$", re.S).sub('', line) + line = KernRe(r"\/\/.*$", re.S).sub('', line) # To distinguish preprocessor directive from regular declaration later. if line.startswith('#'): line += ";" - r = Re(r'([^\{\};]*)([\{\};])(.*)') + r = KernRe(r'([^\{\};]*)([\{\};])(.*)') while True: if r.search(line): if self.entry.prototype: -- cgit From de258fa8ca8d72ef17f4d71162cfbbd2d9f397e6 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Tue, 8 Apr 2025 18:09:36 +0800 Subject: scripts: kernel-doc: fix parsing function-like typedefs (again) Typedefs like typedef struct phylink_pcs *(*pcs_xlate_t)(const u64 *args); have a typedef_type that ends with a * and therefore has no word boundary. Add an extra clause for the final group of the typedef_type so we only require a word boundary if we match a word. [mchehab: modify also kernel-doc.py, as we're deprecating the perl version] Fixes: 7d2c6b1edf79 ("scripts: kernel-doc: fix parsing function-like typedefs") Signed-off-by: Sean Anderson Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/e0abb103c73a96d76602d909f60ab8fd6e2fd0bd.1744106242.git.mchehab+huawei@kernel.org --- scripts/lib/kdoc/kdoc_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts/lib/kdoc/kdoc_parser.py') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index f60722bcc687..4f036c720b36 100755 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -1067,7 +1067,7 @@ class KernelDoc: Stores a typedef inside self.entries array. """ - typedef_type = r'((?:\s+[\w\*]+\b){1,8})\s*' + typedef_type = r'((?:\s+[\w\*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' typedef_ident = r'\*?\s*(\w\S+)\s*' typedef_args = r'\s*\((.*)\);' -- cgit