diff options
Diffstat (limited to 'tools/docs')
-rwxr-xr-x | tools/docs/gen-redirects.py | 54 | ||||
-rwxr-xr-x | tools/docs/gen-renames.py | 130 | ||||
-rw-r--r-- | tools/docs/lib/__init__.py | 0 | ||||
-rw-r--r-- | tools/docs/lib/enrich_formatter.py | 70 | ||||
-rwxr-xr-x | tools/docs/lib/parse_data_structs.py | 452 | ||||
-rwxr-xr-x | tools/docs/parse-headers.py | 60 |
6 files changed, 766 insertions, 0 deletions
diff --git a/tools/docs/gen-redirects.py b/tools/docs/gen-redirects.py new file mode 100755 index 000000000000..6a6ebf6f42dc --- /dev/null +++ b/tools/docs/gen-redirects.py @@ -0,0 +1,54 @@ +#! /usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright © 2025, Oracle and/or its affiliates. +# Author: Vegard Nossum <vegard.nossum@oracle.com> + +"""Generate HTML redirects for renamed Documentation/**.rst files using +the output of tools/docs/gen-renames.py. + +Example: + + tools/docs/gen-redirects.py --output Documentation/output/ < Documentation/.renames.txt +""" + +import argparse +import os +import sys + +parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) +parser.add_argument('-o', '--output', help='output directory') + +args = parser.parse_args() + +for line in sys.stdin: + line = line.rstrip('\n') + + old_name, new_name = line.split(' ', 2) + + old_html_path = os.path.join(args.output, old_name + '.html') + new_html_path = os.path.join(args.output, new_name + '.html') + + if not os.path.exists(new_html_path): + print(f"warning: target does not exist: {new_html_path} (redirect from {old_html_path})") + continue + + old_html_dir = os.path.dirname(old_html_path) + if not os.path.exists(old_html_dir): + os.makedirs(old_html_dir) + + relpath = os.path.relpath(new_name, os.path.dirname(old_name)) + '.html' + + with open(old_html_path, 'w') as f: + print(f"""\ +<!DOCTYPE html> + +<html lang="en"> +<head> + <title>This page has moved</title> + <meta http-equiv="refresh" content="0; url={relpath}"> +</head> +<body> +<p>This page has moved to <a href="{relpath}">{new_name}</a>.</p> +</body> +</html>""", file=f) diff --git a/tools/docs/gen-renames.py b/tools/docs/gen-renames.py new file mode 100755 index 000000000000..8cb3b2157d83 --- /dev/null +++ b/tools/docs/gen-renames.py @@ -0,0 +1,130 @@ +#! /usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright © 2025, Oracle and/or its affiliates. +# Author: Vegard Nossum <vegard.nossum@oracle.com> + +"""Trawl repository history for renames of Documentation/**.rst files. + +Example: + + tools/docs/gen-renames.py --rev HEAD > Documentation/.renames.txt +""" + +import argparse +import itertools +import os +import subprocess +import sys + +parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) +parser.add_argument('--rev', default='HEAD', help='generate renames up to this revision') + +args = parser.parse_args() + +def normalize(path): + prefix = 'Documentation/' + suffix = '.rst' + + assert path.startswith(prefix) + assert path.endswith(suffix) + + return path[len(prefix):-len(suffix)] + +class Name(object): + def __init__(self, name): + self.names = [name] + + def rename(self, new_name): + self.names.append(new_name) + +names = { +} + +for line in subprocess.check_output([ + 'git', 'log', + '--reverse', + '--oneline', + '--find-renames', + '--diff-filter=RD', + '--name-status', + '--format=commit %H', + # ~v4.8-ish is when Sphinx/.rst was added in the first place + f'v4.8..{args.rev}', + '--', + 'Documentation/' +], text=True).splitlines(): + # rename + if line.startswith('R'): + _, old, new = line[1:].split('\t', 2) + + if old.endswith('.rst') and new.endswith('.rst'): + old = normalize(old) + new = normalize(new) + + name = names.get(old) + if name is None: + name = Name(old) + else: + del names[old] + + name.rename(new) + names[new] = name + + continue + + # delete + if line.startswith('D'): + _, old = line.split('\t', 1) + + if old.endswith('.rst'): + old = normalize(old) + + # TODO: we could save added/modified files as well and propose + # them as alternatives + name = names.get(old) + if name is None: + pass + else: + del names[old] + + continue + +# +# Get the set of current files so we can sanity check that we aren't +# redirecting any of those +# + +current_files = set() +for line in subprocess.check_output([ + 'git', 'ls-tree', + '-r', + '--name-only', + args.rev, + 'Documentation/', +], text=True).splitlines(): + if line.endswith('.rst'): + current_files.add(normalize(line)) + +# +# Format/group/output result +# + +result = [] +for _, v in names.items(): + old_names = v.names[:-1] + new_name = v.names[-1] + + for old_name in old_names: + if old_name == new_name: + # A file was renamed to its new name twice; don't redirect that + continue + + if old_name in current_files: + # A file was recreated with a former name; don't redirect those + continue + + result.append((old_name, new_name)) + +for old_name, new_name in sorted(result): + print(f"{old_name} {new_name}") diff --git a/tools/docs/lib/__init__.py b/tools/docs/lib/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/docs/lib/__init__.py diff --git a/tools/docs/lib/enrich_formatter.py b/tools/docs/lib/enrich_formatter.py new file mode 100644 index 000000000000..bb171567a4ca --- /dev/null +++ b/tools/docs/lib/enrich_formatter.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2025 by Mauro Carvalho Chehab <mchehab@kernel.org>. + +""" +Ancillary argparse HelpFormatter class that works on a similar way as +argparse.RawDescriptionHelpFormatter, e.g. description maintains line +breaks, but it also implement transformations to the help text. The +actual transformations ar given by enrich_text(), if the output is tty. + +Currently, the follow transformations are done: + + - Positional arguments are shown in upper cases; + - if output is TTY, ``var`` and positional arguments are shown prepended + by an ANSI SGR code. This is usually translated to bold. On some + terminals, like, konsole, this is translated into a colored bold text. +""" + +import argparse +import re +import sys + +class EnrichFormatter(argparse.HelpFormatter): + """ + Better format the output, making easier to identify the positional args + and how they're used at the __doc__ description. + """ + def __init__(self, *args, **kwargs): + """Initialize class and check if is TTY""" + super().__init__(*args, **kwargs) + self._tty = sys.stdout.isatty() + + def enrich_text(self, text): + """Handle ReST markups (currently, only ``foo``)""" + if self._tty and text: + # Replace ``text`` with ANSI SGR (bold) + return re.sub(r'\`\`(.+?)\`\`', + lambda m: f'\033[1m{m.group(1)}\033[0m', text) + return text + + def _fill_text(self, text, width, indent): + """Enrich descriptions with markups on it""" + enriched = self.enrich_text(text) + return "\n".join(indent + line for line in enriched.splitlines()) + + def _format_usage(self, usage, actions, groups, prefix): + """Enrich positional arguments at usage: line""" + + prog = self._prog + parts = [] + + for action in actions: + if action.option_strings: + opt = action.option_strings[0] + if action.nargs != 0: + opt += f" {action.dest.upper()}" + parts.append(f"[{opt}]") + else: + # Positional argument + parts.append(self.enrich_text(f"``{action.dest.upper()}``")) + + usage_text = f"{prefix or 'usage: '} {prog} {' '.join(parts)}\n" + return usage_text + + def _format_action_invocation(self, action): + """Enrich argument names""" + if not action.option_strings: + return self.enrich_text(f"``{action.dest.upper()}``") + + return ", ".join(action.option_strings) diff --git a/tools/docs/lib/parse_data_structs.py b/tools/docs/lib/parse_data_structs.py new file mode 100755 index 000000000000..a5aa2e182052 --- /dev/null +++ b/tools/docs/lib/parse_data_structs.py @@ -0,0 +1,452 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2016-2025 by Mauro Carvalho Chehab <mchehab@kernel.org>. +# pylint: disable=R0912,R0915 + +""" +Parse a source file or header, creating ReStructured Text cross references. + +It accepts an optional file to change the default symbol reference or to +suppress symbols from the output. + +It is capable of identifying defines, functions, structs, typedefs, +enums and enum symbols and create cross-references for all of them. +It is also capable of distinguish #define used for specifying a Linux +ioctl. + +The optional rules file contains a set of rules like: + + ignore ioctl VIDIOC_ENUM_FMT + replace ioctl VIDIOC_DQBUF vidioc_qbuf + replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det` +""" + +import os +import re +import sys + + +class ParseDataStructs: + """ + Creates an enriched version of a Kernel header file with cross-links + to each C data structure type. + + It is meant to allow having a more comprehensive documentation, where + uAPI headers will create cross-reference links to the code. + + It is capable of identifying defines, functions, structs, typedefs, + enums and enum symbols and create cross-references for all of them. + It is also capable of distinguish #define used for specifying a Linux + ioctl. + + By default, it create rules for all symbols and defines, but it also + allows parsing an exception file. Such file contains a set of rules + using the syntax below: + + 1. Ignore rules: + + ignore <type> <symbol>` + + Removes the symbol from reference generation. + + 2. Replace rules: + + replace <type> <old_symbol> <new_reference> + + Replaces how old_symbol with a new reference. The new_reference can be: + - A simple symbol name; + - A full Sphinx reference. + + On both cases, <type> can be: + - ioctl: for defines that end with _IO*, e.g. ioctl definitions + - define: for other defines + - symbol: for symbols defined within enums; + - typedef: for typedefs; + - enum: for the name of a non-anonymous enum; + - struct: for structs. + + Examples: + + ignore define __LINUX_MEDIA_H + ignore ioctl VIDIOC_ENUM_FMT + replace ioctl VIDIOC_DQBUF vidioc_qbuf + replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det` + """ + + # Parser regexes with multiple ways to capture enums and structs + RE_ENUMS = [ + re.compile(r"^\s*enum\s+([\w_]+)\s*\{"), + re.compile(r"^\s*enum\s+([\w_]+)\s*$"), + re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*\{"), + re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*$"), + ] + RE_STRUCTS = [ + re.compile(r"^\s*struct\s+([_\w][\w\d_]+)\s*\{"), + re.compile(r"^\s*struct\s+([_\w][\w\d_]+)$"), + re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)\s*\{"), + re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)$"), + ] + + # FIXME: the original code was written a long time before Sphinx C + # domain to have multiple namespaces. To avoid to much turn at the + # existing hyperlinks, the code kept using "c:type" instead of the + # right types. To change that, we need to change the types not only + # here, but also at the uAPI media documentation. + DEF_SYMBOL_TYPES = { + "ioctl": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":ref", + "description": "IOCTL Commands", + }, + "define": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":ref", + "description": "Macros and Definitions", + }, + # We're calling each definition inside an enum as "symbol" + "symbol": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":ref", + "description": "Enumeration values", + }, + "typedef": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":c:type", + "description": "Type Definitions", + }, + # This is the description of the enum itself + "enum": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":c:type", + "description": "Enumerations", + }, + "struct": { + "prefix": "\\ ", + "suffix": "\\ ", + "ref_type": ":c:type", + "description": "Structures", + }, + } + + def __init__(self, debug: bool = False): + """Initialize internal vars""" + self.debug = debug + self.data = "" + + self.symbols = {} + + for symbol_type in self.DEF_SYMBOL_TYPES: + self.symbols[symbol_type] = {} + + def store_type(self, symbol_type: str, symbol: str, + ref_name: str = None, replace_underscores: bool = True): + """ + Stores a new symbol at self.symbols under symbol_type. + + By default, underscores are replaced by "-" + """ + defs = self.DEF_SYMBOL_TYPES[symbol_type] + + prefix = defs.get("prefix", "") + suffix = defs.get("suffix", "") + ref_type = defs.get("ref_type") + + # Determine ref_link based on symbol type + if ref_type: + if symbol_type == "enum": + ref_link = f"{ref_type}:`{symbol}`" + else: + if not ref_name: + ref_name = symbol.lower() + + # c-type references don't support hash + if ref_type == ":ref" and replace_underscores: + ref_name = ref_name.replace("_", "-") + + ref_link = f"{ref_type}:`{symbol} <{ref_name}>`" + else: + ref_link = symbol + + self.symbols[symbol_type][symbol] = f"{prefix}{ref_link}{suffix}" + + def store_line(self, line): + """Stores a line at self.data, properly indented""" + line = " " + line.expandtabs() + self.data += line.rstrip(" ") + + def parse_file(self, file_in: str): + """Reads a C source file and get identifiers""" + self.data = "" + is_enum = False + is_comment = False + multiline = "" + + with open(file_in, "r", + encoding="utf-8", errors="backslashreplace") as f: + for line_no, line in enumerate(f): + self.store_line(line) + line = line.strip("\n") + + # Handle continuation lines + if line.endswith(r"\\"): + multiline += line[-1] + continue + + if multiline: + line = multiline + line + multiline = "" + + # Handle comments. They can be multilined + if not is_comment: + if re.search(r"/\*.*", line): + is_comment = True + else: + # Strip C99-style comments + line = re.sub(r"(//.*)", "", line) + + if is_comment: + if re.search(r".*\*/", line): + is_comment = False + else: + multiline = line + continue + + # At this point, line variable may be a multilined statement, + # if lines end with \ or if they have multi-line comments + # With that, it can safely remove the entire comments, + # and there's no need to use re.DOTALL for the logic below + + line = re.sub(r"(/\*.*\*/)", "", line) + if not line.strip(): + continue + + # It can be useful for debug purposes to print the file after + # having comments stripped and multi-lines grouped. + if self.debug > 1: + print(f"line {line_no + 1}: {line}") + + # Now the fun begins: parse each type and store it. + + # We opted for a two parsing logic here due to: + # 1. it makes easier to debug issues not-parsed symbols; + # 2. we want symbol replacement at the entire content, not + # just when the symbol is detected. + + if is_enum: + match = re.match(r"^\s*([_\w][\w\d_]+)\s*[\,=]?", line) + if match: + self.store_type("symbol", match.group(1)) + if "}" in line: + is_enum = False + continue + + match = re.match(r"^\s*#\s*define\s+([\w_]+)\s+_IO", line) + if match: + self.store_type("ioctl", match.group(1), + replace_underscores=False) + continue + + match = re.match(r"^\s*#\s*define\s+([\w_]+)(\s+|$)", line) + if match: + self.store_type("define", match.group(1)) + continue + + match = re.match(r"^\s*typedef\s+([_\w][\w\d_]+)\s+(.*)\s+([_\w][\w\d_]+);", + line) + if match: + name = match.group(2).strip() + symbol = match.group(3) + self.store_type("typedef", symbol, ref_name=name) + continue + + for re_enum in self.RE_ENUMS: + match = re_enum.match(line) + if match: + self.store_type("enum", match.group(1)) + is_enum = True + break + + for re_struct in self.RE_STRUCTS: + match = re_struct.match(line) + if match: + self.store_type("struct", match.group(1)) + break + + def process_exceptions(self, fname: str): + """ + Process exceptions file with rules to ignore or replace references. + """ + if not fname: + return + + name = os.path.basename(fname) + + with open(fname, "r", encoding="utf-8", errors="backslashreplace") as f: + for ln, line in enumerate(f): + ln += 1 + line = line.strip() + if not line or line.startswith("#"): + continue + + # Handle ignore rules + match = re.match(r"^ignore\s+(\w+)\s+(\S+)", line) + if match: + c_type = match.group(1) + symbol = match.group(2) + + if c_type not in self.DEF_SYMBOL_TYPES: + sys.exit(f"{name}:{ln}: {c_type} is invalid") + + d = self.symbols[c_type] + if symbol in d: + del d[symbol] + + continue + + # Handle replace rules + match = re.match(r"^replace\s+(\S+)\s+(\S+)\s+(\S+)", line) + if not match: + sys.exit(f"{name}:{ln}: invalid line: {line}") + + c_type, old, new = match.groups() + + if c_type not in self.DEF_SYMBOL_TYPES: + sys.exit(f"{name}:{ln}: {c_type} is invalid") + + reftype = None + + # Parse reference type when the type is specified + + match = re.match(r"^\:c\:(data|func|macro|type)\:\`(.+)\`", new) + if match: + reftype = f":c:{match.group(1)}" + new = match.group(2) + else: + match = re.search(r"(\:ref)\:\`(.+)\`", new) + if match: + reftype = match.group(1) + new = match.group(2) + + # If the replacement rule doesn't have a type, get default + if not reftype: + reftype = self.DEF_SYMBOL_TYPES[c_type].get("ref_type") + if not reftype: + reftype = self.DEF_SYMBOL_TYPES[c_type].get("real_type") + + new_ref = f"{reftype}:`{old} <{new}>`" + + # Change self.symbols to use the replacement rule + if old in self.symbols[c_type]: + self.symbols[c_type][old] = new_ref + else: + print(f"{name}:{ln}: Warning: can't find {old} {c_type}") + + def debug_print(self): + """ + Print debug information containing the replacement rules per symbol. + To make easier to check, group them per type. + """ + if not self.debug: + return + + for c_type, refs in self.symbols.items(): + if not refs: # Skip empty dictionaries + continue + + print(f"{c_type}:") + + for symbol, ref in sorted(refs.items()): + print(f" {symbol} -> {ref}") + + print() + + def gen_output(self): + """Write the formatted output to a file.""" + + # Avoid extra blank lines + text = re.sub(r"\s+$", "", self.data) + "\n" + text = re.sub(r"\n\s+\n", "\n\n", text) + + # Escape Sphinx special characters + text = re.sub(r"([\_\`\*\<\>\&\\\\:\/\|\%\$\#\{\}\~\^])", r"\\\1", text) + + # Source uAPI files may have special notes. Use bold font for them + text = re.sub(r"DEPRECATED", "**DEPRECATED**", text) + + # Delimiters to catch the entire symbol after escaped + start_delim = r"([ \n\t\(=\*\@])" + end_delim = r"(\s|,|\\=|\\:|\;|\)|\}|\{)" + + # Process all reference types + for ref_dict in self.symbols.values(): + for symbol, replacement in ref_dict.items(): + symbol = re.escape(re.sub(r"([\_\`\*\<\>\&\\\\:\/])", r"\\\1", symbol)) + text = re.sub(fr'{start_delim}{symbol}{end_delim}', + fr'\1{replacement}\2', text) + + # Remove "\ " where not needed: before spaces and at the end of lines + text = re.sub(r"\\ ([\n ])", r"\1", text) + text = re.sub(r" \\ ", " ", text) + + return text + + def gen_toc(self): + """ + Create a TOC table pointing to each symbol from the header + """ + text = [] + + # Add header + text.append(".. contents:: Table of Contents") + text.append(" :depth: 2") + text.append(" :local:") + text.append("") + + # Sort symbol types per description + symbol_descriptions = [] + for k, v in self.DEF_SYMBOL_TYPES.items(): + symbol_descriptions.append((v['description'], k)) + + symbol_descriptions.sort() + + # Process each category + for description, c_type in symbol_descriptions: + + refs = self.symbols[c_type] + if not refs: # Skip empty categories + continue + + text.append(f"{description}") + text.append("-" * len(description)) + text.append("") + + # Sort symbols alphabetically + for symbol, ref in sorted(refs.items()): + text.append(f"* :{ref}:") + + text.append("") # Add empty line between categories + + return "\n".join(text) + + def write_output(self, file_in: str, file_out: str, toc: bool): + title = os.path.basename(file_in) + + if toc: + text = self.gen_toc() + else: + text = self.gen_output() + + with open(file_out, "w", encoding="utf-8", errors="backslashreplace") as f: + f.write(".. -*- coding: utf-8; mode: rst -*-\n\n") + f.write(f"{title}\n") + f.write("=" * len(title) + "\n\n") + + if not toc: + f.write(".. parsed-literal::\n\n") + + f.write(text) diff --git a/tools/docs/parse-headers.py b/tools/docs/parse-headers.py new file mode 100755 index 000000000000..bfa4e46a53e3 --- /dev/null +++ b/tools/docs/parse-headers.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2016, 2025 by Mauro Carvalho Chehab <mchehab@kernel.org>. +# pylint: disable=C0103 + +""" +Convert a C header or source file ``FILE_IN``, into a ReStructured Text +included via ..parsed-literal block with cross-references for the +documentation files that describe the API. It accepts an optional +``FILE_RULES`` file to describes what elements will be either ignored or +be pointed to a non-default reference type/name. + +The output is written at ``FILE_OUT``. + +It is capable of identifying defines, functions, structs, typedefs, +enums and enum symbols and create cross-references for all of them. +It is also capable of distinguish #define used for specifying a Linux +ioctl. + +The optional ``FILE_RULES`` contains a set of rules like: + + ignore ioctl VIDIOC_ENUM_FMT + replace ioctl VIDIOC_DQBUF vidioc_qbuf + replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det` +""" + +import argparse + +from lib.parse_data_structs import ParseDataStructs +from lib.enrich_formatter import EnrichFormatter + +def main(): + """Main function""" + parser = argparse.ArgumentParser(description=__doc__, + formatter_class=EnrichFormatter) + + parser.add_argument("-d", "--debug", action="count", default=0, + help="Increase debug level. Can be used multiple times") + parser.add_argument("-t", "--toc", action="store_true", + help="instead of a literal block, outputs a TOC table at the RST file") + + parser.add_argument("file_in", help="Input C file") + parser.add_argument("file_out", help="Output RST file") + parser.add_argument("file_rules", nargs="?", + help="Exceptions file (optional)") + + args = parser.parse_args() + + parser = ParseDataStructs(debug=args.debug) + parser.parse_file(args.file_in) + + if args.file_rules: + parser.process_exceptions(args.file_rules) + + parser.debug_print() + parser.write_output(args.file_in, args.file_out, args.toc) + + +if __name__ == "__main__": + main() |