summaryrefslogtreecommitdiff
path: root/tools/docs/lib/parse_data_structs.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/docs/lib/parse_data_structs.py')
-rwxr-xr-xtools/docs/lib/parse_data_structs.py452
1 files changed, 0 insertions, 452 deletions
diff --git a/tools/docs/lib/parse_data_structs.py b/tools/docs/lib/parse_data_structs.py
deleted file mode 100755
index a5aa2e182052..000000000000
--- a/tools/docs/lib/parse_data_structs.py
+++ /dev/null
@@ -1,452 +0,0 @@
-#!/usr/bin/env python3
-# SPDX-License-Identifier: GPL-2.0
-# Copyright (c) 2016-2025 by Mauro Carvalho Chehab <mchehab@kernel.org>.
-# pylint: disable=R0912,R0915
-
-"""
-Parse a source file or header, creating ReStructured Text cross references.
-
-It accepts an optional file to change the default symbol reference or to
-suppress symbols from the output.
-
-It is capable of identifying defines, functions, structs, typedefs,
-enums and enum symbols and create cross-references for all of them.
-It is also capable of distinguish #define used for specifying a Linux
-ioctl.
-
-The optional rules file contains a set of rules like:
-
- ignore ioctl VIDIOC_ENUM_FMT
- replace ioctl VIDIOC_DQBUF vidioc_qbuf
- replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`
-"""
-
-import os
-import re
-import sys
-
-
-class ParseDataStructs:
- """
- Creates an enriched version of a Kernel header file with cross-links
- to each C data structure type.
-
- It is meant to allow having a more comprehensive documentation, where
- uAPI headers will create cross-reference links to the code.
-
- It is capable of identifying defines, functions, structs, typedefs,
- enums and enum symbols and create cross-references for all of them.
- It is also capable of distinguish #define used for specifying a Linux
- ioctl.
-
- By default, it create rules for all symbols and defines, but it also
- allows parsing an exception file. Such file contains a set of rules
- using the syntax below:
-
- 1. Ignore rules:
-
- ignore <type> <symbol>`
-
- Removes the symbol from reference generation.
-
- 2. Replace rules:
-
- replace <type> <old_symbol> <new_reference>
-
- Replaces how old_symbol with a new reference. The new_reference can be:
- - A simple symbol name;
- - A full Sphinx reference.
-
- On both cases, <type> can be:
- - ioctl: for defines that end with _IO*, e.g. ioctl definitions
- - define: for other defines
- - symbol: for symbols defined within enums;
- - typedef: for typedefs;
- - enum: for the name of a non-anonymous enum;
- - struct: for structs.
-
- Examples:
-
- ignore define __LINUX_MEDIA_H
- ignore ioctl VIDIOC_ENUM_FMT
- replace ioctl VIDIOC_DQBUF vidioc_qbuf
- replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`
- """
-
- # Parser regexes with multiple ways to capture enums and structs
- RE_ENUMS = [
- re.compile(r"^\s*enum\s+([\w_]+)\s*\{"),
- re.compile(r"^\s*enum\s+([\w_]+)\s*$"),
- re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*\{"),
- re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*$"),
- ]
- RE_STRUCTS = [
- re.compile(r"^\s*struct\s+([_\w][\w\d_]+)\s*\{"),
- re.compile(r"^\s*struct\s+([_\w][\w\d_]+)$"),
- re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)\s*\{"),
- re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)$"),
- ]
-
- # FIXME: the original code was written a long time before Sphinx C
- # domain to have multiple namespaces. To avoid to much turn at the
- # existing hyperlinks, the code kept using "c:type" instead of the
- # right types. To change that, we need to change the types not only
- # here, but also at the uAPI media documentation.
- DEF_SYMBOL_TYPES = {
- "ioctl": {
- "prefix": "\\ ",
- "suffix": "\\ ",
- "ref_type": ":ref",
- "description": "IOCTL Commands",
- },
- "define": {
- "prefix": "\\ ",
- "suffix": "\\ ",
- "ref_type": ":ref",
- "description": "Macros and Definitions",
- },
- # We're calling each definition inside an enum as "symbol"
- "symbol": {
- "prefix": "\\ ",
- "suffix": "\\ ",
- "ref_type": ":ref",
- "description": "Enumeration values",
- },
- "typedef": {
- "prefix": "\\ ",
- "suffix": "\\ ",
- "ref_type": ":c:type",
- "description": "Type Definitions",
- },
- # This is the description of the enum itself
- "enum": {
- "prefix": "\\ ",
- "suffix": "\\ ",
- "ref_type": ":c:type",
- "description": "Enumerations",
- },
- "struct": {
- "prefix": "\\ ",
- "suffix": "\\ ",
- "ref_type": ":c:type",
- "description": "Structures",
- },
- }
-
- def __init__(self, debug: bool = False):
- """Initialize internal vars"""
- self.debug = debug
- self.data = ""
-
- self.symbols = {}
-
- for symbol_type in self.DEF_SYMBOL_TYPES:
- self.symbols[symbol_type] = {}
-
- def store_type(self, symbol_type: str, symbol: str,
- ref_name: str = None, replace_underscores: bool = True):
- """
- Stores a new symbol at self.symbols under symbol_type.
-
- By default, underscores are replaced by "-"
- """
- defs = self.DEF_SYMBOL_TYPES[symbol_type]
-
- prefix = defs.get("prefix", "")
- suffix = defs.get("suffix", "")
- ref_type = defs.get("ref_type")
-
- # Determine ref_link based on symbol type
- if ref_type:
- if symbol_type == "enum":
- ref_link = f"{ref_type}:`{symbol}`"
- else:
- if not ref_name:
- ref_name = symbol.lower()
-
- # c-type references don't support hash
- if ref_type == ":ref" and replace_underscores:
- ref_name = ref_name.replace("_", "-")
-
- ref_link = f"{ref_type}:`{symbol} <{ref_name}>`"
- else:
- ref_link = symbol
-
- self.symbols[symbol_type][symbol] = f"{prefix}{ref_link}{suffix}"
-
- def store_line(self, line):
- """Stores a line at self.data, properly indented"""
- line = " " + line.expandtabs()
- self.data += line.rstrip(" ")
-
- def parse_file(self, file_in: str):
- """Reads a C source file and get identifiers"""
- self.data = ""
- is_enum = False
- is_comment = False
- multiline = ""
-
- with open(file_in, "r",
- encoding="utf-8", errors="backslashreplace") as f:
- for line_no, line in enumerate(f):
- self.store_line(line)
- line = line.strip("\n")
-
- # Handle continuation lines
- if line.endswith(r"\\"):
- multiline += line[-1]
- continue
-
- if multiline:
- line = multiline + line
- multiline = ""
-
- # Handle comments. They can be multilined
- if not is_comment:
- if re.search(r"/\*.*", line):
- is_comment = True
- else:
- # Strip C99-style comments
- line = re.sub(r"(//.*)", "", line)
-
- if is_comment:
- if re.search(r".*\*/", line):
- is_comment = False
- else:
- multiline = line
- continue
-
- # At this point, line variable may be a multilined statement,
- # if lines end with \ or if they have multi-line comments
- # With that, it can safely remove the entire comments,
- # and there's no need to use re.DOTALL for the logic below
-
- line = re.sub(r"(/\*.*\*/)", "", line)
- if not line.strip():
- continue
-
- # It can be useful for debug purposes to print the file after
- # having comments stripped and multi-lines grouped.
- if self.debug > 1:
- print(f"line {line_no + 1}: {line}")
-
- # Now the fun begins: parse each type and store it.
-
- # We opted for a two parsing logic here due to:
- # 1. it makes easier to debug issues not-parsed symbols;
- # 2. we want symbol replacement at the entire content, not
- # just when the symbol is detected.
-
- if is_enum:
- match = re.match(r"^\s*([_\w][\w\d_]+)\s*[\,=]?", line)
- if match:
- self.store_type("symbol", match.group(1))
- if "}" in line:
- is_enum = False
- continue
-
- match = re.match(r"^\s*#\s*define\s+([\w_]+)\s+_IO", line)
- if match:
- self.store_type("ioctl", match.group(1),
- replace_underscores=False)
- continue
-
- match = re.match(r"^\s*#\s*define\s+([\w_]+)(\s+|$)", line)
- if match:
- self.store_type("define", match.group(1))
- continue
-
- match = re.match(r"^\s*typedef\s+([_\w][\w\d_]+)\s+(.*)\s+([_\w][\w\d_]+);",
- line)
- if match:
- name = match.group(2).strip()
- symbol = match.group(3)
- self.store_type("typedef", symbol, ref_name=name)
- continue
-
- for re_enum in self.RE_ENUMS:
- match = re_enum.match(line)
- if match:
- self.store_type("enum", match.group(1))
- is_enum = True
- break
-
- for re_struct in self.RE_STRUCTS:
- match = re_struct.match(line)
- if match:
- self.store_type("struct", match.group(1))
- break
-
- def process_exceptions(self, fname: str):
- """
- Process exceptions file with rules to ignore or replace references.
- """
- if not fname:
- return
-
- name = os.path.basename(fname)
-
- with open(fname, "r", encoding="utf-8", errors="backslashreplace") as f:
- for ln, line in enumerate(f):
- ln += 1
- line = line.strip()
- if not line or line.startswith("#"):
- continue
-
- # Handle ignore rules
- match = re.match(r"^ignore\s+(\w+)\s+(\S+)", line)
- if match:
- c_type = match.group(1)
- symbol = match.group(2)
-
- if c_type not in self.DEF_SYMBOL_TYPES:
- sys.exit(f"{name}:{ln}: {c_type} is invalid")
-
- d = self.symbols[c_type]
- if symbol in d:
- del d[symbol]
-
- continue
-
- # Handle replace rules
- match = re.match(r"^replace\s+(\S+)\s+(\S+)\s+(\S+)", line)
- if not match:
- sys.exit(f"{name}:{ln}: invalid line: {line}")
-
- c_type, old, new = match.groups()
-
- if c_type not in self.DEF_SYMBOL_TYPES:
- sys.exit(f"{name}:{ln}: {c_type} is invalid")
-
- reftype = None
-
- # Parse reference type when the type is specified
-
- match = re.match(r"^\:c\:(data|func|macro|type)\:\`(.+)\`", new)
- if match:
- reftype = f":c:{match.group(1)}"
- new = match.group(2)
- else:
- match = re.search(r"(\:ref)\:\`(.+)\`", new)
- if match:
- reftype = match.group(1)
- new = match.group(2)
-
- # If the replacement rule doesn't have a type, get default
- if not reftype:
- reftype = self.DEF_SYMBOL_TYPES[c_type].get("ref_type")
- if not reftype:
- reftype = self.DEF_SYMBOL_TYPES[c_type].get("real_type")
-
- new_ref = f"{reftype}:`{old} <{new}>`"
-
- # Change self.symbols to use the replacement rule
- if old in self.symbols[c_type]:
- self.symbols[c_type][old] = new_ref
- else:
- print(f"{name}:{ln}: Warning: can't find {old} {c_type}")
-
- def debug_print(self):
- """
- Print debug information containing the replacement rules per symbol.
- To make easier to check, group them per type.
- """
- if not self.debug:
- return
-
- for c_type, refs in self.symbols.items():
- if not refs: # Skip empty dictionaries
- continue
-
- print(f"{c_type}:")
-
- for symbol, ref in sorted(refs.items()):
- print(f" {symbol} -> {ref}")
-
- print()
-
- def gen_output(self):
- """Write the formatted output to a file."""
-
- # Avoid extra blank lines
- text = re.sub(r"\s+$", "", self.data) + "\n"
- text = re.sub(r"\n\s+\n", "\n\n", text)
-
- # Escape Sphinx special characters
- text = re.sub(r"([\_\`\*\<\>\&\\\\:\/\|\%\$\#\{\}\~\^])", r"\\\1", text)
-
- # Source uAPI files may have special notes. Use bold font for them
- text = re.sub(r"DEPRECATED", "**DEPRECATED**", text)
-
- # Delimiters to catch the entire symbol after escaped
- start_delim = r"([ \n\t\(=\*\@])"
- end_delim = r"(\s|,|\\=|\\:|\;|\)|\}|\{)"
-
- # Process all reference types
- for ref_dict in self.symbols.values():
- for symbol, replacement in ref_dict.items():
- symbol = re.escape(re.sub(r"([\_\`\*\<\>\&\\\\:\/])", r"\\\1", symbol))
- text = re.sub(fr'{start_delim}{symbol}{end_delim}',
- fr'\1{replacement}\2', text)
-
- # Remove "\ " where not needed: before spaces and at the end of lines
- text = re.sub(r"\\ ([\n ])", r"\1", text)
- text = re.sub(r" \\ ", " ", text)
-
- return text
-
- def gen_toc(self):
- """
- Create a TOC table pointing to each symbol from the header
- """
- text = []
-
- # Add header
- text.append(".. contents:: Table of Contents")
- text.append(" :depth: 2")
- text.append(" :local:")
- text.append("")
-
- # Sort symbol types per description
- symbol_descriptions = []
- for k, v in self.DEF_SYMBOL_TYPES.items():
- symbol_descriptions.append((v['description'], k))
-
- symbol_descriptions.sort()
-
- # Process each category
- for description, c_type in symbol_descriptions:
-
- refs = self.symbols[c_type]
- if not refs: # Skip empty categories
- continue
-
- text.append(f"{description}")
- text.append("-" * len(description))
- text.append("")
-
- # Sort symbols alphabetically
- for symbol, ref in sorted(refs.items()):
- text.append(f"* :{ref}:")
-
- text.append("") # Add empty line between categories
-
- return "\n".join(text)
-
- def write_output(self, file_in: str, file_out: str, toc: bool):
- title = os.path.basename(file_in)
-
- if toc:
- text = self.gen_toc()
- else:
- text = self.gen_output()
-
- with open(file_out, "w", encoding="utf-8", errors="backslashreplace") as f:
- f.write(".. -*- coding: utf-8; mode: rst -*-\n\n")
- f.write(f"{title}\n")
- f.write("=" * len(title) + "\n\n")
-
- if not toc:
- f.write(".. parsed-literal::\n\n")
-
- f.write(text)