diff options
Diffstat (limited to 'Documentation/netlink/specs/netdev.yaml')
| -rw-r--r-- | Documentation/netlink/specs/netdev.yaml | 808 |
1 files changed, 808 insertions, 0 deletions
diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml new file mode 100644 index 000000000000..82bf5cb2617d --- /dev/null +++ b/Documentation/netlink/specs/netdev.yaml @@ -0,0 +1,808 @@ +# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) +--- +name: netdev + +doc: >- + netdev configuration over generic netlink. + +definitions: + - + type: flags + name: xdp-act + render-max: true + entries: + - + name: basic + doc: >- + XDP features set supported by all drivers + (XDP_ABORTED, XDP_DROP, XDP_PASS, XDP_TX) + - + name: redirect + doc: >- + The netdev supports XDP_REDIRECT + - + name: ndo-xmit + doc: >- + This feature informs if netdev implements ndo_xdp_xmit callback. + - + name: xsk-zerocopy + doc: >- + This feature informs if netdev supports AF_XDP in zero copy mode. + - + name: hw-offload + doc: >- + This feature informs if netdev supports XDP hw offloading. + - + name: rx-sg + doc: >- + This feature informs if netdev implements non-linear XDP buffer + support in the driver napi callback. + - + name: ndo-xmit-sg + doc: >- + This feature informs if netdev implements non-linear XDP buffer + support in ndo_xdp_xmit callback. + - + type: flags + name: xdp-rx-metadata + entries: + - + name: timestamp + doc: | + Device is capable of exposing receive HW timestamp via + bpf_xdp_metadata_rx_timestamp(). + - + name: hash + doc: | + Device is capable of exposing receive packet hash via + bpf_xdp_metadata_rx_hash(). + - + name: vlan-tag + doc: | + Device is capable of exposing receive packet VLAN tag via + bpf_xdp_metadata_rx_vlan_tag(). + - + type: flags + name: xsk-flags + entries: + - + name: tx-timestamp + doc: >- + HW timestamping egress packets is supported by the driver. + - + name: tx-checksum + doc: >- + L3 checksum HW offload is supported by the driver. + - + name: tx-launch-time-fifo + doc: >- + Launch time HW offload is supported by the driver. + - + name: queue-type + type: enum + entries: [rx, tx] + - + name: qstats-scope + type: flags + entries: [queue] + - + name: napi-threaded + type: enum + entries: [disabled, enabled, busy-poll] + +attribute-sets: + - + name: dev + attributes: + - + name: ifindex + doc: netdev ifindex + type: u32 + checks: + min: 1 + - + name: pad + type: pad + - + name: xdp-features + doc: Bitmask of enabled xdp-features. + type: u64 + enum: xdp-act + - + name: xdp-zc-max-segs + doc: max fragment count supported by ZC driver + type: u32 + checks: + min: 1 + - + name: xdp-rx-metadata-features + doc: Bitmask of supported XDP receive metadata features. + See Documentation/networking/xdp-rx-metadata.rst for more details. + type: u64 + enum: xdp-rx-metadata + - + name: xsk-features + doc: Bitmask of enabled AF_XDP features. + type: u64 + enum: xsk-flags + - + name: io-uring-provider-info + attributes: [] + - + name: page-pool + attributes: + - + name: id + doc: Unique ID of a Page Pool instance. + type: uint + checks: + min: 1 + max: u32-max + - + name: ifindex + doc: | + ifindex of the netdev to which the pool belongs. + May be reported as 0 if the page pool was allocated for a netdev + which got destroyed already (page pools may outlast their netdevs + because they wait for all memory to be returned). + type: u32 + checks: + min: 1 + max: s32-max + - + name: napi-id + doc: Id of NAPI using this Page Pool instance. + type: uint + checks: + min: 1 + max: u32-max + - + name: inflight + type: uint + doc: | + Number of outstanding references to this page pool (allocated + but yet to be freed pages). Allocated pages may be held in + socket receive queues, driver receive ring, page pool recycling + ring, the page pool cache, etc. + - + name: inflight-mem + type: uint + doc: | + Amount of memory held by inflight pages. + - + name: detach-time + type: uint + doc: | + Seconds in CLOCK_BOOTTIME of when Page Pool was detached by + the driver. Once detached Page Pool can no longer be used to + allocate memory. + Page Pools wait for all the memory allocated from them to be freed + before truly disappearing. "Detached" Page Pools cannot be + "re-attached", they are just waiting to disappear. + Attribute is absent if Page Pool has not been detached, and + can still be used to allocate new memory. + - + name: dmabuf + doc: ID of the dmabuf this page-pool is attached to. + type: u32 + - + name: io-uring + doc: io-uring memory provider information. + type: nest + nested-attributes: io-uring-provider-info + - + name: page-pool-info + subset-of: page-pool + attributes: + - + name: id + - + name: ifindex + - + name: page-pool-stats + doc: | + Page pool statistics, see docs for struct page_pool_stats + for information about individual statistics. + attributes: + - + name: info + doc: Page pool identifying information. + type: nest + nested-attributes: page-pool-info + - + name: alloc-fast + type: uint + value: 8 # reserve some attr ids in case we need more metadata later + - + name: alloc-slow + type: uint + - + name: alloc-slow-high-order + type: uint + - + name: alloc-empty + type: uint + - + name: alloc-refill + type: uint + - + name: alloc-waive + type: uint + - + name: recycle-cached + type: uint + - + name: recycle-cache-full + type: uint + - + name: recycle-ring + type: uint + - + name: recycle-ring-full + type: uint + - + name: recycle-released-refcnt + type: uint + + - + name: napi + attributes: + - + name: ifindex + doc: ifindex of the netdevice to which NAPI instance belongs. + type: u32 + checks: + min: 1 + - + name: id + doc: ID of the NAPI instance. + type: u32 + - + name: irq + doc: The associated interrupt vector number for the napi + type: u32 + - + name: pid + doc: PID of the napi thread, if NAPI is configured to operate in + threaded mode. If NAPI is not in threaded mode (i.e. uses normal + softirq context), the attribute will be absent. + type: u32 + - + name: defer-hard-irqs + doc: The number of consecutive empty polls before IRQ deferral ends + and hardware IRQs are re-enabled. + type: u32 + checks: + max: s32-max + - + name: gro-flush-timeout + doc: The timeout, in nanoseconds, of when to trigger the NAPI watchdog + timer which schedules NAPI processing. Additionally, a non-zero + value will also prevent GRO from flushing recent super-frames at + the end of a NAPI cycle. This may add receive latency in exchange + for reducing the number of frames processed by the network stack. + type: uint + - + name: irq-suspend-timeout + doc: The timeout, in nanoseconds, of how long to suspend irq + processing, if event polling finds events + type: uint + - + name: threaded + doc: Whether the NAPI is configured to operate in threaded polling + mode. If this is set to enabled then the NAPI context operates + in threaded polling mode. If this is set to busy-poll, then the + threaded polling mode also busy polls. + type: u32 + enum: napi-threaded + - + name: xsk-info + attributes: [] + - + name: queue + attributes: + - + name: id + doc: Queue index; most queue types are indexed like a C array, with + indexes starting at 0 and ending at queue count - 1. Queue indexes + are scoped to an interface and queue type. + type: u32 + - + name: ifindex + doc: ifindex of the netdevice to which the queue belongs. + type: u32 + checks: + min: 1 + - + name: type + doc: Queue type as rx, tx. Each queue type defines a separate ID space. + XDP TX queues allocated in the kernel are not linked to NAPIs and + thus not listed. AF_XDP queues will have more information set in + the xsk attribute. + type: u32 + enum: queue-type + - + name: napi-id + doc: ID of the NAPI instance which services this queue. + type: u32 + - + name: dmabuf + doc: ID of the dmabuf attached to this queue, if any. + type: u32 + - + name: io-uring + doc: io_uring memory provider information. + type: nest + nested-attributes: io-uring-provider-info + - + name: xsk + doc: XSK information for this queue, if any. + type: nest + nested-attributes: xsk-info + - + name: qstats + doc: | + Get device statistics, scoped to a device or a queue. + These statistics extend (and partially duplicate) statistics available + in struct rtnl_link_stats64. + Value of the `scope` attribute determines how statistics are + aggregated. When aggregated for the entire device the statistics + represent the total number of events since last explicit reset of + the device (i.e. not a reconfiguration like changing queue count). + When reported per-queue, however, the statistics may not add + up to the total number of events, will only be reported for currently + active objects, and will likely report the number of events since last + reconfiguration. + attributes: + - + name: ifindex + doc: ifindex of the netdevice to which stats belong. + type: u32 + checks: + min: 1 + - + name: queue-type + doc: Queue type as rx, tx, for queue-id. + type: u32 + enum: queue-type + - + name: queue-id + doc: Queue ID, if stats are scoped to a single queue instance. + type: u32 + - + name: scope + doc: | + What object type should be used to iterate over the stats. + type: uint + enum: qstats-scope + - + name: rx-packets + doc: | + Number of wire packets successfully received and passed to the stack. + For drivers supporting XDP, XDP is considered the first layer + of the stack, so packets consumed by XDP are still counted here. + type: uint + value: 8 # reserve some attr ids in case we need more metadata later + - + name: rx-bytes + doc: Successfully received bytes, see `rx-packets`. + type: uint + - + name: tx-packets + doc: | + Number of wire packets successfully sent. Packet is considered to be + successfully sent once it is in device memory (usually this means + the device has issued a DMA completion for the packet). + type: uint + - + name: tx-bytes + doc: Successfully sent bytes, see `tx-packets`. + type: uint + - + name: rx-alloc-fail + doc: | + Number of times skb or buffer allocation failed on the Rx datapath. + Allocation failure may, or may not result in a packet drop, depending + on driver implementation and whether system recovers quickly. + type: uint + - + name: rx-hw-drops + doc: | + Number of all packets which entered the device, but never left it, + including but not limited to: packets dropped due to lack of buffer + space, processing errors, explicit or implicit policies and packet + filters. + type: uint + - + name: rx-hw-drop-overruns + doc: | + Number of packets dropped due to transient lack of resources, such as + buffer space, host descriptors etc. + type: uint + - + name: rx-csum-complete + doc: Number of packets that were marked as CHECKSUM_COMPLETE. + type: uint + - + name: rx-csum-unnecessary + doc: Number of packets that were marked as CHECKSUM_UNNECESSARY. + type: uint + - + name: rx-csum-none + doc: Number of packets that were not checksummed by device. + type: uint + - + name: rx-csum-bad + doc: | + Number of packets with bad checksum. The packets are not discarded, + but still delivered to the stack. + type: uint + - + name: rx-hw-gro-packets + doc: | + Number of packets that were coalesced from smaller packets by the + device. Counts only packets coalesced with the HW-GRO netdevice + feature, LRO-coalesced packets are not counted. + type: uint + - + name: rx-hw-gro-bytes + doc: See `rx-hw-gro-packets`. + type: uint + - + name: rx-hw-gro-wire-packets + doc: | + Number of packets that were coalesced to bigger packetss with the + HW-GRO netdevice feature. LRO-coalesced packets are not counted. + type: uint + - + name: rx-hw-gro-wire-bytes + doc: See `rx-hw-gro-wire-packets`. + type: uint + - + name: rx-hw-drop-ratelimits + doc: | + Number of the packets dropped by the device due to the received + packets bitrate exceeding the device rate limit. + type: uint + - + name: tx-hw-drops + doc: | + Number of packets that arrived at the device but never left it, + encompassing packets dropped for reasons such as processing errors, as + well as those affected by explicitly defined policies and packet + filtering criteria. + type: uint + - + name: tx-hw-drop-errors + doc: Number of packets dropped because they were invalid or malformed. + type: uint + - + name: tx-csum-none + doc: | + Number of packets that did not require the device to calculate the + checksum. + type: uint + - + name: tx-needs-csum + doc: | + Number of packets that required the device to calculate the checksum. + This counter includes the number of GSO wire packets for which device + calculated the L4 checksum. + type: uint + - + name: tx-hw-gso-packets + doc: | + Number of packets that necessitated segmentation into smaller packets + by the device. + type: uint + - + name: tx-hw-gso-bytes + doc: See `tx-hw-gso-packets`. + type: uint + - + name: tx-hw-gso-wire-packets + doc: | + Number of wire-sized packets generated by processing + `tx-hw-gso-packets` + type: uint + - + name: tx-hw-gso-wire-bytes + doc: See `tx-hw-gso-wire-packets`. + type: uint + - + name: tx-hw-drop-ratelimits + doc: | + Number of the packets dropped by the device due to the transmit + packets bitrate exceeding the device rate limit. + type: uint + - + name: tx-stop + doc: | + Number of times driver paused accepting new tx packets + from the stack to this queue, because the queue was full. + Note that if BQL is supported and enabled on the device + the networking stack will avoid queuing a lot of data at once. + type: uint + - + name: tx-wake + doc: | + Number of times driver re-started accepting send + requests to this queue from the stack. + type: uint + - + name: queue-id + subset-of: queue + attributes: + - + name: id + - + name: type + - + name: dmabuf + attributes: + - + name: ifindex + doc: netdev ifindex to bind the dmabuf to. + type: u32 + checks: + min: 1 + - + name: queues + doc: receive queues to bind the dmabuf to. + type: nest + nested-attributes: queue-id + multi-attr: true + - + name: fd + doc: dmabuf file descriptor to bind. + type: u32 + - + name: id + doc: id of the dmabuf binding + type: u32 + checks: + min: 1 + +operations: + list: + - + name: dev-get + doc: Get / dump information about a netdev. + attribute-set: dev + do: + request: + attributes: + - ifindex + reply: &dev-all + attributes: + - ifindex + - xdp-features + - xdp-zc-max-segs + - xdp-rx-metadata-features + - xsk-features + dump: + reply: *dev-all + - + name: dev-add-ntf + doc: Notification about device appearing. + notify: dev-get + mcgrp: mgmt + - + name: dev-del-ntf + doc: Notification about device disappearing. + notify: dev-get + mcgrp: mgmt + - + name: dev-change-ntf + doc: Notification about device configuration being changed. + notify: dev-get + mcgrp: mgmt + - + name: page-pool-get + doc: | + Get / dump information about Page Pools. + (Only Page Pools associated with a net_device can be listed.) + attribute-set: page-pool + do: + request: + attributes: + - id + reply: &pp-reply + attributes: + - id + - ifindex + - napi-id + - inflight + - inflight-mem + - detach-time + - dmabuf + - io-uring + dump: + reply: *pp-reply + config-cond: page-pool + - + name: page-pool-add-ntf + doc: Notification about page pool appearing. + notify: page-pool-get + mcgrp: page-pool + config-cond: page-pool + - + name: page-pool-del-ntf + doc: Notification about page pool disappearing. + notify: page-pool-get + mcgrp: page-pool + config-cond: page-pool + - + name: page-pool-change-ntf + doc: Notification about page pool configuration being changed. + notify: page-pool-get + mcgrp: page-pool + config-cond: page-pool + - + name: page-pool-stats-get + doc: Get page pool statistics. + attribute-set: page-pool-stats + do: + request: + attributes: + - info + reply: &pp-stats-reply + attributes: + - info + - alloc-fast + - alloc-slow + - alloc-slow-high-order + - alloc-empty + - alloc-refill + - alloc-waive + - recycle-cached + - recycle-cache-full + - recycle-ring + - recycle-ring-full + - recycle-released-refcnt + dump: + reply: *pp-stats-reply + config-cond: page-pool-stats + - + name: queue-get + doc: Get queue information from the kernel. + Only configured queues will be reported (as opposed to all available + hardware queues). + attribute-set: queue + do: + request: + attributes: + - ifindex + - type + - id + reply: &queue-get-op + attributes: + - id + - type + - napi-id + - ifindex + - dmabuf + - io-uring + - xsk + dump: + request: + attributes: + - ifindex + reply: *queue-get-op + - + name: napi-get + doc: Get information about NAPI instances configured on the system. + attribute-set: napi + do: + request: + attributes: + - id + reply: &napi-get-op + attributes: + - id + - ifindex + - irq + - pid + - defer-hard-irqs + - gro-flush-timeout + - irq-suspend-timeout + - threaded + dump: + request: + attributes: + - ifindex + reply: *napi-get-op + - + name: qstats-get + doc: | + Get / dump fine grained statistics. Which statistics are reported + depends on the device and the driver, and whether the driver stores + software counters per-queue. + attribute-set: qstats + dump: + request: + attributes: + - ifindex + - scope + reply: + attributes: + - ifindex + - queue-type + - queue-id + - rx-packets + - rx-bytes + - tx-packets + - tx-bytes + - rx-alloc-fail + - rx-hw-drops + - rx-hw-drop-overruns + - rx-csum-complete + - rx-csum-unnecessary + - rx-csum-none + - rx-csum-bad + - rx-hw-gro-packets + - rx-hw-gro-bytes + - rx-hw-gro-wire-packets + - rx-hw-gro-wire-bytes + - rx-hw-drop-ratelimits + - tx-hw-drops + - tx-hw-drop-errors + - tx-csum-none + - tx-needs-csum + - tx-hw-gso-packets + - tx-hw-gso-bytes + - tx-hw-gso-wire-packets + - tx-hw-gso-wire-bytes + - tx-hw-drop-ratelimits + - tx-stop + - tx-wake + - + name: bind-rx + doc: Bind dmabuf to netdev + attribute-set: dmabuf + flags: [admin-perm] + do: + request: + attributes: + - ifindex + - fd + - queues + reply: + attributes: + - id + - + name: napi-set + doc: Set configurable NAPI instance settings. + attribute-set: napi + flags: [admin-perm] + do: + request: + attributes: + - id + - defer-hard-irqs + - gro-flush-timeout + - irq-suspend-timeout + - threaded + - + name: bind-tx + doc: Bind dmabuf to netdev for TX + attribute-set: dmabuf + do: + request: + attributes: + - ifindex + - fd + reply: + attributes: + - id + +kernel-family: + headers: ["net/netdev_netlink.h"] + sock-priv: struct netdev_nl_sock + +mcast-groups: + list: + - + name: mgmt + - + name: page-pool |
