summaryrefslogtreecommitdiff
path: root/Documentation/netlink/specs/netdev.yaml
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation/netlink/specs/netdev.yaml')
-rw-r--r--Documentation/netlink/specs/netdev.yaml808
1 files changed, 808 insertions, 0 deletions
diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
new file mode 100644
index 000000000000..82bf5cb2617d
--- /dev/null
+++ b/Documentation/netlink/specs/netdev.yaml
@@ -0,0 +1,808 @@
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+---
+name: netdev
+
+doc: >-
+ netdev configuration over generic netlink.
+
+definitions:
+ -
+ type: flags
+ name: xdp-act
+ render-max: true
+ entries:
+ -
+ name: basic
+ doc: >-
+ XDP features set supported by all drivers
+ (XDP_ABORTED, XDP_DROP, XDP_PASS, XDP_TX)
+ -
+ name: redirect
+ doc: >-
+ The netdev supports XDP_REDIRECT
+ -
+ name: ndo-xmit
+ doc: >-
+ This feature informs if netdev implements ndo_xdp_xmit callback.
+ -
+ name: xsk-zerocopy
+ doc: >-
+ This feature informs if netdev supports AF_XDP in zero copy mode.
+ -
+ name: hw-offload
+ doc: >-
+ This feature informs if netdev supports XDP hw offloading.
+ -
+ name: rx-sg
+ doc: >-
+ This feature informs if netdev implements non-linear XDP buffer
+ support in the driver napi callback.
+ -
+ name: ndo-xmit-sg
+ doc: >-
+ This feature informs if netdev implements non-linear XDP buffer
+ support in ndo_xdp_xmit callback.
+ -
+ type: flags
+ name: xdp-rx-metadata
+ entries:
+ -
+ name: timestamp
+ doc: |
+ Device is capable of exposing receive HW timestamp via
+ bpf_xdp_metadata_rx_timestamp().
+ -
+ name: hash
+ doc: |
+ Device is capable of exposing receive packet hash via
+ bpf_xdp_metadata_rx_hash().
+ -
+ name: vlan-tag
+ doc: |
+ Device is capable of exposing receive packet VLAN tag via
+ bpf_xdp_metadata_rx_vlan_tag().
+ -
+ type: flags
+ name: xsk-flags
+ entries:
+ -
+ name: tx-timestamp
+ doc: >-
+ HW timestamping egress packets is supported by the driver.
+ -
+ name: tx-checksum
+ doc: >-
+ L3 checksum HW offload is supported by the driver.
+ -
+ name: tx-launch-time-fifo
+ doc: >-
+ Launch time HW offload is supported by the driver.
+ -
+ name: queue-type
+ type: enum
+ entries: [rx, tx]
+ -
+ name: qstats-scope
+ type: flags
+ entries: [queue]
+ -
+ name: napi-threaded
+ type: enum
+ entries: [disabled, enabled, busy-poll]
+
+attribute-sets:
+ -
+ name: dev
+ attributes:
+ -
+ name: ifindex
+ doc: netdev ifindex
+ type: u32
+ checks:
+ min: 1
+ -
+ name: pad
+ type: pad
+ -
+ name: xdp-features
+ doc: Bitmask of enabled xdp-features.
+ type: u64
+ enum: xdp-act
+ -
+ name: xdp-zc-max-segs
+ doc: max fragment count supported by ZC driver
+ type: u32
+ checks:
+ min: 1
+ -
+ name: xdp-rx-metadata-features
+ doc: Bitmask of supported XDP receive metadata features.
+ See Documentation/networking/xdp-rx-metadata.rst for more details.
+ type: u64
+ enum: xdp-rx-metadata
+ -
+ name: xsk-features
+ doc: Bitmask of enabled AF_XDP features.
+ type: u64
+ enum: xsk-flags
+ -
+ name: io-uring-provider-info
+ attributes: []
+ -
+ name: page-pool
+ attributes:
+ -
+ name: id
+ doc: Unique ID of a Page Pool instance.
+ type: uint
+ checks:
+ min: 1
+ max: u32-max
+ -
+ name: ifindex
+ doc: |
+ ifindex of the netdev to which the pool belongs.
+ May be reported as 0 if the page pool was allocated for a netdev
+ which got destroyed already (page pools may outlast their netdevs
+ because they wait for all memory to be returned).
+ type: u32
+ checks:
+ min: 1
+ max: s32-max
+ -
+ name: napi-id
+ doc: Id of NAPI using this Page Pool instance.
+ type: uint
+ checks:
+ min: 1
+ max: u32-max
+ -
+ name: inflight
+ type: uint
+ doc: |
+ Number of outstanding references to this page pool (allocated
+ but yet to be freed pages). Allocated pages may be held in
+ socket receive queues, driver receive ring, page pool recycling
+ ring, the page pool cache, etc.
+ -
+ name: inflight-mem
+ type: uint
+ doc: |
+ Amount of memory held by inflight pages.
+ -
+ name: detach-time
+ type: uint
+ doc: |
+ Seconds in CLOCK_BOOTTIME of when Page Pool was detached by
+ the driver. Once detached Page Pool can no longer be used to
+ allocate memory.
+ Page Pools wait for all the memory allocated from them to be freed
+ before truly disappearing. "Detached" Page Pools cannot be
+ "re-attached", they are just waiting to disappear.
+ Attribute is absent if Page Pool has not been detached, and
+ can still be used to allocate new memory.
+ -
+ name: dmabuf
+ doc: ID of the dmabuf this page-pool is attached to.
+ type: u32
+ -
+ name: io-uring
+ doc: io-uring memory provider information.
+ type: nest
+ nested-attributes: io-uring-provider-info
+ -
+ name: page-pool-info
+ subset-of: page-pool
+ attributes:
+ -
+ name: id
+ -
+ name: ifindex
+ -
+ name: page-pool-stats
+ doc: |
+ Page pool statistics, see docs for struct page_pool_stats
+ for information about individual statistics.
+ attributes:
+ -
+ name: info
+ doc: Page pool identifying information.
+ type: nest
+ nested-attributes: page-pool-info
+ -
+ name: alloc-fast
+ type: uint
+ value: 8 # reserve some attr ids in case we need more metadata later
+ -
+ name: alloc-slow
+ type: uint
+ -
+ name: alloc-slow-high-order
+ type: uint
+ -
+ name: alloc-empty
+ type: uint
+ -
+ name: alloc-refill
+ type: uint
+ -
+ name: alloc-waive
+ type: uint
+ -
+ name: recycle-cached
+ type: uint
+ -
+ name: recycle-cache-full
+ type: uint
+ -
+ name: recycle-ring
+ type: uint
+ -
+ name: recycle-ring-full
+ type: uint
+ -
+ name: recycle-released-refcnt
+ type: uint
+
+ -
+ name: napi
+ attributes:
+ -
+ name: ifindex
+ doc: ifindex of the netdevice to which NAPI instance belongs.
+ type: u32
+ checks:
+ min: 1
+ -
+ name: id
+ doc: ID of the NAPI instance.
+ type: u32
+ -
+ name: irq
+ doc: The associated interrupt vector number for the napi
+ type: u32
+ -
+ name: pid
+ doc: PID of the napi thread, if NAPI is configured to operate in
+ threaded mode. If NAPI is not in threaded mode (i.e. uses normal
+ softirq context), the attribute will be absent.
+ type: u32
+ -
+ name: defer-hard-irqs
+ doc: The number of consecutive empty polls before IRQ deferral ends
+ and hardware IRQs are re-enabled.
+ type: u32
+ checks:
+ max: s32-max
+ -
+ name: gro-flush-timeout
+ doc: The timeout, in nanoseconds, of when to trigger the NAPI watchdog
+ timer which schedules NAPI processing. Additionally, a non-zero
+ value will also prevent GRO from flushing recent super-frames at
+ the end of a NAPI cycle. This may add receive latency in exchange
+ for reducing the number of frames processed by the network stack.
+ type: uint
+ -
+ name: irq-suspend-timeout
+ doc: The timeout, in nanoseconds, of how long to suspend irq
+ processing, if event polling finds events
+ type: uint
+ -
+ name: threaded
+ doc: Whether the NAPI is configured to operate in threaded polling
+ mode. If this is set to enabled then the NAPI context operates
+ in threaded polling mode. If this is set to busy-poll, then the
+ threaded polling mode also busy polls.
+ type: u32
+ enum: napi-threaded
+ -
+ name: xsk-info
+ attributes: []
+ -
+ name: queue
+ attributes:
+ -
+ name: id
+ doc: Queue index; most queue types are indexed like a C array, with
+ indexes starting at 0 and ending at queue count - 1. Queue indexes
+ are scoped to an interface and queue type.
+ type: u32
+ -
+ name: ifindex
+ doc: ifindex of the netdevice to which the queue belongs.
+ type: u32
+ checks:
+ min: 1
+ -
+ name: type
+ doc: Queue type as rx, tx. Each queue type defines a separate ID space.
+ XDP TX queues allocated in the kernel are not linked to NAPIs and
+ thus not listed. AF_XDP queues will have more information set in
+ the xsk attribute.
+ type: u32
+ enum: queue-type
+ -
+ name: napi-id
+ doc: ID of the NAPI instance which services this queue.
+ type: u32
+ -
+ name: dmabuf
+ doc: ID of the dmabuf attached to this queue, if any.
+ type: u32
+ -
+ name: io-uring
+ doc: io_uring memory provider information.
+ type: nest
+ nested-attributes: io-uring-provider-info
+ -
+ name: xsk
+ doc: XSK information for this queue, if any.
+ type: nest
+ nested-attributes: xsk-info
+ -
+ name: qstats
+ doc: |
+ Get device statistics, scoped to a device or a queue.
+ These statistics extend (and partially duplicate) statistics available
+ in struct rtnl_link_stats64.
+ Value of the `scope` attribute determines how statistics are
+ aggregated. When aggregated for the entire device the statistics
+ represent the total number of events since last explicit reset of
+ the device (i.e. not a reconfiguration like changing queue count).
+ When reported per-queue, however, the statistics may not add
+ up to the total number of events, will only be reported for currently
+ active objects, and will likely report the number of events since last
+ reconfiguration.
+ attributes:
+ -
+ name: ifindex
+ doc: ifindex of the netdevice to which stats belong.
+ type: u32
+ checks:
+ min: 1
+ -
+ name: queue-type
+ doc: Queue type as rx, tx, for queue-id.
+ type: u32
+ enum: queue-type
+ -
+ name: queue-id
+ doc: Queue ID, if stats are scoped to a single queue instance.
+ type: u32
+ -
+ name: scope
+ doc: |
+ What object type should be used to iterate over the stats.
+ type: uint
+ enum: qstats-scope
+ -
+ name: rx-packets
+ doc: |
+ Number of wire packets successfully received and passed to the stack.
+ For drivers supporting XDP, XDP is considered the first layer
+ of the stack, so packets consumed by XDP are still counted here.
+ type: uint
+ value: 8 # reserve some attr ids in case we need more metadata later
+ -
+ name: rx-bytes
+ doc: Successfully received bytes, see `rx-packets`.
+ type: uint
+ -
+ name: tx-packets
+ doc: |
+ Number of wire packets successfully sent. Packet is considered to be
+ successfully sent once it is in device memory (usually this means
+ the device has issued a DMA completion for the packet).
+ type: uint
+ -
+ name: tx-bytes
+ doc: Successfully sent bytes, see `tx-packets`.
+ type: uint
+ -
+ name: rx-alloc-fail
+ doc: |
+ Number of times skb or buffer allocation failed on the Rx datapath.
+ Allocation failure may, or may not result in a packet drop, depending
+ on driver implementation and whether system recovers quickly.
+ type: uint
+ -
+ name: rx-hw-drops
+ doc: |
+ Number of all packets which entered the device, but never left it,
+ including but not limited to: packets dropped due to lack of buffer
+ space, processing errors, explicit or implicit policies and packet
+ filters.
+ type: uint
+ -
+ name: rx-hw-drop-overruns
+ doc: |
+ Number of packets dropped due to transient lack of resources, such as
+ buffer space, host descriptors etc.
+ type: uint
+ -
+ name: rx-csum-complete
+ doc: Number of packets that were marked as CHECKSUM_COMPLETE.
+ type: uint
+ -
+ name: rx-csum-unnecessary
+ doc: Number of packets that were marked as CHECKSUM_UNNECESSARY.
+ type: uint
+ -
+ name: rx-csum-none
+ doc: Number of packets that were not checksummed by device.
+ type: uint
+ -
+ name: rx-csum-bad
+ doc: |
+ Number of packets with bad checksum. The packets are not discarded,
+ but still delivered to the stack.
+ type: uint
+ -
+ name: rx-hw-gro-packets
+ doc: |
+ Number of packets that were coalesced from smaller packets by the
+ device. Counts only packets coalesced with the HW-GRO netdevice
+ feature, LRO-coalesced packets are not counted.
+ type: uint
+ -
+ name: rx-hw-gro-bytes
+ doc: See `rx-hw-gro-packets`.
+ type: uint
+ -
+ name: rx-hw-gro-wire-packets
+ doc: |
+ Number of packets that were coalesced to bigger packetss with the
+ HW-GRO netdevice feature. LRO-coalesced packets are not counted.
+ type: uint
+ -
+ name: rx-hw-gro-wire-bytes
+ doc: See `rx-hw-gro-wire-packets`.
+ type: uint
+ -
+ name: rx-hw-drop-ratelimits
+ doc: |
+ Number of the packets dropped by the device due to the received
+ packets bitrate exceeding the device rate limit.
+ type: uint
+ -
+ name: tx-hw-drops
+ doc: |
+ Number of packets that arrived at the device but never left it,
+ encompassing packets dropped for reasons such as processing errors, as
+ well as those affected by explicitly defined policies and packet
+ filtering criteria.
+ type: uint
+ -
+ name: tx-hw-drop-errors
+ doc: Number of packets dropped because they were invalid or malformed.
+ type: uint
+ -
+ name: tx-csum-none
+ doc: |
+ Number of packets that did not require the device to calculate the
+ checksum.
+ type: uint
+ -
+ name: tx-needs-csum
+ doc: |
+ Number of packets that required the device to calculate the checksum.
+ This counter includes the number of GSO wire packets for which device
+ calculated the L4 checksum.
+ type: uint
+ -
+ name: tx-hw-gso-packets
+ doc: |
+ Number of packets that necessitated segmentation into smaller packets
+ by the device.
+ type: uint
+ -
+ name: tx-hw-gso-bytes
+ doc: See `tx-hw-gso-packets`.
+ type: uint
+ -
+ name: tx-hw-gso-wire-packets
+ doc: |
+ Number of wire-sized packets generated by processing
+ `tx-hw-gso-packets`
+ type: uint
+ -
+ name: tx-hw-gso-wire-bytes
+ doc: See `tx-hw-gso-wire-packets`.
+ type: uint
+ -
+ name: tx-hw-drop-ratelimits
+ doc: |
+ Number of the packets dropped by the device due to the transmit
+ packets bitrate exceeding the device rate limit.
+ type: uint
+ -
+ name: tx-stop
+ doc: |
+ Number of times driver paused accepting new tx packets
+ from the stack to this queue, because the queue was full.
+ Note that if BQL is supported and enabled on the device
+ the networking stack will avoid queuing a lot of data at once.
+ type: uint
+ -
+ name: tx-wake
+ doc: |
+ Number of times driver re-started accepting send
+ requests to this queue from the stack.
+ type: uint
+ -
+ name: queue-id
+ subset-of: queue
+ attributes:
+ -
+ name: id
+ -
+ name: type
+ -
+ name: dmabuf
+ attributes:
+ -
+ name: ifindex
+ doc: netdev ifindex to bind the dmabuf to.
+ type: u32
+ checks:
+ min: 1
+ -
+ name: queues
+ doc: receive queues to bind the dmabuf to.
+ type: nest
+ nested-attributes: queue-id
+ multi-attr: true
+ -
+ name: fd
+ doc: dmabuf file descriptor to bind.
+ type: u32
+ -
+ name: id
+ doc: id of the dmabuf binding
+ type: u32
+ checks:
+ min: 1
+
+operations:
+ list:
+ -
+ name: dev-get
+ doc: Get / dump information about a netdev.
+ attribute-set: dev
+ do:
+ request:
+ attributes:
+ - ifindex
+ reply: &dev-all
+ attributes:
+ - ifindex
+ - xdp-features
+ - xdp-zc-max-segs
+ - xdp-rx-metadata-features
+ - xsk-features
+ dump:
+ reply: *dev-all
+ -
+ name: dev-add-ntf
+ doc: Notification about device appearing.
+ notify: dev-get
+ mcgrp: mgmt
+ -
+ name: dev-del-ntf
+ doc: Notification about device disappearing.
+ notify: dev-get
+ mcgrp: mgmt
+ -
+ name: dev-change-ntf
+ doc: Notification about device configuration being changed.
+ notify: dev-get
+ mcgrp: mgmt
+ -
+ name: page-pool-get
+ doc: |
+ Get / dump information about Page Pools.
+ (Only Page Pools associated with a net_device can be listed.)
+ attribute-set: page-pool
+ do:
+ request:
+ attributes:
+ - id
+ reply: &pp-reply
+ attributes:
+ - id
+ - ifindex
+ - napi-id
+ - inflight
+ - inflight-mem
+ - detach-time
+ - dmabuf
+ - io-uring
+ dump:
+ reply: *pp-reply
+ config-cond: page-pool
+ -
+ name: page-pool-add-ntf
+ doc: Notification about page pool appearing.
+ notify: page-pool-get
+ mcgrp: page-pool
+ config-cond: page-pool
+ -
+ name: page-pool-del-ntf
+ doc: Notification about page pool disappearing.
+ notify: page-pool-get
+ mcgrp: page-pool
+ config-cond: page-pool
+ -
+ name: page-pool-change-ntf
+ doc: Notification about page pool configuration being changed.
+ notify: page-pool-get
+ mcgrp: page-pool
+ config-cond: page-pool
+ -
+ name: page-pool-stats-get
+ doc: Get page pool statistics.
+ attribute-set: page-pool-stats
+ do:
+ request:
+ attributes:
+ - info
+ reply: &pp-stats-reply
+ attributes:
+ - info
+ - alloc-fast
+ - alloc-slow
+ - alloc-slow-high-order
+ - alloc-empty
+ - alloc-refill
+ - alloc-waive
+ - recycle-cached
+ - recycle-cache-full
+ - recycle-ring
+ - recycle-ring-full
+ - recycle-released-refcnt
+ dump:
+ reply: *pp-stats-reply
+ config-cond: page-pool-stats
+ -
+ name: queue-get
+ doc: Get queue information from the kernel.
+ Only configured queues will be reported (as opposed to all available
+ hardware queues).
+ attribute-set: queue
+ do:
+ request:
+ attributes:
+ - ifindex
+ - type
+ - id
+ reply: &queue-get-op
+ attributes:
+ - id
+ - type
+ - napi-id
+ - ifindex
+ - dmabuf
+ - io-uring
+ - xsk
+ dump:
+ request:
+ attributes:
+ - ifindex
+ reply: *queue-get-op
+ -
+ name: napi-get
+ doc: Get information about NAPI instances configured on the system.
+ attribute-set: napi
+ do:
+ request:
+ attributes:
+ - id
+ reply: &napi-get-op
+ attributes:
+ - id
+ - ifindex
+ - irq
+ - pid
+ - defer-hard-irqs
+ - gro-flush-timeout
+ - irq-suspend-timeout
+ - threaded
+ dump:
+ request:
+ attributes:
+ - ifindex
+ reply: *napi-get-op
+ -
+ name: qstats-get
+ doc: |
+ Get / dump fine grained statistics. Which statistics are reported
+ depends on the device and the driver, and whether the driver stores
+ software counters per-queue.
+ attribute-set: qstats
+ dump:
+ request:
+ attributes:
+ - ifindex
+ - scope
+ reply:
+ attributes:
+ - ifindex
+ - queue-type
+ - queue-id
+ - rx-packets
+ - rx-bytes
+ - tx-packets
+ - tx-bytes
+ - rx-alloc-fail
+ - rx-hw-drops
+ - rx-hw-drop-overruns
+ - rx-csum-complete
+ - rx-csum-unnecessary
+ - rx-csum-none
+ - rx-csum-bad
+ - rx-hw-gro-packets
+ - rx-hw-gro-bytes
+ - rx-hw-gro-wire-packets
+ - rx-hw-gro-wire-bytes
+ - rx-hw-drop-ratelimits
+ - tx-hw-drops
+ - tx-hw-drop-errors
+ - tx-csum-none
+ - tx-needs-csum
+ - tx-hw-gso-packets
+ - tx-hw-gso-bytes
+ - tx-hw-gso-wire-packets
+ - tx-hw-gso-wire-bytes
+ - tx-hw-drop-ratelimits
+ - tx-stop
+ - tx-wake
+ -
+ name: bind-rx
+ doc: Bind dmabuf to netdev
+ attribute-set: dmabuf
+ flags: [admin-perm]
+ do:
+ request:
+ attributes:
+ - ifindex
+ - fd
+ - queues
+ reply:
+ attributes:
+ - id
+ -
+ name: napi-set
+ doc: Set configurable NAPI instance settings.
+ attribute-set: napi
+ flags: [admin-perm]
+ do:
+ request:
+ attributes:
+ - id
+ - defer-hard-irqs
+ - gro-flush-timeout
+ - irq-suspend-timeout
+ - threaded
+ -
+ name: bind-tx
+ doc: Bind dmabuf to netdev for TX
+ attribute-set: dmabuf
+ do:
+ request:
+ attributes:
+ - ifindex
+ - fd
+ reply:
+ attributes:
+ - id
+
+kernel-family:
+ headers: ["net/netdev_netlink.h"]
+ sock-priv: struct netdev_nl_sock
+
+mcast-groups:
+ list:
+ -
+ name: mgmt
+ -
+ name: page-pool