summaryrefslogtreecommitdiff
path: root/tools/include/uapi/linux
diff options
context:
space:
mode:
Diffstat (limited to 'tools/include/uapi/linux')
-rw-r--r--tools/include/uapi/linux/bits.h18
-rw-r--r--tools/include/uapi/linux/bpf.h179
-rw-r--r--tools/include/uapi/linux/btf.h3
-rw-r--r--tools/include/uapi/linux/const.h17
-rw-r--r--tools/include/uapi/linux/elf.h524
-rw-r--r--tools/include/uapi/linux/ethtool.h104
-rw-r--r--tools/include/uapi/linux/fanotify.h274
-rw-r--r--tools/include/uapi/linux/fcntl.h123
-rw-r--r--tools/include/uapi/linux/fs.h181
-rw-r--r--tools/include/uapi/linux/if_link.h556
-rw-r--r--tools/include/uapi/linux/if_xdp.h24
-rw-r--r--tools/include/uapi/linux/in.h6
-rw-r--r--tools/include/uapi/linux/kvm.h48
-rw-r--r--tools/include/uapi/linux/memfd.h39
-rw-r--r--tools/include/uapi/linux/mman.h1
-rw-r--r--tools/include/uapi/linux/mount.h28
-rw-r--r--tools/include/uapi/linux/netdev.h56
-rw-r--r--tools/include/uapi/linux/nsfs.h45
-rw-r--r--tools/include/uapi/linux/openat2.h43
-rw-r--r--tools/include/uapi/linux/perf_event.h664
-rw-r--r--tools/include/uapi/linux/prctl.h67
-rw-r--r--tools/include/uapi/linux/sched.h148
-rw-r--r--tools/include/uapi/linux/stat.h101
-rw-r--r--tools/include/uapi/linux/stddef.h15
-rw-r--r--tools/include/uapi/linux/types.h3
-rw-r--r--tools/include/uapi/linux/usbdevice_fs.h231
-rw-r--r--tools/include/uapi/linux/userfaultfd.h386
-rw-r--r--tools/include/uapi/linux/vhost.h230
28 files changed, 2831 insertions, 1283 deletions
diff --git a/tools/include/uapi/linux/bits.h b/tools/include/uapi/linux/bits.h
new file mode 100644
index 000000000000..5ee30f882736
--- /dev/null
+++ b/tools/include/uapi/linux/bits.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* bits.h: Macros for dealing with bitmasks. */
+
+#ifndef _UAPI_LINUX_BITS_H
+#define _UAPI_LINUX_BITS_H
+
+#define __GENMASK(h, l) \
+ (((~_UL(0)) - (_UL(1) << (l)) + 1) & \
+ (~_UL(0) >> (__BITS_PER_LONG - 1 - (h))))
+
+#define __GENMASK_ULL(h, l) \
+ (((~_ULL(0)) - (_ULL(1) << (l)) + 1) & \
+ (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h))))
+
+#define __GENMASK_U128(h, l) \
+ ((_BIT128((h)) << 1) - (_BIT128(l)))
+
+#endif /* _UAPI_LINUX_BITS_H */
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 3c42b9f1bada..85180e4aaa5a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -51,6 +51,9 @@
#define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */
#define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */
+#define BPF_LOAD_ACQ 0x100 /* load-acquire */
+#define BPF_STORE_REL 0x110 /* store-release */
+
enum bpf_cond_pseudo_jmp {
BPF_MAY_GOTO = 0,
};
@@ -1115,11 +1118,16 @@ enum bpf_attach_type {
BPF_CGROUP_UNIX_GETSOCKNAME,
BPF_NETKIT_PRIMARY,
BPF_NETKIT_PEER,
+ BPF_TRACE_KPROBE_SESSION,
+ BPF_TRACE_UPROBE_SESSION,
__MAX_BPF_ATTACH_TYPE
};
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
+/* Add BPF_LINK_TYPE(type, name) in bpf_types.h to keep bpf_link_type_strs[]
+ * in sync with the definitions below.
+ */
enum bpf_link_type {
BPF_LINK_TYPE_UNSPEC = 0,
BPF_LINK_TYPE_RAW_TRACEPOINT = 1,
@@ -1135,6 +1143,7 @@ enum bpf_link_type {
BPF_LINK_TYPE_TCX = 11,
BPF_LINK_TYPE_UPROBE_MULTI = 12,
BPF_LINK_TYPE_NETKIT = 13,
+ BPF_LINK_TYPE_SOCKMAP = 14,
__MAX_BPF_LINK_TYPE,
};
@@ -1201,6 +1210,7 @@ enum bpf_perf_event_type {
#define BPF_F_BEFORE (1U << 3)
#define BPF_F_AFTER (1U << 4)
#define BPF_F_ID (1U << 5)
+#define BPF_F_PREORDER (1U << 6)
#define BPF_F_LINK BPF_F_LINK /* 1 << 13 */
/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
@@ -1423,6 +1433,8 @@ enum {
#define BPF_F_TEST_RUN_ON_CPU (1U << 0)
/* If set, XDP frames will be transmitted after processing */
#define BPF_F_TEST_XDP_LIVE_FRAMES (1U << 1)
+/* If set, apply CHECKSUM_COMPLETE to skb and validate the checksum */
+#define BPF_F_TEST_SKB_CHECKSUM_COMPLETE (1U << 2)
/* type for BPF_ENABLE_STATS */
enum bpf_stats_type {
@@ -1494,7 +1506,7 @@ union bpf_attr {
__s32 map_token_fd;
};
- struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
+ struct { /* anonymous struct used by BPF_MAP_*_ELEM and BPF_MAP_FREEZE commands */
__u32 map_fd;
__aligned_u64 key;
union {
@@ -1565,6 +1577,16 @@ union bpf_attr {
* If provided, prog_flags should have BPF_F_TOKEN_FD flag set.
*/
__s32 prog_token_fd;
+ /* The fd_array_cnt can be used to pass the length of the
+ * fd_array array. In this case all the [map] file descriptors
+ * passed in this array will be bound to the program, even if
+ * the maps are not referenced directly. The functionality is
+ * similar to the BPF_PROG_BIND_MAP syscall, but maps can be
+ * used by the verifier during the program load. If provided,
+ * then the fd_array[0,...,fd_array_cnt-1] is expected to be
+ * continuous.
+ */
+ __u32 fd_array_cnt;
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -1630,6 +1652,7 @@ union bpf_attr {
};
__u32 next_id;
__u32 open_flags;
+ __s32 fd_by_id_token_fd;
};
struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */
@@ -1662,8 +1685,10 @@ union bpf_attr {
} query;
struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */
- __u64 name;
- __u32 prog_fd;
+ __u64 name;
+ __u32 prog_fd;
+ __u32 :32;
+ __aligned_u64 cookie;
} raw_tracepoint;
struct { /* anonymous struct for BPF_BTF_LOAD */
@@ -1964,15 +1989,21 @@ union bpf_attr {
* program.
* Return
* The SMP id of the processor running the program.
+ * Attributes
+ * __bpf_fastcall
*
* long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
* Description
* Store *len* bytes from address *from* into the packet
- * associated to *skb*, at *offset*. *flags* are a combination of
- * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the
- * checksum for the packet after storing the bytes) and
- * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\
- * **->swhash** and *skb*\ **->l4hash** to 0).
+ * associated to *skb*, at *offset*. The *flags* are a combination
+ * of the following values:
+ *
+ * **BPF_F_RECOMPUTE_CSUM**
+ * Automatically update *skb*\ **->csum** after storing the
+ * bytes.
+ * **BPF_F_INVALIDATE_HASH**
+ * Set *skb*\ **->hash**, *skb*\ **->swhash** and *skb*\
+ * **->l4hash** to 0.
*
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
@@ -2024,7 +2055,7 @@ union bpf_attr {
* untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and
* for updates resulting in a null checksum the value is set to
* **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
- * the checksum is to be computed against a pseudo-header.
+ * that the modified header field is part of the pseudo-header.
*
* This helper works in combination with **bpf_csum_diff**\ (),
* which does not update the checksum in-place, but offers more
@@ -2845,7 +2876,7 @@ union bpf_attr {
* **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**,
* **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**,
* **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**,
- * **TCP_BPF_RTO_MIN**.
+ * **TCP_BPF_RTO_MIN**, **TCP_BPF_SOCK_OPS_CB_FLAGS**.
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
* * **IPPROTO_IPV6**, which supports the following *optname*\ s:
* **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**.
@@ -3095,10 +3126,6 @@ union bpf_attr {
* with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration
* option, and in this case it only works on functions tagged with
* **ALLOW_ERROR_INJECTION** in the kernel code.
- *
- * Also, the helper is only available for the architectures having
- * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing,
- * x86 architecture is the only one to support this feature.
* Return
* 0
*
@@ -3392,6 +3419,10 @@ union bpf_attr {
* for the nexthop. If the src addr cannot be derived,
* **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
* case, *params*->dmac and *params*->smac are not set either.
+ * **BPF_FIB_LOOKUP_MARK**
+ * Use the mark present in *params*->mark for the fib lookup.
+ * This option should not be used with BPF_FIB_LOOKUP_DIRECT,
+ * as it only has meaning for full lookups.
*
* *ctx* is either **struct xdp_md** for XDP programs or
* **struct sk_buff** tc cls_act programs.
@@ -4941,6 +4972,9 @@ union bpf_attr {
* the netns switch takes place from ingress to ingress without
* going through the CPU's backlog queue.
*
+ * *skb*\ **->mark** and *skb*\ **->tstamp** are not cleared during
+ * the netns switch.
+ *
* The *flags* argument is reserved and must be 0. The helper is
* currently only supported for tc BPF program types at the
* ingress hook and for veth and netkit target device types. The
@@ -5020,7 +5054,7 @@ union bpf_attr {
* bytes will be copied to *dst*
* Return
* The **hash_algo** is returned on success,
- * **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if
+ * **-EOPNOTSUPP** if IMA is disabled or **-EINVAL** if
* invalid arguments are passed.
*
* struct socket *bpf_sock_from_file(struct file *file)
@@ -5359,7 +5393,7 @@ union bpf_attr {
* Currently, the **flags** must be 0. Currently, nr_loops is
* limited to 1 << 23 (~8 million) loops.
*
- * long (\*callback_fn)(u32 index, void \*ctx);
+ * long (\*callback_fn)(u64 index, void \*ctx);
*
* where **index** is the current index in the loop. The index
* is zero-indexed.
@@ -5506,14 +5540,15 @@ union bpf_attr {
* bytes will be copied to *dst*
* Return
* The **hash_algo** is returned on success,
- * **-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if
+ * **-EOPNOTSUPP** if the hash calculation failed or **-EINVAL** if
* invalid arguments are passed.
*
- * void *bpf_kptr_xchg(void *map_value, void *ptr)
+ * void *bpf_kptr_xchg(void *dst, void *ptr)
* Description
- * Exchange kptr at pointer *map_value* with *ptr*, and return the
- * old value. *ptr* can be NULL, otherwise it must be a referenced
- * pointer which will be released when this helper is called.
+ * Exchange kptr at pointer *dst* with *ptr*, and return the old value.
+ * *dst* can be map value or local kptr. *ptr* can be NULL, otherwise
+ * it must be a referenced pointer which will be released when this helper
+ * is called.
* Return
* The old value of kptr (which can be NULL). The returned pointer
* if not NULL, is a reference which must be released using its
@@ -5996,7 +6031,10 @@ union bpf_attr {
FN(user_ringbuf_drain, 209, ##ctx) \
FN(cgrp_storage_get, 210, ##ctx) \
FN(cgrp_storage_delete, 211, ##ctx) \
- /* */
+ /* This helper list is effectively frozen. If you are trying to \
+ * add a new helper, you should add a kfunc instead which has \
+ * less stability guarantees. See Documentation/bpf/kfuncs.rst \
+ */
/* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't
* know or care about integer value that is now passed as second argument
@@ -6036,11 +6074,6 @@ enum {
BPF_F_MARK_ENFORCE = (1ULL << 6),
};
-/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
-enum {
- BPF_F_INGRESS = (1ULL << 0),
-};
-
/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
enum {
BPF_F_TUNINFO_IPV6 = (1ULL << 0),
@@ -6187,10 +6220,12 @@ enum {
BPF_F_BPRM_SECUREEXEC = (1ULL << 0),
};
-/* Flags for bpf_redirect_map helper */
+/* Flags for bpf_redirect and bpf_redirect_map helpers */
enum {
- BPF_F_BROADCAST = (1ULL << 3),
- BPF_F_EXCLUDE_INGRESS = (1ULL << 4),
+ BPF_F_INGRESS = (1ULL << 0), /* used for skb path */
+ BPF_F_BROADCAST = (1ULL << 3), /* used for XDP path */
+ BPF_F_EXCLUDE_INGRESS = (1ULL << 4), /* used for XDP path */
+#define BPF_F_REDIRECT_FLAGS (BPF_F_INGRESS | BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS)
};
#define __bpf_md_ptr(type, name) \
@@ -6199,12 +6234,17 @@ union { \
__u64 :64; \
} __attribute__((aligned(8)))
+/* The enum used in skb->tstamp_type. It specifies the clock type
+ * of the time stored in the skb->tstamp.
+ */
enum {
- BPF_SKB_TSTAMP_UNSPEC,
- BPF_SKB_TSTAMP_DELIVERY_MONO, /* tstamp has mono delivery time */
- /* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle,
- * the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC
- * and try to deduce it by ingress, egress or skb->sk->sk_clockid.
+ BPF_SKB_TSTAMP_UNSPEC = 0, /* DEPRECATED */
+ BPF_SKB_TSTAMP_DELIVERY_MONO = 1, /* DEPRECATED */
+ BPF_SKB_CLOCK_REALTIME = 0,
+ BPF_SKB_CLOCK_MONOTONIC = 1,
+ BPF_SKB_CLOCK_TAI = 2,
+ /* For any future BPF_SKB_CLOCK_* that the bpf prog cannot handle,
+ * the bpf prog can try to deduce it by ingress/egress/skb->sk->sk_clockid.
*/
};
@@ -6687,6 +6727,7 @@ struct bpf_link_info {
__u32 name_len;
__u32 offset; /* offset from file_name */
__u64 cookie;
+ __u64 ref_ctr_offset;
} uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */
struct {
__aligned_u64 func_name; /* in/out */
@@ -6718,6 +6759,10 @@ struct bpf_link_info {
__u32 ifindex;
__u32 attach_type;
} netkit;
+ struct {
+ __u32 map_id;
+ __u32 attach_type;
+ } sockmap;
};
} __attribute__((aligned(8)));
@@ -6884,6 +6929,12 @@ enum {
BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F,
};
+enum {
+ SK_BPF_CB_TX_TIMESTAMPING = 1<<0,
+ SK_BPF_CB_MASK = (SK_BPF_CB_TX_TIMESTAMPING - 1) |
+ SK_BPF_CB_TX_TIMESTAMPING
+};
+
/* List of known BPF sock_ops operators.
* New entries can only be added at the end
*/
@@ -6936,6 +6987,8 @@ enum {
* socket transition to LISTEN state.
*/
BPF_SOCK_OPS_RTT_CB, /* Called on every RTT.
+ * Arg1: measured RTT input (mrtt)
+ * Arg2: updated srtt
*/
BPF_SOCK_OPS_PARSE_HDR_OPT_CB, /* Parse the header option.
* It will be called to handle
@@ -6994,6 +7047,29 @@ enum {
* by the kernel or the
* earlier bpf-progs.
*/
+ BPF_SOCK_OPS_TSTAMP_SCHED_CB, /* Called when skb is passing
+ * through dev layer when
+ * SK_BPF_CB_TX_TIMESTAMPING
+ * feature is on.
+ */
+ BPF_SOCK_OPS_TSTAMP_SND_SW_CB, /* Called when skb is about to send
+ * to the nic when SK_BPF_CB_TX_TIMESTAMPING
+ * feature is on.
+ */
+ BPF_SOCK_OPS_TSTAMP_SND_HW_CB, /* Called in hardware phase when
+ * SK_BPF_CB_TX_TIMESTAMPING feature
+ * is on.
+ */
+ BPF_SOCK_OPS_TSTAMP_ACK_CB, /* Called when all the skbs in the
+ * same sendmsg call are acked
+ * when SK_BPF_CB_TX_TIMESTAMPING
+ * feature is on.
+ */
+ BPF_SOCK_OPS_TSTAMP_SENDMSG_CB, /* Called when every sendmsg syscall
+ * is triggered. It's used to correlate
+ * sendmsg timestamp with corresponding
+ * tskey.
+ */
};
/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
@@ -7059,6 +7135,8 @@ enum {
TCP_BPF_SYN = 1005, /* Copy the TCP header */
TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */
TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */
+ TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */
+ SK_BPF_CB_FLAGS = 1009, /* Get or set sock ops flags in socket */
};
enum {
@@ -7118,6 +7196,7 @@ enum {
BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
BPF_FIB_LOOKUP_TBID = (1U << 3),
BPF_FIB_LOOKUP_SRC = (1U << 4),
+ BPF_FIB_LOOKUP_MARK = (1U << 5),
};
enum {
@@ -7150,7 +7229,7 @@ struct bpf_fib_lookup {
/* output: MTU value */
__u16 mtu_result;
- };
+ } __attribute__((packed, aligned(2)));
/* input: L3 device index for lookup
* output: device index from FIB lookup
*/
@@ -7195,8 +7274,19 @@ struct bpf_fib_lookup {
__u32 tbid;
};
- __u8 smac[6]; /* ETH_ALEN */
- __u8 dmac[6]; /* ETH_ALEN */
+ union {
+ /* input */
+ struct {
+ __u32 mark; /* policy routing */
+ /* 2 4-byte holes for input */
+ };
+
+ /* output: source and dest mac */
+ struct {
+ __u8 smac[6]; /* ETH_ALEN */
+ __u8 dmac[6]; /* ETH_ALEN */
+ };
+ };
};
struct bpf_redir_neigh {
@@ -7283,6 +7373,10 @@ struct bpf_timer {
__u64 __opaque[2];
} __attribute__((aligned(8)));
+struct bpf_wq {
+ __u64 __opaque[2];
+} __attribute__((aligned(8)));
+
struct bpf_dynptr {
__u64 __opaque[2];
} __attribute__((aligned(8)));
@@ -7475,4 +7569,13 @@ struct bpf_iter_num {
__u64 __opaque[1];
} __attribute__((aligned(8)));
+/*
+ * Flags to control BPF kfunc behaviour.
+ * - BPF_F_PAD_ZEROS: Pad destination buffer with zeros. (See the respective
+ * helper documentation for details.)
+ */
+enum bpf_kfunc_flags {
+ BPF_F_PAD_ZEROS = (1ULL << 0),
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
index ec1798b6d3ff..266d4ffa6c07 100644
--- a/tools/include/uapi/linux/btf.h
+++ b/tools/include/uapi/linux/btf.h
@@ -36,7 +36,8 @@ struct btf_type {
* bits 24-28: kind (e.g. int, ptr, array...etc)
* bits 29-30: unused
* bit 31: kind_flag, currently used by
- * struct, union, enum, fwd and enum64
+ * struct, union, enum, fwd, enum64,
+ * decl_tag and type_tag
*/
__u32 info;
/* "size" is used by INT, ENUM, STRUCT, UNION, DATASEC and ENUM64.
diff --git a/tools/include/uapi/linux/const.h b/tools/include/uapi/linux/const.h
index a429381e7ca5..b8f629ef135f 100644
--- a/tools/include/uapi/linux/const.h
+++ b/tools/include/uapi/linux/const.h
@@ -28,6 +28,23 @@
#define _BITUL(x) (_UL(1) << (x))
#define _BITULL(x) (_ULL(1) << (x))
+#if !defined(__ASSEMBLY__)
+/*
+ * Missing asm support
+ *
+ * __BIT128() would not work in the asm code, as it shifts an
+ * 'unsigned __int128' data type as direct representation of
+ * 128 bit constants is not supported in the gcc compiler, as
+ * they get silently truncated.
+ *
+ * TODO: Please revisit this implementation when gcc compiler
+ * starts representing 128 bit constants directly like long
+ * and unsigned long etc. Subsequently drop the comment for
+ * GENMASK_U128() which would then start supporting asm code.
+ */
+#define _BIT128(x) ((unsigned __int128)(1) << (x))
+#endif
+
#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (__typeof__(x))(a) - 1)
#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask))
diff --git a/tools/include/uapi/linux/elf.h b/tools/include/uapi/linux/elf.h
new file mode 100644
index 000000000000..5834b83d7f9a
--- /dev/null
+++ b/tools/include/uapi/linux/elf.h
@@ -0,0 +1,524 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _LINUX_ELF_H
+#define _LINUX_ELF_H
+
+#include <linux/types.h>
+#include <linux/elf-em.h>
+
+/* 32-bit ELF base types. */
+typedef __u32 Elf32_Addr;
+typedef __u16 Elf32_Half;
+typedef __u32 Elf32_Off;
+typedef __s32 Elf32_Sword;
+typedef __u32 Elf32_Word;
+typedef __u16 Elf32_Versym;
+
+/* 64-bit ELF base types. */
+typedef __u64 Elf64_Addr;
+typedef __u16 Elf64_Half;
+typedef __s16 Elf64_SHalf;
+typedef __u64 Elf64_Off;
+typedef __s32 Elf64_Sword;
+typedef __u32 Elf64_Word;
+typedef __u64 Elf64_Xword;
+typedef __s64 Elf64_Sxword;
+typedef __u16 Elf64_Versym;
+
+/* These constants are for the segment types stored in the image headers */
+#define PT_NULL 0
+#define PT_LOAD 1
+#define PT_DYNAMIC 2
+#define PT_INTERP 3
+#define PT_NOTE 4
+#define PT_SHLIB 5
+#define PT_PHDR 6
+#define PT_TLS 7 /* Thread local storage segment */
+#define PT_LOOS 0x60000000 /* OS-specific */
+#define PT_HIOS 0x6fffffff /* OS-specific */
+#define PT_LOPROC 0x70000000
+#define PT_HIPROC 0x7fffffff
+#define PT_GNU_EH_FRAME (PT_LOOS + 0x474e550)
+#define PT_GNU_STACK (PT_LOOS + 0x474e551)
+#define PT_GNU_RELRO (PT_LOOS + 0x474e552)
+#define PT_GNU_PROPERTY (PT_LOOS + 0x474e553)
+
+
+/* ARM MTE memory tag segment type */
+#define PT_AARCH64_MEMTAG_MTE (PT_LOPROC + 0x2)
+
+/*
+ * Extended Numbering
+ *
+ * If the real number of program header table entries is larger than
+ * or equal to PN_XNUM(0xffff), it is set to sh_info field of the
+ * section header at index 0, and PN_XNUM is set to e_phnum
+ * field. Otherwise, the section header at index 0 is zero
+ * initialized, if it exists.
+ *
+ * Specifications are available in:
+ *
+ * - Oracle: Linker and Libraries.
+ * Part No: 817–1984–19, August 2011.
+ * https://docs.oracle.com/cd/E18752_01/pdf/817-1984.pdf
+ *
+ * - System V ABI AMD64 Architecture Processor Supplement
+ * Draft Version 0.99.4,
+ * January 13, 2010.
+ * http://www.cs.washington.edu/education/courses/cse351/12wi/supp-docs/abi.pdf
+ */
+#define PN_XNUM 0xffff
+
+/* These constants define the different elf file types */
+#define ET_NONE 0
+#define ET_REL 1
+#define ET_EXEC 2
+#define ET_DYN 3
+#define ET_CORE 4
+#define ET_LOPROC 0xff00
+#define ET_HIPROC 0xffff
+
+/* This is the info that is needed to parse the dynamic section of the file */
+#define DT_NULL 0
+#define DT_NEEDED 1
+#define DT_PLTRELSZ 2
+#define DT_PLTGOT 3
+#define DT_HASH 4
+#define DT_STRTAB 5
+#define DT_SYMTAB 6
+#define DT_RELA 7
+#define DT_RELASZ 8
+#define DT_RELAENT 9
+#define DT_STRSZ 10
+#define DT_SYMENT 11
+#define DT_INIT 12
+#define DT_FINI 13
+#define DT_SONAME 14
+#define DT_RPATH 15
+#define DT_SYMBOLIC 16
+#define DT_REL 17
+#define DT_RELSZ 18
+#define DT_RELENT 19
+#define DT_PLTREL 20
+#define DT_DEBUG 21
+#define DT_TEXTREL 22
+#define DT_JMPREL 23
+#define DT_ENCODING 32
+#define OLD_DT_LOOS 0x60000000
+#define DT_LOOS 0x6000000d
+#define DT_HIOS 0x6ffff000
+#define DT_VALRNGLO 0x6ffffd00
+#define DT_VALRNGHI 0x6ffffdff
+#define DT_ADDRRNGLO 0x6ffffe00
+#define DT_GNU_HASH 0x6ffffef5
+#define DT_ADDRRNGHI 0x6ffffeff
+#define DT_VERSYM 0x6ffffff0
+#define DT_RELACOUNT 0x6ffffff9
+#define DT_RELCOUNT 0x6ffffffa
+#define DT_FLAGS_1 0x6ffffffb
+#define DT_VERDEF 0x6ffffffc
+#define DT_VERDEFNUM 0x6ffffffd
+#define DT_VERNEED 0x6ffffffe
+#define DT_VERNEEDNUM 0x6fffffff
+#define OLD_DT_HIOS 0x6fffffff
+#define DT_LOPROC 0x70000000
+#define DT_HIPROC 0x7fffffff
+
+/* This info is needed when parsing the symbol table */
+#define STB_LOCAL 0
+#define STB_GLOBAL 1
+#define STB_WEAK 2
+
+#define STN_UNDEF 0
+
+#define STT_NOTYPE 0
+#define STT_OBJECT 1
+#define STT_FUNC 2
+#define STT_SECTION 3
+#define STT_FILE 4
+#define STT_COMMON 5
+#define STT_TLS 6
+
+#define VER_FLG_BASE 0x1
+#define VER_FLG_WEAK 0x2
+
+#define ELF_ST_BIND(x) ((x) >> 4)
+#define ELF_ST_TYPE(x) ((x) & 0xf)
+#define ELF32_ST_BIND(x) ELF_ST_BIND(x)
+#define ELF32_ST_TYPE(x) ELF_ST_TYPE(x)
+#define ELF64_ST_BIND(x) ELF_ST_BIND(x)
+#define ELF64_ST_TYPE(x) ELF_ST_TYPE(x)
+
+typedef struct {
+ Elf32_Sword d_tag;
+ union {
+ Elf32_Sword d_val;
+ Elf32_Addr d_ptr;
+ } d_un;
+} Elf32_Dyn;
+
+typedef struct {
+ Elf64_Sxword d_tag; /* entry tag value */
+ union {
+ Elf64_Xword d_val;
+ Elf64_Addr d_ptr;
+ } d_un;
+} Elf64_Dyn;
+
+/* The following are used with relocations */
+#define ELF32_R_SYM(x) ((x) >> 8)
+#define ELF32_R_TYPE(x) ((x) & 0xff)
+
+#define ELF64_R_SYM(i) ((i) >> 32)
+#define ELF64_R_TYPE(i) ((i) & 0xffffffff)
+
+typedef struct elf32_rel {
+ Elf32_Addr r_offset;
+ Elf32_Word r_info;
+} Elf32_Rel;
+
+typedef struct elf64_rel {
+ Elf64_Addr r_offset; /* Location at which to apply the action */
+ Elf64_Xword r_info; /* index and type of relocation */
+} Elf64_Rel;
+
+typedef struct elf32_rela {
+ Elf32_Addr r_offset;
+ Elf32_Word r_info;
+ Elf32_Sword r_addend;
+} Elf32_Rela;
+
+typedef struct elf64_rela {
+ Elf64_Addr r_offset; /* Location at which to apply the action */
+ Elf64_Xword r_info; /* index and type of relocation */
+ Elf64_Sxword r_addend; /* Constant addend used to compute value */
+} Elf64_Rela;
+
+typedef struct elf32_sym {
+ Elf32_Word st_name;
+ Elf32_Addr st_value;
+ Elf32_Word st_size;
+ unsigned char st_info;
+ unsigned char st_other;
+ Elf32_Half st_shndx;
+} Elf32_Sym;
+
+typedef struct elf64_sym {
+ Elf64_Word st_name; /* Symbol name, index in string tbl */
+ unsigned char st_info; /* Type and binding attributes */
+ unsigned char st_other; /* No defined meaning, 0 */
+ Elf64_Half st_shndx; /* Associated section index */
+ Elf64_Addr st_value; /* Value of the symbol */
+ Elf64_Xword st_size; /* Associated symbol size */
+} Elf64_Sym;
+
+
+#define EI_NIDENT 16
+
+typedef struct elf32_hdr {
+ unsigned char e_ident[EI_NIDENT];
+ Elf32_Half e_type;
+ Elf32_Half e_machine;
+ Elf32_Word e_version;
+ Elf32_Addr e_entry; /* Entry point */
+ Elf32_Off e_phoff;
+ Elf32_Off e_shoff;
+ Elf32_Word e_flags;
+ Elf32_Half e_ehsize;
+ Elf32_Half e_phentsize;
+ Elf32_Half e_phnum;
+ Elf32_Half e_shentsize;
+ Elf32_Half e_shnum;
+ Elf32_Half e_shstrndx;
+} Elf32_Ehdr;
+
+typedef struct elf64_hdr {
+ unsigned char e_ident[EI_NIDENT]; /* ELF "magic number" */
+ Elf64_Half e_type;
+ Elf64_Half e_machine;
+ Elf64_Word e_version;
+ Elf64_Addr e_entry; /* Entry point virtual address */
+ Elf64_Off e_phoff; /* Program header table file offset */
+ Elf64_Off e_shoff; /* Section header table file offset */
+ Elf64_Word e_flags;
+ Elf64_Half e_ehsize;
+ Elf64_Half e_phentsize;
+ Elf64_Half e_phnum;
+ Elf64_Half e_shentsize;
+ Elf64_Half e_shnum;
+ Elf64_Half e_shstrndx;
+} Elf64_Ehdr;
+
+/* These constants define the permissions on sections in the program
+ header, p_flags. */
+#define PF_R 0x4
+#define PF_W 0x2
+#define PF_X 0x1
+
+typedef struct elf32_phdr {
+ Elf32_Word p_type;
+ Elf32_Off p_offset;
+ Elf32_Addr p_vaddr;
+ Elf32_Addr p_paddr;
+ Elf32_Word p_filesz;
+ Elf32_Word p_memsz;
+ Elf32_Word p_flags;
+ Elf32_Word p_align;
+} Elf32_Phdr;
+
+typedef struct elf64_phdr {
+ Elf64_Word p_type;
+ Elf64_Word p_flags;
+ Elf64_Off p_offset; /* Segment file offset */
+ Elf64_Addr p_vaddr; /* Segment virtual address */
+ Elf64_Addr p_paddr; /* Segment physical address */
+ Elf64_Xword p_filesz; /* Segment size in file */
+ Elf64_Xword p_memsz; /* Segment size in memory */
+ Elf64_Xword p_align; /* Segment alignment, file & memory */
+} Elf64_Phdr;
+
+/* sh_type */
+#define SHT_NULL 0
+#define SHT_PROGBITS 1
+#define SHT_SYMTAB 2
+#define SHT_STRTAB 3
+#define SHT_RELA 4
+#define SHT_HASH 5
+#define SHT_DYNAMIC 6
+#define SHT_NOTE 7
+#define SHT_NOBITS 8
+#define SHT_REL 9
+#define SHT_SHLIB 10
+#define SHT_DYNSYM 11
+#define SHT_NUM 12
+#define SHT_LOPROC 0x70000000
+#define SHT_HIPROC 0x7fffffff
+#define SHT_LOUSER 0x80000000
+#define SHT_HIUSER 0xffffffff
+
+/* sh_flags */
+#define SHF_WRITE 0x1
+#define SHF_ALLOC 0x2
+#define SHF_EXECINSTR 0x4
+#define SHF_RELA_LIVEPATCH 0x00100000
+#define SHF_RO_AFTER_INIT 0x00200000
+#define SHF_MASKPROC 0xf0000000
+
+/* special section indexes */
+#define SHN_UNDEF 0
+#define SHN_LORESERVE 0xff00
+#define SHN_LOPROC 0xff00
+#define SHN_HIPROC 0xff1f
+#define SHN_LIVEPATCH 0xff20
+#define SHN_ABS 0xfff1
+#define SHN_COMMON 0xfff2
+#define SHN_HIRESERVE 0xffff
+
+typedef struct elf32_shdr {
+ Elf32_Word sh_name;
+ Elf32_Word sh_type;
+ Elf32_Word sh_flags;
+ Elf32_Addr sh_addr;
+ Elf32_Off sh_offset;
+ Elf32_Word sh_size;
+ Elf32_Word sh_link;
+ Elf32_Word sh_info;
+ Elf32_Word sh_addralign;
+ Elf32_Word sh_entsize;
+} Elf32_Shdr;
+
+typedef struct elf64_shdr {
+ Elf64_Word sh_name; /* Section name, index in string tbl */
+ Elf64_Word sh_type; /* Type of section */
+ Elf64_Xword sh_flags; /* Miscellaneous section attributes */
+ Elf64_Addr sh_addr; /* Section virtual addr at execution */
+ Elf64_Off sh_offset; /* Section file offset */
+ Elf64_Xword sh_size; /* Size of section in bytes */
+ Elf64_Word sh_link; /* Index of another section */
+ Elf64_Word sh_info; /* Additional section information */
+ Elf64_Xword sh_addralign; /* Section alignment */
+ Elf64_Xword sh_entsize; /* Entry size if section holds table */
+} Elf64_Shdr;
+
+#define EI_MAG0 0 /* e_ident[] indexes */
+#define EI_MAG1 1
+#define EI_MAG2 2
+#define EI_MAG3 3
+#define EI_CLASS 4
+#define EI_DATA 5
+#define EI_VERSION 6
+#define EI_OSABI 7
+#define EI_PAD 8
+
+#define ELFMAG0 0x7f /* EI_MAG */
+#define ELFMAG1 'E'
+#define ELFMAG2 'L'
+#define ELFMAG3 'F'
+#define ELFMAG "\177ELF"
+#define SELFMAG 4
+
+#define ELFCLASSNONE 0 /* EI_CLASS */
+#define ELFCLASS32 1
+#define ELFCLASS64 2
+#define ELFCLASSNUM 3
+
+#define ELFDATANONE 0 /* e_ident[EI_DATA] */
+#define ELFDATA2LSB 1
+#define ELFDATA2MSB 2
+
+#define EV_NONE 0 /* e_version, EI_VERSION */
+#define EV_CURRENT 1
+#define EV_NUM 2
+
+#define ELFOSABI_NONE 0
+#define ELFOSABI_LINUX 3
+
+#ifndef ELF_OSABI
+#define ELF_OSABI ELFOSABI_NONE
+#endif
+
+/*
+ * Notes used in ET_CORE. Architectures export some of the arch register sets
+ * using the corresponding note types via the PTRACE_GETREGSET and
+ * PTRACE_SETREGSET requests.
+ * The note name for these types is "LINUX", except NT_PRFPREG that is named
+ * "CORE".
+ */
+#define NT_PRSTATUS 1
+#define NT_PRFPREG 2
+#define NT_PRPSINFO 3
+#define NT_TASKSTRUCT 4
+#define NT_AUXV 6
+/*
+ * Note to userspace developers: size of NT_SIGINFO note may increase
+ * in the future to accomodate more fields, don't assume it is fixed!
+ */
+#define NT_SIGINFO 0x53494749
+#define NT_FILE 0x46494c45
+#define NT_PRXFPREG 0x46e62b7f /* copied from gdb5.1/include/elf/common.h */
+#define NT_PPC_VMX 0x100 /* PowerPC Altivec/VMX registers */
+#define NT_PPC_SPE 0x101 /* PowerPC SPE/EVR registers */
+#define NT_PPC_VSX 0x102 /* PowerPC VSX registers */
+#define NT_PPC_TAR 0x103 /* Target Address Register */
+#define NT_PPC_PPR 0x104 /* Program Priority Register */
+#define NT_PPC_DSCR 0x105 /* Data Stream Control Register */
+#define NT_PPC_EBB 0x106 /* Event Based Branch Registers */
+#define NT_PPC_PMU 0x107 /* Performance Monitor Registers */
+#define NT_PPC_TM_CGPR 0x108 /* TM checkpointed GPR Registers */
+#define NT_PPC_TM_CFPR 0x109 /* TM checkpointed FPR Registers */
+#define NT_PPC_TM_CVMX 0x10a /* TM checkpointed VMX Registers */
+#define NT_PPC_TM_CVSX 0x10b /* TM checkpointed VSX Registers */
+#define NT_PPC_TM_SPR 0x10c /* TM Special Purpose Registers */
+#define NT_PPC_TM_CTAR 0x10d /* TM checkpointed Target Address Register */
+#define NT_PPC_TM_CPPR 0x10e /* TM checkpointed Program Priority Register */
+#define NT_PPC_TM_CDSCR 0x10f /* TM checkpointed Data Stream Control Register */
+#define NT_PPC_PKEY 0x110 /* Memory Protection Keys registers */
+#define NT_PPC_DEXCR 0x111 /* PowerPC DEXCR registers */
+#define NT_PPC_HASHKEYR 0x112 /* PowerPC HASHKEYR register */
+#define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */
+#define NT_386_IOPERM 0x201 /* x86 io permission bitmap (1=deny) */
+#define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */
+/* Old binutils treats 0x203 as a CET state */
+#define NT_X86_SHSTK 0x204 /* x86 SHSTK state */
+#define NT_X86_XSAVE_LAYOUT 0x205 /* XSAVE layout description */
+#define NT_S390_HIGH_GPRS 0x300 /* s390 upper register halves */
+#define NT_S390_TIMER 0x301 /* s390 timer register */
+#define NT_S390_TODCMP 0x302 /* s390 TOD clock comparator register */
+#define NT_S390_TODPREG 0x303 /* s390 TOD programmable register */
+#define NT_S390_CTRS 0x304 /* s390 control registers */
+#define NT_S390_PREFIX 0x305 /* s390 prefix register */
+#define NT_S390_LAST_BREAK 0x306 /* s390 breaking event address */
+#define NT_S390_SYSTEM_CALL 0x307 /* s390 system call restart data */
+#define NT_S390_TDB 0x308 /* s390 transaction diagnostic block */
+#define NT_S390_VXRS_LOW 0x309 /* s390 vector registers 0-15 upper half */
+#define NT_S390_VXRS_HIGH 0x30a /* s390 vector registers 16-31 */
+#define NT_S390_GS_CB 0x30b /* s390 guarded storage registers */
+#define NT_S390_GS_BC 0x30c /* s390 guarded storage broadcast control block */
+#define NT_S390_RI_CB 0x30d /* s390 runtime instrumentation */
+#define NT_S390_PV_CPU_DATA 0x30e /* s390 protvirt cpu dump data */
+#define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */
+#define NT_ARM_TLS 0x401 /* ARM TLS register */
+#define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */
+#define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */
+#define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */
+#define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension registers */
+#define NT_ARM_PAC_MASK 0x406 /* ARM pointer authentication code masks */
+#define NT_ARM_PACA_KEYS 0x407 /* ARM pointer authentication address keys */
+#define NT_ARM_PACG_KEYS 0x408 /* ARM pointer authentication generic key */
+#define NT_ARM_TAGGED_ADDR_CTRL 0x409 /* arm64 tagged address control (prctl()) */
+#define NT_ARM_PAC_ENABLED_KEYS 0x40a /* arm64 ptr auth enabled keys (prctl()) */
+#define NT_ARM_SSVE 0x40b /* ARM Streaming SVE registers */
+#define NT_ARM_ZA 0x40c /* ARM SME ZA registers */
+#define NT_ARM_ZT 0x40d /* ARM SME ZT registers */
+#define NT_ARM_FPMR 0x40e /* ARM floating point mode register */
+#define NT_ARM_POE 0x40f /* ARM POE registers */
+#define NT_ARM_GCS 0x410 /* ARM GCS state */
+#define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */
+#define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */
+#define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */
+#define NT_MIPS_FP_MODE 0x801 /* MIPS floating-point mode */
+#define NT_MIPS_MSA 0x802 /* MIPS SIMD registers */
+#define NT_RISCV_CSR 0x900 /* RISC-V Control and Status Registers */
+#define NT_RISCV_VECTOR 0x901 /* RISC-V vector registers */
+#define NT_RISCV_TAGGED_ADDR_CTRL 0x902 /* RISC-V tagged address control (prctl()) */
+#define NT_LOONGARCH_CPUCFG 0xa00 /* LoongArch CPU config registers */
+#define NT_LOONGARCH_CSR 0xa01 /* LoongArch control and status registers */
+#define NT_LOONGARCH_LSX 0xa02 /* LoongArch Loongson SIMD Extension registers */
+#define NT_LOONGARCH_LASX 0xa03 /* LoongArch Loongson Advanced SIMD Extension registers */
+#define NT_LOONGARCH_LBT 0xa04 /* LoongArch Loongson Binary Translation registers */
+#define NT_LOONGARCH_HW_BREAK 0xa05 /* LoongArch hardware breakpoint registers */
+#define NT_LOONGARCH_HW_WATCH 0xa06 /* LoongArch hardware watchpoint registers */
+
+/* Note types with note name "GNU" */
+#define NT_GNU_PROPERTY_TYPE_0 5
+
+/* Note header in a PT_NOTE section */
+typedef struct elf32_note {
+ Elf32_Word n_namesz; /* Name size */
+ Elf32_Word n_descsz; /* Content size */
+ Elf32_Word n_type; /* Content type */
+} Elf32_Nhdr;
+
+/* Note header in a PT_NOTE section */
+typedef struct elf64_note {
+ Elf64_Word n_namesz; /* Name size */
+ Elf64_Word n_descsz; /* Content size */
+ Elf64_Word n_type; /* Content type */
+} Elf64_Nhdr;
+
+/* .note.gnu.property types for EM_AARCH64: */
+#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000
+
+/* Bits for GNU_PROPERTY_AARCH64_FEATURE_1_BTI */
+#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0)
+
+typedef struct {
+ Elf32_Half vd_version;
+ Elf32_Half vd_flags;
+ Elf32_Half vd_ndx;
+ Elf32_Half vd_cnt;
+ Elf32_Word vd_hash;
+ Elf32_Word vd_aux;
+ Elf32_Word vd_next;
+} Elf32_Verdef;
+
+typedef struct {
+ Elf64_Half vd_version;
+ Elf64_Half vd_flags;
+ Elf64_Half vd_ndx;
+ Elf64_Half vd_cnt;
+ Elf64_Word vd_hash;
+ Elf64_Word vd_aux;
+ Elf64_Word vd_next;
+} Elf64_Verdef;
+
+typedef struct {
+ Elf32_Word vda_name;
+ Elf32_Word vda_next;
+} Elf32_Verdaux;
+
+typedef struct {
+ Elf64_Word vda_name;
+ Elf64_Word vda_next;
+} Elf64_Verdaux;
+
+#endif /* _LINUX_ELF_H */
diff --git a/tools/include/uapi/linux/ethtool.h b/tools/include/uapi/linux/ethtool.h
deleted file mode 100644
index 47afae3895ec..000000000000
--- a/tools/include/uapi/linux/ethtool.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * ethtool.h: Defines for Linux ethtool.
- *
- * Copyright (C) 1998 David S. Miller (davem@redhat.com)
- * Copyright 2001 Jeff Garzik <jgarzik@pobox.com>
- * Portions Copyright 2001 Sun Microsystems (thockin@sun.com)
- * Portions Copyright 2002 Intel (eli.kupermann@intel.com,
- * christopher.leech@intel.com,
- * scott.feldman@intel.com)
- * Portions Copyright (C) Sun Microsystems 2008
- */
-
-#ifndef _UAPI_LINUX_ETHTOOL_H
-#define _UAPI_LINUX_ETHTOOL_H
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/if_ether.h>
-
-#define ETHTOOL_GCHANNELS 0x0000003c /* Get no of channels */
-
-/**
- * struct ethtool_channels - configuring number of network channel
- * @cmd: ETHTOOL_{G,S}CHANNELS
- * @max_rx: Read only. Maximum number of receive channel the driver support.
- * @max_tx: Read only. Maximum number of transmit channel the driver support.
- * @max_other: Read only. Maximum number of other channel the driver support.
- * @max_combined: Read only. Maximum number of combined channel the driver
- * support. Set of queues RX, TX or other.
- * @rx_count: Valid values are in the range 1 to the max_rx.
- * @tx_count: Valid values are in the range 1 to the max_tx.
- * @other_count: Valid values are in the range 1 to the max_other.
- * @combined_count: Valid values are in the range 1 to the max_combined.
- *
- * This can be used to configure RX, TX and other channels.
- */
-
-struct ethtool_channels {
- __u32 cmd;
- __u32 max_rx;
- __u32 max_tx;
- __u32 max_other;
- __u32 max_combined;
- __u32 rx_count;
- __u32 tx_count;
- __u32 other_count;
- __u32 combined_count;
-};
-
-#define ETHTOOL_FWVERS_LEN 32
-#define ETHTOOL_BUSINFO_LEN 32
-#define ETHTOOL_EROMVERS_LEN 32
-
-/**
- * struct ethtool_drvinfo - general driver and device information
- * @cmd: Command number = %ETHTOOL_GDRVINFO
- * @driver: Driver short name. This should normally match the name
- * in its bus driver structure (e.g. pci_driver::name). Must
- * not be an empty string.
- * @version: Driver version string; may be an empty string
- * @fw_version: Firmware version string; may be an empty string
- * @erom_version: Expansion ROM version string; may be an empty string
- * @bus_info: Device bus address. This should match the dev_name()
- * string for the underlying bus device, if there is one. May be
- * an empty string.
- * @reserved2: Reserved for future use; see the note on reserved space.
- * @n_priv_flags: Number of flags valid for %ETHTOOL_GPFLAGS and
- * %ETHTOOL_SPFLAGS commands; also the number of strings in the
- * %ETH_SS_PRIV_FLAGS set
- * @n_stats: Number of u64 statistics returned by the %ETHTOOL_GSTATS
- * command; also the number of strings in the %ETH_SS_STATS set
- * @testinfo_len: Number of results returned by the %ETHTOOL_TEST
- * command; also the number of strings in the %ETH_SS_TEST set
- * @eedump_len: Size of EEPROM accessible through the %ETHTOOL_GEEPROM
- * and %ETHTOOL_SEEPROM commands, in bytes
- * @regdump_len: Size of register dump returned by the %ETHTOOL_GREGS
- * command, in bytes
- *
- * Users can use the %ETHTOOL_GSSET_INFO command to get the number of
- * strings in any string set (from Linux 2.6.34).
- *
- * Drivers should set at most @driver, @version, @fw_version and
- * @bus_info in their get_drvinfo() implementation. The ethtool
- * core fills in the other fields using other driver operations.
- */
-struct ethtool_drvinfo {
- __u32 cmd;
- char driver[32];
- char version[32];
- char fw_version[ETHTOOL_FWVERS_LEN];
- char bus_info[ETHTOOL_BUSINFO_LEN];
- char erom_version[ETHTOOL_EROMVERS_LEN];
- char reserved2[12];
- __u32 n_priv_flags;
- __u32 n_stats;
- __u32 testinfo_len;
- __u32 eedump_len;
- __u32 regdump_len;
-};
-
-#define ETHTOOL_GDRVINFO 0x00000003
-
-#endif /* _UAPI_LINUX_ETHTOOL_H */
diff --git a/tools/include/uapi/linux/fanotify.h b/tools/include/uapi/linux/fanotify.h
new file mode 100644
index 000000000000..e710967c7c26
--- /dev/null
+++ b/tools/include/uapi/linux/fanotify.h
@@ -0,0 +1,274 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_FANOTIFY_H
+#define _UAPI_LINUX_FANOTIFY_H
+
+#include <linux/types.h>
+
+/* the following events that user-space can register for */
+#define FAN_ACCESS 0x00000001 /* File was accessed */
+#define FAN_MODIFY 0x00000002 /* File was modified */
+#define FAN_ATTRIB 0x00000004 /* Metadata changed */
+#define FAN_CLOSE_WRITE 0x00000008 /* Writable file closed */
+#define FAN_CLOSE_NOWRITE 0x00000010 /* Unwritable file closed */
+#define FAN_OPEN 0x00000020 /* File was opened */
+#define FAN_MOVED_FROM 0x00000040 /* File was moved from X */
+#define FAN_MOVED_TO 0x00000080 /* File was moved to Y */
+#define FAN_CREATE 0x00000100 /* Subfile was created */
+#define FAN_DELETE 0x00000200 /* Subfile was deleted */
+#define FAN_DELETE_SELF 0x00000400 /* Self was deleted */
+#define FAN_MOVE_SELF 0x00000800 /* Self was moved */
+#define FAN_OPEN_EXEC 0x00001000 /* File was opened for exec */
+
+#define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */
+#define FAN_FS_ERROR 0x00008000 /* Filesystem error */
+
+#define FAN_OPEN_PERM 0x00010000 /* File open in perm check */
+#define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */
+#define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */
+/* #define FAN_DIR_MODIFY 0x00080000 */ /* Deprecated (reserved) */
+
+#define FAN_PRE_ACCESS 0x00100000 /* Pre-content access hook */
+#define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */
+#define FAN_MNT_DETACH 0x02000000 /* Mount was detached */
+
+#define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */
+
+#define FAN_RENAME 0x10000000 /* File was renamed */
+
+#define FAN_ONDIR 0x40000000 /* Event occurred against dir */
+
+/* helper events */
+#define FAN_CLOSE (FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE) /* close */
+#define FAN_MOVE (FAN_MOVED_FROM | FAN_MOVED_TO) /* moves */
+
+/* flags used for fanotify_init() */
+#define FAN_CLOEXEC 0x00000001
+#define FAN_NONBLOCK 0x00000002
+
+/* These are NOT bitwise flags. Both bits are used together. */
+#define FAN_CLASS_NOTIF 0x00000000
+#define FAN_CLASS_CONTENT 0x00000004
+#define FAN_CLASS_PRE_CONTENT 0x00000008
+
+/* Deprecated - do not use this in programs and do not add new flags here! */
+#define FAN_ALL_CLASS_BITS (FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \
+ FAN_CLASS_PRE_CONTENT)
+
+#define FAN_UNLIMITED_QUEUE 0x00000010
+#define FAN_UNLIMITED_MARKS 0x00000020
+#define FAN_ENABLE_AUDIT 0x00000040
+
+/* Flags to determine fanotify event format */
+#define FAN_REPORT_PIDFD 0x00000080 /* Report pidfd for event->pid */
+#define FAN_REPORT_TID 0x00000100 /* event->pid is thread id */
+#define FAN_REPORT_FID 0x00000200 /* Report unique file id */
+#define FAN_REPORT_DIR_FID 0x00000400 /* Report unique directory id */
+#define FAN_REPORT_NAME 0x00000800 /* Report events with name */
+#define FAN_REPORT_TARGET_FID 0x00001000 /* Report dirent target id */
+#define FAN_REPORT_FD_ERROR 0x00002000 /* event->fd can report error */
+#define FAN_REPORT_MNT 0x00004000 /* Report mount events */
+
+/* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */
+#define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME)
+/* Convenience macro - FAN_REPORT_TARGET_FID requires all other FID flags */
+#define FAN_REPORT_DFID_NAME_TARGET (FAN_REPORT_DFID_NAME | \
+ FAN_REPORT_FID | FAN_REPORT_TARGET_FID)
+
+/* Deprecated - do not use this in programs and do not add new flags here! */
+#define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \
+ FAN_ALL_CLASS_BITS | FAN_UNLIMITED_QUEUE |\
+ FAN_UNLIMITED_MARKS)
+
+/* flags used for fanotify_modify_mark() */
+#define FAN_MARK_ADD 0x00000001
+#define FAN_MARK_REMOVE 0x00000002
+#define FAN_MARK_DONT_FOLLOW 0x00000004
+#define FAN_MARK_ONLYDIR 0x00000008
+/* FAN_MARK_MOUNT is 0x00000010 */
+#define FAN_MARK_IGNORED_MASK 0x00000020
+#define FAN_MARK_IGNORED_SURV_MODIFY 0x00000040
+#define FAN_MARK_FLUSH 0x00000080
+/* FAN_MARK_FILESYSTEM is 0x00000100 */
+#define FAN_MARK_EVICTABLE 0x00000200
+/* This bit is mutually exclusive with FAN_MARK_IGNORED_MASK bit */
+#define FAN_MARK_IGNORE 0x00000400
+
+/* These are NOT bitwise flags. Both bits can be used togther. */
+#define FAN_MARK_INODE 0x00000000
+#define FAN_MARK_MOUNT 0x00000010
+#define FAN_MARK_FILESYSTEM 0x00000100
+#define FAN_MARK_MNTNS 0x00000110
+
+/*
+ * Convenience macro - FAN_MARK_IGNORE requires FAN_MARK_IGNORED_SURV_MODIFY
+ * for non-inode mark types.
+ */
+#define FAN_MARK_IGNORE_SURV (FAN_MARK_IGNORE | FAN_MARK_IGNORED_SURV_MODIFY)
+
+/* Deprecated - do not use this in programs and do not add new flags here! */
+#define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\
+ FAN_MARK_REMOVE |\
+ FAN_MARK_DONT_FOLLOW |\
+ FAN_MARK_ONLYDIR |\
+ FAN_MARK_MOUNT |\
+ FAN_MARK_IGNORED_MASK |\
+ FAN_MARK_IGNORED_SURV_MODIFY |\
+ FAN_MARK_FLUSH)
+
+/* Deprecated - do not use this in programs and do not add new flags here! */
+#define FAN_ALL_EVENTS (FAN_ACCESS |\
+ FAN_MODIFY |\
+ FAN_CLOSE |\
+ FAN_OPEN)
+
+/*
+ * All events which require a permission response from userspace
+ */
+/* Deprecated - do not use this in programs and do not add new flags here! */
+#define FAN_ALL_PERM_EVENTS (FAN_OPEN_PERM |\
+ FAN_ACCESS_PERM)
+
+/* Deprecated - do not use this in programs and do not add new flags here! */
+#define FAN_ALL_OUTGOING_EVENTS (FAN_ALL_EVENTS |\
+ FAN_ALL_PERM_EVENTS |\
+ FAN_Q_OVERFLOW)
+
+#define FANOTIFY_METADATA_VERSION 3
+
+struct fanotify_event_metadata {
+ __u32 event_len;
+ __u8 vers;
+ __u8 reserved;
+ __u16 metadata_len;
+ __aligned_u64 mask;
+ __s32 fd;
+ __s32 pid;
+};
+
+#define FAN_EVENT_INFO_TYPE_FID 1
+#define FAN_EVENT_INFO_TYPE_DFID_NAME 2
+#define FAN_EVENT_INFO_TYPE_DFID 3
+#define FAN_EVENT_INFO_TYPE_PIDFD 4
+#define FAN_EVENT_INFO_TYPE_ERROR 5
+#define FAN_EVENT_INFO_TYPE_RANGE 6
+#define FAN_EVENT_INFO_TYPE_MNT 7
+
+/* Special info types for FAN_RENAME */
+#define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME 10
+/* Reserved for FAN_EVENT_INFO_TYPE_OLD_DFID 11 */
+#define FAN_EVENT_INFO_TYPE_NEW_DFID_NAME 12
+/* Reserved for FAN_EVENT_INFO_TYPE_NEW_DFID 13 */
+
+/* Variable length info record following event metadata */
+struct fanotify_event_info_header {
+ __u8 info_type;
+ __u8 pad;
+ __u16 len;
+};
+
+/*
+ * Unique file identifier info record.
+ * This structure is used for records of types FAN_EVENT_INFO_TYPE_FID,
+ * FAN_EVENT_INFO_TYPE_DFID and FAN_EVENT_INFO_TYPE_DFID_NAME.
+ * For FAN_EVENT_INFO_TYPE_DFID_NAME there is additionally a null terminated
+ * name immediately after the file handle.
+ */
+struct fanotify_event_info_fid {
+ struct fanotify_event_info_header hdr;
+ __kernel_fsid_t fsid;
+ /*
+ * Following is an opaque struct file_handle that can be passed as
+ * an argument to open_by_handle_at(2).
+ */
+ unsigned char handle[];
+};
+
+/*
+ * This structure is used for info records of type FAN_EVENT_INFO_TYPE_PIDFD.
+ * It holds a pidfd for the pid that was responsible for generating an event.
+ */
+struct fanotify_event_info_pidfd {
+ struct fanotify_event_info_header hdr;
+ __s32 pidfd;
+};
+
+struct fanotify_event_info_error {
+ struct fanotify_event_info_header hdr;
+ __s32 error;
+ __u32 error_count;
+};
+
+struct fanotify_event_info_range {
+ struct fanotify_event_info_header hdr;
+ __u32 pad;
+ __u64 offset;
+ __u64 count;
+};
+
+struct fanotify_event_info_mnt {
+ struct fanotify_event_info_header hdr;
+ __u64 mnt_id;
+};
+
+/*
+ * User space may need to record additional information about its decision.
+ * The extra information type records what kind of information is included.
+ * The default is none. We also define an extra information buffer whose
+ * size is determined by the extra information type.
+ *
+ * If the information type is Audit Rule, then the information following
+ * is the rule number that triggered the user space decision that
+ * requires auditing.
+ */
+
+#define FAN_RESPONSE_INFO_NONE 0
+#define FAN_RESPONSE_INFO_AUDIT_RULE 1
+
+struct fanotify_response {
+ __s32 fd;
+ __u32 response;
+};
+
+struct fanotify_response_info_header {
+ __u8 type;
+ __u8 pad;
+ __u16 len;
+};
+
+struct fanotify_response_info_audit_rule {
+ struct fanotify_response_info_header hdr;
+ __u32 rule_number;
+ __u32 subj_trust;
+ __u32 obj_trust;
+};
+
+/* Legit userspace responses to a _PERM event */
+#define FAN_ALLOW 0x01
+#define FAN_DENY 0x02
+/* errno other than EPERM can specified in upper byte of deny response */
+#define FAN_ERRNO_BITS 8
+#define FAN_ERRNO_SHIFT (32 - FAN_ERRNO_BITS)
+#define FAN_ERRNO_MASK ((1 << FAN_ERRNO_BITS) - 1)
+#define FAN_DENY_ERRNO(err) \
+ (FAN_DENY | ((((__u32)(err)) & FAN_ERRNO_MASK) << FAN_ERRNO_SHIFT))
+
+#define FAN_AUDIT 0x10 /* Bitmask to create audit record for result */
+#define FAN_INFO 0x20 /* Bitmask to indicate additional information */
+
+/* No fd set in event */
+#define FAN_NOFD -1
+#define FAN_NOPIDFD FAN_NOFD
+#define FAN_EPIDFD -2
+
+/* Helper functions to deal with fanotify_event_metadata buffers */
+#define FAN_EVENT_METADATA_LEN (sizeof(struct fanotify_event_metadata))
+
+#define FAN_EVENT_NEXT(meta, len) ((len) -= (meta)->event_len, \
+ (struct fanotify_event_metadata*)(((char *)(meta)) + \
+ (meta)->event_len))
+
+#define FAN_EVENT_OK(meta, len) ((long)(len) >= (long)FAN_EVENT_METADATA_LEN && \
+ (long)(meta)->event_len >= (long)FAN_EVENT_METADATA_LEN && \
+ (long)(meta)->event_len <= (long)(len))
+
+#endif /* _UAPI_LINUX_FANOTIFY_H */
diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h
deleted file mode 100644
index 282e90aeb163..000000000000
--- a/tools/include/uapi/linux/fcntl.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _UAPI_LINUX_FCNTL_H
-#define _UAPI_LINUX_FCNTL_H
-
-#include <asm/fcntl.h>
-#include <linux/openat2.h>
-
-#define F_SETLEASE (F_LINUX_SPECIFIC_BASE + 0)
-#define F_GETLEASE (F_LINUX_SPECIFIC_BASE + 1)
-
-/*
- * Cancel a blocking posix lock; internal use only until we expose an
- * asynchronous lock api to userspace:
- */
-#define F_CANCELLK (F_LINUX_SPECIFIC_BASE + 5)
-
-/* Create a file descriptor with FD_CLOEXEC set. */
-#define F_DUPFD_CLOEXEC (F_LINUX_SPECIFIC_BASE + 6)
-
-/*
- * Request nofications on a directory.
- * See below for events that may be notified.
- */
-#define F_NOTIFY (F_LINUX_SPECIFIC_BASE+2)
-
-/*
- * Set and get of pipe page size array
- */
-#define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7)
-#define F_GETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 8)
-
-/*
- * Set/Get seals
- */
-#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
-#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
-
-/*
- * Types of seals
- */
-#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
-#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
-#define F_SEAL_GROW 0x0004 /* prevent file from growing */
-#define F_SEAL_WRITE 0x0008 /* prevent writes */
-#define F_SEAL_FUTURE_WRITE 0x0010 /* prevent future writes while mapped */
-#define F_SEAL_EXEC 0x0020 /* prevent chmod modifying exec bits */
-/* (1U << 31) is reserved for signed error codes */
-
-/*
- * Set/Get write life time hints. {GET,SET}_RW_HINT operate on the
- * underlying inode, while {GET,SET}_FILE_RW_HINT operate only on
- * the specific file.
- */
-#define F_GET_RW_HINT (F_LINUX_SPECIFIC_BASE + 11)
-#define F_SET_RW_HINT (F_LINUX_SPECIFIC_BASE + 12)
-#define F_GET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 13)
-#define F_SET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 14)
-
-/*
- * Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be
- * used to clear any hints previously set.
- */
-#define RWH_WRITE_LIFE_NOT_SET 0
-#define RWH_WRITE_LIFE_NONE 1
-#define RWH_WRITE_LIFE_SHORT 2
-#define RWH_WRITE_LIFE_MEDIUM 3
-#define RWH_WRITE_LIFE_LONG 4
-#define RWH_WRITE_LIFE_EXTREME 5
-
-/*
- * The originally introduced spelling is remained from the first
- * versions of the patch set that introduced the feature, see commit
- * v4.13-rc1~212^2~51.
- */
-#define RWF_WRITE_LIFE_NOT_SET RWH_WRITE_LIFE_NOT_SET
-
-/*
- * Types of directory notifications that may be requested.
- */
-#define DN_ACCESS 0x00000001 /* File accessed */
-#define DN_MODIFY 0x00000002 /* File modified */
-#define DN_CREATE 0x00000004 /* File created */
-#define DN_DELETE 0x00000008 /* File removed */
-#define DN_RENAME 0x00000010 /* File renamed */
-#define DN_ATTRIB 0x00000020 /* File changed attibutes */
-#define DN_MULTISHOT 0x80000000 /* Don't remove notifier */
-
-/*
- * The constants AT_REMOVEDIR and AT_EACCESS have the same value. AT_EACCESS is
- * meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to
- * unlinkat. The two functions do completely different things and therefore,
- * the flags can be allowed to overlap. For example, passing AT_REMOVEDIR to
- * faccessat would be undefined behavior and thus treating it equivalent to
- * AT_EACCESS is valid undefined behavior.
- */
-#define AT_FDCWD -100 /* Special value used to indicate
- openat should use the current
- working directory. */
-#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */
-#define AT_EACCESS 0x200 /* Test access permitted for
- effective IDs, not real IDs. */
-#define AT_REMOVEDIR 0x200 /* Remove directory instead of
- unlinking file. */
-#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */
-#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */
-#define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */
-
-#define AT_STATX_SYNC_TYPE 0x6000 /* Type of synchronisation required from statx() */
-#define AT_STATX_SYNC_AS_STAT 0x0000 /* - Do whatever stat() does */
-#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */
-#define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */
-
-#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
-
-/* Flags for name_to_handle_at(2). We reuse AT_ flag space to save bits... */
-#define AT_HANDLE_FID AT_REMOVEDIR /* file handle is needed to
- compare object identity and may not
- be usable to open_by_handle_at(2) */
-#if defined(__KERNEL__)
-#define AT_GETATTR_NOSEC 0x80000000
-#endif
-
-#endif /* _UAPI_LINUX_FCNTL_H */
diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h
index 45e4e64fd664..24ddf7bc4f25 100644
--- a/tools/include/uapi/linux/fs.h
+++ b/tools/include/uapi/linux/fs.h
@@ -28,8 +28,8 @@
* nr_file rlimit, so it's safe to set up a ridiculously high absolute
* upper limit on files-per-process.
*
- * Some programs (notably those using select()) may have to be
- * recompiled to take full advantage of the new limits..
+ * Some programs (notably those using select()) may have to be
+ * recompiled to take full advantage of the new limits..
*/
/* Fixed constants first: */
@@ -40,6 +40,15 @@
#define BLOCK_SIZE_BITS 10
#define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
+/* flags for integrity meta */
+#define IO_INTEGRITY_CHK_GUARD (1U << 0) /* enforce guard check */
+#define IO_INTEGRITY_CHK_REFTAG (1U << 1) /* enforce ref check */
+#define IO_INTEGRITY_CHK_APPTAG (1U << 2) /* enforce app check */
+
+#define IO_INTEGRITY_VALID_FLAGS (IO_INTEGRITY_CHK_GUARD | \
+ IO_INTEGRITY_CHK_REFTAG | \
+ IO_INTEGRITY_CHK_APPTAG)
+
#define SEEK_SET 0 /* seek relative to beginning of file */
#define SEEK_CUR 1 /* seek relative to current file position */
#define SEEK_END 2 /* seek relative to end of file */
@@ -329,12 +338,21 @@ typedef int __bitwise __kernel_rwf_t;
/* per-IO negation of O_APPEND */
#define RWF_NOAPPEND ((__force __kernel_rwf_t)0x00000020)
+/* Atomic Write */
+#define RWF_ATOMIC ((__force __kernel_rwf_t)0x00000040)
+
+/* buffered IO that drops the cache after reading or writing data */
+#define RWF_DONTCACHE ((__force __kernel_rwf_t)0x00000080)
+
/* mask of flags supported by the kernel */
#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
- RWF_APPEND | RWF_NOAPPEND)
+ RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\
+ RWF_DONTCACHE)
+
+#define PROCFS_IOCTL_MAGIC 'f'
/* Pagemap ioctl */
-#define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg)
+#define PAGEMAP_SCAN _IOWR(PROCFS_IOCTL_MAGIC, 16, struct pm_scan_arg)
/* Bitmasks provided in pm_scan_args masks and reported in page_region.categories. */
#define PAGE_IS_WPALLOWED (1 << 0)
@@ -345,6 +363,7 @@ typedef int __bitwise __kernel_rwf_t;
#define PAGE_IS_PFNZERO (1 << 5)
#define PAGE_IS_HUGE (1 << 6)
#define PAGE_IS_SOFT_DIRTY (1 << 7)
+#define PAGE_IS_GUARD (1 << 8)
/*
* struct page_region - Page region with flags
@@ -393,4 +412,158 @@ struct pm_scan_arg {
__u64 return_mask;
};
+/* /proc/<pid>/maps ioctl */
+#define PROCMAP_QUERY _IOWR(PROCFS_IOCTL_MAGIC, 17, struct procmap_query)
+
+enum procmap_query_flags {
+ /*
+ * VMA permission flags.
+ *
+ * Can be used as part of procmap_query.query_flags field to look up
+ * only VMAs satisfying specified subset of permissions. E.g., specifying
+ * PROCMAP_QUERY_VMA_READABLE only will return both readable and read/write VMAs,
+ * while having PROCMAP_QUERY_VMA_READABLE | PROCMAP_QUERY_VMA_WRITABLE will only
+ * return read/write VMAs, though both executable/non-executable and
+ * private/shared will be ignored.
+ *
+ * PROCMAP_QUERY_VMA_* flags are also returned in procmap_query.vma_flags
+ * field to specify actual VMA permissions.
+ */
+ PROCMAP_QUERY_VMA_READABLE = 0x01,
+ PROCMAP_QUERY_VMA_WRITABLE = 0x02,
+ PROCMAP_QUERY_VMA_EXECUTABLE = 0x04,
+ PROCMAP_QUERY_VMA_SHARED = 0x08,
+ /*
+ * Query modifier flags.
+ *
+ * By default VMA that covers provided address is returned, or -ENOENT
+ * is returned. With PROCMAP_QUERY_COVERING_OR_NEXT_VMA flag set, closest
+ * VMA with vma_start > addr will be returned if no covering VMA is
+ * found.
+ *
+ * PROCMAP_QUERY_FILE_BACKED_VMA instructs query to consider only VMAs that
+ * have file backing. Can be combined with PROCMAP_QUERY_COVERING_OR_NEXT_VMA
+ * to iterate all VMAs with file backing.
+ */
+ PROCMAP_QUERY_COVERING_OR_NEXT_VMA = 0x10,
+ PROCMAP_QUERY_FILE_BACKED_VMA = 0x20,
+};
+
+/*
+ * Input/output argument structured passed into ioctl() call. It can be used
+ * to query a set of VMAs (Virtual Memory Areas) of a process.
+ *
+ * Each field can be one of three kinds, marked in a short comment to the
+ * right of the field:
+ * - "in", input argument, user has to provide this value, kernel doesn't modify it;
+ * - "out", output argument, kernel sets this field with VMA data;
+ * - "in/out", input and output argument; user provides initial value (used
+ * to specify maximum allowable buffer size), and kernel sets it to actual
+ * amount of data written (or zero, if there is no data).
+ *
+ * If matching VMA is found (according to criterias specified by
+ * query_addr/query_flags, all the out fields are filled out, and ioctl()
+ * returns 0. If there is no matching VMA, -ENOENT will be returned.
+ * In case of any other error, negative error code other than -ENOENT is
+ * returned.
+ *
+ * Most of the data is similar to the one returned as text in /proc/<pid>/maps
+ * file, but procmap_query provides more querying flexibility. There are no
+ * consistency guarantees between subsequent ioctl() calls, but data returned
+ * for matched VMA is self-consistent.
+ */
+struct procmap_query {
+ /* Query struct size, for backwards/forward compatibility */
+ __u64 size;
+ /*
+ * Query flags, a combination of enum procmap_query_flags values.
+ * Defines query filtering and behavior, see enum procmap_query_flags.
+ *
+ * Input argument, provided by user. Kernel doesn't modify it.
+ */
+ __u64 query_flags; /* in */
+ /*
+ * Query address. By default, VMA that covers this address will
+ * be looked up. PROCMAP_QUERY_* flags above modify this default
+ * behavior further.
+ *
+ * Input argument, provided by user. Kernel doesn't modify it.
+ */
+ __u64 query_addr; /* in */
+ /* VMA starting (inclusive) and ending (exclusive) address, if VMA is found. */
+ __u64 vma_start; /* out */
+ __u64 vma_end; /* out */
+ /* VMA permissions flags. A combination of PROCMAP_QUERY_VMA_* flags. */
+ __u64 vma_flags; /* out */
+ /* VMA backing page size granularity. */
+ __u64 vma_page_size; /* out */
+ /*
+ * VMA file offset. If VMA has file backing, this specifies offset
+ * within the file that VMA's start address corresponds to.
+ * Is set to zero if VMA has no backing file.
+ */
+ __u64 vma_offset; /* out */
+ /* Backing file's inode number, or zero, if VMA has no backing file. */
+ __u64 inode; /* out */
+ /* Backing file's device major/minor number, or zero, if VMA has no backing file. */
+ __u32 dev_major; /* out */
+ __u32 dev_minor; /* out */
+ /*
+ * If set to non-zero value, signals the request to return VMA name
+ * (i.e., VMA's backing file's absolute path, with " (deleted)" suffix
+ * appended, if file was unlinked from FS) for matched VMA. VMA name
+ * can also be some special name (e.g., "[heap]", "[stack]") or could
+ * be even user-supplied with prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME).
+ *
+ * Kernel will set this field to zero, if VMA has no associated name.
+ * Otherwise kernel will return actual amount of bytes filled in
+ * user-supplied buffer (see vma_name_addr field below), including the
+ * terminating zero.
+ *
+ * If VMA name is longer that user-supplied maximum buffer size,
+ * -E2BIG error is returned.
+ *
+ * If this field is set to non-zero value, vma_name_addr should point
+ * to valid user space memory buffer of at least vma_name_size bytes.
+ * If set to zero, vma_name_addr should be set to zero as well
+ */
+ __u32 vma_name_size; /* in/out */
+ /*
+ * If set to non-zero value, signals the request to extract and return
+ * VMA's backing file's build ID, if the backing file is an ELF file
+ * and it contains embedded build ID.
+ *
+ * Kernel will set this field to zero, if VMA has no backing file,
+ * backing file is not an ELF file, or ELF file has no build ID
+ * embedded.
+ *
+ * Build ID is a binary value (not a string). Kernel will set
+ * build_id_size field to exact number of bytes used for build ID.
+ * If build ID is requested and present, but needs more bytes than
+ * user-supplied maximum buffer size (see build_id_addr field below),
+ * -E2BIG error will be returned.
+ *
+ * If this field is set to non-zero value, build_id_addr should point
+ * to valid user space memory buffer of at least build_id_size bytes.
+ * If set to zero, build_id_addr should be set to zero as well
+ */
+ __u32 build_id_size; /* in/out */
+ /*
+ * User-supplied address of a buffer of at least vma_name_size bytes
+ * for kernel to fill with matched VMA's name (see vma_name_size field
+ * description above for details).
+ *
+ * Should be set to zero if VMA name should not be returned.
+ */
+ __u64 vma_name_addr; /* in */
+ /*
+ * User-supplied address of a buffer of at least build_id_size bytes
+ * for kernel to fill with matched VMA's ELF build ID, if available
+ * (see build_id_size field description above for details).
+ *
+ * Should be set to zero if build ID should not be returned.
+ */
+ __u64 build_id_addr; /* in */
+};
+
#endif /* _UAPI_LINUX_FS_H */
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index f0d71b2a3f1e..7e46ca4cd31b 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -377,6 +377,7 @@ enum {
IFLA_GSO_IPV4_MAX_SIZE,
IFLA_GRO_IPV4_MAX_SIZE,
IFLA_DPLL_PIN,
+ IFLA_MAX_PACING_OFFLOAD_HORIZON,
__IFLA_MAX
};
@@ -461,6 +462,286 @@ enum in6_addr_gen_mode {
/* Bridge section */
+/**
+ * DOC: Bridge enum definition
+ *
+ * Please *note* that the timer values in the following section are expected
+ * in clock_t format, which is seconds multiplied by USER_HZ (generally
+ * defined as 100).
+ *
+ * @IFLA_BR_FORWARD_DELAY
+ * The bridge forwarding delay is the time spent in LISTENING state
+ * (before moving to LEARNING) and in LEARNING state (before moving
+ * to FORWARDING). Only relevant if STP is enabled.
+ *
+ * The valid values are between (2 * USER_HZ) and (30 * USER_HZ).
+ * The default value is (15 * USER_HZ).
+ *
+ * @IFLA_BR_HELLO_TIME
+ * The time between hello packets sent by the bridge, when it is a root
+ * bridge or a designated bridge. Only relevant if STP is enabled.
+ *
+ * The valid values are between (1 * USER_HZ) and (10 * USER_HZ).
+ * The default value is (2 * USER_HZ).
+ *
+ * @IFLA_BR_MAX_AGE
+ * The hello packet timeout is the time until another bridge in the
+ * spanning tree is assumed to be dead, after reception of its last hello
+ * message. Only relevant if STP is enabled.
+ *
+ * The valid values are between (6 * USER_HZ) and (40 * USER_HZ).
+ * The default value is (20 * USER_HZ).
+ *
+ * @IFLA_BR_AGEING_TIME
+ * Configure the bridge's FDB entries aging time. It is the time a MAC
+ * address will be kept in the FDB after a packet has been received from
+ * that address. After this time has passed, entries are cleaned up.
+ * Allow values outside the 802.1 standard specification for special cases:
+ *
+ * * 0 - entry never ages (all permanent)
+ * * 1 - entry disappears (no persistence)
+ *
+ * The default value is (300 * USER_HZ).
+ *
+ * @IFLA_BR_STP_STATE
+ * Turn spanning tree protocol on (*IFLA_BR_STP_STATE* > 0) or off
+ * (*IFLA_BR_STP_STATE* == 0) for this bridge.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_PRIORITY
+ * Set this bridge's spanning tree priority, used during STP root bridge
+ * election.
+ *
+ * The valid values are between 0 and 65535.
+ *
+ * @IFLA_BR_VLAN_FILTERING
+ * Turn VLAN filtering on (*IFLA_BR_VLAN_FILTERING* > 0) or off
+ * (*IFLA_BR_VLAN_FILTERING* == 0). When disabled, the bridge will not
+ * consider the VLAN tag when handling packets.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_VLAN_PROTOCOL
+ * Set the protocol used for VLAN filtering.
+ *
+ * The valid values are 0x8100(802.1Q) or 0x88A8(802.1AD). The default value
+ * is 0x8100(802.1Q).
+ *
+ * @IFLA_BR_GROUP_FWD_MASK
+ * The group forwarding mask. This is the bitmask that is applied to
+ * decide whether to forward incoming frames destined to link-local
+ * addresses (of the form 01:80:C2:00:00:0X).
+ *
+ * The default value is 0, which means the bridge does not forward any
+ * link-local frames coming on this port.
+ *
+ * @IFLA_BR_ROOT_ID
+ * The bridge root id, read only.
+ *
+ * @IFLA_BR_BRIDGE_ID
+ * The bridge id, read only.
+ *
+ * @IFLA_BR_ROOT_PORT
+ * The bridge root port, read only.
+ *
+ * @IFLA_BR_ROOT_PATH_COST
+ * The bridge root path cost, read only.
+ *
+ * @IFLA_BR_TOPOLOGY_CHANGE
+ * The bridge topology change, read only.
+ *
+ * @IFLA_BR_TOPOLOGY_CHANGE_DETECTED
+ * The bridge topology change detected, read only.
+ *
+ * @IFLA_BR_HELLO_TIMER
+ * The bridge hello timer, read only.
+ *
+ * @IFLA_BR_TCN_TIMER
+ * The bridge tcn timer, read only.
+ *
+ * @IFLA_BR_TOPOLOGY_CHANGE_TIMER
+ * The bridge topology change timer, read only.
+ *
+ * @IFLA_BR_GC_TIMER
+ * The bridge gc timer, read only.
+ *
+ * @IFLA_BR_GROUP_ADDR
+ * Set the MAC address of the multicast group this bridge uses for STP.
+ * The address must be a link-local address in standard Ethernet MAC address
+ * format. It is an address of the form 01:80:C2:00:00:0X, with X in [0, 4..f].
+ *
+ * The default value is 0.
+ *
+ * @IFLA_BR_FDB_FLUSH
+ * Flush bridge's fdb dynamic entries.
+ *
+ * @IFLA_BR_MCAST_ROUTER
+ * Set bridge's multicast router if IGMP snooping is enabled.
+ * The valid values are:
+ *
+ * * 0 - disabled.
+ * * 1 - automatic (queried).
+ * * 2 - permanently enabled.
+ *
+ * The default value is 1.
+ *
+ * @IFLA_BR_MCAST_SNOOPING
+ * Turn multicast snooping on (*IFLA_BR_MCAST_SNOOPING* > 0) or off
+ * (*IFLA_BR_MCAST_SNOOPING* == 0).
+ *
+ * The default value is 1.
+ *
+ * @IFLA_BR_MCAST_QUERY_USE_IFADDR
+ * If enabled use the bridge's own IP address as source address for IGMP
+ * queries (*IFLA_BR_MCAST_QUERY_USE_IFADDR* > 0) or the default of 0.0.0.0
+ * (*IFLA_BR_MCAST_QUERY_USE_IFADDR* == 0).
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_MCAST_QUERIER
+ * Enable (*IFLA_BR_MULTICAST_QUERIER* > 0) or disable
+ * (*IFLA_BR_MULTICAST_QUERIER* == 0) IGMP querier, ie sending of multicast
+ * queries by the bridge.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_MCAST_HASH_ELASTICITY
+ * Set multicast database hash elasticity, It is the maximum chain length in
+ * the multicast hash table. This attribute is *deprecated* and the value
+ * is always 16.
+ *
+ * @IFLA_BR_MCAST_HASH_MAX
+ * Set maximum size of the multicast hash table
+ *
+ * The default value is 4096, the value must be a power of 2.
+ *
+ * @IFLA_BR_MCAST_LAST_MEMBER_CNT
+ * The Last Member Query Count is the number of Group-Specific Queries
+ * sent before the router assumes there are no local members. The Last
+ * Member Query Count is also the number of Group-and-Source-Specific
+ * Queries sent before the router assumes there are no listeners for a
+ * particular source.
+ *
+ * The default value is 2.
+ *
+ * @IFLA_BR_MCAST_STARTUP_QUERY_CNT
+ * The Startup Query Count is the number of Queries sent out on startup,
+ * separated by the Startup Query Interval.
+ *
+ * The default value is 2.
+ *
+ * @IFLA_BR_MCAST_LAST_MEMBER_INTVL
+ * The Last Member Query Interval is the Max Response Time inserted into
+ * Group-Specific Queries sent in response to Leave Group messages, and
+ * is also the amount of time between Group-Specific Query messages.
+ *
+ * The default value is (1 * USER_HZ).
+ *
+ * @IFLA_BR_MCAST_MEMBERSHIP_INTVL
+ * The interval after which the bridge will leave a group, if no membership
+ * reports for this group are received.
+ *
+ * The default value is (260 * USER_HZ).
+ *
+ * @IFLA_BR_MCAST_QUERIER_INTVL
+ * The interval between queries sent by other routers. if no queries are
+ * seen after this delay has passed, the bridge will start to send its own
+ * queries (as if *IFLA_BR_MCAST_QUERIER_INTVL* was enabled).
+ *
+ * The default value is (255 * USER_HZ).
+ *
+ * @IFLA_BR_MCAST_QUERY_INTVL
+ * The Query Interval is the interval between General Queries sent by
+ * the Querier.
+ *
+ * The default value is (125 * USER_HZ). The minimum value is (1 * USER_HZ).
+ *
+ * @IFLA_BR_MCAST_QUERY_RESPONSE_INTVL
+ * The Max Response Time used to calculate the Max Resp Code inserted
+ * into the periodic General Queries.
+ *
+ * The default value is (10 * USER_HZ).
+ *
+ * @IFLA_BR_MCAST_STARTUP_QUERY_INTVL
+ * The interval between queries in the startup phase.
+ *
+ * The default value is (125 * USER_HZ) / 4. The minimum value is (1 * USER_HZ).
+ *
+ * @IFLA_BR_NF_CALL_IPTABLES
+ * Enable (*NF_CALL_IPTABLES* > 0) or disable (*NF_CALL_IPTABLES* == 0)
+ * iptables hooks on the bridge.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_NF_CALL_IP6TABLES
+ * Enable (*NF_CALL_IP6TABLES* > 0) or disable (*NF_CALL_IP6TABLES* == 0)
+ * ip6tables hooks on the bridge.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_NF_CALL_ARPTABLES
+ * Enable (*NF_CALL_ARPTABLES* > 0) or disable (*NF_CALL_ARPTABLES* == 0)
+ * arptables hooks on the bridge.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_VLAN_DEFAULT_PVID
+ * VLAN ID applied to untagged and priority-tagged incoming packets.
+ *
+ * The default value is 1. Setting to the special value 0 makes all ports of
+ * this bridge not have a PVID by default, which means that they will
+ * not accept VLAN-untagged traffic.
+ *
+ * @IFLA_BR_PAD
+ * Bridge attribute padding type for netlink message.
+ *
+ * @IFLA_BR_VLAN_STATS_ENABLED
+ * Enable (*IFLA_BR_VLAN_STATS_ENABLED* == 1) or disable
+ * (*IFLA_BR_VLAN_STATS_ENABLED* == 0) per-VLAN stats accounting.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_MCAST_STATS_ENABLED
+ * Enable (*IFLA_BR_MCAST_STATS_ENABLED* > 0) or disable
+ * (*IFLA_BR_MCAST_STATS_ENABLED* == 0) multicast (IGMP/MLD) stats
+ * accounting.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_MCAST_IGMP_VERSION
+ * Set the IGMP version.
+ *
+ * The valid values are 2 and 3. The default value is 2.
+ *
+ * @IFLA_BR_MCAST_MLD_VERSION
+ * Set the MLD version.
+ *
+ * The valid values are 1 and 2. The default value is 1.
+ *
+ * @IFLA_BR_VLAN_STATS_PER_PORT
+ * Enable (*IFLA_BR_VLAN_STATS_PER_PORT* == 1) or disable
+ * (*IFLA_BR_VLAN_STATS_PER_PORT* == 0) per-VLAN per-port stats accounting.
+ * Can be changed only when there are no port VLANs configured.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_MULTI_BOOLOPT
+ * The multi_boolopt is used to control new boolean options to avoid adding
+ * new netlink attributes. You can look at ``enum br_boolopt_id`` for those
+ * options.
+ *
+ * @IFLA_BR_MCAST_QUERIER_STATE
+ * Bridge mcast querier states, read only.
+ *
+ * @IFLA_BR_FDB_N_LEARNED
+ * The number of dynamically learned FDB entries for the current bridge,
+ * read only.
+ *
+ * @IFLA_BR_FDB_MAX_LEARNED
+ * Set the number of max dynamically learned FDB entries for the current
+ * bridge.
+ */
enum {
IFLA_BR_UNSPEC,
IFLA_BR_FORWARD_DELAY,
@@ -510,6 +791,8 @@ enum {
IFLA_BR_VLAN_STATS_PER_PORT,
IFLA_BR_MULTI_BOOLOPT,
IFLA_BR_MCAST_QUERIER_STATE,
+ IFLA_BR_FDB_N_LEARNED,
+ IFLA_BR_FDB_MAX_LEARNED,
__IFLA_BR_MAX,
};
@@ -520,11 +803,252 @@ struct ifla_bridge_id {
__u8 addr[6]; /* ETH_ALEN */
};
+/**
+ * DOC: Bridge mode enum definition
+ *
+ * @BRIDGE_MODE_HAIRPIN
+ * Controls whether traffic may be sent back out of the port on which it
+ * was received. This option is also called reflective relay mode, and is
+ * used to support basic VEPA (Virtual Ethernet Port Aggregator)
+ * capabilities. By default, this flag is turned off and the bridge will
+ * not forward traffic back out of the receiving port.
+ */
enum {
BRIDGE_MODE_UNSPEC,
BRIDGE_MODE_HAIRPIN,
};
+/**
+ * DOC: Bridge port enum definition
+ *
+ * @IFLA_BRPORT_STATE
+ * The operation state of the port. Here are the valid values.
+ *
+ * * 0 - port is in STP *DISABLED* state. Make this port completely
+ * inactive for STP. This is also called BPDU filter and could be used
+ * to disable STP on an untrusted port, like a leaf virtual device.
+ * The traffic forwarding is also stopped on this port.
+ * * 1 - port is in STP *LISTENING* state. Only valid if STP is enabled
+ * on the bridge. In this state the port listens for STP BPDUs and
+ * drops all other traffic frames.
+ * * 2 - port is in STP *LEARNING* state. Only valid if STP is enabled on
+ * the bridge. In this state the port will accept traffic only for the
+ * purpose of updating MAC address tables.
+ * * 3 - port is in STP *FORWARDING* state. Port is fully active.
+ * * 4 - port is in STP *BLOCKING* state. Only valid if STP is enabled on
+ * the bridge. This state is used during the STP election process.
+ * In this state, port will only process STP BPDUs.
+ *
+ * @IFLA_BRPORT_PRIORITY
+ * The STP port priority. The valid values are between 0 and 255.
+ *
+ * @IFLA_BRPORT_COST
+ * The STP path cost of the port. The valid values are between 1 and 65535.
+ *
+ * @IFLA_BRPORT_MODE
+ * Set the bridge port mode. See *BRIDGE_MODE_HAIRPIN* for more details.
+ *
+ * @IFLA_BRPORT_GUARD
+ * Controls whether STP BPDUs will be processed by the bridge port. By
+ * default, the flag is turned off to allow BPDU processing. Turning this
+ * flag on will disable the bridge port if a STP BPDU packet is received.
+ *
+ * If the bridge has Spanning Tree enabled, hostile devices on the network
+ * may send BPDU on a port and cause network failure. Setting *guard on*
+ * will detect and stop this by disabling the port. The port will be
+ * restarted if the link is brought down, or removed and reattached.
+ *
+ * @IFLA_BRPORT_PROTECT
+ * Controls whether a given port is allowed to become a root port or not.
+ * Only used when STP is enabled on the bridge. By default the flag is off.
+ *
+ * This feature is also called root port guard. If BPDU is received from a
+ * leaf (edge) port, it should not be elected as root port. This could
+ * be used if using STP on a bridge and the downstream bridges are not fully
+ * trusted; this prevents a hostile guest from rerouting traffic.
+ *
+ * @IFLA_BRPORT_FAST_LEAVE
+ * This flag allows the bridge to immediately stop multicast traffic
+ * forwarding on a port that receives an IGMP Leave message. It is only used
+ * when IGMP snooping is enabled on the bridge. By default the flag is off.
+ *
+ * @IFLA_BRPORT_LEARNING
+ * Controls whether a given port will learn *source* MAC addresses from
+ * received traffic or not. Also controls whether dynamic FDB entries
+ * (which can also be added by software) will be refreshed by incoming
+ * traffic. By default this flag is on.
+ *
+ * @IFLA_BRPORT_UNICAST_FLOOD
+ * Controls whether unicast traffic for which there is no FDB entry will
+ * be flooded towards this port. By default this flag is on.
+ *
+ * @IFLA_BRPORT_PROXYARP
+ * Enable proxy ARP on this port.
+ *
+ * @IFLA_BRPORT_LEARNING_SYNC
+ * Controls whether a given port will sync MAC addresses learned on device
+ * port to bridge FDB.
+ *
+ * @IFLA_BRPORT_PROXYARP_WIFI
+ * Enable proxy ARP on this port which meets extended requirements by
+ * IEEE 802.11 and Hotspot 2.0 specifications.
+ *
+ * @IFLA_BRPORT_ROOT_ID
+ *
+ * @IFLA_BRPORT_BRIDGE_ID
+ *
+ * @IFLA_BRPORT_DESIGNATED_PORT
+ *
+ * @IFLA_BRPORT_DESIGNATED_COST
+ *
+ * @IFLA_BRPORT_ID
+ *
+ * @IFLA_BRPORT_NO
+ *
+ * @IFLA_BRPORT_TOPOLOGY_CHANGE_ACK
+ *
+ * @IFLA_BRPORT_CONFIG_PENDING
+ *
+ * @IFLA_BRPORT_MESSAGE_AGE_TIMER
+ *
+ * @IFLA_BRPORT_FORWARD_DELAY_TIMER
+ *
+ * @IFLA_BRPORT_HOLD_TIMER
+ *
+ * @IFLA_BRPORT_FLUSH
+ * Flush bridge ports' fdb dynamic entries.
+ *
+ * @IFLA_BRPORT_MULTICAST_ROUTER
+ * Configure the port's multicast router presence. A port with
+ * a multicast router will receive all multicast traffic.
+ * The valid values are:
+ *
+ * * 0 disable multicast routers on this port
+ * * 1 let the system detect the presence of routers (default)
+ * * 2 permanently enable multicast traffic forwarding on this port
+ * * 3 enable multicast routers temporarily on this port, not depending
+ * on incoming queries.
+ *
+ * @IFLA_BRPORT_PAD
+ *
+ * @IFLA_BRPORT_MCAST_FLOOD
+ * Controls whether a given port will flood multicast traffic for which
+ * there is no MDB entry. By default this flag is on.
+ *
+ * @IFLA_BRPORT_MCAST_TO_UCAST
+ * Controls whether a given port will replicate packets using unicast
+ * instead of multicast. By default this flag is off.
+ *
+ * This is done by copying the packet per host and changing the multicast
+ * destination MAC to a unicast one accordingly.
+ *
+ * *mcast_to_unicast* works on top of the multicast snooping feature of the
+ * bridge. Which means unicast copies are only delivered to hosts which
+ * are interested in unicast and signaled this via IGMP/MLD reports previously.
+ *
+ * This feature is intended for interface types which have a more reliable
+ * and/or efficient way to deliver unicast packets than broadcast ones
+ * (e.g. WiFi).
+ *
+ * However, it should only be enabled on interfaces where no IGMPv2/MLDv1
+ * report suppression takes place. IGMP/MLD report suppression issue is
+ * usually overcome by the network daemon (supplicant) enabling AP isolation
+ * and by that separating all STAs.
+ *
+ * Delivery of STA-to-STA IP multicast is made possible again by enabling
+ * and utilizing the bridge hairpin mode, which considers the incoming port
+ * as a potential outgoing port, too (see *BRIDGE_MODE_HAIRPIN* option).
+ * Hairpin mode is performed after multicast snooping, therefore leading
+ * to only deliver reports to STAs running a multicast router.
+ *
+ * @IFLA_BRPORT_VLAN_TUNNEL
+ * Controls whether vlan to tunnel mapping is enabled on the port.
+ * By default this flag is off.
+ *
+ * @IFLA_BRPORT_BCAST_FLOOD
+ * Controls flooding of broadcast traffic on the given port. By default
+ * this flag is on.
+ *
+ * @IFLA_BRPORT_GROUP_FWD_MASK
+ * Set the group forward mask. This is a bitmask that is applied to
+ * decide whether to forward incoming frames destined to link-local
+ * addresses. The addresses of the form are 01:80:C2:00:00:0X (defaults
+ * to 0, which means the bridge does not forward any link-local frames
+ * coming on this port).
+ *
+ * @IFLA_BRPORT_NEIGH_SUPPRESS
+ * Controls whether neighbor discovery (arp and nd) proxy and suppression
+ * is enabled on the port. By default this flag is off.
+ *
+ * @IFLA_BRPORT_ISOLATED
+ * Controls whether a given port will be isolated, which means it will be
+ * able to communicate with non-isolated ports only. By default this
+ * flag is off.
+ *
+ * @IFLA_BRPORT_BACKUP_PORT
+ * Set a backup port. If the port loses carrier all traffic will be
+ * redirected to the configured backup port. Set the value to 0 to disable
+ * it.
+ *
+ * @IFLA_BRPORT_MRP_RING_OPEN
+ *
+ * @IFLA_BRPORT_MRP_IN_OPEN
+ *
+ * @IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT
+ * The number of per-port EHT hosts limit. The default value is 512.
+ * Setting to 0 is not allowed.
+ *
+ * @IFLA_BRPORT_MCAST_EHT_HOSTS_CNT
+ * The current number of tracked hosts, read only.
+ *
+ * @IFLA_BRPORT_LOCKED
+ * Controls whether a port will be locked, meaning that hosts behind the
+ * port will not be able to communicate through the port unless an FDB
+ * entry with the unit's MAC address is in the FDB. The common use case is
+ * that hosts are allowed access through authentication with the IEEE 802.1X
+ * protocol or based on whitelists. By default this flag is off.
+ *
+ * Please note that secure 802.1X deployments should always use the
+ * *BR_BOOLOPT_NO_LL_LEARN* flag, to not permit the bridge to populate its
+ * FDB based on link-local (EAPOL) traffic received on the port.
+ *
+ * @IFLA_BRPORT_MAB
+ * Controls whether a port will use MAC Authentication Bypass (MAB), a
+ * technique through which select MAC addresses may be allowed on a locked
+ * port, without using 802.1X authentication. Packets with an unknown source
+ * MAC address generates a "locked" FDB entry on the incoming bridge port.
+ * The common use case is for user space to react to these bridge FDB
+ * notifications and optionally replace the locked FDB entry with a normal
+ * one, allowing traffic to pass for whitelisted MAC addresses.
+ *
+ * Setting this flag also requires *IFLA_BRPORT_LOCKED* and
+ * *IFLA_BRPORT_LEARNING*. *IFLA_BRPORT_LOCKED* ensures that unauthorized
+ * data packets are dropped, and *IFLA_BRPORT_LEARNING* allows the dynamic
+ * FDB entries installed by user space (as replacements for the locked FDB
+ * entries) to be refreshed and/or aged out.
+ *
+ * @IFLA_BRPORT_MCAST_N_GROUPS
+ *
+ * @IFLA_BRPORT_MCAST_MAX_GROUPS
+ * Sets the maximum number of MDB entries that can be registered for a
+ * given port. Attempts to register more MDB entries at the port than this
+ * limit allows will be rejected, whether they are done through netlink
+ * (e.g. the bridge tool), or IGMP or MLD membership reports. Setting a
+ * limit of 0 disables the limit. The default value is 0.
+ *
+ * @IFLA_BRPORT_NEIGH_VLAN_SUPPRESS
+ * Controls whether neighbor discovery (arp and nd) proxy and suppression is
+ * enabled for a given port. By default this flag is off.
+ *
+ * Note that this option only takes effect when *IFLA_BRPORT_NEIGH_SUPPRESS*
+ * is enabled for a given port.
+ *
+ * @IFLA_BRPORT_BACKUP_NHID
+ * The FDB nexthop object ID to attach to packets being redirected to a
+ * backup port that has VLAN tunnel mapping enabled (via the
+ * *IFLA_BRPORT_VLAN_TUNNEL* option). Setting a value of 0 (default) has
+ * the effect of not attaching any ID.
+ */
enum {
IFLA_BRPORT_UNSPEC,
IFLA_BRPORT_STATE, /* Spanning tree state */
@@ -769,6 +1293,19 @@ enum netkit_mode {
NETKIT_L3,
};
+/* NETKIT_SCRUB_NONE leaves clearing skb->{mark,priority} up to
+ * the BPF program if attached. This also means the latter can
+ * consume the two fields if they were populated earlier.
+ *
+ * NETKIT_SCRUB_DEFAULT zeroes skb->{mark,priority} fields before
+ * invoking the attached BPF program when the peer device resides
+ * in a different network namespace. This is the default behavior.
+ */
+enum netkit_scrub {
+ NETKIT_SCRUB_NONE,
+ NETKIT_SCRUB_DEFAULT,
+};
+
enum {
IFLA_NETKIT_UNSPEC,
IFLA_NETKIT_PEER_INFO,
@@ -776,6 +1313,10 @@ enum {
IFLA_NETKIT_POLICY,
IFLA_NETKIT_PEER_POLICY,
IFLA_NETKIT_MODE,
+ IFLA_NETKIT_SCRUB,
+ IFLA_NETKIT_PEER_SCRUB,
+ IFLA_NETKIT_HEADROOM,
+ IFLA_NETKIT_TAILROOM,
__IFLA_NETKIT_MAX,
};
#define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1)
@@ -854,6 +1395,7 @@ enum {
IFLA_VXLAN_DF,
IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */
IFLA_VXLAN_LOCALBYPASS,
+ IFLA_VXLAN_LABEL_POLICY, /* IPv6 flow label policy; ifla_vxlan_label_policy */
__IFLA_VXLAN_MAX
};
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
@@ -871,6 +1413,13 @@ enum ifla_vxlan_df {
VXLAN_DF_MAX = __VXLAN_DF_END - 1,
};
+enum ifla_vxlan_label_policy {
+ VXLAN_LABEL_FIXED = 0,
+ VXLAN_LABEL_INHERIT = 1,
+ __VXLAN_LABEL_END,
+ VXLAN_LABEL_MAX = __VXLAN_LABEL_END - 1,
+};
+
/* GENEVE section */
enum {
IFLA_GENEVE_UNSPEC,
@@ -935,6 +1484,8 @@ enum {
IFLA_GTP_ROLE,
IFLA_GTP_CREATE_SOCKETS,
IFLA_GTP_RESTART_COUNT,
+ IFLA_GTP_LOCAL,
+ IFLA_GTP_LOCAL6,
__IFLA_GTP_MAX,
};
#define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1)
@@ -1240,6 +1791,7 @@ enum {
IFLA_HSR_PROTOCOL, /* Indicate different protocol than
* HSR. For example PRP.
*/
+ IFLA_HSR_INTERLINK, /* HSR interlink network device */
__IFLA_HSR_MAX,
};
@@ -1417,7 +1969,9 @@ enum {
enum {
IFLA_DSA_UNSPEC,
- IFLA_DSA_MASTER,
+ IFLA_DSA_CONDUIT,
+ /* Deprecated, use IFLA_DSA_CONDUIT instead */
+ IFLA_DSA_MASTER = IFLA_DSA_CONDUIT,
__IFLA_DSA_MAX,
};
diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h
index 638c606dfa74..44f2bb93e7e6 100644
--- a/tools/include/uapi/linux/if_xdp.h
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -7,8 +7,8 @@
* Magnus Karlsson <magnus.karlsson@intel.com>
*/
-#ifndef _LINUX_IF_XDP_H
-#define _LINUX_IF_XDP_H
+#ifndef _UAPI_LINUX_IF_XDP_H
+#define _UAPI_LINUX_IF_XDP_H
#include <linux/types.h>
@@ -41,6 +41,10 @@
*/
#define XDP_UMEM_TX_SW_CSUM (1 << 1)
+/* Request to reserve tx_metadata_len bytes of per-chunk metadata.
+ */
+#define XDP_UMEM_TX_METADATA_LEN (1 << 2)
+
struct sockaddr_xdp {
__u16 sxdp_family;
__u16 sxdp_flags;
@@ -113,16 +117,22 @@ struct xdp_options {
((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1)
/* Request transmit timestamp. Upon completion, put it into tx_timestamp
- * field of union xsk_tx_metadata.
+ * field of struct xsk_tx_metadata.
*/
#define XDP_TXMD_FLAGS_TIMESTAMP (1 << 0)
/* Request transmit checksum offload. Checksum start position and offset
- * are communicated via csum_start and csum_offset fields of union
+ * are communicated via csum_start and csum_offset fields of struct
* xsk_tx_metadata.
*/
#define XDP_TXMD_FLAGS_CHECKSUM (1 << 1)
+/* Request launch time hardware offload. The device will schedule the packet for
+ * transmission at a pre-determined time called launch time. The value of
+ * launch time is communicated via launch_time field of struct xsk_tx_metadata.
+ */
+#define XDP_TXMD_FLAGS_LAUNCH_TIME (1 << 2)
+
/* AF_XDP offloads request. 'request' union member is consumed by the driver
* when the packet is being transmitted. 'completion' union member is
* filled by the driver when the transmit completion arrives.
@@ -138,6 +148,10 @@ struct xsk_tx_metadata {
__u16 csum_start;
/* Offset from csum_start where checksum should be stored. */
__u16 csum_offset;
+
+ /* XDP_TXMD_FLAGS_LAUNCH_TIME */
+ /* Launch time in nanosecond against the PTP HW Clock */
+ __u64 launch_time;
} request;
struct {
@@ -166,4 +180,4 @@ struct xdp_desc {
/* TX packet carries valid metadata. */
#define XDP_TX_METADATA (1 << 1)
-#endif /* _LINUX_IF_XDP_H */
+#endif /* _UAPI_LINUX_IF_XDP_H */
diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h
index e682ab628dfa..ced0fc3c3aa5 100644
--- a/tools/include/uapi/linux/in.h
+++ b/tools/include/uapi/linux/in.h
@@ -79,8 +79,12 @@ enum {
#define IPPROTO_MPLS IPPROTO_MPLS
IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */
#define IPPROTO_ETHERNET IPPROTO_ETHERNET
+ IPPROTO_AGGFRAG = 144, /* AGGFRAG in ESP (RFC 9347) */
+#define IPPROTO_AGGFRAG IPPROTO_AGGFRAG
IPPROTO_RAW = 255, /* Raw IP packets */
#define IPPROTO_RAW IPPROTO_RAW
+ IPPROTO_SMC = 256, /* Shared Memory Communications */
+#define IPPROTO_SMC IPPROTO_SMC
IPPROTO_MPTCP = 262, /* Multipath TCP connection */
#define IPPROTO_MPTCP IPPROTO_MPTCP
IPPROTO_MAX
@@ -139,7 +143,7 @@ struct in_addr {
*/
#define IP_PMTUDISC_INTERFACE 4
/* weaker version of IP_PMTUDISC_INTERFACE, which allows packets to get
- * fragmented if they exeed the interface mtu
+ * fragmented if they exceed the interface mtu
*/
#define IP_PMTUDISC_OMIT 5
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 2190adbe3002..b6ae8ad8934b 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -192,11 +192,24 @@ struct kvm_xen_exit {
/* Flags that describe what fields in emulation_failure hold valid data. */
#define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0)
+/*
+ * struct kvm_run can be modified by userspace at any time, so KVM must be
+ * careful to avoid TOCTOU bugs. In order to protect KVM, HINT_UNSAFE_IN_KVM()
+ * renames fields in struct kvm_run from <symbol> to <symbol>__unsafe when
+ * compiled into the kernel, ensuring that any use within KVM is obvious and
+ * gets extra scrutiny.
+ */
+#ifdef __KERNEL__
+#define HINT_UNSAFE_IN_KVM(_symbol) _symbol##__unsafe
+#else
+#define HINT_UNSAFE_IN_KVM(_symbol) _symbol
+#endif
+
/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
struct kvm_run {
/* in */
__u8 request_interrupt_window;
- __u8 immediate_exit;
+ __u8 HINT_UNSAFE_IN_KVM(immediate_exit);
__u8 padding1[6];
/* out */
@@ -604,10 +617,6 @@ struct kvm_ioeventfd {
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
-#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \
- KVM_X86_DISABLE_EXITS_HLT | \
- KVM_X86_DISABLE_EXITS_PAUSE | \
- KVM_X86_DISABLE_EXITS_CSTATE)
/* for KVM_ENABLE_CAP */
struct kvm_enable_cap {
@@ -917,6 +926,10 @@ struct kvm_enable_cap {
#define KVM_CAP_MEMORY_ATTRIBUTES 233
#define KVM_CAP_GUEST_MEMFD 234
#define KVM_CAP_VM_TYPES 235
+#define KVM_CAP_PRE_FAULT_MEMORY 236
+#define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237
+#define KVM_CAP_X86_GUEST_MODE 238
+#define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239
struct kvm_irq_routing_irqchip {
__u32 irqchip;
@@ -1054,6 +1067,10 @@ struct kvm_dirty_tlb {
#define KVM_REG_SIZE_SHIFT 52
#define KVM_REG_SIZE_MASK 0x00f0000000000000ULL
+
+#define KVM_REG_SIZE(id) \
+ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
+
#define KVM_REG_SIZE_U8 0x0000000000000000ULL
#define KVM_REG_SIZE_U16 0x0010000000000000ULL
#define KVM_REG_SIZE_U32 0x0020000000000000ULL
@@ -1142,7 +1159,15 @@ enum kvm_device_type {
#define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME
KVM_DEV_TYPE_RISCV_AIA,
#define KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_RISCV_AIA
+ KVM_DEV_TYPE_LOONGARCH_IPI,
+#define KVM_DEV_TYPE_LOONGARCH_IPI KVM_DEV_TYPE_LOONGARCH_IPI
+ KVM_DEV_TYPE_LOONGARCH_EIOINTC,
+#define KVM_DEV_TYPE_LOONGARCH_EIOINTC KVM_DEV_TYPE_LOONGARCH_EIOINTC
+ KVM_DEV_TYPE_LOONGARCH_PCHPIC,
+#define KVM_DEV_TYPE_LOONGARCH_PCHPIC KVM_DEV_TYPE_LOONGARCH_PCHPIC
+
KVM_DEV_TYPE_MAX,
+
};
struct kvm_vfio_spapr_tce {
@@ -1221,9 +1246,9 @@ struct kvm_vfio_spapr_tce {
/* Available with KVM_CAP_SPAPR_RESIZE_HPT */
#define KVM_PPC_RESIZE_HPT_PREPARE _IOR(KVMIO, 0xad, struct kvm_ppc_resize_hpt)
#define KVM_PPC_RESIZE_HPT_COMMIT _IOR(KVMIO, 0xae, struct kvm_ppc_resize_hpt)
-/* Available with KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3 */
+/* Available with KVM_CAP_PPC_MMU_RADIX or KVM_CAP_PPC_MMU_HASH_V3 */
#define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg)
-/* Available with KVM_CAP_PPC_RADIX_MMU */
+/* Available with KVM_CAP_PPC_MMU_RADIX */
#define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info)
/* Available with KVM_CAP_PPC_GET_CPU_CHAR */
#define KVM_PPC_GET_CPU_CHAR _IOR(KVMIO, 0xb1, struct kvm_ppc_cpu_char)
@@ -1548,4 +1573,13 @@ struct kvm_create_guest_memfd {
__u64 reserved[6];
};
+#define KVM_PRE_FAULT_MEMORY _IOWR(KVMIO, 0xd5, struct kvm_pre_fault_memory)
+
+struct kvm_pre_fault_memory {
+ __u64 gpa;
+ __u64 size;
+ __u64 flags;
+ __u64 padding[5];
+};
+
#endif /* __LINUX_KVM_H */
diff --git a/tools/include/uapi/linux/memfd.h b/tools/include/uapi/linux/memfd.h
new file mode 100644
index 000000000000..01c0324e7733
--- /dev/null
+++ b/tools/include/uapi/linux/memfd.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _LINUX_MEMFD_H
+#define _LINUX_MEMFD_H
+
+#include <asm-generic/hugetlb_encode.h>
+
+/* flags for memfd_create(2) (unsigned int) */
+#define MFD_CLOEXEC 0x0001U
+#define MFD_ALLOW_SEALING 0x0002U
+#define MFD_HUGETLB 0x0004U
+/* not executable and sealed to prevent changing to executable. */
+#define MFD_NOEXEC_SEAL 0x0008U
+/* executable */
+#define MFD_EXEC 0x0010U
+
+/*
+ * Huge page size encoding when MFD_HUGETLB is specified, and a huge page
+ * size other than the default is desired. See hugetlb_encode.h.
+ * All known huge page size encodings are provided here. It is the
+ * responsibility of the application to know which sizes are supported on
+ * the running system. See mmap(2) man page for details.
+ */
+#define MFD_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT
+#define MFD_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK
+
+#define MFD_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB
+#define MFD_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB
+#define MFD_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB
+#define MFD_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB
+#define MFD_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB
+#define MFD_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB
+#define MFD_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB
+#define MFD_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB
+#define MFD_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB
+#define MFD_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB
+#define MFD_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB
+#define MFD_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB
+
+#endif /* _LINUX_MEMFD_H */
diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h
index a246e11988d5..e89d00528f2f 100644
--- a/tools/include/uapi/linux/mman.h
+++ b/tools/include/uapi/linux/mman.h
@@ -17,6 +17,7 @@
#define MAP_SHARED 0x01 /* Share changes */
#define MAP_PRIVATE 0x02 /* Changes are private */
#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */
+#define MAP_DROPPABLE 0x08 /* Zero memory under memory pressure. */
/*
* Huge page size encoding when MAP_HUGETLB is specified, and a huge page
diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h
index ad5478dbad00..7fa67c2031a5 100644
--- a/tools/include/uapi/linux/mount.h
+++ b/tools/include/uapi/linux/mount.h
@@ -154,7 +154,7 @@ struct mount_attr {
*/
struct statmount {
__u32 size; /* Total size, including strings */
- __u32 __spare1;
+ __u32 mnt_opts; /* [str] Options (comma separated, escaped) */
__u64 mask; /* What results were written */
__u32 sb_dev_major; /* Device ID */
__u32 sb_dev_minor;
@@ -172,7 +172,19 @@ struct statmount {
__u64 propagate_from; /* Propagation from in current namespace */
__u32 mnt_root; /* [str] Root of mount relative to root of fs */
__u32 mnt_point; /* [str] Mountpoint relative to current root */
- __u64 __spare2[50];
+ __u64 mnt_ns_id; /* ID of the mount namespace */
+ __u32 fs_subtype; /* [str] Subtype of fs_type (if any) */
+ __u32 sb_source; /* [str] Source string of the mount */
+ __u32 opt_num; /* Number of fs options */
+ __u32 opt_array; /* [str] Array of nul terminated fs options */
+ __u32 opt_sec_num; /* Number of security options */
+ __u32 opt_sec_array; /* [str] Array of nul terminated security options */
+ __u64 supported_mask; /* Mask flags that this kernel supports */
+ __u32 mnt_uidmap_num; /* Number of uid mappings */
+ __u32 mnt_uidmap; /* [str] Array of uid mappings (as seen from callers namespace) */
+ __u32 mnt_gidmap_num; /* Number of gid mappings */
+ __u32 mnt_gidmap; /* [str] Array of gid mappings (as seen from callers namespace) */
+ __u64 __spare2[43];
char str[]; /* Variable size part containing strings */
};
@@ -188,10 +200,12 @@ struct mnt_id_req {
__u32 spare;
__u64 mnt_id;
__u64 param;
+ __u64 mnt_ns_id;
};
/* List of all mnt_id_req versions. */
#define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */
+#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */
/*
* @mask bits for statmount(2)
@@ -202,10 +216,20 @@ struct mnt_id_req {
#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */
#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */
#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */
+#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */
+#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */
+#define STATMOUNT_FS_SUBTYPE 0x00000100U /* Want/got fs_subtype */
+#define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */
+#define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */
+#define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */
+#define STATMOUNT_SUPPORTED_MASK 0x00001000U /* Want/got supported mask flags */
+#define STATMOUNT_MNT_UIDMAP 0x00002000U /* Want/got uidmap... */
+#define STATMOUNT_MNT_GIDMAP 0x00004000U /* Want/got gidmap... */
/*
* Special @mnt_id values that can be passed to listmount
*/
#define LSMT_ROOT 0xffffffffffffffff /* root mount */
+#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */
#endif /* _UAPI_LINUX_MOUNT_H */
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index bb65ee840cda..7eb9571786b8 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -59,10 +59,13 @@ enum netdev_xdp_rx_metadata {
* by the driver.
* @NETDEV_XSK_FLAGS_TX_CHECKSUM: L3 checksum HW offload is supported by the
* driver.
+ * @NETDEV_XSK_FLAGS_TX_LAUNCH_TIME_FIFO: Launch time HW offload is supported
+ * by the driver.
*/
enum netdev_xsk_flags {
NETDEV_XSK_FLAGS_TX_TIMESTAMP = 1,
NETDEV_XSK_FLAGS_TX_CHECKSUM = 2,
+ NETDEV_XSK_FLAGS_TX_LAUNCH_TIME_FIFO = 4,
};
enum netdev_queue_type {
@@ -87,12 +90,19 @@ enum {
};
enum {
+ __NETDEV_A_IO_URING_PROVIDER_INFO_MAX,
+ NETDEV_A_IO_URING_PROVIDER_INFO_MAX = (__NETDEV_A_IO_URING_PROVIDER_INFO_MAX - 1)
+};
+
+enum {
NETDEV_A_PAGE_POOL_ID = 1,
NETDEV_A_PAGE_POOL_IFINDEX,
NETDEV_A_PAGE_POOL_NAPI_ID,
NETDEV_A_PAGE_POOL_INFLIGHT,
NETDEV_A_PAGE_POOL_INFLIGHT_MEM,
NETDEV_A_PAGE_POOL_DETACH_TIME,
+ NETDEV_A_PAGE_POOL_DMABUF,
+ NETDEV_A_PAGE_POOL_IO_URING,
__NETDEV_A_PAGE_POOL_MAX,
NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1)
@@ -121,16 +131,27 @@ enum {
NETDEV_A_NAPI_ID,
NETDEV_A_NAPI_IRQ,
NETDEV_A_NAPI_PID,
+ NETDEV_A_NAPI_DEFER_HARD_IRQS,
+ NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT,
+ NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT,
__NETDEV_A_NAPI_MAX,
NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1)
};
enum {
+ __NETDEV_A_XSK_INFO_MAX,
+ NETDEV_A_XSK_INFO_MAX = (__NETDEV_A_XSK_INFO_MAX - 1)
+};
+
+enum {
NETDEV_A_QUEUE_ID = 1,
NETDEV_A_QUEUE_IFINDEX,
NETDEV_A_QUEUE_TYPE,
NETDEV_A_QUEUE_NAPI_ID,
+ NETDEV_A_QUEUE_DMABUF,
+ NETDEV_A_QUEUE_IO_URING,
+ NETDEV_A_QUEUE_XSK,
__NETDEV_A_QUEUE_MAX,
NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)
@@ -146,12 +167,44 @@ enum {
NETDEV_A_QSTATS_TX_PACKETS,
NETDEV_A_QSTATS_TX_BYTES,
NETDEV_A_QSTATS_RX_ALLOC_FAIL,
+ NETDEV_A_QSTATS_RX_HW_DROPS,
+ NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS,
+ NETDEV_A_QSTATS_RX_CSUM_COMPLETE,
+ NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY,
+ NETDEV_A_QSTATS_RX_CSUM_NONE,
+ NETDEV_A_QSTATS_RX_CSUM_BAD,
+ NETDEV_A_QSTATS_RX_HW_GRO_PACKETS,
+ NETDEV_A_QSTATS_RX_HW_GRO_BYTES,
+ NETDEV_A_QSTATS_RX_HW_GRO_WIRE_PACKETS,
+ NETDEV_A_QSTATS_RX_HW_GRO_WIRE_BYTES,
+ NETDEV_A_QSTATS_RX_HW_DROP_RATELIMITS,
+ NETDEV_A_QSTATS_TX_HW_DROPS,
+ NETDEV_A_QSTATS_TX_HW_DROP_ERRORS,
+ NETDEV_A_QSTATS_TX_CSUM_NONE,
+ NETDEV_A_QSTATS_TX_NEEDS_CSUM,
+ NETDEV_A_QSTATS_TX_HW_GSO_PACKETS,
+ NETDEV_A_QSTATS_TX_HW_GSO_BYTES,
+ NETDEV_A_QSTATS_TX_HW_GSO_WIRE_PACKETS,
+ NETDEV_A_QSTATS_TX_HW_GSO_WIRE_BYTES,
+ NETDEV_A_QSTATS_TX_HW_DROP_RATELIMITS,
+ NETDEV_A_QSTATS_TX_STOP,
+ NETDEV_A_QSTATS_TX_WAKE,
__NETDEV_A_QSTATS_MAX,
NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1)
};
enum {
+ NETDEV_A_DMABUF_IFINDEX = 1,
+ NETDEV_A_DMABUF_QUEUES,
+ NETDEV_A_DMABUF_FD,
+ NETDEV_A_DMABUF_ID,
+
+ __NETDEV_A_DMABUF_MAX,
+ NETDEV_A_DMABUF_MAX = (__NETDEV_A_DMABUF_MAX - 1)
+};
+
+enum {
NETDEV_CMD_DEV_GET = 1,
NETDEV_CMD_DEV_ADD_NTF,
NETDEV_CMD_DEV_DEL_NTF,
@@ -164,6 +217,9 @@ enum {
NETDEV_CMD_QUEUE_GET,
NETDEV_CMD_NAPI_GET,
NETDEV_CMD_QSTATS_GET,
+ NETDEV_CMD_BIND_RX,
+ NETDEV_CMD_NAPI_SET,
+ NETDEV_CMD_BIND_TX,
__NETDEV_CMD_MAX,
NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
diff --git a/tools/include/uapi/linux/nsfs.h b/tools/include/uapi/linux/nsfs.h
new file mode 100644
index 000000000000..34127653fd00
--- /dev/null
+++ b/tools/include/uapi/linux/nsfs.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __LINUX_NSFS_H
+#define __LINUX_NSFS_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+#define NSIO 0xb7
+
+/* Returns a file descriptor that refers to an owning user namespace */
+#define NS_GET_USERNS _IO(NSIO, 0x1)
+/* Returns a file descriptor that refers to a parent namespace */
+#define NS_GET_PARENT _IO(NSIO, 0x2)
+/* Returns the type of namespace (CLONE_NEW* value) referred to by
+ file descriptor */
+#define NS_GET_NSTYPE _IO(NSIO, 0x3)
+/* Get owner UID (in the caller's user namespace) for a user namespace */
+#define NS_GET_OWNER_UID _IO(NSIO, 0x4)
+/* Get the id for a mount namespace */
+#define NS_GET_MNTNS_ID _IOR(NSIO, 0x5, __u64)
+/* Translate pid from target pid namespace into the caller's pid namespace. */
+#define NS_GET_PID_FROM_PIDNS _IOR(NSIO, 0x6, int)
+/* Return thread-group leader id of pid in the callers pid namespace. */
+#define NS_GET_TGID_FROM_PIDNS _IOR(NSIO, 0x7, int)
+/* Translate pid from caller's pid namespace into a target pid namespace. */
+#define NS_GET_PID_IN_PIDNS _IOR(NSIO, 0x8, int)
+/* Return thread-group leader id of pid in the target pid namespace. */
+#define NS_GET_TGID_IN_PIDNS _IOR(NSIO, 0x9, int)
+
+struct mnt_ns_info {
+ __u32 size;
+ __u32 nr_mounts;
+ __u64 mnt_ns_id;
+};
+
+#define MNT_NS_INFO_SIZE_VER0 16 /* size of first published struct */
+
+/* Get information about namespace. */
+#define NS_MNT_GET_INFO _IOR(NSIO, 10, struct mnt_ns_info)
+/* Get next namespace. */
+#define NS_MNT_GET_NEXT _IOR(NSIO, 11, struct mnt_ns_info)
+/* Get previous namespace. */
+#define NS_MNT_GET_PREV _IOR(NSIO, 12, struct mnt_ns_info)
+
+#endif /* __LINUX_NSFS_H */
diff --git a/tools/include/uapi/linux/openat2.h b/tools/include/uapi/linux/openat2.h
deleted file mode 100644
index a5feb7604948..000000000000
--- a/tools/include/uapi/linux/openat2.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _UAPI_LINUX_OPENAT2_H
-#define _UAPI_LINUX_OPENAT2_H
-
-#include <linux/types.h>
-
-/*
- * Arguments for how openat2(2) should open the target path. If only @flags and
- * @mode are non-zero, then openat2(2) operates very similarly to openat(2).
- *
- * However, unlike openat(2), unknown or invalid bits in @flags result in
- * -EINVAL rather than being silently ignored. @mode must be zero unless one of
- * {O_CREAT, O_TMPFILE} are set.
- *
- * @flags: O_* flags.
- * @mode: O_CREAT/O_TMPFILE file mode.
- * @resolve: RESOLVE_* flags.
- */
-struct open_how {
- __u64 flags;
- __u64 mode;
- __u64 resolve;
-};
-
-/* how->resolve flags for openat2(2). */
-#define RESOLVE_NO_XDEV 0x01 /* Block mount-point crossings
- (includes bind-mounts). */
-#define RESOLVE_NO_MAGICLINKS 0x02 /* Block traversal through procfs-style
- "magic-links". */
-#define RESOLVE_NO_SYMLINKS 0x04 /* Block traversal through all symlinks
- (implies OEXT_NO_MAGICLINKS) */
-#define RESOLVE_BENEATH 0x08 /* Block "lexical" trickery like
- "..", symlinks, and absolute
- paths which escape the dirfd. */
-#define RESOLVE_IN_ROOT 0x10 /* Make all jumps to "/" and ".."
- be scoped inside the dirfd
- (similar to chroot(2)). */
-#define RESOLVE_CACHED 0x20 /* Only complete if resolution can be
- completed through cached lookup. May
- return -EAGAIN if that's not
- possible. */
-
-#endif /* _UAPI_LINUX_OPENAT2_H */
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 3a64499b0f5d..78a362b80027 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -39,18 +39,21 @@ enum perf_type_id {
/*
* attr.config layout for type PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE
+ *
* PERF_TYPE_HARDWARE: 0xEEEEEEEE000000AA
* AA: hardware event ID
* EEEEEEEE: PMU type ID
+ *
* PERF_TYPE_HW_CACHE: 0xEEEEEEEE00DDCCBB
* BB: hardware cache ID
* CC: hardware cache op ID
* DD: hardware cache op result ID
* EEEEEEEE: PMU type ID
- * If the PMU type ID is 0, the PERF_TYPE_RAW will be applied.
+ *
+ * If the PMU type ID is 0, PERF_TYPE_RAW will be applied.
*/
-#define PERF_PMU_TYPE_SHIFT 32
-#define PERF_HW_EVENT_MASK 0xffffffff
+#define PERF_PMU_TYPE_SHIFT 32
+#define PERF_HW_EVENT_MASK 0xffffffff
/*
* Generalized performance event event_id types, used by the
@@ -112,7 +115,7 @@ enum perf_hw_cache_op_result_id {
/*
* Special "software" events provided by the kernel, even if the hardware
* does not support performance events. These events measure various
- * physical and sw events of the kernel (and allow the profiling of them as
+ * physical and SW events of the kernel (and allow the profiling of them as
* well):
*/
enum perf_sw_ids {
@@ -167,8 +170,9 @@ enum perf_event_sample_format {
};
#define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT)
+
/*
- * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set
+ * Values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set.
*
* If the user does not pass priv level information via branch_sample_type,
* the kernel uses the event's priv level. Branch and event priv levels do
@@ -178,20 +182,20 @@ enum perf_event_sample_format {
* of branches and therefore it supersedes all the other types.
*/
enum perf_branch_sample_type_shift {
- PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */
- PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */
- PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */
-
- PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */
- PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */
- PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */
- PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */
- PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */
- PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */
- PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */
+ PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */
+ PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */
+ PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */
+
+ PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */
+ PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */
+ PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */
+ PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */
+ PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */
+ PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */
+ PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */
PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */
- PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* call/ret stack */
+ PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* CALL/RET stack */
PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT = 12, /* indirect jumps */
PERF_SAMPLE_BRANCH_CALL_SHIFT = 13, /* direct call */
@@ -210,96 +214,95 @@ enum perf_branch_sample_type_shift {
};
enum perf_branch_sample_type {
- PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT,
- PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT,
- PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT,
+ PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT,
+ PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT,
+ PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT,
- PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT,
- PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT,
- PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT,
- PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT,
- PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT,
- PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT,
- PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT,
- PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT,
+ PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT,
+ PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT,
+ PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT,
+ PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT,
+ PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT,
+ PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT,
+ PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT,
+ PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT,
- PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT,
- PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT,
- PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT,
+ PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT,
+ PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT,
+ PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT,
- PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT,
- PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT,
+ PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT,
+ PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT,
- PERF_SAMPLE_BRANCH_TYPE_SAVE =
- 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT,
+ PERF_SAMPLE_BRANCH_TYPE_SAVE = 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT,
- PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT,
+ PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT,
- PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT,
+ PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT,
- PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT,
+ PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT,
- PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
+ PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
};
/*
- * Common flow change classification
+ * Common control flow change classifications:
*/
enum {
- PERF_BR_UNKNOWN = 0, /* unknown */
- PERF_BR_COND = 1, /* conditional */
- PERF_BR_UNCOND = 2, /* unconditional */
- PERF_BR_IND = 3, /* indirect */
- PERF_BR_CALL = 4, /* function call */
- PERF_BR_IND_CALL = 5, /* indirect function call */
- PERF_BR_RET = 6, /* function return */
- PERF_BR_SYSCALL = 7, /* syscall */
- PERF_BR_SYSRET = 8, /* syscall return */
- PERF_BR_COND_CALL = 9, /* conditional function call */
- PERF_BR_COND_RET = 10, /* conditional function return */
- PERF_BR_ERET = 11, /* exception return */
- PERF_BR_IRQ = 12, /* irq */
- PERF_BR_SERROR = 13, /* system error */
- PERF_BR_NO_TX = 14, /* not in transaction */
- PERF_BR_EXTEND_ABI = 15, /* extend ABI */
+ PERF_BR_UNKNOWN = 0, /* Unknown */
+ PERF_BR_COND = 1, /* Conditional */
+ PERF_BR_UNCOND = 2, /* Unconditional */
+ PERF_BR_IND = 3, /* Indirect */
+ PERF_BR_CALL = 4, /* Function call */
+ PERF_BR_IND_CALL = 5, /* Indirect function call */
+ PERF_BR_RET = 6, /* Function return */
+ PERF_BR_SYSCALL = 7, /* Syscall */
+ PERF_BR_SYSRET = 8, /* Syscall return */
+ PERF_BR_COND_CALL = 9, /* Conditional function call */
+ PERF_BR_COND_RET = 10, /* Conditional function return */
+ PERF_BR_ERET = 11, /* Exception return */
+ PERF_BR_IRQ = 12, /* IRQ */
+ PERF_BR_SERROR = 13, /* System error */
+ PERF_BR_NO_TX = 14, /* Not in transaction */
+ PERF_BR_EXTEND_ABI = 15, /* Extend ABI */
PERF_BR_MAX,
};
/*
- * Common branch speculation outcome classification
+ * Common branch speculation outcome classifications:
*/
enum {
- PERF_BR_SPEC_NA = 0, /* Not available */
- PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */
- PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */
- PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */
+ PERF_BR_SPEC_NA = 0, /* Not available */
+ PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */
+ PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */
+ PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */
PERF_BR_SPEC_MAX,
};
enum {
- PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */
- PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */
- PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */
- PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */
- PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */
- PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */
- PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */
- PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */
+ PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */
+ PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */
+ PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */
+ PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */
+ PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */
+ PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */
+ PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */
+ PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */
PERF_BR_NEW_MAX,
};
enum {
- PERF_BR_PRIV_UNKNOWN = 0,
- PERF_BR_PRIV_USER = 1,
- PERF_BR_PRIV_KERNEL = 2,
- PERF_BR_PRIV_HV = 3,
+ PERF_BR_PRIV_UNKNOWN = 0,
+ PERF_BR_PRIV_USER = 1,
+ PERF_BR_PRIV_KERNEL = 2,
+ PERF_BR_PRIV_HV = 3,
};
-#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1
-#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2
-#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3
-#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4
-#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5
+#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1
+#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2
+#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3
+#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4
+#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5
#define PERF_SAMPLE_BRANCH_PLM_ALL \
(PERF_SAMPLE_BRANCH_USER|\
@@ -310,9 +313,9 @@ enum {
* Values to determine ABI of the registers dump.
*/
enum perf_sample_regs_abi {
- PERF_SAMPLE_REGS_ABI_NONE = 0,
- PERF_SAMPLE_REGS_ABI_32 = 1,
- PERF_SAMPLE_REGS_ABI_64 = 2,
+ PERF_SAMPLE_REGS_ABI_NONE = 0,
+ PERF_SAMPLE_REGS_ABI_32 = 1,
+ PERF_SAMPLE_REGS_ABI_64 = 2,
};
/*
@@ -320,21 +323,21 @@ enum perf_sample_regs_abi {
* abort events. Multiple bits can be set.
*/
enum {
- PERF_TXN_ELISION = (1 << 0), /* From elision */
- PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */
- PERF_TXN_SYNC = (1 << 2), /* Instruction is related */
- PERF_TXN_ASYNC = (1 << 3), /* Instruction not related */
- PERF_TXN_RETRY = (1 << 4), /* Retry possible */
- PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */
- PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */
- PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */
+ PERF_TXN_ELISION = (1 << 0), /* From elision */
+ PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */
+ PERF_TXN_SYNC = (1 << 2), /* Instruction is related */
+ PERF_TXN_ASYNC = (1 << 3), /* Instruction is not related */
+ PERF_TXN_RETRY = (1 << 4), /* Retry possible */
+ PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */
+ PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */
+ PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */
- PERF_TXN_MAX = (1 << 8), /* non-ABI */
+ PERF_TXN_MAX = (1 << 8), /* non-ABI */
- /* bits 32..63 are reserved for the abort code */
+ /* Bits 32..63 are reserved for the abort code */
- PERF_TXN_ABORT_MASK = (0xffffffffULL << 32),
- PERF_TXN_ABORT_SHIFT = 32,
+ PERF_TXN_ABORT_MASK = (0xffffffffULL << 32),
+ PERF_TXN_ABORT_SHIFT = 32,
};
/*
@@ -369,22 +372,22 @@ enum perf_event_read_format {
PERF_FORMAT_MAX = 1U << 5, /* non-ABI */
};
-#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
-#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */
-#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */
-#define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */
- /* add: sample_stack_user */
-#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */
-#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */
-#define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */
-#define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */
-#define PERF_ATTR_SIZE_VER8 136 /* add: config3 */
+#define PERF_ATTR_SIZE_VER0 64 /* Size of first published 'struct perf_event_attr' */
+#define PERF_ATTR_SIZE_VER1 72 /* Add: config2 */
+#define PERF_ATTR_SIZE_VER2 80 /* Add: branch_sample_type */
+#define PERF_ATTR_SIZE_VER3 96 /* Add: sample_regs_user */
+ /* Add: sample_stack_user */
+#define PERF_ATTR_SIZE_VER4 104 /* Add: sample_regs_intr */
+#define PERF_ATTR_SIZE_VER5 112 /* Add: aux_watermark */
+#define PERF_ATTR_SIZE_VER6 120 /* Add: aux_sample_size */
+#define PERF_ATTR_SIZE_VER7 128 /* Add: sig_data */
+#define PERF_ATTR_SIZE_VER8 136 /* Add: config3 */
/*
- * Hardware event_id to monitor via a performance monitoring event:
- *
- * @sample_max_stack: Max number of frame pointers in a callchain,
- * should be < /proc/sys/kernel/perf_event_max_stack
+ * 'struct perf_event_attr' contains various attributes that define
+ * a performance event - most of them hardware related configuration
+ * details, but also a lot of behavioral switches and values implemented
+ * by the kernel.
*/
struct perf_event_attr {
@@ -394,7 +397,7 @@ struct perf_event_attr {
__u32 type;
/*
- * Size of the attr structure, for fwd/bwd compat.
+ * Size of the attr structure, for forward/backwards compatibility.
*/
__u32 size;
@@ -449,21 +452,21 @@ struct perf_event_attr {
comm_exec : 1, /* flag comm events that are due to an exec */
use_clockid : 1, /* use @clockid for time fields */
context_switch : 1, /* context switch data */
- write_backward : 1, /* Write ring buffer from end to beginning */
+ write_backward : 1, /* write ring buffer from end to beginning */
namespaces : 1, /* include namespaces data */
ksymbol : 1, /* include ksymbol events */
- bpf_event : 1, /* include bpf events */
+ bpf_event : 1, /* include BPF events */
aux_output : 1, /* generate AUX records instead of events */
cgroup : 1, /* include cgroup events */
text_poke : 1, /* include text poke events */
- build_id : 1, /* use build id in mmap2 events */
+ build_id : 1, /* use build ID in mmap2 events */
inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */
remove_on_exec : 1, /* event is removed from task on exec */
sigtrap : 1, /* send synchronous SIGTRAP on event */
__reserved_1 : 26;
union {
- __u32 wakeup_events; /* wakeup every n events */
+ __u32 wakeup_events; /* wake up every n events */
__u32 wakeup_watermark; /* bytes before wakeup */
};
@@ -472,13 +475,13 @@ struct perf_event_attr {
__u64 bp_addr;
__u64 kprobe_func; /* for perf_kprobe */
__u64 uprobe_path; /* for perf_uprobe */
- __u64 config1; /* extension of config */
+ __u64 config1; /* extension of config */
};
union {
__u64 bp_len;
- __u64 kprobe_addr; /* when kprobe_func == NULL */
+ __u64 kprobe_addr; /* when kprobe_func == NULL */
__u64 probe_offset; /* for perf_[k,u]probe */
- __u64 config2; /* extension of config1 */
+ __u64 config2; /* extension of config1 */
};
__u64 branch_sample_type; /* enum perf_branch_sample_type */
@@ -508,10 +511,28 @@ struct perf_event_attr {
* Wakeup watermark for AUX area
*/
__u32 aux_watermark;
+
+ /*
+ * Max number of frame pointers in a callchain, should be
+ * lower than /proc/sys/kernel/perf_event_max_stack.
+ *
+ * Max number of entries of branch stack should be lower
+ * than the hardware limit.
+ */
__u16 sample_max_stack;
+
__u16 __reserved_2;
__u32 aux_sample_size;
- __u32 __reserved_3;
+
+ union {
+ __u32 aux_action;
+ struct {
+ __u32 aux_start_paused : 1, /* start AUX area tracing paused */
+ aux_pause : 1, /* on overflow, pause AUX area tracing */
+ aux_resume : 1, /* on overflow, resume AUX area tracing */
+ __reserved_3 : 29;
+ };
+ };
/*
* User provided data if sigtrap=1, passed back to user via
@@ -526,7 +547,7 @@ struct perf_event_attr {
/*
* Structure used by below PERF_EVENT_IOC_QUERY_BPF command
- * to query bpf programs attached to the same perf tracepoint
+ * to query BPF programs attached to the same perf tracepoint
* as the given perf event.
*/
struct perf_event_query_bpf {
@@ -548,21 +569,21 @@ struct perf_event_query_bpf {
/*
* Ioctls that can be done on a perf event fd:
*/
-#define PERF_EVENT_IOC_ENABLE _IO ('$', 0)
-#define PERF_EVENT_IOC_DISABLE _IO ('$', 1)
-#define PERF_EVENT_IOC_REFRESH _IO ('$', 2)
-#define PERF_EVENT_IOC_RESET _IO ('$', 3)
-#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64)
-#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5)
-#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *)
-#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *)
-#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32)
-#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32)
+#define PERF_EVENT_IOC_ENABLE _IO ('$', 0)
+#define PERF_EVENT_IOC_DISABLE _IO ('$', 1)
+#define PERF_EVENT_IOC_REFRESH _IO ('$', 2)
+#define PERF_EVENT_IOC_RESET _IO ('$', 3)
+#define PERF_EVENT_IOC_PERIOD _IOW ('$', 4, __u64)
+#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5)
+#define PERF_EVENT_IOC_SET_FILTER _IOW ('$', 6, char *)
+#define PERF_EVENT_IOC_ID _IOR ('$', 7, __u64 *)
+#define PERF_EVENT_IOC_SET_BPF _IOW ('$', 8, __u32)
+#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW ('$', 9, __u32)
#define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *)
-#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW('$', 11, struct perf_event_attr *)
+#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW ('$', 11, struct perf_event_attr *)
enum perf_event_ioc_flags {
- PERF_IOC_FLAG_GROUP = 1U << 0,
+ PERF_IOC_FLAG_GROUP = 1U << 0,
};
/*
@@ -573,7 +594,7 @@ struct perf_event_mmap_page {
__u32 compat_version; /* lowest version this is compat with */
/*
- * Bits needed to read the hw events in user-space.
+ * Bits needed to read the HW events in user-space.
*
* u32 seq, time_mult, time_shift, index, width;
* u64 count, enabled, running;
@@ -611,7 +632,7 @@ struct perf_event_mmap_page {
__u32 index; /* hardware event identifier */
__s64 offset; /* add to hardware event value */
__u64 time_enabled; /* time event active */
- __u64 time_running; /* time event on cpu */
+ __u64 time_running; /* time event on CPU */
union {
__u64 capabilities;
struct {
@@ -639,7 +660,7 @@ struct perf_event_mmap_page {
/*
* If cap_usr_time the below fields can be used to compute the time
- * delta since time_enabled (in ns) using rdtsc or similar.
+ * delta since time_enabled (in ns) using RDTSC or similar.
*
* u64 quot, rem;
* u64 delta;
@@ -712,7 +733,7 @@ struct perf_event_mmap_page {
* after reading this value.
*
* When the mapping is PROT_WRITE the @data_tail value should be
- * written by userspace to reflect the last read data, after issueing
+ * written by user-space to reflect the last read data, after issuing
* an smp_mb() to separate the data read from the ->data_tail store.
* In this case the kernel will not over-write unread data.
*
@@ -728,7 +749,7 @@ struct perf_event_mmap_page {
/*
* AUX area is defined by aux_{offset,size} fields that should be set
- * by the userspace, so that
+ * by the user-space, so that
*
* aux_offset >= data_offset + data_size
*
@@ -802,7 +823,7 @@ struct perf_event_mmap_page {
* Indicates that thread was preempted in TASK_RUNNING state.
*
* PERF_RECORD_MISC_MMAP_BUILD_ID:
- * Indicates that mmap2 event carries build id data.
+ * Indicates that mmap2 event carries build ID data.
*/
#define PERF_RECORD_MISC_EXACT_IP (1 << 14)
#define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14)
@@ -813,26 +834,26 @@ struct perf_event_mmap_page {
#define PERF_RECORD_MISC_EXT_RESERVED (1 << 15)
struct perf_event_header {
- __u32 type;
- __u16 misc;
- __u16 size;
+ __u32 type;
+ __u16 misc;
+ __u16 size;
};
struct perf_ns_link_info {
- __u64 dev;
- __u64 ino;
+ __u64 dev;
+ __u64 ino;
};
enum {
- NET_NS_INDEX = 0,
- UTS_NS_INDEX = 1,
- IPC_NS_INDEX = 2,
- PID_NS_INDEX = 3,
- USER_NS_INDEX = 4,
- MNT_NS_INDEX = 5,
- CGROUP_NS_INDEX = 6,
-
- NR_NAMESPACES, /* number of available namespaces */
+ NET_NS_INDEX = 0,
+ UTS_NS_INDEX = 1,
+ IPC_NS_INDEX = 2,
+ PID_NS_INDEX = 3,
+ USER_NS_INDEX = 4,
+ MNT_NS_INDEX = 5,
+ CGROUP_NS_INDEX = 6,
+
+ NR_NAMESPACES, /* number of available namespaces */
};
enum perf_event_type {
@@ -848,11 +869,11 @@ enum perf_event_type {
* optional fields being ignored.
*
* struct sample_id {
- * { u32 pid, tid; } && PERF_SAMPLE_TID
- * { u64 time; } && PERF_SAMPLE_TIME
- * { u64 id; } && PERF_SAMPLE_ID
- * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID
- * { u32 cpu, res; } && PERF_SAMPLE_CPU
+ * { u32 pid, tid; } && PERF_SAMPLE_TID
+ * { u64 time; } && PERF_SAMPLE_TIME
+ * { u64 id; } && PERF_SAMPLE_ID
+ * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID
+ * { u32 cpu, res; } && PERF_SAMPLE_CPU
* { u64 id; } && PERF_SAMPLE_IDENTIFIER
* } && perf_event_attr::sample_id_all
*
@@ -863,7 +884,7 @@ enum perf_event_type {
/*
* The MMAP events record the PROT_EXEC mappings so that we can
- * correlate userspace IPs to code. They have the following structure:
+ * correlate user-space IPs to code. They have the following structure:
*
* struct {
* struct perf_event_header header;
@@ -873,7 +894,7 @@ enum perf_event_type {
* u64 len;
* u64 pgoff;
* char filename[];
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_MMAP = 1,
@@ -883,7 +904,7 @@ enum perf_event_type {
* struct perf_event_header header;
* u64 id;
* u64 lost;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_LOST = 2,
@@ -894,7 +915,7 @@ enum perf_event_type {
*
* u32 pid, tid;
* char comm[];
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_COMM = 3,
@@ -905,7 +926,7 @@ enum perf_event_type {
* u32 pid, ppid;
* u32 tid, ptid;
* u64 time;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_EXIT = 4,
@@ -916,7 +937,7 @@ enum perf_event_type {
* u64 time;
* u64 id;
* u64 stream_id;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_THROTTLE = 5,
@@ -928,7 +949,7 @@ enum perf_event_type {
* u32 pid, ppid;
* u32 tid, ptid;
* u64 time;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_FORK = 7,
@@ -939,7 +960,7 @@ enum perf_event_type {
* u32 pid, tid;
*
* struct read_format values;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_READ = 8,
@@ -994,12 +1015,12 @@ enum perf_event_type {
* { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS
* } && PERF_SAMPLE_BRANCH_STACK
*
- * { u64 abi; # enum perf_sample_regs_abi
- * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
+ * { u64 abi; # enum perf_sample_regs_abi
+ * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
*
- * { u64 size;
- * char data[size];
- * u64 dyn_size; } && PERF_SAMPLE_STACK_USER
+ * { u64 size;
+ * char data[size];
+ * u64 dyn_size; } && PERF_SAMPLE_STACK_USER
*
* { union perf_sample_weight
* {
@@ -1024,10 +1045,11 @@ enum perf_event_type {
* { u64 abi; # enum perf_sample_regs_abi
* u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
* { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR
- * { u64 size;
- * char data[size]; } && PERF_SAMPLE_AUX
+ * { u64 cgroup;} && PERF_SAMPLE_CGROUP
* { u64 data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE
* { u64 code_page_size;} && PERF_SAMPLE_CODE_PAGE_SIZE
+ * { u64 size;
+ * char data[size]; } && PERF_SAMPLE_AUX
* };
*/
PERF_RECORD_SAMPLE = 9,
@@ -1059,7 +1081,7 @@ enum perf_event_type {
* };
* u32 prot, flags;
* char filename[];
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_MMAP2 = 10,
@@ -1068,12 +1090,12 @@ enum perf_event_type {
* Records that new data landed in the AUX buffer part.
*
* struct {
- * struct perf_event_header header;
+ * struct perf_event_header header;
*
- * u64 aux_offset;
- * u64 aux_size;
+ * u64 aux_offset;
+ * u64 aux_size;
* u64 flags;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_AUX = 11,
@@ -1156,7 +1178,7 @@ enum perf_event_type {
PERF_RECORD_KSYMBOL = 17,
/*
- * Record bpf events:
+ * Record BPF events:
* enum perf_bpf_event_type {
* PERF_BPF_EVENT_UNKNOWN = 0,
* PERF_BPF_EVENT_PROG_LOAD = 1,
@@ -1234,179 +1256,181 @@ enum perf_record_ksymbol_type {
#define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0)
enum perf_bpf_event_type {
- PERF_BPF_EVENT_UNKNOWN = 0,
- PERF_BPF_EVENT_PROG_LOAD = 1,
- PERF_BPF_EVENT_PROG_UNLOAD = 2,
- PERF_BPF_EVENT_MAX, /* non-ABI */
+ PERF_BPF_EVENT_UNKNOWN = 0,
+ PERF_BPF_EVENT_PROG_LOAD = 1,
+ PERF_BPF_EVENT_PROG_UNLOAD = 2,
+ PERF_BPF_EVENT_MAX, /* non-ABI */
};
-#define PERF_MAX_STACK_DEPTH 127
-#define PERF_MAX_CONTEXTS_PER_STACK 8
+#define PERF_MAX_STACK_DEPTH 127
+#define PERF_MAX_CONTEXTS_PER_STACK 8
enum perf_callchain_context {
- PERF_CONTEXT_HV = (__u64)-32,
- PERF_CONTEXT_KERNEL = (__u64)-128,
- PERF_CONTEXT_USER = (__u64)-512,
+ PERF_CONTEXT_HV = (__u64)-32,
+ PERF_CONTEXT_KERNEL = (__u64)-128,
+ PERF_CONTEXT_USER = (__u64)-512,
- PERF_CONTEXT_GUEST = (__u64)-2048,
- PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176,
- PERF_CONTEXT_GUEST_USER = (__u64)-2560,
+ PERF_CONTEXT_GUEST = (__u64)-2048,
+ PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176,
+ PERF_CONTEXT_GUEST_USER = (__u64)-2560,
- PERF_CONTEXT_MAX = (__u64)-4095,
+ PERF_CONTEXT_MAX = (__u64)-4095,
};
/**
* PERF_RECORD_AUX::flags bits
*/
-#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */
-#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */
-#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */
-#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */
+#define PERF_AUX_FLAG_TRUNCATED 0x0001 /* Record was truncated to fit */
+#define PERF_AUX_FLAG_OVERWRITE 0x0002 /* Snapshot from overwrite mode */
+#define PERF_AUX_FLAG_PARTIAL 0x0004 /* Record contains gaps */
+#define PERF_AUX_FLAG_COLLISION 0x0008 /* Sample collided with another */
#define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK 0xff00 /* PMU specific trace format type */
/* CoreSight PMU AUX buffer formats */
-#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */
-#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */
+#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */
+#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */
-#define PERF_FLAG_FD_NO_GROUP (1UL << 0)
-#define PERF_FLAG_FD_OUTPUT (1UL << 1)
-#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
-#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
+#define PERF_FLAG_FD_NO_GROUP (1UL << 0)
+#define PERF_FLAG_FD_OUTPUT (1UL << 1)
+#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup ID, per-CPU mode only */
+#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
#if defined(__LITTLE_ENDIAN_BITFIELD)
union perf_mem_data_src {
__u64 val;
struct {
- __u64 mem_op:5, /* type of opcode */
- mem_lvl:14, /* memory hierarchy level */
- mem_snoop:5, /* snoop mode */
- mem_lock:2, /* lock instr */
- mem_dtlb:7, /* tlb access */
- mem_lvl_num:4, /* memory hierarchy level number */
- mem_remote:1, /* remote */
- mem_snoopx:2, /* snoop mode, ext */
- mem_blk:3, /* access blocked */
- mem_hops:3, /* hop level */
- mem_rsvd:18;
+ __u64 mem_op : 5, /* Type of opcode */
+ mem_lvl : 14, /* Memory hierarchy level */
+ mem_snoop : 5, /* Snoop mode */
+ mem_lock : 2, /* Lock instr */
+ mem_dtlb : 7, /* TLB access */
+ mem_lvl_num : 4, /* Memory hierarchy level number */
+ mem_remote : 1, /* Remote */
+ mem_snoopx : 2, /* Snoop mode, ext */
+ mem_blk : 3, /* Access blocked */
+ mem_hops : 3, /* Hop level */
+ mem_rsvd : 18;
};
};
#elif defined(__BIG_ENDIAN_BITFIELD)
union perf_mem_data_src {
__u64 val;
struct {
- __u64 mem_rsvd:18,
- mem_hops:3, /* hop level */
- mem_blk:3, /* access blocked */
- mem_snoopx:2, /* snoop mode, ext */
- mem_remote:1, /* remote */
- mem_lvl_num:4, /* memory hierarchy level number */
- mem_dtlb:7, /* tlb access */
- mem_lock:2, /* lock instr */
- mem_snoop:5, /* snoop mode */
- mem_lvl:14, /* memory hierarchy level */
- mem_op:5; /* type of opcode */
+ __u64 mem_rsvd : 18,
+ mem_hops : 3, /* Hop level */
+ mem_blk : 3, /* Access blocked */
+ mem_snoopx : 2, /* Snoop mode, ext */
+ mem_remote : 1, /* Remote */
+ mem_lvl_num : 4, /* Memory hierarchy level number */
+ mem_dtlb : 7, /* TLB access */
+ mem_lock : 2, /* Lock instr */
+ mem_snoop : 5, /* Snoop mode */
+ mem_lvl : 14, /* Memory hierarchy level */
+ mem_op : 5; /* Type of opcode */
};
};
#else
-#error "Unknown endianness"
+# error "Unknown endianness"
#endif
-/* type of opcode (load/store/prefetch,code) */
-#define PERF_MEM_OP_NA 0x01 /* not available */
-#define PERF_MEM_OP_LOAD 0x02 /* load instruction */
-#define PERF_MEM_OP_STORE 0x04 /* store instruction */
-#define PERF_MEM_OP_PFETCH 0x08 /* prefetch */
-#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */
-#define PERF_MEM_OP_SHIFT 0
+/* Type of memory opcode: */
+#define PERF_MEM_OP_NA 0x0001 /* Not available */
+#define PERF_MEM_OP_LOAD 0x0002 /* Load instruction */
+#define PERF_MEM_OP_STORE 0x0004 /* Store instruction */
+#define PERF_MEM_OP_PFETCH 0x0008 /* Prefetch */
+#define PERF_MEM_OP_EXEC 0x0010 /* Code (execution) */
+#define PERF_MEM_OP_SHIFT 0
/*
- * PERF_MEM_LVL_* namespace being depricated to some extent in the
+ * The PERF_MEM_LVL_* namespace is being deprecated to some extent in
* favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields.
- * Supporting this namespace inorder to not break defined ABIs.
+ * We support this namespace in order to not break defined ABIs.
*
- * memory hierarchy (memory level, hit or miss)
+ * Memory hierarchy (memory level, hit or miss)
*/
-#define PERF_MEM_LVL_NA 0x01 /* not available */
-#define PERF_MEM_LVL_HIT 0x02 /* hit level */
-#define PERF_MEM_LVL_MISS 0x04 /* miss level */
-#define PERF_MEM_LVL_L1 0x08 /* L1 */
-#define PERF_MEM_LVL_LFB 0x10 /* Line Fill Buffer */
-#define PERF_MEM_LVL_L2 0x20 /* L2 */
-#define PERF_MEM_LVL_L3 0x40 /* L3 */
-#define PERF_MEM_LVL_LOC_RAM 0x80 /* Local DRAM */
-#define PERF_MEM_LVL_REM_RAM1 0x100 /* Remote DRAM (1 hop) */
-#define PERF_MEM_LVL_REM_RAM2 0x200 /* Remote DRAM (2 hops) */
-#define PERF_MEM_LVL_REM_CCE1 0x400 /* Remote Cache (1 hop) */
-#define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */
-#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */
-#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */
-#define PERF_MEM_LVL_SHIFT 5
-
-#define PERF_MEM_REMOTE_REMOTE 0x01 /* Remote */
-#define PERF_MEM_REMOTE_SHIFT 37
-
-#define PERF_MEM_LVLNUM_L1 0x01 /* L1 */
-#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */
-#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */
-#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */
-/* 5-0x7 available */
-#define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */
-#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */
-#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */
-#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
-#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */
-#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */
-#define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */
-#define PERF_MEM_LVLNUM_NA 0x0f /* N/A */
-
-#define PERF_MEM_LVLNUM_SHIFT 33
-
-/* snoop mode */
-#define PERF_MEM_SNOOP_NA 0x01 /* not available */
-#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */
-#define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */
-#define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */
-#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */
-#define PERF_MEM_SNOOP_SHIFT 19
-
-#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */
-#define PERF_MEM_SNOOPX_PEER 0x02 /* xfer from peer */
-#define PERF_MEM_SNOOPX_SHIFT 38
-
-/* locked instruction */
-#define PERF_MEM_LOCK_NA 0x01 /* not available */
-#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */
-#define PERF_MEM_LOCK_SHIFT 24
+#define PERF_MEM_LVL_NA 0x0001 /* Not available */
+#define PERF_MEM_LVL_HIT 0x0002 /* Hit level */
+#define PERF_MEM_LVL_MISS 0x0004 /* Miss level */
+#define PERF_MEM_LVL_L1 0x0008 /* L1 */
+#define PERF_MEM_LVL_LFB 0x0010 /* Line Fill Buffer */
+#define PERF_MEM_LVL_L2 0x0020 /* L2 */
+#define PERF_MEM_LVL_L3 0x0040 /* L3 */
+#define PERF_MEM_LVL_LOC_RAM 0x0080 /* Local DRAM */
+#define PERF_MEM_LVL_REM_RAM1 0x0100 /* Remote DRAM (1 hop) */
+#define PERF_MEM_LVL_REM_RAM2 0x0200 /* Remote DRAM (2 hops) */
+#define PERF_MEM_LVL_REM_CCE1 0x0400 /* Remote Cache (1 hop) */
+#define PERF_MEM_LVL_REM_CCE2 0x0800 /* Remote Cache (2 hops) */
+#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */
+#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */
+#define PERF_MEM_LVL_SHIFT 5
+
+#define PERF_MEM_REMOTE_REMOTE 0x0001 /* Remote */
+#define PERF_MEM_REMOTE_SHIFT 37
+
+#define PERF_MEM_LVLNUM_L1 0x0001 /* L1 */
+#define PERF_MEM_LVLNUM_L2 0x0002 /* L2 */
+#define PERF_MEM_LVLNUM_L3 0x0003 /* L3 */
+#define PERF_MEM_LVLNUM_L4 0x0004 /* L4 */
+#define PERF_MEM_LVLNUM_L2_MHB 0x0005 /* L2 Miss Handling Buffer */
+#define PERF_MEM_LVLNUM_MSC 0x0006 /* Memory-side Cache */
+/* 0x007 available */
+#define PERF_MEM_LVLNUM_UNC 0x0008 /* Uncached */
+#define PERF_MEM_LVLNUM_CXL 0x0009 /* CXL */
+#define PERF_MEM_LVLNUM_IO 0x000a /* I/O */
+#define PERF_MEM_LVLNUM_ANY_CACHE 0x000b /* Any cache */
+#define PERF_MEM_LVLNUM_LFB 0x000c /* LFB / L1 Miss Handling Buffer */
+#define PERF_MEM_LVLNUM_RAM 0x000d /* RAM */
+#define PERF_MEM_LVLNUM_PMEM 0x000e /* PMEM */
+#define PERF_MEM_LVLNUM_NA 0x000f /* N/A */
+
+#define PERF_MEM_LVLNUM_SHIFT 33
+
+/* Snoop mode */
+#define PERF_MEM_SNOOP_NA 0x0001 /* Not available */
+#define PERF_MEM_SNOOP_NONE 0x0002 /* No snoop */
+#define PERF_MEM_SNOOP_HIT 0x0004 /* Snoop hit */
+#define PERF_MEM_SNOOP_MISS 0x0008 /* Snoop miss */
+#define PERF_MEM_SNOOP_HITM 0x0010 /* Snoop hit modified */
+#define PERF_MEM_SNOOP_SHIFT 19
+
+#define PERF_MEM_SNOOPX_FWD 0x0001 /* Forward */
+#define PERF_MEM_SNOOPX_PEER 0x0002 /* Transfer from peer */
+#define PERF_MEM_SNOOPX_SHIFT 38
+
+/* Locked instruction */
+#define PERF_MEM_LOCK_NA 0x0001 /* Not available */
+#define PERF_MEM_LOCK_LOCKED 0x0002 /* Locked transaction */
+#define PERF_MEM_LOCK_SHIFT 24
/* TLB access */
-#define PERF_MEM_TLB_NA 0x01 /* not available */
-#define PERF_MEM_TLB_HIT 0x02 /* hit level */
-#define PERF_MEM_TLB_MISS 0x04 /* miss level */
-#define PERF_MEM_TLB_L1 0x08 /* L1 */
-#define PERF_MEM_TLB_L2 0x10 /* L2 */
-#define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/
-#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */
-#define PERF_MEM_TLB_SHIFT 26
+#define PERF_MEM_TLB_NA 0x0001 /* Not available */
+#define PERF_MEM_TLB_HIT 0x0002 /* Hit level */
+#define PERF_MEM_TLB_MISS 0x0004 /* Miss level */
+#define PERF_MEM_TLB_L1 0x0008 /* L1 */
+#define PERF_MEM_TLB_L2 0x0010 /* L2 */
+#define PERF_MEM_TLB_WK 0x0020 /* Hardware Walker*/
+#define PERF_MEM_TLB_OS 0x0040 /* OS fault handler */
+#define PERF_MEM_TLB_SHIFT 26
/* Access blocked */
-#define PERF_MEM_BLK_NA 0x01 /* not available */
-#define PERF_MEM_BLK_DATA 0x02 /* data could not be forwarded */
-#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */
-#define PERF_MEM_BLK_SHIFT 40
-
-/* hop level */
-#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */
-#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */
-#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */
-#define PERF_MEM_HOPS_3 0x04 /* remote board */
+#define PERF_MEM_BLK_NA 0x0001 /* Not available */
+#define PERF_MEM_BLK_DATA 0x0002 /* Data could not be forwarded */
+#define PERF_MEM_BLK_ADDR 0x0004 /* Address conflict */
+#define PERF_MEM_BLK_SHIFT 40
+
+/* Hop level */
+#define PERF_MEM_HOPS_0 0x0001 /* Remote core, same node */
+#define PERF_MEM_HOPS_1 0x0002 /* Remote node, same socket */
+#define PERF_MEM_HOPS_2 0x0003 /* Remote socket, same board */
+#define PERF_MEM_HOPS_3 0x0004 /* Remote board */
/* 5-7 available */
-#define PERF_MEM_HOPS_SHIFT 43
+#define PERF_MEM_HOPS_SHIFT 43
#define PERF_MEM_S(a, s) \
(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
/*
- * single taken branch record layout:
+ * Layout of single taken branch records:
*
* from: source instruction (may not always be a branch insn)
* to: branch target
@@ -1425,37 +1449,37 @@ union perf_mem_data_src {
struct perf_branch_entry {
__u64 from;
__u64 to;
- __u64 mispred:1, /* target mispredicted */
- predicted:1,/* target predicted */
- in_tx:1, /* in transaction */
- abort:1, /* transaction abort */
- cycles:16, /* cycle count to last branch */
- type:4, /* branch type */
- spec:2, /* branch speculation info */
- new_type:4, /* additional branch type */
- priv:3, /* privilege level */
- reserved:31;
+ __u64 mispred : 1, /* target mispredicted */
+ predicted : 1, /* target predicted */
+ in_tx : 1, /* in transaction */
+ abort : 1, /* transaction abort */
+ cycles : 16, /* cycle count to last branch */
+ type : 4, /* branch type */
+ spec : 2, /* branch speculation info */
+ new_type : 4, /* additional branch type */
+ priv : 3, /* privilege level */
+ reserved : 31;
};
/* Size of used info bits in struct perf_branch_entry */
#define PERF_BRANCH_ENTRY_INFO_BITS_MAX 33
union perf_sample_weight {
- __u64 full;
+ __u64 full;
#if defined(__LITTLE_ENDIAN_BITFIELD)
struct {
- __u32 var1_dw;
- __u16 var2_w;
- __u16 var3_w;
+ __u32 var1_dw;
+ __u16 var2_w;
+ __u16 var3_w;
};
#elif defined(__BIG_ENDIAN_BITFIELD)
struct {
- __u16 var3_w;
- __u16 var2_w;
- __u32 var1_dw;
+ __u16 var3_w;
+ __u16 var2_w;
+ __u32 var1_dw;
};
#else
-#error "Unknown endianness"
+# error "Unknown endianness"
#endif
};
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index 370ed14b1ae0..43dec6eed559 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -230,7 +230,7 @@ struct prctl_mm_map {
# define PR_PAC_APDBKEY (1UL << 3)
# define PR_PAC_APGAKEY (1UL << 4)
-/* Tagged user address controls for arm64 */
+/* Tagged user address controls for arm64 and RISC-V */
#define PR_SET_TAGGED_ADDR_CTRL 55
#define PR_GET_TAGGED_ADDR_CTRL 56
# define PR_TAGGED_ADDR_ENABLE (1UL << 0)
@@ -244,6 +244,9 @@ struct prctl_mm_map {
# define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT)
/* Unused; kept only for source compatibility */
# define PR_MTE_TCF_SHIFT 1
+/* RISC-V pointer masking tag length */
+# define PR_PMLEN_SHIFT 24
+# define PR_PMLEN_MASK (0x7fUL << PR_PMLEN_SHIFT)
/* Control reclaim behavior when allocating memory */
#define PR_SET_IO_FLUSHER 57
@@ -306,4 +309,66 @@ struct prctl_mm_map {
# define PR_RISCV_V_VSTATE_CTRL_NEXT_MASK 0xc
# define PR_RISCV_V_VSTATE_CTRL_MASK 0x1f
+#define PR_RISCV_SET_ICACHE_FLUSH_CTX 71
+# define PR_RISCV_CTX_SW_FENCEI_ON 0
+# define PR_RISCV_CTX_SW_FENCEI_OFF 1
+# define PR_RISCV_SCOPE_PER_PROCESS 0
+# define PR_RISCV_SCOPE_PER_THREAD 1
+
+/* PowerPC Dynamic Execution Control Register (DEXCR) controls */
+#define PR_PPC_GET_DEXCR 72
+#define PR_PPC_SET_DEXCR 73
+/* DEXCR aspect to act on */
+# define PR_PPC_DEXCR_SBHE 0 /* Speculative branch hint enable */
+# define PR_PPC_DEXCR_IBRTPD 1 /* Indirect branch recurrent target prediction disable */
+# define PR_PPC_DEXCR_SRAPD 2 /* Subroutine return address prediction disable */
+# define PR_PPC_DEXCR_NPHIE 3 /* Non-privileged hash instruction enable */
+/* Action to apply / return */
+# define PR_PPC_DEXCR_CTRL_EDITABLE 0x1 /* Aspect can be modified with PR_PPC_SET_DEXCR */
+# define PR_PPC_DEXCR_CTRL_SET 0x2 /* Set the aspect for this process */
+# define PR_PPC_DEXCR_CTRL_CLEAR 0x4 /* Clear the aspect for this process */
+# define PR_PPC_DEXCR_CTRL_SET_ONEXEC 0x8 /* Set the aspect on exec */
+# define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC 0x10 /* Clear the aspect on exec */
+# define PR_PPC_DEXCR_CTRL_MASK 0x1f
+
+/*
+ * Get the current shadow stack configuration for the current thread,
+ * this will be the value configured via PR_SET_SHADOW_STACK_STATUS.
+ */
+#define PR_GET_SHADOW_STACK_STATUS 74
+
+/*
+ * Set the current shadow stack configuration. Enabling the shadow
+ * stack will cause a shadow stack to be allocated for the thread.
+ */
+#define PR_SET_SHADOW_STACK_STATUS 75
+# define PR_SHADOW_STACK_ENABLE (1UL << 0)
+# define PR_SHADOW_STACK_WRITE (1UL << 1)
+# define PR_SHADOW_STACK_PUSH (1UL << 2)
+
+/*
+ * Prevent further changes to the specified shadow stack
+ * configuration. All bits may be locked via this call, including
+ * undefined bits.
+ */
+#define PR_LOCK_SHADOW_STACK_STATUS 76
+
+/*
+ * Controls the mode of timer_create() for CRIU restore operations.
+ * Enabling this allows CRIU to restore timers with explicit IDs.
+ *
+ * Don't use for normal operations as the result might be undefined.
+ */
+#define PR_TIMER_CREATE_RESTORE_IDS 77
+# define PR_TIMER_CREATE_RESTORE_IDS_OFF 0
+# define PR_TIMER_CREATE_RESTORE_IDS_ON 1
+# define PR_TIMER_CREATE_RESTORE_IDS_GET 2
+
+/* FUTEX hash management */
+#define PR_FUTEX_HASH 78
+# define PR_FUTEX_HASH_SET_SLOTS 1
+# define FH_FLAG_IMMUTABLE (1ULL << 0)
+# define PR_FUTEX_HASH_GET_SLOTS 2
+# define PR_FUTEX_HASH_GET_IMMUTABLE 3
+
#endif /* _LINUX_PRCTL_H */
diff --git a/tools/include/uapi/linux/sched.h b/tools/include/uapi/linux/sched.h
deleted file mode 100644
index 3bac0a8ceab2..000000000000
--- a/tools/include/uapi/linux/sched.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _UAPI_LINUX_SCHED_H
-#define _UAPI_LINUX_SCHED_H
-
-#include <linux/types.h>
-
-/*
- * cloning flags:
- */
-#define CSIGNAL 0x000000ff /* signal mask to be sent at exit */
-#define CLONE_VM 0x00000100 /* set if VM shared between processes */
-#define CLONE_FS 0x00000200 /* set if fs info shared between processes */
-#define CLONE_FILES 0x00000400 /* set if open files shared between processes */
-#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */
-#define CLONE_PIDFD 0x00001000 /* set if a pidfd should be placed in parent */
-#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */
-#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */
-#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */
-#define CLONE_THREAD 0x00010000 /* Same thread group? */
-#define CLONE_NEWNS 0x00020000 /* New mount namespace group */
-#define CLONE_SYSVSEM 0x00040000 /* share system V SEM_UNDO semantics */
-#define CLONE_SETTLS 0x00080000 /* create a new TLS for the child */
-#define CLONE_PARENT_SETTID 0x00100000 /* set the TID in the parent */
-#define CLONE_CHILD_CLEARTID 0x00200000 /* clear the TID in the child */
-#define CLONE_DETACHED 0x00400000 /* Unused, ignored */
-#define CLONE_UNTRACED 0x00800000 /* set if the tracing process can't force CLONE_PTRACE on this clone */
-#define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */
-#define CLONE_NEWCGROUP 0x02000000 /* New cgroup namespace */
-#define CLONE_NEWUTS 0x04000000 /* New utsname namespace */
-#define CLONE_NEWIPC 0x08000000 /* New ipc namespace */
-#define CLONE_NEWUSER 0x10000000 /* New user namespace */
-#define CLONE_NEWPID 0x20000000 /* New pid namespace */
-#define CLONE_NEWNET 0x40000000 /* New network namespace */
-#define CLONE_IO 0x80000000 /* Clone io context */
-
-/* Flags for the clone3() syscall. */
-#define CLONE_CLEAR_SIGHAND 0x100000000ULL /* Clear any signal handler and reset to SIG_DFL. */
-#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */
-
-/*
- * cloning flags intersect with CSIGNAL so can be used with unshare and clone3
- * syscalls only:
- */
-#define CLONE_NEWTIME 0x00000080 /* New time namespace */
-
-#ifndef __ASSEMBLY__
-/**
- * struct clone_args - arguments for the clone3 syscall
- * @flags: Flags for the new process as listed above.
- * All flags are valid except for CSIGNAL and
- * CLONE_DETACHED.
- * @pidfd: If CLONE_PIDFD is set, a pidfd will be
- * returned in this argument.
- * @child_tid: If CLONE_CHILD_SETTID is set, the TID of the
- * child process will be returned in the child's
- * memory.
- * @parent_tid: If CLONE_PARENT_SETTID is set, the TID of
- * the child process will be returned in the
- * parent's memory.
- * @exit_signal: The exit_signal the parent process will be
- * sent when the child exits.
- * @stack: Specify the location of the stack for the
- * child process.
- * Note, @stack is expected to point to the
- * lowest address. The stack direction will be
- * determined by the kernel and set up
- * appropriately based on @stack_size.
- * @stack_size: The size of the stack for the child process.
- * @tls: If CLONE_SETTLS is set, the tls descriptor
- * is set to tls.
- * @set_tid: Pointer to an array of type *pid_t. The size
- * of the array is defined using @set_tid_size.
- * This array is used to select PIDs/TIDs for
- * newly created processes. The first element in
- * this defines the PID in the most nested PID
- * namespace. Each additional element in the array
- * defines the PID in the parent PID namespace of
- * the original PID namespace. If the array has
- * less entries than the number of currently
- * nested PID namespaces only the PIDs in the
- * corresponding namespaces are set.
- * @set_tid_size: This defines the size of the array referenced
- * in @set_tid. This cannot be larger than the
- * kernel's limit of nested PID namespaces.
- * @cgroup: If CLONE_INTO_CGROUP is specified set this to
- * a file descriptor for the cgroup.
- *
- * The structure is versioned by size and thus extensible.
- * New struct members must go at the end of the struct and
- * must be properly 64bit aligned.
- */
-struct clone_args {
- __aligned_u64 flags;
- __aligned_u64 pidfd;
- __aligned_u64 child_tid;
- __aligned_u64 parent_tid;
- __aligned_u64 exit_signal;
- __aligned_u64 stack;
- __aligned_u64 stack_size;
- __aligned_u64 tls;
- __aligned_u64 set_tid;
- __aligned_u64 set_tid_size;
- __aligned_u64 cgroup;
-};
-#endif
-
-#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */
-#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */
-#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */
-
-/*
- * Scheduling policies
- */
-#define SCHED_NORMAL 0
-#define SCHED_FIFO 1
-#define SCHED_RR 2
-#define SCHED_BATCH 3
-/* SCHED_ISO: reserved but not implemented yet */
-#define SCHED_IDLE 5
-#define SCHED_DEADLINE 6
-
-/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
-#define SCHED_RESET_ON_FORK 0x40000000
-
-/*
- * For the sched_{set,get}attr() calls
- */
-#define SCHED_FLAG_RESET_ON_FORK 0x01
-#define SCHED_FLAG_RECLAIM 0x02
-#define SCHED_FLAG_DL_OVERRUN 0x04
-#define SCHED_FLAG_KEEP_POLICY 0x08
-#define SCHED_FLAG_KEEP_PARAMS 0x10
-#define SCHED_FLAG_UTIL_CLAMP_MIN 0x20
-#define SCHED_FLAG_UTIL_CLAMP_MAX 0x40
-
-#define SCHED_FLAG_KEEP_ALL (SCHED_FLAG_KEEP_POLICY | \
- SCHED_FLAG_KEEP_PARAMS)
-
-#define SCHED_FLAG_UTIL_CLAMP (SCHED_FLAG_UTIL_CLAMP_MIN | \
- SCHED_FLAG_UTIL_CLAMP_MAX)
-
-#define SCHED_FLAG_ALL (SCHED_FLAG_RESET_ON_FORK | \
- SCHED_FLAG_RECLAIM | \
- SCHED_FLAG_DL_OVERRUN | \
- SCHED_FLAG_KEEP_ALL | \
- SCHED_FLAG_UTIL_CLAMP)
-
-#endif /* _UAPI_LINUX_SCHED_H */
diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h
index 2f2ee82d5517..f78ee3670dd5 100644
--- a/tools/include/uapi/linux/stat.h
+++ b/tools/include/uapi/linux/stat.h
@@ -98,36 +98,93 @@ struct statx_timestamp {
*/
struct statx {
/* 0x00 */
- __u32 stx_mask; /* What results were written [uncond] */
- __u32 stx_blksize; /* Preferred general I/O size [uncond] */
- __u64 stx_attributes; /* Flags conveying information about the file [uncond] */
+ /* What results were written [uncond] */
+ __u32 stx_mask;
+
+ /* Preferred general I/O size [uncond] */
+ __u32 stx_blksize;
+
+ /* Flags conveying information about the file [uncond] */
+ __u64 stx_attributes;
+
/* 0x10 */
- __u32 stx_nlink; /* Number of hard links */
- __u32 stx_uid; /* User ID of owner */
- __u32 stx_gid; /* Group ID of owner */
- __u16 stx_mode; /* File mode */
+ /* Number of hard links */
+ __u32 stx_nlink;
+
+ /* User ID of owner */
+ __u32 stx_uid;
+
+ /* Group ID of owner */
+ __u32 stx_gid;
+
+ /* File mode */
+ __u16 stx_mode;
__u16 __spare0[1];
+
/* 0x20 */
- __u64 stx_ino; /* Inode number */
- __u64 stx_size; /* File size */
- __u64 stx_blocks; /* Number of 512-byte blocks allocated */
- __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */
+ /* Inode number */
+ __u64 stx_ino;
+
+ /* File size */
+ __u64 stx_size;
+
+ /* Number of 512-byte blocks allocated */
+ __u64 stx_blocks;
+
+ /* Mask to show what's supported in stx_attributes */
+ __u64 stx_attributes_mask;
+
/* 0x40 */
- struct statx_timestamp stx_atime; /* Last access time */
- struct statx_timestamp stx_btime; /* File creation time */
- struct statx_timestamp stx_ctime; /* Last attribute change time */
- struct statx_timestamp stx_mtime; /* Last data modification time */
+ /* Last access time */
+ struct statx_timestamp stx_atime;
+
+ /* File creation time */
+ struct statx_timestamp stx_btime;
+
+ /* Last attribute change time */
+ struct statx_timestamp stx_ctime;
+
+ /* Last data modification time */
+ struct statx_timestamp stx_mtime;
+
/* 0x80 */
- __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */
+ /* Device ID of special file [if bdev/cdev] */
+ __u32 stx_rdev_major;
__u32 stx_rdev_minor;
- __u32 stx_dev_major; /* ID of device containing file [uncond] */
+
+ /* ID of device containing file [uncond] */
+ __u32 stx_dev_major;
__u32 stx_dev_minor;
+
/* 0x90 */
__u64 stx_mnt_id;
- __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */
- __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */
+
+ /* Memory buffer alignment for direct I/O */
+ __u32 stx_dio_mem_align;
+
+ /* File offset alignment for direct I/O */
+ __u32 stx_dio_offset_align;
+
/* 0xa0 */
- __u64 __spare3[12]; /* Spare space for future expansion */
+ /* Subvolume identifier */
+ __u64 stx_subvol;
+
+ /* Min atomic write unit in bytes */
+ __u32 stx_atomic_write_unit_min;
+
+ /* Max atomic write unit in bytes */
+ __u32 stx_atomic_write_unit_max;
+
+ /* 0xb0 */
+ /* Max atomic write segment count */
+ __u32 stx_atomic_write_segments_max;
+
+ /* File offset alignment for direct I/O reads */
+ __u32 stx_dio_read_offset_align;
+
+ /* 0xb8 */
+ __u64 __spare3[9]; /* Spare space for future expansion */
+
/* 0x100 */
};
@@ -155,6 +212,9 @@ struct statx {
#define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */
#define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */
#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */
+#define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */
+#define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */
+#define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
@@ -190,6 +250,7 @@ struct statx {
#define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */
#define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */
#define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */
+#define STATX_ATTR_WRITE_ATOMIC 0x00400000 /* File supports atomic write operations */
#endif /* _UAPI_LINUX_STAT_H */
diff --git a/tools/include/uapi/linux/stddef.h b/tools/include/uapi/linux/stddef.h
index bb6ea517efb5..c53cde425406 100644
--- a/tools/include/uapi/linux/stddef.h
+++ b/tools/include/uapi/linux/stddef.h
@@ -8,6 +8,13 @@
#define __always_inline __inline__
#endif
+/* Not all C++ standards support type declarations inside an anonymous union */
+#ifndef __cplusplus
+#define __struct_group_tag(TAG) TAG
+#else
+#define __struct_group_tag(TAG)
+#endif
+
/**
* __struct_group() - Create a mirrored named and anonyomous struct
*
@@ -20,14 +27,14 @@
* and size: one anonymous and one named. The former's members can be used
* normally without sub-struct naming, and the latter can be used to
* reason about the start, end, and size of the group of struct members.
- * The named struct can also be explicitly tagged for layer reuse, as well
- * as both having struct attributes appended.
+ * The named struct can also be explicitly tagged for layer reuse (C only),
+ * as well as both having struct attributes appended.
*/
#define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \
union { \
struct { MEMBERS } ATTRS; \
- struct TAG { MEMBERS } ATTRS NAME; \
- }
+ struct __struct_group_tag(TAG) { MEMBERS } ATTRS NAME; \
+ } ATTRS
/**
* __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union
diff --git a/tools/include/uapi/linux/types.h b/tools/include/uapi/linux/types.h
index 91fa51a9c31d..85aa327245c6 100644
--- a/tools/include/uapi/linux/types.h
+++ b/tools/include/uapi/linux/types.h
@@ -4,6 +4,8 @@
#include <asm-generic/int-ll64.h>
+#ifndef __ASSEMBLER__
+
/* copied from linux:include/uapi/linux/types.h */
#define __bitwise
typedef __u16 __bitwise __le16;
@@ -20,4 +22,5 @@ typedef __u32 __bitwise __wsum;
#define __aligned_be64 __be64 __attribute__((aligned(8)))
#define __aligned_le64 __le64 __attribute__((aligned(8)))
+#endif /* __ASSEMBLER__ */
#endif /* _UAPI_LINUX_TYPES_H */
diff --git a/tools/include/uapi/linux/usbdevice_fs.h b/tools/include/uapi/linux/usbdevice_fs.h
deleted file mode 100644
index 74a84e02422a..000000000000
--- a/tools/include/uapi/linux/usbdevice_fs.h
+++ /dev/null
@@ -1,231 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
-/*****************************************************************************/
-
-/*
- * usbdevice_fs.h -- USB device file system.
- *
- * Copyright (C) 2000
- * Thomas Sailer (sailer@ife.ee.ethz.ch)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * History:
- * 0.1 04.01.2000 Created
- */
-
-/*****************************************************************************/
-
-#ifndef _UAPI_LINUX_USBDEVICE_FS_H
-#define _UAPI_LINUX_USBDEVICE_FS_H
-
-#include <linux/types.h>
-#include <linux/magic.h>
-
-/* --------------------------------------------------------------------- */
-
-/* usbdevfs ioctl codes */
-
-struct usbdevfs_ctrltransfer {
- __u8 bRequestType;
- __u8 bRequest;
- __u16 wValue;
- __u16 wIndex;
- __u16 wLength;
- __u32 timeout; /* in milliseconds */
- void __user *data;
-};
-
-struct usbdevfs_bulktransfer {
- unsigned int ep;
- unsigned int len;
- unsigned int timeout; /* in milliseconds */
- void __user *data;
-};
-
-struct usbdevfs_setinterface {
- unsigned int interface;
- unsigned int altsetting;
-};
-
-struct usbdevfs_disconnectsignal {
- unsigned int signr;
- void __user *context;
-};
-
-#define USBDEVFS_MAXDRIVERNAME 255
-
-struct usbdevfs_getdriver {
- unsigned int interface;
- char driver[USBDEVFS_MAXDRIVERNAME + 1];
-};
-
-struct usbdevfs_connectinfo {
- unsigned int devnum;
- unsigned char slow;
-};
-
-struct usbdevfs_conninfo_ex {
- __u32 size; /* Size of the structure from the kernel's */
- /* point of view. Can be used by userspace */
- /* to determine how much data can be */
- /* used/trusted. */
- __u32 busnum; /* USB bus number, as enumerated by the */
- /* kernel, the device is connected to. */
- __u32 devnum; /* Device address on the bus. */
- __u32 speed; /* USB_SPEED_* constants from ch9.h */
- __u8 num_ports; /* Number of ports the device is connected */
- /* to on the way to the root hub. It may */
- /* be bigger than size of 'ports' array so */
- /* userspace can detect overflows. */
- __u8 ports[7]; /* List of ports on the way from the root */
- /* hub to the device. Current limit in */
- /* USB specification is 7 tiers (root hub, */
- /* 5 intermediate hubs, device), which */
- /* gives at most 6 port entries. */
-};
-
-#define USBDEVFS_URB_SHORT_NOT_OK 0x01
-#define USBDEVFS_URB_ISO_ASAP 0x02
-#define USBDEVFS_URB_BULK_CONTINUATION 0x04
-#define USBDEVFS_URB_NO_FSBR 0x20 /* Not used */
-#define USBDEVFS_URB_ZERO_PACKET 0x40
-#define USBDEVFS_URB_NO_INTERRUPT 0x80
-
-#define USBDEVFS_URB_TYPE_ISO 0
-#define USBDEVFS_URB_TYPE_INTERRUPT 1
-#define USBDEVFS_URB_TYPE_CONTROL 2
-#define USBDEVFS_URB_TYPE_BULK 3
-
-struct usbdevfs_iso_packet_desc {
- unsigned int length;
- unsigned int actual_length;
- unsigned int status;
-};
-
-struct usbdevfs_urb {
- unsigned char type;
- unsigned char endpoint;
- int status;
- unsigned int flags;
- void __user *buffer;
- int buffer_length;
- int actual_length;
- int start_frame;
- union {
- int number_of_packets; /* Only used for isoc urbs */
- unsigned int stream_id; /* Only used with bulk streams */
- };
- int error_count;
- unsigned int signr; /* signal to be sent on completion,
- or 0 if none should be sent. */
- void __user *usercontext;
- struct usbdevfs_iso_packet_desc iso_frame_desc[];
-};
-
-/* ioctls for talking directly to drivers */
-struct usbdevfs_ioctl {
- int ifno; /* interface 0..N ; negative numbers reserved */
- int ioctl_code; /* MUST encode size + direction of data so the
- * macros in <asm/ioctl.h> give correct values */
- void __user *data; /* param buffer (in, or out) */
-};
-
-/* You can do most things with hubs just through control messages,
- * except find out what device connects to what port. */
-struct usbdevfs_hub_portinfo {
- char nports; /* number of downstream ports in this hub */
- char port [127]; /* e.g. port 3 connects to device 27 */
-};
-
-/* System and bus capability flags */
-#define USBDEVFS_CAP_ZERO_PACKET 0x01
-#define USBDEVFS_CAP_BULK_CONTINUATION 0x02
-#define USBDEVFS_CAP_NO_PACKET_SIZE_LIM 0x04
-#define USBDEVFS_CAP_BULK_SCATTER_GATHER 0x08
-#define USBDEVFS_CAP_REAP_AFTER_DISCONNECT 0x10
-#define USBDEVFS_CAP_MMAP 0x20
-#define USBDEVFS_CAP_DROP_PRIVILEGES 0x40
-#define USBDEVFS_CAP_CONNINFO_EX 0x80
-#define USBDEVFS_CAP_SUSPEND 0x100
-
-/* USBDEVFS_DISCONNECT_CLAIM flags & struct */
-
-/* disconnect-and-claim if the driver matches the driver field */
-#define USBDEVFS_DISCONNECT_CLAIM_IF_DRIVER 0x01
-/* disconnect-and-claim except when the driver matches the driver field */
-#define USBDEVFS_DISCONNECT_CLAIM_EXCEPT_DRIVER 0x02
-
-struct usbdevfs_disconnect_claim {
- unsigned int interface;
- unsigned int flags;
- char driver[USBDEVFS_MAXDRIVERNAME + 1];
-};
-
-struct usbdevfs_streams {
- unsigned int num_streams; /* Not used by USBDEVFS_FREE_STREAMS */
- unsigned int num_eps;
- unsigned char eps[];
-};
-
-/*
- * USB_SPEED_* values returned by USBDEVFS_GET_SPEED are defined in
- * linux/usb/ch9.h
- */
-
-#define USBDEVFS_CONTROL _IOWR('U', 0, struct usbdevfs_ctrltransfer)
-#define USBDEVFS_CONTROL32 _IOWR('U', 0, struct usbdevfs_ctrltransfer32)
-#define USBDEVFS_BULK _IOWR('U', 2, struct usbdevfs_bulktransfer)
-#define USBDEVFS_BULK32 _IOWR('U', 2, struct usbdevfs_bulktransfer32)
-#define USBDEVFS_RESETEP _IOR('U', 3, unsigned int)
-#define USBDEVFS_SETINTERFACE _IOR('U', 4, struct usbdevfs_setinterface)
-#define USBDEVFS_SETCONFIGURATION _IOR('U', 5, unsigned int)
-#define USBDEVFS_GETDRIVER _IOW('U', 8, struct usbdevfs_getdriver)
-#define USBDEVFS_SUBMITURB _IOR('U', 10, struct usbdevfs_urb)
-#define USBDEVFS_SUBMITURB32 _IOR('U', 10, struct usbdevfs_urb32)
-#define USBDEVFS_DISCARDURB _IO('U', 11)
-#define USBDEVFS_REAPURB _IOW('U', 12, void *)
-#define USBDEVFS_REAPURB32 _IOW('U', 12, __u32)
-#define USBDEVFS_REAPURBNDELAY _IOW('U', 13, void *)
-#define USBDEVFS_REAPURBNDELAY32 _IOW('U', 13, __u32)
-#define USBDEVFS_DISCSIGNAL _IOR('U', 14, struct usbdevfs_disconnectsignal)
-#define USBDEVFS_DISCSIGNAL32 _IOR('U', 14, struct usbdevfs_disconnectsignal32)
-#define USBDEVFS_CLAIMINTERFACE _IOR('U', 15, unsigned int)
-#define USBDEVFS_RELEASEINTERFACE _IOR('U', 16, unsigned int)
-#define USBDEVFS_CONNECTINFO _IOW('U', 17, struct usbdevfs_connectinfo)
-#define USBDEVFS_IOCTL _IOWR('U', 18, struct usbdevfs_ioctl)
-#define USBDEVFS_IOCTL32 _IOWR('U', 18, struct usbdevfs_ioctl32)
-#define USBDEVFS_HUB_PORTINFO _IOR('U', 19, struct usbdevfs_hub_portinfo)
-#define USBDEVFS_RESET _IO('U', 20)
-#define USBDEVFS_CLEAR_HALT _IOR('U', 21, unsigned int)
-#define USBDEVFS_DISCONNECT _IO('U', 22)
-#define USBDEVFS_CONNECT _IO('U', 23)
-#define USBDEVFS_CLAIM_PORT _IOR('U', 24, unsigned int)
-#define USBDEVFS_RELEASE_PORT _IOR('U', 25, unsigned int)
-#define USBDEVFS_GET_CAPABILITIES _IOR('U', 26, __u32)
-#define USBDEVFS_DISCONNECT_CLAIM _IOR('U', 27, struct usbdevfs_disconnect_claim)
-#define USBDEVFS_ALLOC_STREAMS _IOR('U', 28, struct usbdevfs_streams)
-#define USBDEVFS_FREE_STREAMS _IOR('U', 29, struct usbdevfs_streams)
-#define USBDEVFS_DROP_PRIVILEGES _IOW('U', 30, __u32)
-#define USBDEVFS_GET_SPEED _IO('U', 31)
-/*
- * Returns struct usbdevfs_conninfo_ex; length is variable to allow
- * extending size of the data returned.
- */
-#define USBDEVFS_CONNINFO_EX(len) _IOC(_IOC_READ, 'U', 32, len)
-#define USBDEVFS_FORBID_SUSPEND _IO('U', 33)
-#define USBDEVFS_ALLOW_SUSPEND _IO('U', 34)
-#define USBDEVFS_WAIT_FOR_RESUME _IO('U', 35)
-
-#endif /* _UAPI_LINUX_USBDEVICE_FS_H */
diff --git a/tools/include/uapi/linux/userfaultfd.h b/tools/include/uapi/linux/userfaultfd.h
new file mode 100644
index 000000000000..4283de22d5b6
--- /dev/null
+++ b/tools/include/uapi/linux/userfaultfd.h
@@ -0,0 +1,386 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * include/linux/userfaultfd.h
+ *
+ * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org>
+ * Copyright (C) 2015 Red Hat, Inc.
+ *
+ */
+
+#ifndef _LINUX_USERFAULTFD_H
+#define _LINUX_USERFAULTFD_H
+
+#include <linux/types.h>
+
+/* ioctls for /dev/userfaultfd */
+#define USERFAULTFD_IOC 0xAA
+#define USERFAULTFD_IOC_NEW _IO(USERFAULTFD_IOC, 0x00)
+
+/*
+ * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and
+ * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR. In
+ * userfaultfd.h we assumed the kernel was reading (instead _IOC_READ
+ * means the userland is reading).
+ */
+#define UFFD_API ((__u64)0xAA)
+#define UFFD_API_REGISTER_MODES (UFFDIO_REGISTER_MODE_MISSING | \
+ UFFDIO_REGISTER_MODE_WP | \
+ UFFDIO_REGISTER_MODE_MINOR)
+#define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \
+ UFFD_FEATURE_EVENT_FORK | \
+ UFFD_FEATURE_EVENT_REMAP | \
+ UFFD_FEATURE_EVENT_REMOVE | \
+ UFFD_FEATURE_EVENT_UNMAP | \
+ UFFD_FEATURE_MISSING_HUGETLBFS | \
+ UFFD_FEATURE_MISSING_SHMEM | \
+ UFFD_FEATURE_SIGBUS | \
+ UFFD_FEATURE_THREAD_ID | \
+ UFFD_FEATURE_MINOR_HUGETLBFS | \
+ UFFD_FEATURE_MINOR_SHMEM | \
+ UFFD_FEATURE_EXACT_ADDRESS | \
+ UFFD_FEATURE_WP_HUGETLBFS_SHMEM | \
+ UFFD_FEATURE_WP_UNPOPULATED | \
+ UFFD_FEATURE_POISON | \
+ UFFD_FEATURE_WP_ASYNC | \
+ UFFD_FEATURE_MOVE)
+#define UFFD_API_IOCTLS \
+ ((__u64)1 << _UFFDIO_REGISTER | \
+ (__u64)1 << _UFFDIO_UNREGISTER | \
+ (__u64)1 << _UFFDIO_API)
+#define UFFD_API_RANGE_IOCTLS \
+ ((__u64)1 << _UFFDIO_WAKE | \
+ (__u64)1 << _UFFDIO_COPY | \
+ (__u64)1 << _UFFDIO_ZEROPAGE | \
+ (__u64)1 << _UFFDIO_MOVE | \
+ (__u64)1 << _UFFDIO_WRITEPROTECT | \
+ (__u64)1 << _UFFDIO_CONTINUE | \
+ (__u64)1 << _UFFDIO_POISON)
+#define UFFD_API_RANGE_IOCTLS_BASIC \
+ ((__u64)1 << _UFFDIO_WAKE | \
+ (__u64)1 << _UFFDIO_COPY | \
+ (__u64)1 << _UFFDIO_WRITEPROTECT | \
+ (__u64)1 << _UFFDIO_CONTINUE | \
+ (__u64)1 << _UFFDIO_POISON)
+
+/*
+ * Valid ioctl command number range with this API is from 0x00 to
+ * 0x3F. UFFDIO_API is the fixed number, everything else can be
+ * changed by implementing a different UFFD_API. If sticking to the
+ * same UFFD_API more ioctl can be added and userland will be aware of
+ * which ioctl the running kernel implements through the ioctl command
+ * bitmask written by the UFFDIO_API.
+ */
+#define _UFFDIO_REGISTER (0x00)
+#define _UFFDIO_UNREGISTER (0x01)
+#define _UFFDIO_WAKE (0x02)
+#define _UFFDIO_COPY (0x03)
+#define _UFFDIO_ZEROPAGE (0x04)
+#define _UFFDIO_MOVE (0x05)
+#define _UFFDIO_WRITEPROTECT (0x06)
+#define _UFFDIO_CONTINUE (0x07)
+#define _UFFDIO_POISON (0x08)
+#define _UFFDIO_API (0x3F)
+
+/* userfaultfd ioctl ids */
+#define UFFDIO 0xAA
+#define UFFDIO_API _IOWR(UFFDIO, _UFFDIO_API, \
+ struct uffdio_api)
+#define UFFDIO_REGISTER _IOWR(UFFDIO, _UFFDIO_REGISTER, \
+ struct uffdio_register)
+#define UFFDIO_UNREGISTER _IOR(UFFDIO, _UFFDIO_UNREGISTER, \
+ struct uffdio_range)
+#define UFFDIO_WAKE _IOR(UFFDIO, _UFFDIO_WAKE, \
+ struct uffdio_range)
+#define UFFDIO_COPY _IOWR(UFFDIO, _UFFDIO_COPY, \
+ struct uffdio_copy)
+#define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \
+ struct uffdio_zeropage)
+#define UFFDIO_MOVE _IOWR(UFFDIO, _UFFDIO_MOVE, \
+ struct uffdio_move)
+#define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \
+ struct uffdio_writeprotect)
+#define UFFDIO_CONTINUE _IOWR(UFFDIO, _UFFDIO_CONTINUE, \
+ struct uffdio_continue)
+#define UFFDIO_POISON _IOWR(UFFDIO, _UFFDIO_POISON, \
+ struct uffdio_poison)
+
+/* read() structure */
+struct uffd_msg {
+ __u8 event;
+
+ __u8 reserved1;
+ __u16 reserved2;
+ __u32 reserved3;
+
+ union {
+ struct {
+ __u64 flags;
+ __u64 address;
+ union {
+ __u32 ptid;
+ } feat;
+ } pagefault;
+
+ struct {
+ __u32 ufd;
+ } fork;
+
+ struct {
+ __u64 from;
+ __u64 to;
+ __u64 len;
+ } remap;
+
+ struct {
+ __u64 start;
+ __u64 end;
+ } remove;
+
+ struct {
+ /* unused reserved fields */
+ __u64 reserved1;
+ __u64 reserved2;
+ __u64 reserved3;
+ } reserved;
+ } arg;
+} __attribute__((packed));
+
+/*
+ * Start at 0x12 and not at 0 to be more strict against bugs.
+ */
+#define UFFD_EVENT_PAGEFAULT 0x12
+#define UFFD_EVENT_FORK 0x13
+#define UFFD_EVENT_REMAP 0x14
+#define UFFD_EVENT_REMOVE 0x15
+#define UFFD_EVENT_UNMAP 0x16
+
+/* flags for UFFD_EVENT_PAGEFAULT */
+#define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */
+#define UFFD_PAGEFAULT_FLAG_WP (1<<1) /* If reason is VM_UFFD_WP */
+#define UFFD_PAGEFAULT_FLAG_MINOR (1<<2) /* If reason is VM_UFFD_MINOR */
+
+struct uffdio_api {
+ /* userland asks for an API number and the features to enable */
+ __u64 api;
+ /*
+ * Kernel answers below with the all available features for
+ * the API, this notifies userland of which events and/or
+ * which flags for each event are enabled in the current
+ * kernel.
+ *
+ * Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE
+ * are to be considered implicitly always enabled in all kernels as
+ * long as the uffdio_api.api requested matches UFFD_API.
+ *
+ * UFFD_FEATURE_MISSING_HUGETLBFS means an UFFDIO_REGISTER
+ * with UFFDIO_REGISTER_MODE_MISSING mode will succeed on
+ * hugetlbfs virtual memory ranges. Adding or not adding
+ * UFFD_FEATURE_MISSING_HUGETLBFS to uffdio_api.features has
+ * no real functional effect after UFFDIO_API returns, but
+ * it's only useful for an initial feature set probe at
+ * UFFDIO_API time. There are two ways to use it:
+ *
+ * 1) by adding UFFD_FEATURE_MISSING_HUGETLBFS to the
+ * uffdio_api.features before calling UFFDIO_API, an error
+ * will be returned by UFFDIO_API on a kernel without
+ * hugetlbfs missing support
+ *
+ * 2) the UFFD_FEATURE_MISSING_HUGETLBFS can not be added in
+ * uffdio_api.features and instead it will be set by the
+ * kernel in the uffdio_api.features if the kernel supports
+ * it, so userland can later check if the feature flag is
+ * present in uffdio_api.features after UFFDIO_API
+ * succeeded.
+ *
+ * UFFD_FEATURE_MISSING_SHMEM works the same as
+ * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem
+ * (i.e. tmpfs and other shmem based APIs).
+ *
+ * UFFD_FEATURE_SIGBUS feature means no page-fault
+ * (UFFD_EVENT_PAGEFAULT) event will be delivered, instead
+ * a SIGBUS signal will be sent to the faulting process.
+ *
+ * UFFD_FEATURE_THREAD_ID pid of the page faulted task_struct will
+ * be returned, if feature is not requested 0 will be returned.
+ *
+ * UFFD_FEATURE_MINOR_HUGETLBFS indicates that minor faults
+ * can be intercepted (via REGISTER_MODE_MINOR) for
+ * hugetlbfs-backed pages.
+ *
+ * UFFD_FEATURE_MINOR_SHMEM indicates the same support as
+ * UFFD_FEATURE_MINOR_HUGETLBFS, but for shmem-backed pages instead.
+ *
+ * UFFD_FEATURE_EXACT_ADDRESS indicates that the exact address of page
+ * faults would be provided and the offset within the page would not be
+ * masked.
+ *
+ * UFFD_FEATURE_WP_HUGETLBFS_SHMEM indicates that userfaultfd
+ * write-protection mode is supported on both shmem and hugetlbfs.
+ *
+ * UFFD_FEATURE_WP_UNPOPULATED indicates that userfaultfd
+ * write-protection mode will always apply to unpopulated pages
+ * (i.e. empty ptes). This will be the default behavior for shmem
+ * & hugetlbfs, so this flag only affects anonymous memory behavior
+ * when userfault write-protection mode is registered.
+ *
+ * UFFD_FEATURE_WP_ASYNC indicates that userfaultfd write-protection
+ * asynchronous mode is supported in which the write fault is
+ * automatically resolved and write-protection is un-set.
+ * It implies UFFD_FEATURE_WP_UNPOPULATED.
+ *
+ * UFFD_FEATURE_MOVE indicates that the kernel supports moving an
+ * existing page contents from userspace.
+ */
+#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0)
+#define UFFD_FEATURE_EVENT_FORK (1<<1)
+#define UFFD_FEATURE_EVENT_REMAP (1<<2)
+#define UFFD_FEATURE_EVENT_REMOVE (1<<3)
+#define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4)
+#define UFFD_FEATURE_MISSING_SHMEM (1<<5)
+#define UFFD_FEATURE_EVENT_UNMAP (1<<6)
+#define UFFD_FEATURE_SIGBUS (1<<7)
+#define UFFD_FEATURE_THREAD_ID (1<<8)
+#define UFFD_FEATURE_MINOR_HUGETLBFS (1<<9)
+#define UFFD_FEATURE_MINOR_SHMEM (1<<10)
+#define UFFD_FEATURE_EXACT_ADDRESS (1<<11)
+#define UFFD_FEATURE_WP_HUGETLBFS_SHMEM (1<<12)
+#define UFFD_FEATURE_WP_UNPOPULATED (1<<13)
+#define UFFD_FEATURE_POISON (1<<14)
+#define UFFD_FEATURE_WP_ASYNC (1<<15)
+#define UFFD_FEATURE_MOVE (1<<16)
+ __u64 features;
+
+ __u64 ioctls;
+};
+
+struct uffdio_range {
+ __u64 start;
+ __u64 len;
+};
+
+struct uffdio_register {
+ struct uffdio_range range;
+#define UFFDIO_REGISTER_MODE_MISSING ((__u64)1<<0)
+#define UFFDIO_REGISTER_MODE_WP ((__u64)1<<1)
+#define UFFDIO_REGISTER_MODE_MINOR ((__u64)1<<2)
+ __u64 mode;
+
+ /*
+ * kernel answers which ioctl commands are available for the
+ * range, keep at the end as the last 8 bytes aren't read.
+ */
+ __u64 ioctls;
+};
+
+struct uffdio_copy {
+ __u64 dst;
+ __u64 src;
+ __u64 len;
+#define UFFDIO_COPY_MODE_DONTWAKE ((__u64)1<<0)
+ /*
+ * UFFDIO_COPY_MODE_WP will map the page write protected on
+ * the fly. UFFDIO_COPY_MODE_WP is available only if the
+ * write protected ioctl is implemented for the range
+ * according to the uffdio_register.ioctls.
+ */
+#define UFFDIO_COPY_MODE_WP ((__u64)1<<1)
+ __u64 mode;
+
+ /*
+ * "copy" is written by the ioctl and must be at the end: the
+ * copy_from_user will not read the last 8 bytes.
+ */
+ __s64 copy;
+};
+
+struct uffdio_zeropage {
+ struct uffdio_range range;
+#define UFFDIO_ZEROPAGE_MODE_DONTWAKE ((__u64)1<<0)
+ __u64 mode;
+
+ /*
+ * "zeropage" is written by the ioctl and must be at the end:
+ * the copy_from_user will not read the last 8 bytes.
+ */
+ __s64 zeropage;
+};
+
+struct uffdio_writeprotect {
+ struct uffdio_range range;
+/*
+ * UFFDIO_WRITEPROTECT_MODE_WP: set the flag to write protect a range,
+ * unset the flag to undo protection of a range which was previously
+ * write protected.
+ *
+ * UFFDIO_WRITEPROTECT_MODE_DONTWAKE: set the flag to avoid waking up
+ * any wait thread after the operation succeeds.
+ *
+ * NOTE: Write protecting a region (WP=1) is unrelated to page faults,
+ * therefore DONTWAKE flag is meaningless with WP=1. Removing write
+ * protection (WP=0) in response to a page fault wakes the faulting
+ * task unless DONTWAKE is set.
+ */
+#define UFFDIO_WRITEPROTECT_MODE_WP ((__u64)1<<0)
+#define UFFDIO_WRITEPROTECT_MODE_DONTWAKE ((__u64)1<<1)
+ __u64 mode;
+};
+
+struct uffdio_continue {
+ struct uffdio_range range;
+#define UFFDIO_CONTINUE_MODE_DONTWAKE ((__u64)1<<0)
+ /*
+ * UFFDIO_CONTINUE_MODE_WP will map the page write protected on
+ * the fly. UFFDIO_CONTINUE_MODE_WP is available only if the
+ * write protected ioctl is implemented for the range
+ * according to the uffdio_register.ioctls.
+ */
+#define UFFDIO_CONTINUE_MODE_WP ((__u64)1<<1)
+ __u64 mode;
+
+ /*
+ * Fields below here are written by the ioctl and must be at the end:
+ * the copy_from_user will not read past here.
+ */
+ __s64 mapped;
+};
+
+struct uffdio_poison {
+ struct uffdio_range range;
+#define UFFDIO_POISON_MODE_DONTWAKE ((__u64)1<<0)
+ __u64 mode;
+
+ /*
+ * Fields below here are written by the ioctl and must be at the end:
+ * the copy_from_user will not read past here.
+ */
+ __s64 updated;
+};
+
+struct uffdio_move {
+ __u64 dst;
+ __u64 src;
+ __u64 len;
+ /*
+ * Especially if used to atomically remove memory from the
+ * address space the wake on the dst range is not needed.
+ */
+#define UFFDIO_MOVE_MODE_DONTWAKE ((__u64)1<<0)
+#define UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES ((__u64)1<<1)
+ __u64 mode;
+ /*
+ * "move" is written by the ioctl and must be at the end: the
+ * copy_from_user will not read the last 8 bytes.
+ */
+ __s64 move;
+};
+
+/*
+ * Flags for the userfaultfd(2) system call itself.
+ */
+
+/*
+ * Create a userfaultfd that can handle page faults only in user mode.
+ */
+#define UFFD_USER_MODE_ONLY 1
+
+#endif /* _LINUX_USERFAULTFD_H */
diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h
deleted file mode 100644
index 649560c685f1..000000000000
--- a/tools/include/uapi/linux/vhost.h
+++ /dev/null
@@ -1,230 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _LINUX_VHOST_H
-#define _LINUX_VHOST_H
-/* Userspace interface for in-kernel virtio accelerators. */
-
-/* vhost is used to reduce the number of system calls involved in virtio.
- *
- * Existing virtio net code is used in the guest without modification.
- *
- * This header includes interface used by userspace hypervisor for
- * device configuration.
- */
-
-#include <linux/vhost_types.h>
-#include <linux/types.h>
-#include <linux/ioctl.h>
-
-#define VHOST_FILE_UNBIND -1
-
-/* ioctls */
-
-#define VHOST_VIRTIO 0xAF
-
-/* Features bitmask for forward compatibility. Transport bits are used for
- * vhost specific features. */
-#define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64)
-#define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64)
-
-/* Set current process as the (exclusive) owner of this file descriptor. This
- * must be called before any other vhost command. Further calls to
- * VHOST_OWNER_SET fail until VHOST_OWNER_RESET is called. */
-#define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01)
-/* Give up ownership, and reset the device to default values.
- * Allows subsequent call to VHOST_OWNER_SET to succeed. */
-#define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02)
-
-/* Set up/modify memory layout */
-#define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory)
-
-/* Write logging setup. */
-/* Memory writes can optionally be logged by setting bit at an offset
- * (calculated from the physical address) from specified log base.
- * The bit is set using an atomic 32 bit operation. */
-/* Set base address for logging. */
-#define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64)
-/* Specify an eventfd file descriptor to signal on log write. */
-#define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int)
-/* By default, a device gets one vhost_worker that its virtqueues share. This
- * command allows the owner of the device to create an additional vhost_worker
- * for the device. It can later be bound to 1 or more of its virtqueues using
- * the VHOST_ATTACH_VRING_WORKER command.
- *
- * This must be called after VHOST_SET_OWNER and the caller must be the owner
- * of the device. The new thread will inherit caller's cgroups and namespaces,
- * and will share the caller's memory space. The new thread will also be
- * counted against the caller's RLIMIT_NPROC value.
- *
- * The worker's ID used in other commands will be returned in
- * vhost_worker_state.
- */
-#define VHOST_NEW_WORKER _IOR(VHOST_VIRTIO, 0x8, struct vhost_worker_state)
-/* Free a worker created with VHOST_NEW_WORKER if it's not attached to any
- * virtqueue. If userspace is not able to call this for workers its created,
- * the kernel will free all the device's workers when the device is closed.
- */
-#define VHOST_FREE_WORKER _IOW(VHOST_VIRTIO, 0x9, struct vhost_worker_state)
-
-/* Ring setup. */
-/* Set number of descriptors in ring. This parameter can not
- * be modified while ring is running (bound to a device). */
-#define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state)
-/* Set addresses for the ring. */
-#define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr)
-/* Base value where queue looks for available descriptors */
-#define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
-/* Get accessor: reads index, writes value in num */
-#define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
-
-/* Set the vring byte order in num. Valid values are VHOST_VRING_LITTLE_ENDIAN
- * or VHOST_VRING_BIG_ENDIAN (other values return -EINVAL).
- * The byte order cannot be changed while the device is active: trying to do so
- * returns -EBUSY.
- * This is a legacy only API that is simply ignored when VIRTIO_F_VERSION_1 is
- * set.
- * Not all kernel configurations support this ioctl, but all configurations that
- * support SET also support GET.
- */
-#define VHOST_VRING_LITTLE_ENDIAN 0
-#define VHOST_VRING_BIG_ENDIAN 1
-#define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state)
-#define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state)
-/* Attach a vhost_worker created with VHOST_NEW_WORKER to one of the device's
- * virtqueues.
- *
- * This will replace the virtqueue's existing worker. If the replaced worker
- * is no longer attached to any virtqueues, it can be freed with
- * VHOST_FREE_WORKER.
- */
-#define VHOST_ATTACH_VRING_WORKER _IOW(VHOST_VIRTIO, 0x15, \
- struct vhost_vring_worker)
-/* Return the vring worker's ID */
-#define VHOST_GET_VRING_WORKER _IOWR(VHOST_VIRTIO, 0x16, \
- struct vhost_vring_worker)
-
-/* The following ioctls use eventfd file descriptors to signal and poll
- * for events. */
-
-/* Set eventfd to poll for added buffers */
-#define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file)
-/* Set eventfd to signal when buffers have beed used */
-#define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file)
-/* Set eventfd to signal an error */
-#define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file)
-/* Set busy loop timeout (in us) */
-#define VHOST_SET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x23, \
- struct vhost_vring_state)
-/* Get busy loop timeout (in us) */
-#define VHOST_GET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x24, \
- struct vhost_vring_state)
-
-/* Set or get vhost backend capability */
-
-#define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64)
-#define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64)
-
-/* VHOST_NET specific defines */
-
-/* Attach virtio net ring to a raw socket, or tap device.
- * The socket must be already bound to an ethernet device, this device will be
- * used for transmit. Pass fd -1 to unbind from the socket and the transmit
- * device. This can be used to stop the ring (e.g. for migration). */
-#define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file)
-
-/* VHOST_SCSI specific defines */
-
-#define VHOST_SCSI_SET_ENDPOINT _IOW(VHOST_VIRTIO, 0x40, struct vhost_scsi_target)
-#define VHOST_SCSI_CLEAR_ENDPOINT _IOW(VHOST_VIRTIO, 0x41, struct vhost_scsi_target)
-/* Changing this breaks userspace. */
-#define VHOST_SCSI_GET_ABI_VERSION _IOW(VHOST_VIRTIO, 0x42, int)
-/* Set and get the events missed flag */
-#define VHOST_SCSI_SET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x43, __u32)
-#define VHOST_SCSI_GET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x44, __u32)
-
-/* VHOST_VSOCK specific defines */
-
-#define VHOST_VSOCK_SET_GUEST_CID _IOW(VHOST_VIRTIO, 0x60, __u64)
-#define VHOST_VSOCK_SET_RUNNING _IOW(VHOST_VIRTIO, 0x61, int)
-
-/* VHOST_VDPA specific defines */
-
-/* Get the device id. The device ids follow the same definition of
- * the device id defined in virtio-spec.
- */
-#define VHOST_VDPA_GET_DEVICE_ID _IOR(VHOST_VIRTIO, 0x70, __u32)
-/* Get and set the status. The status bits follow the same definition
- * of the device status defined in virtio-spec.
- */
-#define VHOST_VDPA_GET_STATUS _IOR(VHOST_VIRTIO, 0x71, __u8)
-#define VHOST_VDPA_SET_STATUS _IOW(VHOST_VIRTIO, 0x72, __u8)
-/* Get and set the device config. The device config follows the same
- * definition of the device config defined in virtio-spec.
- */
-#define VHOST_VDPA_GET_CONFIG _IOR(VHOST_VIRTIO, 0x73, \
- struct vhost_vdpa_config)
-#define VHOST_VDPA_SET_CONFIG _IOW(VHOST_VIRTIO, 0x74, \
- struct vhost_vdpa_config)
-/* Enable/disable the ring. */
-#define VHOST_VDPA_SET_VRING_ENABLE _IOW(VHOST_VIRTIO, 0x75, \
- struct vhost_vring_state)
-/* Get the max ring size. */
-#define VHOST_VDPA_GET_VRING_NUM _IOR(VHOST_VIRTIO, 0x76, __u16)
-
-/* Set event fd for config interrupt*/
-#define VHOST_VDPA_SET_CONFIG_CALL _IOW(VHOST_VIRTIO, 0x77, int)
-
-/* Get the valid iova range */
-#define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \
- struct vhost_vdpa_iova_range)
-/* Get the config size */
-#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32)
-
-/* Get the count of all virtqueues */
-#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32)
-
-/* Get the number of virtqueue groups. */
-#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32)
-
-/* Get the number of address spaces. */
-#define VHOST_VDPA_GET_AS_NUM _IOR(VHOST_VIRTIO, 0x7A, unsigned int)
-
-/* Get the group for a virtqueue: read index, write group in num,
- * The virtqueue index is stored in the index field of
- * vhost_vring_state. The group for this specific virtqueue is
- * returned via num field of vhost_vring_state.
- */
-#define VHOST_VDPA_GET_VRING_GROUP _IOWR(VHOST_VIRTIO, 0x7B, \
- struct vhost_vring_state)
-/* Set the ASID for a virtqueue group. The group index is stored in
- * the index field of vhost_vring_state, the ASID associated with this
- * group is stored at num field of vhost_vring_state.
- */
-#define VHOST_VDPA_SET_GROUP_ASID _IOW(VHOST_VIRTIO, 0x7C, \
- struct vhost_vring_state)
-
-/* Suspend a device so it does not process virtqueue requests anymore
- *
- * After the return of ioctl the device must preserve all the necessary state
- * (the virtqueue vring base plus the possible device specific states) that is
- * required for restoring in the future. The device must not change its
- * configuration after that point.
- */
-#define VHOST_VDPA_SUSPEND _IO(VHOST_VIRTIO, 0x7D)
-
-/* Resume a device so it can resume processing virtqueue requests
- *
- * After the return of this ioctl the device will have restored all the
- * necessary states and it is fully operational to continue processing the
- * virtqueue descriptors.
- */
-#define VHOST_VDPA_RESUME _IO(VHOST_VIRTIO, 0x7E)
-
-/* Get the group for the descriptor table including driver & device areas
- * of a virtqueue: read index, write group in num.
- * The virtqueue index is stored in the index field of vhost_vring_state.
- * The group ID of the descriptor table for this specific virtqueue
- * is returned via num field of vhost_vring_state.
- */
-#define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \
- struct vhost_vring_state)
-#endif