summaryrefslogtreecommitdiff
path: root/tools/include/uapi/linux
diff options
context:
space:
mode:
Diffstat (limited to 'tools/include/uapi/linux')
-rw-r--r--tools/include/uapi/linux/bits.h18
-rw-r--r--tools/include/uapi/linux/bpf.h240
-rw-r--r--tools/include/uapi/linux/const.h17
-rw-r--r--tools/include/uapi/linux/ethtool.h104
-rw-r--r--tools/include/uapi/linux/fcntl.h123
-rw-r--r--tools/include/uapi/linux/fs.h192
-rw-r--r--tools/include/uapi/linux/if_link.h557
-rw-r--r--tools/include/uapi/linux/if_xdp.h8
-rw-r--r--tools/include/uapi/linux/in.h4
-rw-r--r--tools/include/uapi/linux/kvm.h728
-rw-r--r--tools/include/uapi/linux/memfd.h39
-rw-r--r--tools/include/uapi/linux/mman.h1
-rw-r--r--tools/include/uapi/linux/mount.h211
-rw-r--r--tools/include/uapi/linux/netdev.h59
-rw-r--r--tools/include/uapi/linux/openat2.h43
-rw-r--r--tools/include/uapi/linux/perf_event.h17
-rw-r--r--tools/include/uapi/linux/prctl.h22
-rw-r--r--tools/include/uapi/linux/sched.h148
-rw-r--r--tools/include/uapi/linux/stat.h12
-rw-r--r--tools/include/uapi/linux/stddef.h15
-rw-r--r--tools/include/uapi/linux/usbdevice_fs.h231
-rw-r--r--tools/include/uapi/linux/userfaultfd.h386
-rw-r--r--tools/include/uapi/linux/vhost.h230
23 files changed, 1576 insertions, 1829 deletions
diff --git a/tools/include/uapi/linux/bits.h b/tools/include/uapi/linux/bits.h
new file mode 100644
index 000000000000..5ee30f882736
--- /dev/null
+++ b/tools/include/uapi/linux/bits.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* bits.h: Macros for dealing with bitmasks. */
+
+#ifndef _UAPI_LINUX_BITS_H
+#define _UAPI_LINUX_BITS_H
+
+#define __GENMASK(h, l) \
+ (((~_UL(0)) - (_UL(1) << (l)) + 1) & \
+ (~_UL(0) >> (__BITS_PER_LONG - 1 - (h))))
+
+#define __GENMASK_ULL(h, l) \
+ (((~_ULL(0)) - (_ULL(1) << (l)) + 1) & \
+ (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h))))
+
+#define __GENMASK_U128(h, l) \
+ ((_BIT128((h)) << 1) - (_BIT128(l)))
+
+#endif /* _UAPI_LINUX_BITS_H */
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 7f24d898efbb..2acf9b336371 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -42,6 +42,7 @@
#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */
#define BPF_JSLT 0xc0 /* SLT is signed, '<' */
#define BPF_JSLE 0xd0 /* SLE is signed, '<=' */
+#define BPF_JCOND 0xe0 /* conditional pseudo jumps: may_goto, goto_or_nop */
#define BPF_CALL 0x80 /* function call */
#define BPF_EXIT 0x90 /* function return */
@@ -50,6 +51,10 @@
#define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */
#define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */
+enum bpf_cond_pseudo_jmp {
+ BPF_MAY_GOTO = 0,
+};
+
/* Register numbers */
enum {
BPF_REG_0 = 0,
@@ -77,12 +82,29 @@ struct bpf_insn {
__s32 imm; /* signed immediate constant */
};
-/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
+/* Deprecated: use struct bpf_lpm_trie_key_u8 (when the "data" member is needed for
+ * byte access) or struct bpf_lpm_trie_key_hdr (when using an alternative type for
+ * the trailing flexible array member) instead.
+ */
struct bpf_lpm_trie_key {
__u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */
__u8 data[0]; /* Arbitrary size */
};
+/* Header for bpf_lpm_trie_key structs */
+struct bpf_lpm_trie_key_hdr {
+ __u32 prefixlen;
+};
+
+/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry, with trailing byte array. */
+struct bpf_lpm_trie_key_u8 {
+ union {
+ struct bpf_lpm_trie_key_hdr hdr;
+ __u32 prefixlen;
+ };
+ __u8 data[]; /* Arbitrary size */
+};
+
struct bpf_cgroup_storage_key {
__u64 cgroup_inode_id; /* cgroup inode id */
__u32 attach_type; /* program attach type (enum bpf_attach_type) */
@@ -617,7 +639,11 @@ union bpf_iter_link_info {
* to NULL to begin the batched operation. After each subsequent
* **BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant
* *out_batch* as the *in_batch* for the next operation to
- * continue iteration from the current point.
+ * continue iteration from the current point. Both *in_batch* and
+ * *out_batch* must point to memory large enough to hold a key,
+ * except for maps of type **BPF_MAP_TYPE_{HASH, PERCPU_HASH,
+ * LRU_HASH, LRU_PERCPU_HASH}**, for which batch parameters
+ * must be at least 4 bytes wide regardless of key size.
*
* The *keys* and *values* are output parameters which must point
* to memory large enough to hold *count* items based on the key
@@ -847,6 +873,36 @@ union bpf_iter_link_info {
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
+ * BPF_TOKEN_CREATE
+ * Description
+ * Create BPF token with embedded information about what
+ * BPF-related functionality it allows:
+ * - a set of allowed bpf() syscall commands;
+ * - a set of allowed BPF map types to be created with
+ * BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed;
+ * - a set of allowed BPF program types and BPF program attach
+ * types to be loaded with BPF_PROG_LOAD command, if
+ * BPF_PROG_LOAD itself is allowed.
+ *
+ * BPF token is created (derived) from an instance of BPF FS,
+ * assuming it has necessary delegation mount options specified.
+ * This BPF token can be passed as an extra parameter to various
+ * bpf() syscall commands to grant BPF subsystem functionality to
+ * unprivileged processes.
+ *
+ * When created, BPF token is "associated" with the owning
+ * user namespace of BPF FS instance (super block) that it was
+ * derived from, and subsequent BPF operations performed with
+ * BPF token would be performing capabilities checks (i.e.,
+ * CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within
+ * that user namespace. Without BPF token, such capabilities
+ * have to be granted in init user namespace, making bpf()
+ * syscall incompatible with user namespace, for the most part.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
* NOTES
* eBPF objects (maps and programs) can be shared between processes.
*
@@ -901,6 +957,8 @@ enum bpf_cmd {
BPF_ITER_CREATE,
BPF_LINK_DETACH,
BPF_PROG_BIND_MAP,
+ BPF_TOKEN_CREATE,
+ __MAX_BPF_CMD,
};
enum bpf_map_type {
@@ -951,6 +1009,8 @@ enum bpf_map_type {
BPF_MAP_TYPE_BLOOM_FILTER,
BPF_MAP_TYPE_USER_RINGBUF,
BPF_MAP_TYPE_CGRP_STORAGE,
+ BPF_MAP_TYPE_ARENA,
+ __MAX_BPF_MAP_TYPE
};
/* Note that tracing related programs such as
@@ -995,6 +1055,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_SK_LOOKUP,
BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
BPF_PROG_TYPE_NETFILTER,
+ __MAX_BPF_PROG_TYPE
};
enum bpf_attach_type {
@@ -1054,11 +1115,16 @@ enum bpf_attach_type {
BPF_CGROUP_UNIX_GETSOCKNAME,
BPF_NETKIT_PRIMARY,
BPF_NETKIT_PEER,
+ BPF_TRACE_KPROBE_SESSION,
+ BPF_TRACE_UPROBE_SESSION,
__MAX_BPF_ATTACH_TYPE
};
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
+/* Add BPF_LINK_TYPE(type, name) in bpf_types.h to keep bpf_link_type_strs[]
+ * in sync with the definitions below.
+ */
enum bpf_link_type {
BPF_LINK_TYPE_UNSPEC = 0,
BPF_LINK_TYPE_RAW_TRACEPOINT = 1,
@@ -1074,6 +1140,7 @@ enum bpf_link_type {
BPF_LINK_TYPE_TCX = 11,
BPF_LINK_TYPE_UPROBE_MULTI = 12,
BPF_LINK_TYPE_NETKIT = 13,
+ BPF_LINK_TYPE_SOCKMAP = 14,
__MAX_BPF_LINK_TYPE,
};
@@ -1278,6 +1345,10 @@ enum {
*/
#define BPF_PSEUDO_KFUNC_CALL 2
+enum bpf_addr_space_cast {
+ BPF_ADDR_SPACE_CAST = 1,
+};
+
/* flags for BPF_MAP_UPDATE_ELEM command */
enum {
BPF_ANY = 0, /* create new element or update existing */
@@ -1330,6 +1401,18 @@ enum {
/* Get path from provided FD in BPF_OBJ_PIN/BPF_OBJ_GET commands */
BPF_F_PATH_FD = (1U << 14),
+
+/* Flag for value_type_btf_obj_fd, the fd is available */
+ BPF_F_VTYPE_BTF_OBJ_FD = (1U << 15),
+
+/* BPF token FD is passed in a corresponding command's token_fd field */
+ BPF_F_TOKEN_FD = (1U << 16),
+
+/* When user space page faults in bpf_arena send SIGSEGV instead of inserting new page */
+ BPF_F_SEGV_ON_FAULT = (1U << 17),
+
+/* Do not translate kernel bpf_arena pointers to user pointers */
+ BPF_F_NO_USER_CONV = (1U << 18),
};
/* Flags for BPF_PROG_QUERY. */
@@ -1346,6 +1429,8 @@ enum {
#define BPF_F_TEST_RUN_ON_CPU (1U << 0)
/* If set, XDP frames will be transmitted after processing */
#define BPF_F_TEST_XDP_LIVE_FRAMES (1U << 1)
+/* If set, apply CHECKSUM_COMPLETE to skb and validate the checksum */
+#define BPF_F_TEST_SKB_CHECKSUM_COMPLETE (1U << 2)
/* type for BPF_ENABLE_STATS */
enum bpf_stats_type {
@@ -1401,8 +1486,20 @@ union bpf_attr {
* BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the
* number of hash functions (if 0, the bloom filter will default
* to using 5 hash functions).
+ *
+ * BPF_MAP_TYPE_ARENA - contains the address where user space
+ * is going to mmap() the arena. It has to be page aligned.
*/
__u64 map_extra;
+
+ __s32 value_type_btf_obj_fd; /* fd pointing to a BTF
+ * type data for
+ * btf_vmlinux_value_type_id.
+ */
+ /* BPF token FD to use with BPF_MAP_CREATE operation.
+ * If provided, map_flags should have BPF_F_TOKEN_FD flag set.
+ */
+ __s32 map_token_fd;
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -1472,6 +1569,20 @@ union bpf_attr {
* truncated), or smaller (if log buffer wasn't filled completely).
*/
__u32 log_true_size;
+ /* BPF token FD to use with BPF_PROG_LOAD operation.
+ * If provided, prog_flags should have BPF_F_TOKEN_FD flag set.
+ */
+ __s32 prog_token_fd;
+ /* The fd_array_cnt can be used to pass the length of the
+ * fd_array array. In this case all the [map] file descriptors
+ * passed in this array will be bound to the program, even if
+ * the maps are not referenced directly. The functionality is
+ * similar to the BPF_PROG_BIND_MAP syscall, but maps can be
+ * used by the verifier during the program load. If provided,
+ * then the fd_array[0,...,fd_array_cnt-1] is expected to be
+ * continuous.
+ */
+ __u32 fd_array_cnt;
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -1569,8 +1680,10 @@ union bpf_attr {
} query;
struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */
- __u64 name;
- __u32 prog_fd;
+ __u64 name;
+ __u32 prog_fd;
+ __u32 :32;
+ __aligned_u64 cookie;
} raw_tracepoint;
struct { /* anonymous struct for BPF_BTF_LOAD */
@@ -1584,6 +1697,11 @@ union bpf_attr {
* truncated), or smaller (if log buffer wasn't filled completely).
*/
__u32 btf_log_true_size;
+ __u32 btf_flags;
+ /* BPF token FD to use with BPF_BTF_LOAD operation.
+ * If provided, btf_flags should have BPF_F_TOKEN_FD flag set.
+ */
+ __s32 btf_token_fd;
};
struct {
@@ -1714,6 +1832,11 @@ union bpf_attr {
__u32 flags; /* extra flags */
} prog_bind_map;
+ struct { /* struct used by BPF_TOKEN_CREATE command */
+ __u32 flags;
+ __u32 bpffs_fd;
+ } token_create;
+
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
@@ -1861,6 +1984,8 @@ union bpf_attr {
* program.
* Return
* The SMP id of the processor running the program.
+ * Attributes
+ * __bpf_fastcall
*
* long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
* Description
@@ -2742,7 +2867,7 @@ union bpf_attr {
* **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**,
* **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**,
* **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**,
- * **TCP_BPF_RTO_MIN**.
+ * **TCP_BPF_RTO_MIN**, **TCP_BPF_SOCK_OPS_CB_FLAGS**.
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
* * **IPPROTO_IPV6**, which supports the following *optname*\ s:
* **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**.
@@ -2992,10 +3117,6 @@ union bpf_attr {
* with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration
* option, and in this case it only works on functions tagged with
* **ALLOW_ERROR_INJECTION** in the kernel code.
- *
- * Also, the helper is only available for the architectures having
- * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing,
- * x86 architecture is the only one to support this feature.
* Return
* 0
*
@@ -3289,6 +3410,10 @@ union bpf_attr {
* for the nexthop. If the src addr cannot be derived,
* **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
* case, *params*->dmac and *params*->smac are not set either.
+ * **BPF_FIB_LOOKUP_MARK**
+ * Use the mark present in *params*->mark for the fib lookup.
+ * This option should not be used with BPF_FIB_LOOKUP_DIRECT,
+ * as it only has meaning for full lookups.
*
* *ctx* is either **struct xdp_md** for XDP programs or
* **struct sk_buff** tc cls_act programs.
@@ -4839,9 +4964,9 @@ union bpf_attr {
* going through the CPU's backlog queue.
*
* The *flags* argument is reserved and must be 0. The helper is
- * currently only supported for tc BPF program types at the ingress
- * hook and for veth device types. The peer device must reside in a
- * different network namespace.
+ * currently only supported for tc BPF program types at the
+ * ingress hook and for veth and netkit target device types. The
+ * peer device must reside in a different network namespace.
* Return
* The helper returns **TC_ACT_REDIRECT** on success or
* **TC_ACT_SHOT** on error.
@@ -4917,7 +5042,7 @@ union bpf_attr {
* bytes will be copied to *dst*
* Return
* The **hash_algo** is returned on success,
- * **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if
+ * **-EOPNOTSUPP** if IMA is disabled or **-EINVAL** if
* invalid arguments are passed.
*
* struct socket *bpf_sock_from_file(struct file *file)
@@ -5256,7 +5381,7 @@ union bpf_attr {
* Currently, the **flags** must be 0. Currently, nr_loops is
* limited to 1 << 23 (~8 million) loops.
*
- * long (\*callback_fn)(u32 index, void \*ctx);
+ * long (\*callback_fn)(u64 index, void \*ctx);
*
* where **index** is the current index in the loop. The index
* is zero-indexed.
@@ -5403,14 +5528,15 @@ union bpf_attr {
* bytes will be copied to *dst*
* Return
* The **hash_algo** is returned on success,
- * **-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if
+ * **-EOPNOTSUPP** if the hash calculation failed or **-EINVAL** if
* invalid arguments are passed.
*
- * void *bpf_kptr_xchg(void *map_value, void *ptr)
+ * void *bpf_kptr_xchg(void *dst, void *ptr)
* Description
- * Exchange kptr at pointer *map_value* with *ptr*, and return the
- * old value. *ptr* can be NULL, otherwise it must be a referenced
- * pointer which will be released when this helper is called.
+ * Exchange kptr at pointer *dst* with *ptr*, and return the old value.
+ * *dst* can be map value or local kptr. *ptr* can be NULL, otherwise
+ * it must be a referenced pointer which will be released when this helper
+ * is called.
* Return
* The old value of kptr (which can be NULL). The returned pointer
* if not NULL, is a reference which must be released using its
@@ -5933,11 +6059,6 @@ enum {
BPF_F_MARK_ENFORCE = (1ULL << 6),
};
-/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
-enum {
- BPF_F_INGRESS = (1ULL << 0),
-};
-
/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
enum {
BPF_F_TUNINFO_IPV6 = (1ULL << 0),
@@ -6084,10 +6205,12 @@ enum {
BPF_F_BPRM_SECUREEXEC = (1ULL << 0),
};
-/* Flags for bpf_redirect_map helper */
+/* Flags for bpf_redirect and bpf_redirect_map helpers */
enum {
- BPF_F_BROADCAST = (1ULL << 3),
- BPF_F_EXCLUDE_INGRESS = (1ULL << 4),
+ BPF_F_INGRESS = (1ULL << 0), /* used for skb path */
+ BPF_F_BROADCAST = (1ULL << 3), /* used for XDP path */
+ BPF_F_EXCLUDE_INGRESS = (1ULL << 4), /* used for XDP path */
+#define BPF_F_REDIRECT_FLAGS (BPF_F_INGRESS | BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS)
};
#define __bpf_md_ptr(type, name) \
@@ -6096,12 +6219,17 @@ union { \
__u64 :64; \
} __attribute__((aligned(8)))
+/* The enum used in skb->tstamp_type. It specifies the clock type
+ * of the time stored in the skb->tstamp.
+ */
enum {
- BPF_SKB_TSTAMP_UNSPEC,
- BPF_SKB_TSTAMP_DELIVERY_MONO, /* tstamp has mono delivery time */
- /* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle,
- * the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC
- * and try to deduce it by ingress, egress or skb->sk->sk_clockid.
+ BPF_SKB_TSTAMP_UNSPEC = 0, /* DEPRECATED */
+ BPF_SKB_TSTAMP_DELIVERY_MONO = 1, /* DEPRECATED */
+ BPF_SKB_CLOCK_REALTIME = 0,
+ BPF_SKB_CLOCK_MONOTONIC = 1,
+ BPF_SKB_CLOCK_TAI = 2,
+ /* For any future BPF_SKB_CLOCK_* that the bpf prog cannot handle,
+ * the bpf prog can try to deduce it by ingress/egress/skb->sk->sk_clockid.
*/
};
@@ -6487,7 +6615,7 @@ struct bpf_map_info {
__u32 btf_id;
__u32 btf_key_type_id;
__u32 btf_value_type_id;
- __u32 :32; /* alignment pad */
+ __u32 btf_vmlinux_id;
__u64 map_extra;
} __attribute__((aligned(8)));
@@ -6563,6 +6691,7 @@ struct bpf_link_info {
__u32 count; /* in/out: kprobe_multi function count */
__u32 flags;
__u64 missed;
+ __aligned_u64 cookies;
} kprobe_multi;
struct {
__aligned_u64 path;
@@ -6582,6 +6711,7 @@ struct bpf_link_info {
__aligned_u64 file_name; /* in/out */
__u32 name_len;
__u32 offset; /* offset from file_name */
+ __u64 cookie;
} uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */
struct {
__aligned_u64 func_name; /* in/out */
@@ -6589,14 +6719,19 @@ struct bpf_link_info {
__u32 offset; /* offset from func_name */
__u64 addr;
__u64 missed;
+ __u64 cookie;
} kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */
struct {
__aligned_u64 tp_name; /* in/out */
__u32 name_len;
+ __u32 :32;
+ __u64 cookie;
} tracepoint; /* BPF_PERF_EVENT_TRACEPOINT */
struct {
__u64 config;
__u32 type;
+ __u32 :32;
+ __u64 cookie;
} event; /* BPF_PERF_EVENT_EVENT */
};
} perf_event;
@@ -6608,6 +6743,10 @@ struct bpf_link_info {
__u32 ifindex;
__u32 attach_type;
} netkit;
+ struct {
+ __u32 map_id;
+ __u32 attach_type;
+ } sockmap;
};
} __attribute__((aligned(8)));
@@ -6826,6 +6965,8 @@ enum {
* socket transition to LISTEN state.
*/
BPF_SOCK_OPS_RTT_CB, /* Called on every RTT.
+ * Arg1: measured RTT input (mrtt)
+ * Arg2: updated srtt
*/
BPF_SOCK_OPS_PARSE_HDR_OPT_CB, /* Parse the header option.
* It will be called to handle
@@ -6904,6 +7045,7 @@ enum {
BPF_TCP_LISTEN,
BPF_TCP_CLOSING, /* Now a valid state */
BPF_TCP_NEW_SYN_RECV,
+ BPF_TCP_BOUND_INACTIVE,
BPF_TCP_MAX_STATES /* Leave at the end! */
};
@@ -6948,6 +7090,7 @@ enum {
TCP_BPF_SYN = 1005, /* Copy the TCP header */
TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */
TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */
+ TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */
};
enum {
@@ -7007,6 +7150,7 @@ enum {
BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
BPF_FIB_LOOKUP_TBID = (1U << 3),
BPF_FIB_LOOKUP_SRC = (1U << 4),
+ BPF_FIB_LOOKUP_MARK = (1U << 5),
};
enum {
@@ -7039,7 +7183,7 @@ struct bpf_fib_lookup {
/* output: MTU value */
__u16 mtu_result;
- };
+ } __attribute__((packed, aligned(2)));
/* input: L3 device index for lookup
* output: device index from FIB lookup
*/
@@ -7084,8 +7228,19 @@ struct bpf_fib_lookup {
__u32 tbid;
};
- __u8 smac[6]; /* ETH_ALEN */
- __u8 dmac[6]; /* ETH_ALEN */
+ union {
+ /* input */
+ struct {
+ __u32 mark; /* policy routing */
+ /* 2 4-byte holes for input */
+ };
+
+ /* output: source and dest mac */
+ struct {
+ __u8 smac[6]; /* ETH_ALEN */
+ __u8 dmac[6]; /* ETH_ALEN */
+ };
+ };
};
struct bpf_redir_neigh {
@@ -7172,6 +7327,10 @@ struct bpf_timer {
__u64 __opaque[2];
} __attribute__((aligned(8)));
+struct bpf_wq {
+ __u64 __opaque[2];
+} __attribute__((aligned(8)));
+
struct bpf_dynptr {
__u64 __opaque[2];
} __attribute__((aligned(8)));
@@ -7364,4 +7523,13 @@ struct bpf_iter_num {
__u64 __opaque[1];
} __attribute__((aligned(8)));
+/*
+ * Flags to control BPF kfunc behaviour.
+ * - BPF_F_PAD_ZEROS: Pad destination buffer with zeros. (See the respective
+ * helper documentation for details.)
+ */
+enum bpf_kfunc_flags {
+ BPF_F_PAD_ZEROS = (1ULL << 0),
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/include/uapi/linux/const.h b/tools/include/uapi/linux/const.h
index a429381e7ca5..e16be0d37746 100644
--- a/tools/include/uapi/linux/const.h
+++ b/tools/include/uapi/linux/const.h
@@ -28,6 +28,23 @@
#define _BITUL(x) (_UL(1) << (x))
#define _BITULL(x) (_ULL(1) << (x))
+#if !defined(__ASSEMBLY__)
+/*
+ * Missing asm support
+ *
+ * __BIT128() would not work in the asm code, as it shifts an
+ * 'unsigned __init128' data type as direct representation of
+ * 128 bit constants is not supported in the gcc compiler, as
+ * they get silently truncated.
+ *
+ * TODO: Please revisit this implementation when gcc compiler
+ * starts representing 128 bit constants directly like long
+ * and unsigned long etc. Subsequently drop the comment for
+ * GENMASK_U128() which would then start supporting asm code.
+ */
+#define _BIT128(x) ((unsigned __int128)(1) << (x))
+#endif
+
#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (__typeof__(x))(a) - 1)
#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask))
diff --git a/tools/include/uapi/linux/ethtool.h b/tools/include/uapi/linux/ethtool.h
deleted file mode 100644
index 47afae3895ec..000000000000
--- a/tools/include/uapi/linux/ethtool.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * ethtool.h: Defines for Linux ethtool.
- *
- * Copyright (C) 1998 David S. Miller (davem@redhat.com)
- * Copyright 2001 Jeff Garzik <jgarzik@pobox.com>
- * Portions Copyright 2001 Sun Microsystems (thockin@sun.com)
- * Portions Copyright 2002 Intel (eli.kupermann@intel.com,
- * christopher.leech@intel.com,
- * scott.feldman@intel.com)
- * Portions Copyright (C) Sun Microsystems 2008
- */
-
-#ifndef _UAPI_LINUX_ETHTOOL_H
-#define _UAPI_LINUX_ETHTOOL_H
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/if_ether.h>
-
-#define ETHTOOL_GCHANNELS 0x0000003c /* Get no of channels */
-
-/**
- * struct ethtool_channels - configuring number of network channel
- * @cmd: ETHTOOL_{G,S}CHANNELS
- * @max_rx: Read only. Maximum number of receive channel the driver support.
- * @max_tx: Read only. Maximum number of transmit channel the driver support.
- * @max_other: Read only. Maximum number of other channel the driver support.
- * @max_combined: Read only. Maximum number of combined channel the driver
- * support. Set of queues RX, TX or other.
- * @rx_count: Valid values are in the range 1 to the max_rx.
- * @tx_count: Valid values are in the range 1 to the max_tx.
- * @other_count: Valid values are in the range 1 to the max_other.
- * @combined_count: Valid values are in the range 1 to the max_combined.
- *
- * This can be used to configure RX, TX and other channels.
- */
-
-struct ethtool_channels {
- __u32 cmd;
- __u32 max_rx;
- __u32 max_tx;
- __u32 max_other;
- __u32 max_combined;
- __u32 rx_count;
- __u32 tx_count;
- __u32 other_count;
- __u32 combined_count;
-};
-
-#define ETHTOOL_FWVERS_LEN 32
-#define ETHTOOL_BUSINFO_LEN 32
-#define ETHTOOL_EROMVERS_LEN 32
-
-/**
- * struct ethtool_drvinfo - general driver and device information
- * @cmd: Command number = %ETHTOOL_GDRVINFO
- * @driver: Driver short name. This should normally match the name
- * in its bus driver structure (e.g. pci_driver::name). Must
- * not be an empty string.
- * @version: Driver version string; may be an empty string
- * @fw_version: Firmware version string; may be an empty string
- * @erom_version: Expansion ROM version string; may be an empty string
- * @bus_info: Device bus address. This should match the dev_name()
- * string for the underlying bus device, if there is one. May be
- * an empty string.
- * @reserved2: Reserved for future use; see the note on reserved space.
- * @n_priv_flags: Number of flags valid for %ETHTOOL_GPFLAGS and
- * %ETHTOOL_SPFLAGS commands; also the number of strings in the
- * %ETH_SS_PRIV_FLAGS set
- * @n_stats: Number of u64 statistics returned by the %ETHTOOL_GSTATS
- * command; also the number of strings in the %ETH_SS_STATS set
- * @testinfo_len: Number of results returned by the %ETHTOOL_TEST
- * command; also the number of strings in the %ETH_SS_TEST set
- * @eedump_len: Size of EEPROM accessible through the %ETHTOOL_GEEPROM
- * and %ETHTOOL_SEEPROM commands, in bytes
- * @regdump_len: Size of register dump returned by the %ETHTOOL_GREGS
- * command, in bytes
- *
- * Users can use the %ETHTOOL_GSSET_INFO command to get the number of
- * strings in any string set (from Linux 2.6.34).
- *
- * Drivers should set at most @driver, @version, @fw_version and
- * @bus_info in their get_drvinfo() implementation. The ethtool
- * core fills in the other fields using other driver operations.
- */
-struct ethtool_drvinfo {
- __u32 cmd;
- char driver[32];
- char version[32];
- char fw_version[ETHTOOL_FWVERS_LEN];
- char bus_info[ETHTOOL_BUSINFO_LEN];
- char erom_version[ETHTOOL_EROMVERS_LEN];
- char reserved2[12];
- __u32 n_priv_flags;
- __u32 n_stats;
- __u32 testinfo_len;
- __u32 eedump_len;
- __u32 regdump_len;
-};
-
-#define ETHTOOL_GDRVINFO 0x00000003
-
-#endif /* _UAPI_LINUX_ETHTOOL_H */
diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h
deleted file mode 100644
index 282e90aeb163..000000000000
--- a/tools/include/uapi/linux/fcntl.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _UAPI_LINUX_FCNTL_H
-#define _UAPI_LINUX_FCNTL_H
-
-#include <asm/fcntl.h>
-#include <linux/openat2.h>
-
-#define F_SETLEASE (F_LINUX_SPECIFIC_BASE + 0)
-#define F_GETLEASE (F_LINUX_SPECIFIC_BASE + 1)
-
-/*
- * Cancel a blocking posix lock; internal use only until we expose an
- * asynchronous lock api to userspace:
- */
-#define F_CANCELLK (F_LINUX_SPECIFIC_BASE + 5)
-
-/* Create a file descriptor with FD_CLOEXEC set. */
-#define F_DUPFD_CLOEXEC (F_LINUX_SPECIFIC_BASE + 6)
-
-/*
- * Request nofications on a directory.
- * See below for events that may be notified.
- */
-#define F_NOTIFY (F_LINUX_SPECIFIC_BASE+2)
-
-/*
- * Set and get of pipe page size array
- */
-#define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7)
-#define F_GETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 8)
-
-/*
- * Set/Get seals
- */
-#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
-#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
-
-/*
- * Types of seals
- */
-#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
-#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
-#define F_SEAL_GROW 0x0004 /* prevent file from growing */
-#define F_SEAL_WRITE 0x0008 /* prevent writes */
-#define F_SEAL_FUTURE_WRITE 0x0010 /* prevent future writes while mapped */
-#define F_SEAL_EXEC 0x0020 /* prevent chmod modifying exec bits */
-/* (1U << 31) is reserved for signed error codes */
-
-/*
- * Set/Get write life time hints. {GET,SET}_RW_HINT operate on the
- * underlying inode, while {GET,SET}_FILE_RW_HINT operate only on
- * the specific file.
- */
-#define F_GET_RW_HINT (F_LINUX_SPECIFIC_BASE + 11)
-#define F_SET_RW_HINT (F_LINUX_SPECIFIC_BASE + 12)
-#define F_GET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 13)
-#define F_SET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 14)
-
-/*
- * Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be
- * used to clear any hints previously set.
- */
-#define RWH_WRITE_LIFE_NOT_SET 0
-#define RWH_WRITE_LIFE_NONE 1
-#define RWH_WRITE_LIFE_SHORT 2
-#define RWH_WRITE_LIFE_MEDIUM 3
-#define RWH_WRITE_LIFE_LONG 4
-#define RWH_WRITE_LIFE_EXTREME 5
-
-/*
- * The originally introduced spelling is remained from the first
- * versions of the patch set that introduced the feature, see commit
- * v4.13-rc1~212^2~51.
- */
-#define RWF_WRITE_LIFE_NOT_SET RWH_WRITE_LIFE_NOT_SET
-
-/*
- * Types of directory notifications that may be requested.
- */
-#define DN_ACCESS 0x00000001 /* File accessed */
-#define DN_MODIFY 0x00000002 /* File modified */
-#define DN_CREATE 0x00000004 /* File created */
-#define DN_DELETE 0x00000008 /* File removed */
-#define DN_RENAME 0x00000010 /* File renamed */
-#define DN_ATTRIB 0x00000020 /* File changed attibutes */
-#define DN_MULTISHOT 0x80000000 /* Don't remove notifier */
-
-/*
- * The constants AT_REMOVEDIR and AT_EACCESS have the same value. AT_EACCESS is
- * meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to
- * unlinkat. The two functions do completely different things and therefore,
- * the flags can be allowed to overlap. For example, passing AT_REMOVEDIR to
- * faccessat would be undefined behavior and thus treating it equivalent to
- * AT_EACCESS is valid undefined behavior.
- */
-#define AT_FDCWD -100 /* Special value used to indicate
- openat should use the current
- working directory. */
-#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */
-#define AT_EACCESS 0x200 /* Test access permitted for
- effective IDs, not real IDs. */
-#define AT_REMOVEDIR 0x200 /* Remove directory instead of
- unlinking file. */
-#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */
-#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */
-#define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */
-
-#define AT_STATX_SYNC_TYPE 0x6000 /* Type of synchronisation required from statx() */
-#define AT_STATX_SYNC_AS_STAT 0x0000 /* - Do whatever stat() does */
-#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */
-#define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */
-
-#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
-
-/* Flags for name_to_handle_at(2). We reuse AT_ flag space to save bits... */
-#define AT_HANDLE_FID AT_REMOVEDIR /* file handle is needed to
- compare object identity and may not
- be usable to open_by_handle_at(2) */
-#if defined(__KERNEL__)
-#define AT_GETATTR_NOSEC 0x80000000
-#endif
-
-#endif /* _UAPI_LINUX_FCNTL_H */
diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h
index 48ad69f7722e..8a27bc5c7a7f 100644
--- a/tools/include/uapi/linux/fs.h
+++ b/tools/include/uapi/linux/fs.h
@@ -28,8 +28,8 @@
* nr_file rlimit, so it's safe to set up a ridiculously high absolute
* upper limit on files-per-process.
*
- * Some programs (notably those using select()) may have to be
- * recompiled to take full advantage of the new limits..
+ * Some programs (notably those using select()) may have to be
+ * recompiled to take full advantage of the new limits..
*/
/* Fixed constants first: */
@@ -64,6 +64,24 @@ struct fstrim_range {
__u64 minlen;
};
+/*
+ * We include a length field because some filesystems (vfat) have an identifier
+ * that we do want to expose as a UUID, but doesn't have the standard length.
+ *
+ * We use a fixed size buffer beacuse this interface will, by fiat, never
+ * support "UUIDs" longer than 16 bytes; we don't want to force all downstream
+ * users to have to deal with that.
+ */
+struct fsuuid2 {
+ __u8 len;
+ __u8 uuid[16];
+};
+
+struct fs_sysfs_path {
+ __u8 len;
+ __u8 name[128];
+};
+
/* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */
#define FILE_DEDUPE_RANGE_SAME 0
#define FILE_DEDUPE_RANGE_DIFFERS 1
@@ -215,6 +233,13 @@ struct fsxattr {
#define FS_IOC_FSSETXATTR _IOW('X', 32, struct fsxattr)
#define FS_IOC_GETFSLABEL _IOR(0x94, 49, char[FSLABEL_MAX])
#define FS_IOC_SETFSLABEL _IOW(0x94, 50, char[FSLABEL_MAX])
+/* Returns the external filesystem UUID, the same one blkid returns */
+#define FS_IOC_GETFSUUID _IOR(0x15, 0, struct fsuuid2)
+/*
+ * Returns the path component under /sys/fs/ that refers to this filesystem;
+ * also /sys/kernel/debug/ for filesystems with debugfs exports
+ */
+#define FS_IOC_GETFSSYSFSPATH _IOR(0x15, 1, struct fs_sysfs_path)
/*
* Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
@@ -301,12 +326,17 @@ typedef int __bitwise __kernel_rwf_t;
/* per-IO O_APPEND */
#define RWF_APPEND ((__force __kernel_rwf_t)0x00000010)
+/* per-IO negation of O_APPEND */
+#define RWF_NOAPPEND ((__force __kernel_rwf_t)0x00000020)
+
/* mask of flags supported by the kernel */
#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
- RWF_APPEND)
+ RWF_APPEND | RWF_NOAPPEND)
+
+#define PROCFS_IOCTL_MAGIC 'f'
/* Pagemap ioctl */
-#define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg)
+#define PAGEMAP_SCAN _IOWR(PROCFS_IOCTL_MAGIC, 16, struct pm_scan_arg)
/* Bitmasks provided in pm_scan_args masks and reported in page_region.categories. */
#define PAGE_IS_WPALLOWED (1 << 0)
@@ -365,4 +395,158 @@ struct pm_scan_arg {
__u64 return_mask;
};
+/* /proc/<pid>/maps ioctl */
+#define PROCMAP_QUERY _IOWR(PROCFS_IOCTL_MAGIC, 17, struct procmap_query)
+
+enum procmap_query_flags {
+ /*
+ * VMA permission flags.
+ *
+ * Can be used as part of procmap_query.query_flags field to look up
+ * only VMAs satisfying specified subset of permissions. E.g., specifying
+ * PROCMAP_QUERY_VMA_READABLE only will return both readable and read/write VMAs,
+ * while having PROCMAP_QUERY_VMA_READABLE | PROCMAP_QUERY_VMA_WRITABLE will only
+ * return read/write VMAs, though both executable/non-executable and
+ * private/shared will be ignored.
+ *
+ * PROCMAP_QUERY_VMA_* flags are also returned in procmap_query.vma_flags
+ * field to specify actual VMA permissions.
+ */
+ PROCMAP_QUERY_VMA_READABLE = 0x01,
+ PROCMAP_QUERY_VMA_WRITABLE = 0x02,
+ PROCMAP_QUERY_VMA_EXECUTABLE = 0x04,
+ PROCMAP_QUERY_VMA_SHARED = 0x08,
+ /*
+ * Query modifier flags.
+ *
+ * By default VMA that covers provided address is returned, or -ENOENT
+ * is returned. With PROCMAP_QUERY_COVERING_OR_NEXT_VMA flag set, closest
+ * VMA with vma_start > addr will be returned if no covering VMA is
+ * found.
+ *
+ * PROCMAP_QUERY_FILE_BACKED_VMA instructs query to consider only VMAs that
+ * have file backing. Can be combined with PROCMAP_QUERY_COVERING_OR_NEXT_VMA
+ * to iterate all VMAs with file backing.
+ */
+ PROCMAP_QUERY_COVERING_OR_NEXT_VMA = 0x10,
+ PROCMAP_QUERY_FILE_BACKED_VMA = 0x20,
+};
+
+/*
+ * Input/output argument structured passed into ioctl() call. It can be used
+ * to query a set of VMAs (Virtual Memory Areas) of a process.
+ *
+ * Each field can be one of three kinds, marked in a short comment to the
+ * right of the field:
+ * - "in", input argument, user has to provide this value, kernel doesn't modify it;
+ * - "out", output argument, kernel sets this field with VMA data;
+ * - "in/out", input and output argument; user provides initial value (used
+ * to specify maximum allowable buffer size), and kernel sets it to actual
+ * amount of data written (or zero, if there is no data).
+ *
+ * If matching VMA is found (according to criterias specified by
+ * query_addr/query_flags, all the out fields are filled out, and ioctl()
+ * returns 0. If there is no matching VMA, -ENOENT will be returned.
+ * In case of any other error, negative error code other than -ENOENT is
+ * returned.
+ *
+ * Most of the data is similar to the one returned as text in /proc/<pid>/maps
+ * file, but procmap_query provides more querying flexibility. There are no
+ * consistency guarantees between subsequent ioctl() calls, but data returned
+ * for matched VMA is self-consistent.
+ */
+struct procmap_query {
+ /* Query struct size, for backwards/forward compatibility */
+ __u64 size;
+ /*
+ * Query flags, a combination of enum procmap_query_flags values.
+ * Defines query filtering and behavior, see enum procmap_query_flags.
+ *
+ * Input argument, provided by user. Kernel doesn't modify it.
+ */
+ __u64 query_flags; /* in */
+ /*
+ * Query address. By default, VMA that covers this address will
+ * be looked up. PROCMAP_QUERY_* flags above modify this default
+ * behavior further.
+ *
+ * Input argument, provided by user. Kernel doesn't modify it.
+ */
+ __u64 query_addr; /* in */
+ /* VMA starting (inclusive) and ending (exclusive) address, if VMA is found. */
+ __u64 vma_start; /* out */
+ __u64 vma_end; /* out */
+ /* VMA permissions flags. A combination of PROCMAP_QUERY_VMA_* flags. */
+ __u64 vma_flags; /* out */
+ /* VMA backing page size granularity. */
+ __u64 vma_page_size; /* out */
+ /*
+ * VMA file offset. If VMA has file backing, this specifies offset
+ * within the file that VMA's start address corresponds to.
+ * Is set to zero if VMA has no backing file.
+ */
+ __u64 vma_offset; /* out */
+ /* Backing file's inode number, or zero, if VMA has no backing file. */
+ __u64 inode; /* out */
+ /* Backing file's device major/minor number, or zero, if VMA has no backing file. */
+ __u32 dev_major; /* out */
+ __u32 dev_minor; /* out */
+ /*
+ * If set to non-zero value, signals the request to return VMA name
+ * (i.e., VMA's backing file's absolute path, with " (deleted)" suffix
+ * appended, if file was unlinked from FS) for matched VMA. VMA name
+ * can also be some special name (e.g., "[heap]", "[stack]") or could
+ * be even user-supplied with prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME).
+ *
+ * Kernel will set this field to zero, if VMA has no associated name.
+ * Otherwise kernel will return actual amount of bytes filled in
+ * user-supplied buffer (see vma_name_addr field below), including the
+ * terminating zero.
+ *
+ * If VMA name is longer that user-supplied maximum buffer size,
+ * -E2BIG error is returned.
+ *
+ * If this field is set to non-zero value, vma_name_addr should point
+ * to valid user space memory buffer of at least vma_name_size bytes.
+ * If set to zero, vma_name_addr should be set to zero as well
+ */
+ __u32 vma_name_size; /* in/out */
+ /*
+ * If set to non-zero value, signals the request to extract and return
+ * VMA's backing file's build ID, if the backing file is an ELF file
+ * and it contains embedded build ID.
+ *
+ * Kernel will set this field to zero, if VMA has no backing file,
+ * backing file is not an ELF file, or ELF file has no build ID
+ * embedded.
+ *
+ * Build ID is a binary value (not a string). Kernel will set
+ * build_id_size field to exact number of bytes used for build ID.
+ * If build ID is requested and present, but needs more bytes than
+ * user-supplied maximum buffer size (see build_id_addr field below),
+ * -E2BIG error will be returned.
+ *
+ * If this field is set to non-zero value, build_id_addr should point
+ * to valid user space memory buffer of at least build_id_size bytes.
+ * If set to zero, build_id_addr should be set to zero as well
+ */
+ __u32 build_id_size; /* in/out */
+ /*
+ * User-supplied address of a buffer of at least vma_name_size bytes
+ * for kernel to fill with matched VMA's name (see vma_name_size field
+ * description above for details).
+ *
+ * Should be set to zero if VMA name should not be returned.
+ */
+ __u64 vma_name_addr; /* in */
+ /*
+ * User-supplied address of a buffer of at least build_id_size bytes
+ * for kernel to fill with matched VMA's ELF build ID, if available
+ * (see build_id_size field description above for details).
+ *
+ * Should be set to zero if build ID should not be returned.
+ */
+ __u64 build_id_addr; /* in */
+};
+
#endif /* _UAPI_LINUX_FS_H */
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index a0aa05a28cf2..7e46ca4cd31b 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -377,6 +377,7 @@ enum {
IFLA_GSO_IPV4_MAX_SIZE,
IFLA_GRO_IPV4_MAX_SIZE,
IFLA_DPLL_PIN,
+ IFLA_MAX_PACING_OFFLOAD_HORIZON,
__IFLA_MAX
};
@@ -461,6 +462,286 @@ enum in6_addr_gen_mode {
/* Bridge section */
+/**
+ * DOC: Bridge enum definition
+ *
+ * Please *note* that the timer values in the following section are expected
+ * in clock_t format, which is seconds multiplied by USER_HZ (generally
+ * defined as 100).
+ *
+ * @IFLA_BR_FORWARD_DELAY
+ * The bridge forwarding delay is the time spent in LISTENING state
+ * (before moving to LEARNING) and in LEARNING state (before moving
+ * to FORWARDING). Only relevant if STP is enabled.
+ *
+ * The valid values are between (2 * USER_HZ) and (30 * USER_HZ).
+ * The default value is (15 * USER_HZ).
+ *
+ * @IFLA_BR_HELLO_TIME
+ * The time between hello packets sent by the bridge, when it is a root
+ * bridge or a designated bridge. Only relevant if STP is enabled.
+ *
+ * The valid values are between (1 * USER_HZ) and (10 * USER_HZ).
+ * The default value is (2 * USER_HZ).
+ *
+ * @IFLA_BR_MAX_AGE
+ * The hello packet timeout is the time until another bridge in the
+ * spanning tree is assumed to be dead, after reception of its last hello
+ * message. Only relevant if STP is enabled.
+ *
+ * The valid values are between (6 * USER_HZ) and (40 * USER_HZ).
+ * The default value is (20 * USER_HZ).
+ *
+ * @IFLA_BR_AGEING_TIME
+ * Configure the bridge's FDB entries aging time. It is the time a MAC
+ * address will be kept in the FDB after a packet has been received from
+ * that address. After this time has passed, entries are cleaned up.
+ * Allow values outside the 802.1 standard specification for special cases:
+ *
+ * * 0 - entry never ages (all permanent)
+ * * 1 - entry disappears (no persistence)
+ *
+ * The default value is (300 * USER_HZ).
+ *
+ * @IFLA_BR_STP_STATE
+ * Turn spanning tree protocol on (*IFLA_BR_STP_STATE* > 0) or off
+ * (*IFLA_BR_STP_STATE* == 0) for this bridge.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_PRIORITY
+ * Set this bridge's spanning tree priority, used during STP root bridge
+ * election.
+ *
+ * The valid values are between 0 and 65535.
+ *
+ * @IFLA_BR_VLAN_FILTERING
+ * Turn VLAN filtering on (*IFLA_BR_VLAN_FILTERING* > 0) or off
+ * (*IFLA_BR_VLAN_FILTERING* == 0). When disabled, the bridge will not
+ * consider the VLAN tag when handling packets.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_VLAN_PROTOCOL
+ * Set the protocol used for VLAN filtering.
+ *
+ * The valid values are 0x8100(802.1Q) or 0x88A8(802.1AD). The default value
+ * is 0x8100(802.1Q).
+ *
+ * @IFLA_BR_GROUP_FWD_MASK
+ * The group forwarding mask. This is the bitmask that is applied to
+ * decide whether to forward incoming frames destined to link-local
+ * addresses (of the form 01:80:C2:00:00:0X).
+ *
+ * The default value is 0, which means the bridge does not forward any
+ * link-local frames coming on this port.
+ *
+ * @IFLA_BR_ROOT_ID
+ * The bridge root id, read only.
+ *
+ * @IFLA_BR_BRIDGE_ID
+ * The bridge id, read only.
+ *
+ * @IFLA_BR_ROOT_PORT
+ * The bridge root port, read only.
+ *
+ * @IFLA_BR_ROOT_PATH_COST
+ * The bridge root path cost, read only.
+ *
+ * @IFLA_BR_TOPOLOGY_CHANGE
+ * The bridge topology change, read only.
+ *
+ * @IFLA_BR_TOPOLOGY_CHANGE_DETECTED
+ * The bridge topology change detected, read only.
+ *
+ * @IFLA_BR_HELLO_TIMER
+ * The bridge hello timer, read only.
+ *
+ * @IFLA_BR_TCN_TIMER
+ * The bridge tcn timer, read only.
+ *
+ * @IFLA_BR_TOPOLOGY_CHANGE_TIMER
+ * The bridge topology change timer, read only.
+ *
+ * @IFLA_BR_GC_TIMER
+ * The bridge gc timer, read only.
+ *
+ * @IFLA_BR_GROUP_ADDR
+ * Set the MAC address of the multicast group this bridge uses for STP.
+ * The address must be a link-local address in standard Ethernet MAC address
+ * format. It is an address of the form 01:80:C2:00:00:0X, with X in [0, 4..f].
+ *
+ * The default value is 0.
+ *
+ * @IFLA_BR_FDB_FLUSH
+ * Flush bridge's fdb dynamic entries.
+ *
+ * @IFLA_BR_MCAST_ROUTER
+ * Set bridge's multicast router if IGMP snooping is enabled.
+ * The valid values are:
+ *
+ * * 0 - disabled.
+ * * 1 - automatic (queried).
+ * * 2 - permanently enabled.
+ *
+ * The default value is 1.
+ *
+ * @IFLA_BR_MCAST_SNOOPING
+ * Turn multicast snooping on (*IFLA_BR_MCAST_SNOOPING* > 0) or off
+ * (*IFLA_BR_MCAST_SNOOPING* == 0).
+ *
+ * The default value is 1.
+ *
+ * @IFLA_BR_MCAST_QUERY_USE_IFADDR
+ * If enabled use the bridge's own IP address as source address for IGMP
+ * queries (*IFLA_BR_MCAST_QUERY_USE_IFADDR* > 0) or the default of 0.0.0.0
+ * (*IFLA_BR_MCAST_QUERY_USE_IFADDR* == 0).
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_MCAST_QUERIER
+ * Enable (*IFLA_BR_MULTICAST_QUERIER* > 0) or disable
+ * (*IFLA_BR_MULTICAST_QUERIER* == 0) IGMP querier, ie sending of multicast
+ * queries by the bridge.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_MCAST_HASH_ELASTICITY
+ * Set multicast database hash elasticity, It is the maximum chain length in
+ * the multicast hash table. This attribute is *deprecated* and the value
+ * is always 16.
+ *
+ * @IFLA_BR_MCAST_HASH_MAX
+ * Set maximum size of the multicast hash table
+ *
+ * The default value is 4096, the value must be a power of 2.
+ *
+ * @IFLA_BR_MCAST_LAST_MEMBER_CNT
+ * The Last Member Query Count is the number of Group-Specific Queries
+ * sent before the router assumes there are no local members. The Last
+ * Member Query Count is also the number of Group-and-Source-Specific
+ * Queries sent before the router assumes there are no listeners for a
+ * particular source.
+ *
+ * The default value is 2.
+ *
+ * @IFLA_BR_MCAST_STARTUP_QUERY_CNT
+ * The Startup Query Count is the number of Queries sent out on startup,
+ * separated by the Startup Query Interval.
+ *
+ * The default value is 2.
+ *
+ * @IFLA_BR_MCAST_LAST_MEMBER_INTVL
+ * The Last Member Query Interval is the Max Response Time inserted into
+ * Group-Specific Queries sent in response to Leave Group messages, and
+ * is also the amount of time between Group-Specific Query messages.
+ *
+ * The default value is (1 * USER_HZ).
+ *
+ * @IFLA_BR_MCAST_MEMBERSHIP_INTVL
+ * The interval after which the bridge will leave a group, if no membership
+ * reports for this group are received.
+ *
+ * The default value is (260 * USER_HZ).
+ *
+ * @IFLA_BR_MCAST_QUERIER_INTVL
+ * The interval between queries sent by other routers. if no queries are
+ * seen after this delay has passed, the bridge will start to send its own
+ * queries (as if *IFLA_BR_MCAST_QUERIER_INTVL* was enabled).
+ *
+ * The default value is (255 * USER_HZ).
+ *
+ * @IFLA_BR_MCAST_QUERY_INTVL
+ * The Query Interval is the interval between General Queries sent by
+ * the Querier.
+ *
+ * The default value is (125 * USER_HZ). The minimum value is (1 * USER_HZ).
+ *
+ * @IFLA_BR_MCAST_QUERY_RESPONSE_INTVL
+ * The Max Response Time used to calculate the Max Resp Code inserted
+ * into the periodic General Queries.
+ *
+ * The default value is (10 * USER_HZ).
+ *
+ * @IFLA_BR_MCAST_STARTUP_QUERY_INTVL
+ * The interval between queries in the startup phase.
+ *
+ * The default value is (125 * USER_HZ) / 4. The minimum value is (1 * USER_HZ).
+ *
+ * @IFLA_BR_NF_CALL_IPTABLES
+ * Enable (*NF_CALL_IPTABLES* > 0) or disable (*NF_CALL_IPTABLES* == 0)
+ * iptables hooks on the bridge.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_NF_CALL_IP6TABLES
+ * Enable (*NF_CALL_IP6TABLES* > 0) or disable (*NF_CALL_IP6TABLES* == 0)
+ * ip6tables hooks on the bridge.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_NF_CALL_ARPTABLES
+ * Enable (*NF_CALL_ARPTABLES* > 0) or disable (*NF_CALL_ARPTABLES* == 0)
+ * arptables hooks on the bridge.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_VLAN_DEFAULT_PVID
+ * VLAN ID applied to untagged and priority-tagged incoming packets.
+ *
+ * The default value is 1. Setting to the special value 0 makes all ports of
+ * this bridge not have a PVID by default, which means that they will
+ * not accept VLAN-untagged traffic.
+ *
+ * @IFLA_BR_PAD
+ * Bridge attribute padding type for netlink message.
+ *
+ * @IFLA_BR_VLAN_STATS_ENABLED
+ * Enable (*IFLA_BR_VLAN_STATS_ENABLED* == 1) or disable
+ * (*IFLA_BR_VLAN_STATS_ENABLED* == 0) per-VLAN stats accounting.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_MCAST_STATS_ENABLED
+ * Enable (*IFLA_BR_MCAST_STATS_ENABLED* > 0) or disable
+ * (*IFLA_BR_MCAST_STATS_ENABLED* == 0) multicast (IGMP/MLD) stats
+ * accounting.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_MCAST_IGMP_VERSION
+ * Set the IGMP version.
+ *
+ * The valid values are 2 and 3. The default value is 2.
+ *
+ * @IFLA_BR_MCAST_MLD_VERSION
+ * Set the MLD version.
+ *
+ * The valid values are 1 and 2. The default value is 1.
+ *
+ * @IFLA_BR_VLAN_STATS_PER_PORT
+ * Enable (*IFLA_BR_VLAN_STATS_PER_PORT* == 1) or disable
+ * (*IFLA_BR_VLAN_STATS_PER_PORT* == 0) per-VLAN per-port stats accounting.
+ * Can be changed only when there are no port VLANs configured.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_MULTI_BOOLOPT
+ * The multi_boolopt is used to control new boolean options to avoid adding
+ * new netlink attributes. You can look at ``enum br_boolopt_id`` for those
+ * options.
+ *
+ * @IFLA_BR_MCAST_QUERIER_STATE
+ * Bridge mcast querier states, read only.
+ *
+ * @IFLA_BR_FDB_N_LEARNED
+ * The number of dynamically learned FDB entries for the current bridge,
+ * read only.
+ *
+ * @IFLA_BR_FDB_MAX_LEARNED
+ * Set the number of max dynamically learned FDB entries for the current
+ * bridge.
+ */
enum {
IFLA_BR_UNSPEC,
IFLA_BR_FORWARD_DELAY,
@@ -510,6 +791,8 @@ enum {
IFLA_BR_VLAN_STATS_PER_PORT,
IFLA_BR_MULTI_BOOLOPT,
IFLA_BR_MCAST_QUERIER_STATE,
+ IFLA_BR_FDB_N_LEARNED,
+ IFLA_BR_FDB_MAX_LEARNED,
__IFLA_BR_MAX,
};
@@ -520,11 +803,252 @@ struct ifla_bridge_id {
__u8 addr[6]; /* ETH_ALEN */
};
+/**
+ * DOC: Bridge mode enum definition
+ *
+ * @BRIDGE_MODE_HAIRPIN
+ * Controls whether traffic may be sent back out of the port on which it
+ * was received. This option is also called reflective relay mode, and is
+ * used to support basic VEPA (Virtual Ethernet Port Aggregator)
+ * capabilities. By default, this flag is turned off and the bridge will
+ * not forward traffic back out of the receiving port.
+ */
enum {
BRIDGE_MODE_UNSPEC,
BRIDGE_MODE_HAIRPIN,
};
+/**
+ * DOC: Bridge port enum definition
+ *
+ * @IFLA_BRPORT_STATE
+ * The operation state of the port. Here are the valid values.
+ *
+ * * 0 - port is in STP *DISABLED* state. Make this port completely
+ * inactive for STP. This is also called BPDU filter and could be used
+ * to disable STP on an untrusted port, like a leaf virtual device.
+ * The traffic forwarding is also stopped on this port.
+ * * 1 - port is in STP *LISTENING* state. Only valid if STP is enabled
+ * on the bridge. In this state the port listens for STP BPDUs and
+ * drops all other traffic frames.
+ * * 2 - port is in STP *LEARNING* state. Only valid if STP is enabled on
+ * the bridge. In this state the port will accept traffic only for the
+ * purpose of updating MAC address tables.
+ * * 3 - port is in STP *FORWARDING* state. Port is fully active.
+ * * 4 - port is in STP *BLOCKING* state. Only valid if STP is enabled on
+ * the bridge. This state is used during the STP election process.
+ * In this state, port will only process STP BPDUs.
+ *
+ * @IFLA_BRPORT_PRIORITY
+ * The STP port priority. The valid values are between 0 and 255.
+ *
+ * @IFLA_BRPORT_COST
+ * The STP path cost of the port. The valid values are between 1 and 65535.
+ *
+ * @IFLA_BRPORT_MODE
+ * Set the bridge port mode. See *BRIDGE_MODE_HAIRPIN* for more details.
+ *
+ * @IFLA_BRPORT_GUARD
+ * Controls whether STP BPDUs will be processed by the bridge port. By
+ * default, the flag is turned off to allow BPDU processing. Turning this
+ * flag on will disable the bridge port if a STP BPDU packet is received.
+ *
+ * If the bridge has Spanning Tree enabled, hostile devices on the network
+ * may send BPDU on a port and cause network failure. Setting *guard on*
+ * will detect and stop this by disabling the port. The port will be
+ * restarted if the link is brought down, or removed and reattached.
+ *
+ * @IFLA_BRPORT_PROTECT
+ * Controls whether a given port is allowed to become a root port or not.
+ * Only used when STP is enabled on the bridge. By default the flag is off.
+ *
+ * This feature is also called root port guard. If BPDU is received from a
+ * leaf (edge) port, it should not be elected as root port. This could
+ * be used if using STP on a bridge and the downstream bridges are not fully
+ * trusted; this prevents a hostile guest from rerouting traffic.
+ *
+ * @IFLA_BRPORT_FAST_LEAVE
+ * This flag allows the bridge to immediately stop multicast traffic
+ * forwarding on a port that receives an IGMP Leave message. It is only used
+ * when IGMP snooping is enabled on the bridge. By default the flag is off.
+ *
+ * @IFLA_BRPORT_LEARNING
+ * Controls whether a given port will learn *source* MAC addresses from
+ * received traffic or not. Also controls whether dynamic FDB entries
+ * (which can also be added by software) will be refreshed by incoming
+ * traffic. By default this flag is on.
+ *
+ * @IFLA_BRPORT_UNICAST_FLOOD
+ * Controls whether unicast traffic for which there is no FDB entry will
+ * be flooded towards this port. By default this flag is on.
+ *
+ * @IFLA_BRPORT_PROXYARP
+ * Enable proxy ARP on this port.
+ *
+ * @IFLA_BRPORT_LEARNING_SYNC
+ * Controls whether a given port will sync MAC addresses learned on device
+ * port to bridge FDB.
+ *
+ * @IFLA_BRPORT_PROXYARP_WIFI
+ * Enable proxy ARP on this port which meets extended requirements by
+ * IEEE 802.11 and Hotspot 2.0 specifications.
+ *
+ * @IFLA_BRPORT_ROOT_ID
+ *
+ * @IFLA_BRPORT_BRIDGE_ID
+ *
+ * @IFLA_BRPORT_DESIGNATED_PORT
+ *
+ * @IFLA_BRPORT_DESIGNATED_COST
+ *
+ * @IFLA_BRPORT_ID
+ *
+ * @IFLA_BRPORT_NO
+ *
+ * @IFLA_BRPORT_TOPOLOGY_CHANGE_ACK
+ *
+ * @IFLA_BRPORT_CONFIG_PENDING
+ *
+ * @IFLA_BRPORT_MESSAGE_AGE_TIMER
+ *
+ * @IFLA_BRPORT_FORWARD_DELAY_TIMER
+ *
+ * @IFLA_BRPORT_HOLD_TIMER
+ *
+ * @IFLA_BRPORT_FLUSH
+ * Flush bridge ports' fdb dynamic entries.
+ *
+ * @IFLA_BRPORT_MULTICAST_ROUTER
+ * Configure the port's multicast router presence. A port with
+ * a multicast router will receive all multicast traffic.
+ * The valid values are:
+ *
+ * * 0 disable multicast routers on this port
+ * * 1 let the system detect the presence of routers (default)
+ * * 2 permanently enable multicast traffic forwarding on this port
+ * * 3 enable multicast routers temporarily on this port, not depending
+ * on incoming queries.
+ *
+ * @IFLA_BRPORT_PAD
+ *
+ * @IFLA_BRPORT_MCAST_FLOOD
+ * Controls whether a given port will flood multicast traffic for which
+ * there is no MDB entry. By default this flag is on.
+ *
+ * @IFLA_BRPORT_MCAST_TO_UCAST
+ * Controls whether a given port will replicate packets using unicast
+ * instead of multicast. By default this flag is off.
+ *
+ * This is done by copying the packet per host and changing the multicast
+ * destination MAC to a unicast one accordingly.
+ *
+ * *mcast_to_unicast* works on top of the multicast snooping feature of the
+ * bridge. Which means unicast copies are only delivered to hosts which
+ * are interested in unicast and signaled this via IGMP/MLD reports previously.
+ *
+ * This feature is intended for interface types which have a more reliable
+ * and/or efficient way to deliver unicast packets than broadcast ones
+ * (e.g. WiFi).
+ *
+ * However, it should only be enabled on interfaces where no IGMPv2/MLDv1
+ * report suppression takes place. IGMP/MLD report suppression issue is
+ * usually overcome by the network daemon (supplicant) enabling AP isolation
+ * and by that separating all STAs.
+ *
+ * Delivery of STA-to-STA IP multicast is made possible again by enabling
+ * and utilizing the bridge hairpin mode, which considers the incoming port
+ * as a potential outgoing port, too (see *BRIDGE_MODE_HAIRPIN* option).
+ * Hairpin mode is performed after multicast snooping, therefore leading
+ * to only deliver reports to STAs running a multicast router.
+ *
+ * @IFLA_BRPORT_VLAN_TUNNEL
+ * Controls whether vlan to tunnel mapping is enabled on the port.
+ * By default this flag is off.
+ *
+ * @IFLA_BRPORT_BCAST_FLOOD
+ * Controls flooding of broadcast traffic on the given port. By default
+ * this flag is on.
+ *
+ * @IFLA_BRPORT_GROUP_FWD_MASK
+ * Set the group forward mask. This is a bitmask that is applied to
+ * decide whether to forward incoming frames destined to link-local
+ * addresses. The addresses of the form are 01:80:C2:00:00:0X (defaults
+ * to 0, which means the bridge does not forward any link-local frames
+ * coming on this port).
+ *
+ * @IFLA_BRPORT_NEIGH_SUPPRESS
+ * Controls whether neighbor discovery (arp and nd) proxy and suppression
+ * is enabled on the port. By default this flag is off.
+ *
+ * @IFLA_BRPORT_ISOLATED
+ * Controls whether a given port will be isolated, which means it will be
+ * able to communicate with non-isolated ports only. By default this
+ * flag is off.
+ *
+ * @IFLA_BRPORT_BACKUP_PORT
+ * Set a backup port. If the port loses carrier all traffic will be
+ * redirected to the configured backup port. Set the value to 0 to disable
+ * it.
+ *
+ * @IFLA_BRPORT_MRP_RING_OPEN
+ *
+ * @IFLA_BRPORT_MRP_IN_OPEN
+ *
+ * @IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT
+ * The number of per-port EHT hosts limit. The default value is 512.
+ * Setting to 0 is not allowed.
+ *
+ * @IFLA_BRPORT_MCAST_EHT_HOSTS_CNT
+ * The current number of tracked hosts, read only.
+ *
+ * @IFLA_BRPORT_LOCKED
+ * Controls whether a port will be locked, meaning that hosts behind the
+ * port will not be able to communicate through the port unless an FDB
+ * entry with the unit's MAC address is in the FDB. The common use case is
+ * that hosts are allowed access through authentication with the IEEE 802.1X
+ * protocol or based on whitelists. By default this flag is off.
+ *
+ * Please note that secure 802.1X deployments should always use the
+ * *BR_BOOLOPT_NO_LL_LEARN* flag, to not permit the bridge to populate its
+ * FDB based on link-local (EAPOL) traffic received on the port.
+ *
+ * @IFLA_BRPORT_MAB
+ * Controls whether a port will use MAC Authentication Bypass (MAB), a
+ * technique through which select MAC addresses may be allowed on a locked
+ * port, without using 802.1X authentication. Packets with an unknown source
+ * MAC address generates a "locked" FDB entry on the incoming bridge port.
+ * The common use case is for user space to react to these bridge FDB
+ * notifications and optionally replace the locked FDB entry with a normal
+ * one, allowing traffic to pass for whitelisted MAC addresses.
+ *
+ * Setting this flag also requires *IFLA_BRPORT_LOCKED* and
+ * *IFLA_BRPORT_LEARNING*. *IFLA_BRPORT_LOCKED* ensures that unauthorized
+ * data packets are dropped, and *IFLA_BRPORT_LEARNING* allows the dynamic
+ * FDB entries installed by user space (as replacements for the locked FDB
+ * entries) to be refreshed and/or aged out.
+ *
+ * @IFLA_BRPORT_MCAST_N_GROUPS
+ *
+ * @IFLA_BRPORT_MCAST_MAX_GROUPS
+ * Sets the maximum number of MDB entries that can be registered for a
+ * given port. Attempts to register more MDB entries at the port than this
+ * limit allows will be rejected, whether they are done through netlink
+ * (e.g. the bridge tool), or IGMP or MLD membership reports. Setting a
+ * limit of 0 disables the limit. The default value is 0.
+ *
+ * @IFLA_BRPORT_NEIGH_VLAN_SUPPRESS
+ * Controls whether neighbor discovery (arp and nd) proxy and suppression is
+ * enabled for a given port. By default this flag is off.
+ *
+ * Note that this option only takes effect when *IFLA_BRPORT_NEIGH_SUPPRESS*
+ * is enabled for a given port.
+ *
+ * @IFLA_BRPORT_BACKUP_NHID
+ * The FDB nexthop object ID to attach to packets being redirected to a
+ * backup port that has VLAN tunnel mapping enabled (via the
+ * *IFLA_BRPORT_VLAN_TUNNEL* option). Setting a value of 0 (default) has
+ * the effect of not attaching any ID.
+ */
enum {
IFLA_BRPORT_UNSPEC,
IFLA_BRPORT_STATE, /* Spanning tree state */
@@ -769,6 +1293,19 @@ enum netkit_mode {
NETKIT_L3,
};
+/* NETKIT_SCRUB_NONE leaves clearing skb->{mark,priority} up to
+ * the BPF program if attached. This also means the latter can
+ * consume the two fields if they were populated earlier.
+ *
+ * NETKIT_SCRUB_DEFAULT zeroes skb->{mark,priority} fields before
+ * invoking the attached BPF program when the peer device resides
+ * in a different network namespace. This is the default behavior.
+ */
+enum netkit_scrub {
+ NETKIT_SCRUB_NONE,
+ NETKIT_SCRUB_DEFAULT,
+};
+
enum {
IFLA_NETKIT_UNSPEC,
IFLA_NETKIT_PEER_INFO,
@@ -776,6 +1313,10 @@ enum {
IFLA_NETKIT_POLICY,
IFLA_NETKIT_PEER_POLICY,
IFLA_NETKIT_MODE,
+ IFLA_NETKIT_SCRUB,
+ IFLA_NETKIT_PEER_SCRUB,
+ IFLA_NETKIT_HEADROOM,
+ IFLA_NETKIT_TAILROOM,
__IFLA_NETKIT_MAX,
};
#define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1)
@@ -854,6 +1395,7 @@ enum {
IFLA_VXLAN_DF,
IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */
IFLA_VXLAN_LOCALBYPASS,
+ IFLA_VXLAN_LABEL_POLICY, /* IPv6 flow label policy; ifla_vxlan_label_policy */
__IFLA_VXLAN_MAX
};
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
@@ -871,6 +1413,13 @@ enum ifla_vxlan_df {
VXLAN_DF_MAX = __VXLAN_DF_END - 1,
};
+enum ifla_vxlan_label_policy {
+ VXLAN_LABEL_FIXED = 0,
+ VXLAN_LABEL_INHERIT = 1,
+ __VXLAN_LABEL_END,
+ VXLAN_LABEL_MAX = __VXLAN_LABEL_END - 1,
+};
+
/* GENEVE section */
enum {
IFLA_GENEVE_UNSPEC,
@@ -935,6 +1484,8 @@ enum {
IFLA_GTP_ROLE,
IFLA_GTP_CREATE_SOCKETS,
IFLA_GTP_RESTART_COUNT,
+ IFLA_GTP_LOCAL,
+ IFLA_GTP_LOCAL6,
__IFLA_GTP_MAX,
};
#define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1)
@@ -974,6 +1525,7 @@ enum {
IFLA_BOND_AD_LACP_ACTIVE,
IFLA_BOND_MISSED_MAX,
IFLA_BOND_NS_IP6_TARGET,
+ IFLA_BOND_COUPLED_CONTROL,
__IFLA_BOND_MAX,
};
@@ -1239,6 +1791,7 @@ enum {
IFLA_HSR_PROTOCOL, /* Indicate different protocol than
* HSR. For example PRP.
*/
+ IFLA_HSR_INTERLINK, /* HSR interlink network device */
__IFLA_HSR_MAX,
};
@@ -1416,7 +1969,9 @@ enum {
enum {
IFLA_DSA_UNSPEC,
- IFLA_DSA_MASTER,
+ IFLA_DSA_CONDUIT,
+ /* Deprecated, use IFLA_DSA_CONDUIT instead */
+ IFLA_DSA_MASTER = IFLA_DSA_CONDUIT,
__IFLA_DSA_MAX,
};
diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h
index 638c606dfa74..42ec5ddaab8d 100644
--- a/tools/include/uapi/linux/if_xdp.h
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -41,6 +41,10 @@
*/
#define XDP_UMEM_TX_SW_CSUM (1 << 1)
+/* Request to reserve tx_metadata_len bytes of per-chunk metadata.
+ */
+#define XDP_UMEM_TX_METADATA_LEN (1 << 2)
+
struct sockaddr_xdp {
__u16 sxdp_family;
__u16 sxdp_flags;
@@ -113,12 +117,12 @@ struct xdp_options {
((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1)
/* Request transmit timestamp. Upon completion, put it into tx_timestamp
- * field of union xsk_tx_metadata.
+ * field of struct xsk_tx_metadata.
*/
#define XDP_TXMD_FLAGS_TIMESTAMP (1 << 0)
/* Request transmit checksum offload. Checksum start position and offset
- * are communicated via csum_start and csum_offset fields of union
+ * are communicated via csum_start and csum_offset fields of struct
* xsk_tx_metadata.
*/
#define XDP_TXMD_FLAGS_CHECKSUM (1 << 1)
diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h
index e682ab628dfa..5d32d53508d9 100644
--- a/tools/include/uapi/linux/in.h
+++ b/tools/include/uapi/linux/in.h
@@ -81,6 +81,8 @@ enum {
#define IPPROTO_ETHERNET IPPROTO_ETHERNET
IPPROTO_RAW = 255, /* Raw IP packets */
#define IPPROTO_RAW IPPROTO_RAW
+ IPPROTO_SMC = 256, /* Shared Memory Communications */
+#define IPPROTO_SMC IPPROTO_SMC
IPPROTO_MPTCP = 262, /* Multipath TCP connection */
#define IPPROTO_MPTCP IPPROTO_MPTCP
IPPROTO_MAX
@@ -139,7 +141,7 @@ struct in_addr {
*/
#define IP_PMTUDISC_INTERFACE 4
/* weaker version of IP_PMTUDISC_INTERFACE, which allows packets to get
- * fragmented if they exeed the interface mtu
+ * fragmented if they exceed the interface mtu
*/
#define IP_PMTUDISC_OMIT 5
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index c3308536482b..502ea63b5d2e 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -16,6 +16,11 @@
#define KVM_API_VERSION 12
+/*
+ * Backwards-compatible definitions.
+ */
+#define __KVM_HAVE_GUEST_DEBUG
+
/* for KVM_SET_USER_MEMORY_REGION */
struct kvm_userspace_memory_region {
__u32 slot;
@@ -85,43 +90,6 @@ struct kvm_pit_config {
#define KVM_PIT_SPEAKER_DUMMY 1
-struct kvm_s390_skeys {
- __u64 start_gfn;
- __u64 count;
- __u64 skeydata_addr;
- __u32 flags;
- __u32 reserved[9];
-};
-
-#define KVM_S390_CMMA_PEEK (1 << 0)
-
-/**
- * kvm_s390_cmma_log - Used for CMMA migration.
- *
- * Used both for input and output.
- *
- * @start_gfn: Guest page number to start from.
- * @count: Size of the result buffer.
- * @flags: Control operation mode via KVM_S390_CMMA_* flags
- * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty
- * pages are still remaining.
- * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set
- * in the PGSTE.
- * @values: Pointer to the values buffer.
- *
- * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls.
- */
-struct kvm_s390_cmma_log {
- __u64 start_gfn;
- __u32 count;
- __u32 flags;
- union {
- __u64 remaining;
- __u64 mask;
- };
- __u64 values;
-};
-
struct kvm_hyperv_exit {
#define KVM_EXIT_HYPERV_SYNIC 1
#define KVM_EXIT_HYPERV_HCALL 2
@@ -224,11 +192,24 @@ struct kvm_xen_exit {
/* Flags that describe what fields in emulation_failure hold valid data. */
#define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0)
+/*
+ * struct kvm_run can be modified by userspace at any time, so KVM must be
+ * careful to avoid TOCTOU bugs. In order to protect KVM, HINT_UNSAFE_IN_KVM()
+ * renames fields in struct kvm_run from <symbol> to <symbol>__unsafe when
+ * compiled into the kernel, ensuring that any use within KVM is obvious and
+ * gets extra scrutiny.
+ */
+#ifdef __KERNEL__
+#define HINT_UNSAFE_IN_KVM(_symbol) _symbol##__unsafe
+#else
+#define HINT_UNSAFE_IN_KVM(_symbol) _symbol
+#endif
+
/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
struct kvm_run {
/* in */
__u8 request_interrupt_window;
- __u8 immediate_exit;
+ __u8 HINT_UNSAFE_IN_KVM(immediate_exit);
__u8 padding1[6];
/* out */
@@ -315,11 +296,6 @@ struct kvm_run {
__u32 ipb;
} s390_sieic;
/* KVM_EXIT_S390_RESET */
-#define KVM_S390_RESET_POR 1
-#define KVM_S390_RESET_CLEAR 2
-#define KVM_S390_RESET_SUBSYSTEM 4
-#define KVM_S390_RESET_CPU_INIT 8
-#define KVM_S390_RESET_IPL 16
__u64 s390_reset_flags;
/* KVM_EXIT_S390_UCONTROL */
struct {
@@ -536,43 +512,6 @@ struct kvm_translation {
__u8 pad[5];
};
-/* for KVM_S390_MEM_OP */
-struct kvm_s390_mem_op {
- /* in */
- __u64 gaddr; /* the guest address */
- __u64 flags; /* flags */
- __u32 size; /* amount of bytes */
- __u32 op; /* type of operation */
- __u64 buf; /* buffer in userspace */
- union {
- struct {
- __u8 ar; /* the access register number */
- __u8 key; /* access key, ignored if flag unset */
- __u8 pad1[6]; /* ignored */
- __u64 old_addr; /* ignored if cmpxchg flag unset */
- };
- __u32 sida_offset; /* offset into the sida */
- __u8 reserved[32]; /* ignored */
- };
-};
-/* types for kvm_s390_mem_op->op */
-#define KVM_S390_MEMOP_LOGICAL_READ 0
-#define KVM_S390_MEMOP_LOGICAL_WRITE 1
-#define KVM_S390_MEMOP_SIDA_READ 2
-#define KVM_S390_MEMOP_SIDA_WRITE 3
-#define KVM_S390_MEMOP_ABSOLUTE_READ 4
-#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5
-#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG 6
-
-/* flags for kvm_s390_mem_op->flags */
-#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0)
-#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1)
-#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2)
-
-/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */
-#define KVM_S390_MEMOP_EXTENSION_CAP_BASE (1 << 0)
-#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG (1 << 1)
-
/* for KVM_INTERRUPT */
struct kvm_interrupt {
/* in */
@@ -637,124 +576,6 @@ struct kvm_mp_state {
__u32 mp_state;
};
-struct kvm_s390_psw {
- __u64 mask;
- __u64 addr;
-};
-
-/* valid values for type in kvm_s390_interrupt */
-#define KVM_S390_SIGP_STOP 0xfffe0000u
-#define KVM_S390_PROGRAM_INT 0xfffe0001u
-#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u
-#define KVM_S390_RESTART 0xfffe0003u
-#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u
-#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u
-#define KVM_S390_MCHK 0xfffe1000u
-#define KVM_S390_INT_CLOCK_COMP 0xffff1004u
-#define KVM_S390_INT_CPU_TIMER 0xffff1005u
-#define KVM_S390_INT_VIRTIO 0xffff2603u
-#define KVM_S390_INT_SERVICE 0xffff2401u
-#define KVM_S390_INT_EMERGENCY 0xffff1201u
-#define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u
-/* Anything below 0xfffe0000u is taken by INT_IO */
-#define KVM_S390_INT_IO(ai,cssid,ssid,schid) \
- (((schid)) | \
- ((ssid) << 16) | \
- ((cssid) << 18) | \
- ((ai) << 26))
-#define KVM_S390_INT_IO_MIN 0x00000000u
-#define KVM_S390_INT_IO_MAX 0xfffdffffu
-#define KVM_S390_INT_IO_AI_MASK 0x04000000u
-
-
-struct kvm_s390_interrupt {
- __u32 type;
- __u32 parm;
- __u64 parm64;
-};
-
-struct kvm_s390_io_info {
- __u16 subchannel_id;
- __u16 subchannel_nr;
- __u32 io_int_parm;
- __u32 io_int_word;
-};
-
-struct kvm_s390_ext_info {
- __u32 ext_params;
- __u32 pad;
- __u64 ext_params2;
-};
-
-struct kvm_s390_pgm_info {
- __u64 trans_exc_code;
- __u64 mon_code;
- __u64 per_address;
- __u32 data_exc_code;
- __u16 code;
- __u16 mon_class_nr;
- __u8 per_code;
- __u8 per_atmid;
- __u8 exc_access_id;
- __u8 per_access_id;
- __u8 op_access_id;
-#define KVM_S390_PGM_FLAGS_ILC_VALID 0x01
-#define KVM_S390_PGM_FLAGS_ILC_0 0x02
-#define KVM_S390_PGM_FLAGS_ILC_1 0x04
-#define KVM_S390_PGM_FLAGS_ILC_MASK 0x06
-#define KVM_S390_PGM_FLAGS_NO_REWIND 0x08
- __u8 flags;
- __u8 pad[2];
-};
-
-struct kvm_s390_prefix_info {
- __u32 address;
-};
-
-struct kvm_s390_extcall_info {
- __u16 code;
-};
-
-struct kvm_s390_emerg_info {
- __u16 code;
-};
-
-#define KVM_S390_STOP_FLAG_STORE_STATUS 0x01
-struct kvm_s390_stop_info {
- __u32 flags;
-};
-
-struct kvm_s390_mchk_info {
- __u64 cr14;
- __u64 mcic;
- __u64 failing_storage_address;
- __u32 ext_damage_code;
- __u32 pad;
- __u8 fixed_logout[16];
-};
-
-struct kvm_s390_irq {
- __u64 type;
- union {
- struct kvm_s390_io_info io;
- struct kvm_s390_ext_info ext;
- struct kvm_s390_pgm_info pgm;
- struct kvm_s390_emerg_info emerg;
- struct kvm_s390_extcall_info extcall;
- struct kvm_s390_prefix_info prefix;
- struct kvm_s390_stop_info stop;
- struct kvm_s390_mchk_info mchk;
- char reserved[64];
- } u;
-};
-
-struct kvm_s390_irq_state {
- __u64 buf;
- __u32 flags; /* will stay unused for compatibility reasons */
- __u32 len;
- __u32 reserved[4]; /* will stay unused for compatibility reasons */
-};
-
/* for KVM_SET_GUEST_DEBUG */
#define KVM_GUESTDBG_ENABLE 0x00000001
@@ -810,50 +631,6 @@ struct kvm_enable_cap {
__u8 pad[64];
};
-/* for KVM_PPC_GET_PVINFO */
-
-#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0)
-
-struct kvm_ppc_pvinfo {
- /* out */
- __u32 flags;
- __u32 hcall[4];
- __u8 pad[108];
-};
-
-/* for KVM_PPC_GET_SMMU_INFO */
-#define KVM_PPC_PAGE_SIZES_MAX_SZ 8
-
-struct kvm_ppc_one_page_size {
- __u32 page_shift; /* Page shift (or 0) */
- __u32 pte_enc; /* Encoding in the HPTE (>>12) */
-};
-
-struct kvm_ppc_one_seg_page_size {
- __u32 page_shift; /* Base page shift of segment (or 0) */
- __u32 slb_enc; /* SLB encoding for BookS */
- struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ];
-};
-
-#define KVM_PPC_PAGE_SIZES_REAL 0x00000001
-#define KVM_PPC_1T_SEGMENTS 0x00000002
-#define KVM_PPC_NO_HASH 0x00000004
-
-struct kvm_ppc_smmu_info {
- __u64 flags;
- __u32 slb_size;
- __u16 data_keys; /* # storage keys supported for data */
- __u16 instr_keys; /* # storage keys supported for instructions */
- struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
-};
-
-/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */
-struct kvm_ppc_resize_hpt {
- __u64 flags;
- __u32 shift;
- __u32 pad;
-};
-
#define KVMIO 0xAE
/* machine type bits, to be used as argument to KVM_CREATE_VM */
@@ -923,9 +700,7 @@ struct kvm_ppc_resize_hpt {
/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
#define KVM_CAP_USER_NMI 22
-#ifdef __KVM_HAVE_GUEST_DEBUG
#define KVM_CAP_SET_GUEST_DEBUG 23
-#endif
#ifdef __KVM_HAVE_PIT
#define KVM_CAP_REINJECT_CONTROL 24
#endif
@@ -1155,8 +930,9 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_MEMORY_ATTRIBUTES 233
#define KVM_CAP_GUEST_MEMFD 234
#define KVM_CAP_VM_TYPES 235
-
-#ifdef KVM_CAP_IRQ_ROUTING
+#define KVM_CAP_PRE_FAULT_MEMORY 236
+#define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237
+#define KVM_CAP_X86_GUEST_MODE 238
struct kvm_irq_routing_irqchip {
__u32 irqchip;
@@ -1222,42 +998,6 @@ struct kvm_irq_routing {
struct kvm_irq_routing_entry entries[];
};
-#endif
-
-#ifdef KVM_CAP_MCE
-/* x86 MCE */
-struct kvm_x86_mce {
- __u64 status;
- __u64 addr;
- __u64 misc;
- __u64 mcg_status;
- __u8 bank;
- __u8 pad1[7];
- __u64 pad2[3];
-};
-#endif
-
-#ifdef KVM_CAP_XEN_HVM
-#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0)
-#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
-#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
-#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3)
-#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4)
-#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5)
-#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6)
-#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7)
-
-struct kvm_xen_hvm_config {
- __u32 flags;
- __u32 msr;
- __u64 blob_addr_32;
- __u64 blob_addr_64;
- __u8 blob_size_32;
- __u8 blob_size_64;
- __u8 pad2[30];
-};
-#endif
-
#define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
/*
* Available with KVM_CAP_IRQFD_RESAMPLE
@@ -1418,7 +1158,15 @@ enum kvm_device_type {
#define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME
KVM_DEV_TYPE_RISCV_AIA,
#define KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_RISCV_AIA
+ KVM_DEV_TYPE_LOONGARCH_IPI,
+#define KVM_DEV_TYPE_LOONGARCH_IPI KVM_DEV_TYPE_LOONGARCH_IPI
+ KVM_DEV_TYPE_LOONGARCH_EIOINTC,
+#define KVM_DEV_TYPE_LOONGARCH_EIOINTC KVM_DEV_TYPE_LOONGARCH_EIOINTC
+ KVM_DEV_TYPE_LOONGARCH_PCHPIC,
+#define KVM_DEV_TYPE_LOONGARCH_PCHPIC KVM_DEV_TYPE_LOONGARCH_PCHPIC
+
KVM_DEV_TYPE_MAX,
+
};
struct kvm_vfio_spapr_tce {
@@ -1442,11 +1190,6 @@ struct kvm_vfio_spapr_tce {
struct kvm_userspace_memory_region2)
/* enable ucontrol for s390 */
-struct kvm_s390_ucas_mapping {
- __u64 user_addr;
- __u64 vcpu_addr;
- __u64 length;
-};
#define KVM_S390_UCAS_MAP _IOW(KVMIO, 0x50, struct kvm_s390_ucas_mapping)
#define KVM_S390_UCAS_UNMAP _IOW(KVMIO, 0x51, struct kvm_s390_ucas_mapping)
#define KVM_S390_VCPU_FAULT _IOW(KVMIO, 0x52, unsigned long)
@@ -1502,9 +1245,9 @@ struct kvm_s390_ucas_mapping {
/* Available with KVM_CAP_SPAPR_RESIZE_HPT */
#define KVM_PPC_RESIZE_HPT_PREPARE _IOR(KVMIO, 0xad, struct kvm_ppc_resize_hpt)
#define KVM_PPC_RESIZE_HPT_COMMIT _IOR(KVMIO, 0xae, struct kvm_ppc_resize_hpt)
-/* Available with KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3 */
+/* Available with KVM_CAP_PPC_MMU_RADIX or KVM_CAP_PPC_MMU_HASH_V3 */
#define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg)
-/* Available with KVM_CAP_PPC_RADIX_MMU */
+/* Available with KVM_CAP_PPC_MMU_RADIX */
#define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info)
/* Available with KVM_CAP_PPC_GET_CPU_CHAR */
#define KVM_PPC_GET_CPU_CHAR _IOR(KVMIO, 0xb1, struct kvm_ppc_cpu_char)
@@ -1641,89 +1384,6 @@ struct kvm_enc_region {
#define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3)
#define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4)
-struct kvm_s390_pv_sec_parm {
- __u64 origin;
- __u64 length;
-};
-
-struct kvm_s390_pv_unp {
- __u64 addr;
- __u64 size;
- __u64 tweak;
-};
-
-enum pv_cmd_dmp_id {
- KVM_PV_DUMP_INIT,
- KVM_PV_DUMP_CONFIG_STOR_STATE,
- KVM_PV_DUMP_COMPLETE,
- KVM_PV_DUMP_CPU,
-};
-
-struct kvm_s390_pv_dmp {
- __u64 subcmd;
- __u64 buff_addr;
- __u64 buff_len;
- __u64 gaddr; /* For dump storage state */
- __u64 reserved[4];
-};
-
-enum pv_cmd_info_id {
- KVM_PV_INFO_VM,
- KVM_PV_INFO_DUMP,
-};
-
-struct kvm_s390_pv_info_dump {
- __u64 dump_cpu_buffer_len;
- __u64 dump_config_mem_buffer_per_1m;
- __u64 dump_config_finalize_len;
-};
-
-struct kvm_s390_pv_info_vm {
- __u64 inst_calls_list[4];
- __u64 max_cpus;
- __u64 max_guests;
- __u64 max_guest_addr;
- __u64 feature_indication;
-};
-
-struct kvm_s390_pv_info_header {
- __u32 id;
- __u32 len_max;
- __u32 len_written;
- __u32 reserved;
-};
-
-struct kvm_s390_pv_info {
- struct kvm_s390_pv_info_header header;
- union {
- struct kvm_s390_pv_info_dump dump;
- struct kvm_s390_pv_info_vm vm;
- };
-};
-
-enum pv_cmd_id {
- KVM_PV_ENABLE,
- KVM_PV_DISABLE,
- KVM_PV_SET_SEC_PARMS,
- KVM_PV_UNPACK,
- KVM_PV_VERIFY,
- KVM_PV_PREP_RESET,
- KVM_PV_UNSHARE_ALL,
- KVM_PV_INFO,
- KVM_PV_DUMP,
- KVM_PV_ASYNC_CLEANUP_PREPARE,
- KVM_PV_ASYNC_CLEANUP_PERFORM,
-};
-
-struct kvm_pv_cmd {
- __u32 cmd; /* Command to be executed */
- __u16 rc; /* Ultravisor return code */
- __u16 rrc; /* Ultravisor return reason code */
- __u64 data; /* Data or address */
- __u32 flags; /* flags for future extensions. Must be 0 for now */
- __u32 reserved[3];
-};
-
/* Available with KVM_CAP_S390_PROTECTED */
#define KVM_S390_PV_COMMAND _IOWR(KVMIO, 0xc5, struct kvm_pv_cmd)
@@ -1737,58 +1397,6 @@ struct kvm_pv_cmd {
#define KVM_XEN_HVM_GET_ATTR _IOWR(KVMIO, 0xc8, struct kvm_xen_hvm_attr)
#define KVM_XEN_HVM_SET_ATTR _IOW(KVMIO, 0xc9, struct kvm_xen_hvm_attr)
-struct kvm_xen_hvm_attr {
- __u16 type;
- __u16 pad[3];
- union {
- __u8 long_mode;
- __u8 vector;
- __u8 runstate_update_flag;
- struct {
- __u64 gfn;
-#define KVM_XEN_INVALID_GFN ((__u64)-1)
- } shared_info;
- struct {
- __u32 send_port;
- __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */
- __u32 flags;
-#define KVM_XEN_EVTCHN_DEASSIGN (1 << 0)
-#define KVM_XEN_EVTCHN_UPDATE (1 << 1)
-#define KVM_XEN_EVTCHN_RESET (1 << 2)
- /*
- * Events sent by the guest are either looped back to
- * the guest itself (potentially on a different port#)
- * or signalled via an eventfd.
- */
- union {
- struct {
- __u32 port;
- __u32 vcpu;
- __u32 priority;
- } port;
- struct {
- __u32 port; /* Zero for eventfd */
- __s32 fd;
- } eventfd;
- __u32 padding[4];
- } deliver;
- } evtchn;
- __u32 xen_version;
- __u64 pad[8];
- } u;
-};
-
-
-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
-#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0
-#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1
-#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2
-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
-#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3
-#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4
-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */
-#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5
-
/* Per-vCPU Xen attributes */
#define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr)
#define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr)
@@ -1799,242 +1407,6 @@ struct kvm_xen_hvm_attr {
#define KVM_GET_SREGS2 _IOR(KVMIO, 0xcc, struct kvm_sregs2)
#define KVM_SET_SREGS2 _IOW(KVMIO, 0xcd, struct kvm_sregs2)
-struct kvm_xen_vcpu_attr {
- __u16 type;
- __u16 pad[3];
- union {
- __u64 gpa;
-#define KVM_XEN_INVALID_GPA ((__u64)-1)
- __u64 pad[8];
- struct {
- __u64 state;
- __u64 state_entry_time;
- __u64 time_running;
- __u64 time_runnable;
- __u64 time_blocked;
- __u64 time_offline;
- } runstate;
- __u32 vcpu_id;
- struct {
- __u32 port;
- __u32 priority;
- __u64 expires_ns;
- } timer;
- __u8 vector;
- } u;
-};
-
-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
-#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0
-#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1
-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2
-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3
-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4
-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5
-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
-#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6
-#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7
-#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8
-
-/* Secure Encrypted Virtualization command */
-enum sev_cmd_id {
- /* Guest initialization commands */
- KVM_SEV_INIT = 0,
- KVM_SEV_ES_INIT,
- /* Guest launch commands */
- KVM_SEV_LAUNCH_START,
- KVM_SEV_LAUNCH_UPDATE_DATA,
- KVM_SEV_LAUNCH_UPDATE_VMSA,
- KVM_SEV_LAUNCH_SECRET,
- KVM_SEV_LAUNCH_MEASURE,
- KVM_SEV_LAUNCH_FINISH,
- /* Guest migration commands (outgoing) */
- KVM_SEV_SEND_START,
- KVM_SEV_SEND_UPDATE_DATA,
- KVM_SEV_SEND_UPDATE_VMSA,
- KVM_SEV_SEND_FINISH,
- /* Guest migration commands (incoming) */
- KVM_SEV_RECEIVE_START,
- KVM_SEV_RECEIVE_UPDATE_DATA,
- KVM_SEV_RECEIVE_UPDATE_VMSA,
- KVM_SEV_RECEIVE_FINISH,
- /* Guest status and debug commands */
- KVM_SEV_GUEST_STATUS,
- KVM_SEV_DBG_DECRYPT,
- KVM_SEV_DBG_ENCRYPT,
- /* Guest certificates commands */
- KVM_SEV_CERT_EXPORT,
- /* Attestation report */
- KVM_SEV_GET_ATTESTATION_REPORT,
- /* Guest Migration Extension */
- KVM_SEV_SEND_CANCEL,
-
- KVM_SEV_NR_MAX,
-};
-
-struct kvm_sev_cmd {
- __u32 id;
- __u64 data;
- __u32 error;
- __u32 sev_fd;
-};
-
-struct kvm_sev_launch_start {
- __u32 handle;
- __u32 policy;
- __u64 dh_uaddr;
- __u32 dh_len;
- __u64 session_uaddr;
- __u32 session_len;
-};
-
-struct kvm_sev_launch_update_data {
- __u64 uaddr;
- __u32 len;
-};
-
-
-struct kvm_sev_launch_secret {
- __u64 hdr_uaddr;
- __u32 hdr_len;
- __u64 guest_uaddr;
- __u32 guest_len;
- __u64 trans_uaddr;
- __u32 trans_len;
-};
-
-struct kvm_sev_launch_measure {
- __u64 uaddr;
- __u32 len;
-};
-
-struct kvm_sev_guest_status {
- __u32 handle;
- __u32 policy;
- __u32 state;
-};
-
-struct kvm_sev_dbg {
- __u64 src_uaddr;
- __u64 dst_uaddr;
- __u32 len;
-};
-
-struct kvm_sev_attestation_report {
- __u8 mnonce[16];
- __u64 uaddr;
- __u32 len;
-};
-
-struct kvm_sev_send_start {
- __u32 policy;
- __u64 pdh_cert_uaddr;
- __u32 pdh_cert_len;
- __u64 plat_certs_uaddr;
- __u32 plat_certs_len;
- __u64 amd_certs_uaddr;
- __u32 amd_certs_len;
- __u64 session_uaddr;
- __u32 session_len;
-};
-
-struct kvm_sev_send_update_data {
- __u64 hdr_uaddr;
- __u32 hdr_len;
- __u64 guest_uaddr;
- __u32 guest_len;
- __u64 trans_uaddr;
- __u32 trans_len;
-};
-
-struct kvm_sev_receive_start {
- __u32 handle;
- __u32 policy;
- __u64 pdh_uaddr;
- __u32 pdh_len;
- __u64 session_uaddr;
- __u32 session_len;
-};
-
-struct kvm_sev_receive_update_data {
- __u64 hdr_uaddr;
- __u32 hdr_len;
- __u64 guest_uaddr;
- __u32 guest_len;
- __u64 trans_uaddr;
- __u32 trans_len;
-};
-
-#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
-#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
-#define KVM_DEV_ASSIGN_MASK_INTX (1 << 2)
-
-struct kvm_assigned_pci_dev {
- __u32 assigned_dev_id;
- __u32 busnr;
- __u32 devfn;
- __u32 flags;
- __u32 segnr;
- union {
- __u32 reserved[11];
- };
-};
-
-#define KVM_DEV_IRQ_HOST_INTX (1 << 0)
-#define KVM_DEV_IRQ_HOST_MSI (1 << 1)
-#define KVM_DEV_IRQ_HOST_MSIX (1 << 2)
-
-#define KVM_DEV_IRQ_GUEST_INTX (1 << 8)
-#define KVM_DEV_IRQ_GUEST_MSI (1 << 9)
-#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10)
-
-#define KVM_DEV_IRQ_HOST_MASK 0x00ff
-#define KVM_DEV_IRQ_GUEST_MASK 0xff00
-
-struct kvm_assigned_irq {
- __u32 assigned_dev_id;
- __u32 host_irq; /* ignored (legacy field) */
- __u32 guest_irq;
- __u32 flags;
- union {
- __u32 reserved[12];
- };
-};
-
-struct kvm_assigned_msix_nr {
- __u32 assigned_dev_id;
- __u16 entry_nr;
- __u16 padding;
-};
-
-#define KVM_MAX_MSIX_PER_DEV 256
-struct kvm_assigned_msix_entry {
- __u32 assigned_dev_id;
- __u32 gsi;
- __u16 entry; /* The index of entry in the MSI-X table */
- __u16 padding[3];
-};
-
-#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
-#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)
-
-/* Available with KVM_CAP_ARM_USER_IRQ */
-
-/* Bits for run->s.regs.device_irq_level */
-#define KVM_ARM_DEV_EL1_VTIMER (1 << 0)
-#define KVM_ARM_DEV_EL1_PTIMER (1 << 1)
-#define KVM_ARM_DEV_PMU (1 << 2)
-
-struct kvm_hyperv_eventfd {
- __u32 conn_id;
- __s32 fd;
- __u32 flags;
- __u32 padding[3];
-};
-
-#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff
-#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0)
-
#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0)
#define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1)
@@ -2180,33 +1552,6 @@ struct kvm_stats_desc {
/* Available with KVM_CAP_S390_ZPCI_OP */
#define KVM_S390_ZPCI_OP _IOW(KVMIO, 0xd1, struct kvm_s390_zpci_op)
-struct kvm_s390_zpci_op {
- /* in */
- __u32 fh; /* target device */
- __u8 op; /* operation to perform */
- __u8 pad[3];
- union {
- /* for KVM_S390_ZPCIOP_REG_AEN */
- struct {
- __u64 ibv; /* Guest addr of interrupt bit vector */
- __u64 sb; /* Guest addr of summary bit */
- __u32 flags;
- __u32 noi; /* Number of interrupts */
- __u8 isc; /* Guest interrupt subclass */
- __u8 sbo; /* Offset of guest summary bit vector */
- __u16 pad;
- } reg_aen;
- __u64 reserved[8];
- } u;
-};
-
-/* types for kvm_s390_zpci_op->op */
-#define KVM_S390_ZPCIOP_REG_AEN 0
-#define KVM_S390_ZPCIOP_DEREG_AEN 1
-
-/* flags for kvm_s390_zpci_op->u.reg_aen.flags */
-#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0)
-
/* Available with KVM_CAP_MEMORY_ATTRIBUTES */
#define KVM_SET_MEMORY_ATTRIBUTES _IOW(KVMIO, 0xd2, struct kvm_memory_attributes)
@@ -2227,4 +1572,13 @@ struct kvm_create_guest_memfd {
__u64 reserved[6];
};
+#define KVM_PRE_FAULT_MEMORY _IOWR(KVMIO, 0xd5, struct kvm_pre_fault_memory)
+
+struct kvm_pre_fault_memory {
+ __u64 gpa;
+ __u64 size;
+ __u64 flags;
+ __u64 padding[5];
+};
+
#endif /* __LINUX_KVM_H */
diff --git a/tools/include/uapi/linux/memfd.h b/tools/include/uapi/linux/memfd.h
new file mode 100644
index 000000000000..01c0324e7733
--- /dev/null
+++ b/tools/include/uapi/linux/memfd.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _LINUX_MEMFD_H
+#define _LINUX_MEMFD_H
+
+#include <asm-generic/hugetlb_encode.h>
+
+/* flags for memfd_create(2) (unsigned int) */
+#define MFD_CLOEXEC 0x0001U
+#define MFD_ALLOW_SEALING 0x0002U
+#define MFD_HUGETLB 0x0004U
+/* not executable and sealed to prevent changing to executable. */
+#define MFD_NOEXEC_SEAL 0x0008U
+/* executable */
+#define MFD_EXEC 0x0010U
+
+/*
+ * Huge page size encoding when MFD_HUGETLB is specified, and a huge page
+ * size other than the default is desired. See hugetlb_encode.h.
+ * All known huge page size encodings are provided here. It is the
+ * responsibility of the application to know which sizes are supported on
+ * the running system. See mmap(2) man page for details.
+ */
+#define MFD_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT
+#define MFD_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK
+
+#define MFD_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB
+#define MFD_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB
+#define MFD_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB
+#define MFD_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB
+#define MFD_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB
+#define MFD_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB
+#define MFD_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB
+#define MFD_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB
+#define MFD_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB
+#define MFD_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB
+#define MFD_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB
+#define MFD_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB
+
+#endif /* _LINUX_MEMFD_H */
diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h
index a246e11988d5..e89d00528f2f 100644
--- a/tools/include/uapi/linux/mman.h
+++ b/tools/include/uapi/linux/mman.h
@@ -17,6 +17,7 @@
#define MAP_SHARED 0x01 /* Share changes */
#define MAP_PRIVATE 0x02 /* Changes are private */
#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */
+#define MAP_DROPPABLE 0x08 /* Zero memory under memory pressure. */
/*
* Huge page size encoding when MAP_HUGETLB is specified, and a huge page
diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h
deleted file mode 100644
index ad5478dbad00..000000000000
--- a/tools/include/uapi/linux/mount.h
+++ /dev/null
@@ -1,211 +0,0 @@
-#ifndef _UAPI_LINUX_MOUNT_H
-#define _UAPI_LINUX_MOUNT_H
-
-#include <linux/types.h>
-
-/*
- * These are the fs-independent mount-flags: up to 32 flags are supported
- *
- * Usage of these is restricted within the kernel to core mount(2) code and
- * callers of sys_mount() only. Filesystems should be using the SB_*
- * equivalent instead.
- */
-#define MS_RDONLY 1 /* Mount read-only */
-#define MS_NOSUID 2 /* Ignore suid and sgid bits */
-#define MS_NODEV 4 /* Disallow access to device special files */
-#define MS_NOEXEC 8 /* Disallow program execution */
-#define MS_SYNCHRONOUS 16 /* Writes are synced at once */
-#define MS_REMOUNT 32 /* Alter flags of a mounted FS */
-#define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */
-#define MS_DIRSYNC 128 /* Directory modifications are synchronous */
-#define MS_NOSYMFOLLOW 256 /* Do not follow symlinks */
-#define MS_NOATIME 1024 /* Do not update access times. */
-#define MS_NODIRATIME 2048 /* Do not update directory access times */
-#define MS_BIND 4096
-#define MS_MOVE 8192
-#define MS_REC 16384
-#define MS_VERBOSE 32768 /* War is peace. Verbosity is silence.
- MS_VERBOSE is deprecated. */
-#define MS_SILENT 32768
-#define MS_POSIXACL (1<<16) /* VFS does not apply the umask */
-#define MS_UNBINDABLE (1<<17) /* change to unbindable */
-#define MS_PRIVATE (1<<18) /* change to private */
-#define MS_SLAVE (1<<19) /* change to slave */
-#define MS_SHARED (1<<20) /* change to shared */
-#define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */
-#define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */
-#define MS_I_VERSION (1<<23) /* Update inode I_version field */
-#define MS_STRICTATIME (1<<24) /* Always perform atime updates */
-#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */
-
-/* These sb flags are internal to the kernel */
-#define MS_SUBMOUNT (1<<26)
-#define MS_NOREMOTELOCK (1<<27)
-#define MS_NOSEC (1<<28)
-#define MS_BORN (1<<29)
-#define MS_ACTIVE (1<<30)
-#define MS_NOUSER (1<<31)
-
-/*
- * Superblock flags that can be altered by MS_REMOUNT
- */
-#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\
- MS_LAZYTIME)
-
-/*
- * Old magic mount flag and mask
- */
-#define MS_MGC_VAL 0xC0ED0000
-#define MS_MGC_MSK 0xffff0000
-
-/*
- * open_tree() flags.
- */
-#define OPEN_TREE_CLONE 1 /* Clone the target tree and attach the clone */
-#define OPEN_TREE_CLOEXEC O_CLOEXEC /* Close the file on execve() */
-
-/*
- * move_mount() flags.
- */
-#define MOVE_MOUNT_F_SYMLINKS 0x00000001 /* Follow symlinks on from path */
-#define MOVE_MOUNT_F_AUTOMOUNTS 0x00000002 /* Follow automounts on from path */
-#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */
-#define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */
-#define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */
-#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */
-#define MOVE_MOUNT_SET_GROUP 0x00000100 /* Set sharing group instead */
-#define MOVE_MOUNT_BENEATH 0x00000200 /* Mount beneath top mount */
-#define MOVE_MOUNT__MASK 0x00000377
-
-/*
- * fsopen() flags.
- */
-#define FSOPEN_CLOEXEC 0x00000001
-
-/*
- * fspick() flags.
- */
-#define FSPICK_CLOEXEC 0x00000001
-#define FSPICK_SYMLINK_NOFOLLOW 0x00000002
-#define FSPICK_NO_AUTOMOUNT 0x00000004
-#define FSPICK_EMPTY_PATH 0x00000008
-
-/*
- * The type of fsconfig() call made.
- */
-enum fsconfig_command {
- FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */
- FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */
- FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */
- FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */
- FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */
- FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */
- FSCONFIG_CMD_CREATE = 6, /* Create new or reuse existing superblock */
- FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */
- FSCONFIG_CMD_CREATE_EXCL = 8, /* Create new superblock, fail if reusing existing superblock */
-};
-
-/*
- * fsmount() flags.
- */
-#define FSMOUNT_CLOEXEC 0x00000001
-
-/*
- * Mount attributes.
- */
-#define MOUNT_ATTR_RDONLY 0x00000001 /* Mount read-only */
-#define MOUNT_ATTR_NOSUID 0x00000002 /* Ignore suid and sgid bits */
-#define MOUNT_ATTR_NODEV 0x00000004 /* Disallow access to device special files */
-#define MOUNT_ATTR_NOEXEC 0x00000008 /* Disallow program execution */
-#define MOUNT_ATTR__ATIME 0x00000070 /* Setting on how atime should be updated */
-#define MOUNT_ATTR_RELATIME 0x00000000 /* - Update atime relative to mtime/ctime. */
-#define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */
-#define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */
-#define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */
-#define MOUNT_ATTR_IDMAP 0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */
-#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000 /* Do not follow symlinks */
-
-/*
- * mount_setattr()
- */
-struct mount_attr {
- __u64 attr_set;
- __u64 attr_clr;
- __u64 propagation;
- __u64 userns_fd;
-};
-
-/* List of all mount_attr versions. */
-#define MOUNT_ATTR_SIZE_VER0 32 /* sizeof first published struct */
-
-
-/*
- * Structure for getting mount/superblock/filesystem info with statmount(2).
- *
- * The interface is similar to statx(2): individual fields or groups can be
- * selected with the @mask argument of statmount(). Kernel will set the @mask
- * field according to the supported fields.
- *
- * If string fields are selected, then the caller needs to pass a buffer that
- * has space after the fixed part of the structure. Nul terminated strings are
- * copied there and offsets relative to @str are stored in the relevant fields.
- * If the buffer is too small, then EOVERFLOW is returned. The actually used
- * size is returned in @size.
- */
-struct statmount {
- __u32 size; /* Total size, including strings */
- __u32 __spare1;
- __u64 mask; /* What results were written */
- __u32 sb_dev_major; /* Device ID */
- __u32 sb_dev_minor;
- __u64 sb_magic; /* ..._SUPER_MAGIC */
- __u32 sb_flags; /* SB_{RDONLY,SYNCHRONOUS,DIRSYNC,LAZYTIME} */
- __u32 fs_type; /* [str] Filesystem type */
- __u64 mnt_id; /* Unique ID of mount */
- __u64 mnt_parent_id; /* Unique ID of parent (for root == mnt_id) */
- __u32 mnt_id_old; /* Reused IDs used in proc/.../mountinfo */
- __u32 mnt_parent_id_old;
- __u64 mnt_attr; /* MOUNT_ATTR_... */
- __u64 mnt_propagation; /* MS_{SHARED,SLAVE,PRIVATE,UNBINDABLE} */
- __u64 mnt_peer_group; /* ID of shared peer group */
- __u64 mnt_master; /* Mount receives propagation from this ID */
- __u64 propagate_from; /* Propagation from in current namespace */
- __u32 mnt_root; /* [str] Root of mount relative to root of fs */
- __u32 mnt_point; /* [str] Mountpoint relative to current root */
- __u64 __spare2[50];
- char str[]; /* Variable size part containing strings */
-};
-
-/*
- * Structure for passing mount ID and miscellaneous parameters to statmount(2)
- * and listmount(2).
- *
- * For statmount(2) @param represents the request mask.
- * For listmount(2) @param represents the last listed mount id (or zero).
- */
-struct mnt_id_req {
- __u32 size;
- __u32 spare;
- __u64 mnt_id;
- __u64 param;
-};
-
-/* List of all mnt_id_req versions. */
-#define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */
-
-/*
- * @mask bits for statmount(2)
- */
-#define STATMOUNT_SB_BASIC 0x00000001U /* Want/got sb_... */
-#define STATMOUNT_MNT_BASIC 0x00000002U /* Want/got mnt_... */
-#define STATMOUNT_PROPAGATE_FROM 0x00000004U /* Want/got propagate_from */
-#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */
-#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */
-#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */
-
-/*
- * Special @mnt_id values that can be passed to listmount
- */
-#define LSMT_ROOT 0xffffffffffffffff /* root mount */
-
-#endif /* _UAPI_LINUX_MOUNT_H */
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index 93cb411adf72..e4be227d3ad6 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -70,6 +70,10 @@ enum netdev_queue_type {
NETDEV_QUEUE_TYPE_TX,
};
+enum netdev_qstats_scope {
+ NETDEV_QSTATS_SCOPE_QUEUE = 1,
+};
+
enum {
NETDEV_A_DEV_IFINDEX = 1,
NETDEV_A_DEV_PAD,
@@ -89,6 +93,7 @@ enum {
NETDEV_A_PAGE_POOL_INFLIGHT,
NETDEV_A_PAGE_POOL_INFLIGHT_MEM,
NETDEV_A_PAGE_POOL_DETACH_TIME,
+ NETDEV_A_PAGE_POOL_DMABUF,
__NETDEV_A_PAGE_POOL_MAX,
NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1)
@@ -117,6 +122,9 @@ enum {
NETDEV_A_NAPI_ID,
NETDEV_A_NAPI_IRQ,
NETDEV_A_NAPI_PID,
+ NETDEV_A_NAPI_DEFER_HARD_IRQS,
+ NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT,
+ NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT,
__NETDEV_A_NAPI_MAX,
NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1)
@@ -127,12 +135,60 @@ enum {
NETDEV_A_QUEUE_IFINDEX,
NETDEV_A_QUEUE_TYPE,
NETDEV_A_QUEUE_NAPI_ID,
+ NETDEV_A_QUEUE_DMABUF,
__NETDEV_A_QUEUE_MAX,
NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)
};
enum {
+ NETDEV_A_QSTATS_IFINDEX = 1,
+ NETDEV_A_QSTATS_QUEUE_TYPE,
+ NETDEV_A_QSTATS_QUEUE_ID,
+ NETDEV_A_QSTATS_SCOPE,
+ NETDEV_A_QSTATS_RX_PACKETS = 8,
+ NETDEV_A_QSTATS_RX_BYTES,
+ NETDEV_A_QSTATS_TX_PACKETS,
+ NETDEV_A_QSTATS_TX_BYTES,
+ NETDEV_A_QSTATS_RX_ALLOC_FAIL,
+ NETDEV_A_QSTATS_RX_HW_DROPS,
+ NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS,
+ NETDEV_A_QSTATS_RX_CSUM_COMPLETE,
+ NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY,
+ NETDEV_A_QSTATS_RX_CSUM_NONE,
+ NETDEV_A_QSTATS_RX_CSUM_BAD,
+ NETDEV_A_QSTATS_RX_HW_GRO_PACKETS,
+ NETDEV_A_QSTATS_RX_HW_GRO_BYTES,
+ NETDEV_A_QSTATS_RX_HW_GRO_WIRE_PACKETS,
+ NETDEV_A_QSTATS_RX_HW_GRO_WIRE_BYTES,
+ NETDEV_A_QSTATS_RX_HW_DROP_RATELIMITS,
+ NETDEV_A_QSTATS_TX_HW_DROPS,
+ NETDEV_A_QSTATS_TX_HW_DROP_ERRORS,
+ NETDEV_A_QSTATS_TX_CSUM_NONE,
+ NETDEV_A_QSTATS_TX_NEEDS_CSUM,
+ NETDEV_A_QSTATS_TX_HW_GSO_PACKETS,
+ NETDEV_A_QSTATS_TX_HW_GSO_BYTES,
+ NETDEV_A_QSTATS_TX_HW_GSO_WIRE_PACKETS,
+ NETDEV_A_QSTATS_TX_HW_GSO_WIRE_BYTES,
+ NETDEV_A_QSTATS_TX_HW_DROP_RATELIMITS,
+ NETDEV_A_QSTATS_TX_STOP,
+ NETDEV_A_QSTATS_TX_WAKE,
+
+ __NETDEV_A_QSTATS_MAX,
+ NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1)
+};
+
+enum {
+ NETDEV_A_DMABUF_IFINDEX = 1,
+ NETDEV_A_DMABUF_QUEUES,
+ NETDEV_A_DMABUF_FD,
+ NETDEV_A_DMABUF_ID,
+
+ __NETDEV_A_DMABUF_MAX,
+ NETDEV_A_DMABUF_MAX = (__NETDEV_A_DMABUF_MAX - 1)
+};
+
+enum {
NETDEV_CMD_DEV_GET = 1,
NETDEV_CMD_DEV_ADD_NTF,
NETDEV_CMD_DEV_DEL_NTF,
@@ -144,6 +200,9 @@ enum {
NETDEV_CMD_PAGE_POOL_STATS_GET,
NETDEV_CMD_QUEUE_GET,
NETDEV_CMD_NAPI_GET,
+ NETDEV_CMD_QSTATS_GET,
+ NETDEV_CMD_BIND_RX,
+ NETDEV_CMD_NAPI_SET,
__NETDEV_CMD_MAX,
NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
diff --git a/tools/include/uapi/linux/openat2.h b/tools/include/uapi/linux/openat2.h
deleted file mode 100644
index a5feb7604948..000000000000
--- a/tools/include/uapi/linux/openat2.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _UAPI_LINUX_OPENAT2_H
-#define _UAPI_LINUX_OPENAT2_H
-
-#include <linux/types.h>
-
-/*
- * Arguments for how openat2(2) should open the target path. If only @flags and
- * @mode are non-zero, then openat2(2) operates very similarly to openat(2).
- *
- * However, unlike openat(2), unknown or invalid bits in @flags result in
- * -EINVAL rather than being silently ignored. @mode must be zero unless one of
- * {O_CREAT, O_TMPFILE} are set.
- *
- * @flags: O_* flags.
- * @mode: O_CREAT/O_TMPFILE file mode.
- * @resolve: RESOLVE_* flags.
- */
-struct open_how {
- __u64 flags;
- __u64 mode;
- __u64 resolve;
-};
-
-/* how->resolve flags for openat2(2). */
-#define RESOLVE_NO_XDEV 0x01 /* Block mount-point crossings
- (includes bind-mounts). */
-#define RESOLVE_NO_MAGICLINKS 0x02 /* Block traversal through procfs-style
- "magic-links". */
-#define RESOLVE_NO_SYMLINKS 0x04 /* Block traversal through all symlinks
- (implies OEXT_NO_MAGICLINKS) */
-#define RESOLVE_BENEATH 0x08 /* Block "lexical" trickery like
- "..", symlinks, and absolute
- paths which escape the dirfd. */
-#define RESOLVE_IN_ROOT 0x10 /* Make all jumps to "/" and ".."
- be scoped inside the dirfd
- (similar to chroot(2)). */
-#define RESOLVE_CACHED 0x20 /* Only complete if resolution can be
- completed through cached lookup. May
- return -EAGAIN if that's not
- possible. */
-
-#endif /* _UAPI_LINUX_OPENAT2_H */
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 3a64499b0f5d..0524d541d4e3 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -511,7 +511,16 @@ struct perf_event_attr {
__u16 sample_max_stack;
__u16 __reserved_2;
__u32 aux_sample_size;
- __u32 __reserved_3;
+
+ union {
+ __u32 aux_action;
+ struct {
+ __u32 aux_start_paused : 1, /* start AUX area tracing paused */
+ aux_pause : 1, /* on overflow, pause AUX area tracing */
+ aux_resume : 1, /* on overflow, resume AUX area tracing */
+ __reserved_3 : 29;
+ };
+ };
/*
* User provided data if sigtrap=1, passed back to user via
@@ -1349,12 +1358,14 @@ union perf_mem_data_src {
#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */
#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */
#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */
-/* 5-0x7 available */
+#define PERF_MEM_LVLNUM_L2_MHB 0x05 /* L2 Miss Handling Buffer */
+#define PERF_MEM_LVLNUM_MSC 0x06 /* Memory-side Cache */
+/* 0x7 available */
#define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */
#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */
#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */
#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
-#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */
+#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB / L1 Miss Handling Buffer */
#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */
#define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */
#define PERF_MEM_LVLNUM_NA 0x0f /* N/A */
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index 370ed14b1ae0..35791791a879 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -306,4 +306,26 @@ struct prctl_mm_map {
# define PR_RISCV_V_VSTATE_CTRL_NEXT_MASK 0xc
# define PR_RISCV_V_VSTATE_CTRL_MASK 0x1f
+#define PR_RISCV_SET_ICACHE_FLUSH_CTX 71
+# define PR_RISCV_CTX_SW_FENCEI_ON 0
+# define PR_RISCV_CTX_SW_FENCEI_OFF 1
+# define PR_RISCV_SCOPE_PER_PROCESS 0
+# define PR_RISCV_SCOPE_PER_THREAD 1
+
+/* PowerPC Dynamic Execution Control Register (DEXCR) controls */
+#define PR_PPC_GET_DEXCR 72
+#define PR_PPC_SET_DEXCR 73
+/* DEXCR aspect to act on */
+# define PR_PPC_DEXCR_SBHE 0 /* Speculative branch hint enable */
+# define PR_PPC_DEXCR_IBRTPD 1 /* Indirect branch recurrent target prediction disable */
+# define PR_PPC_DEXCR_SRAPD 2 /* Subroutine return address prediction disable */
+# define PR_PPC_DEXCR_NPHIE 3 /* Non-privileged hash instruction enable */
+/* Action to apply / return */
+# define PR_PPC_DEXCR_CTRL_EDITABLE 0x1 /* Aspect can be modified with PR_PPC_SET_DEXCR */
+# define PR_PPC_DEXCR_CTRL_SET 0x2 /* Set the aspect for this process */
+# define PR_PPC_DEXCR_CTRL_CLEAR 0x4 /* Clear the aspect for this process */
+# define PR_PPC_DEXCR_CTRL_SET_ONEXEC 0x8 /* Set the aspect on exec */
+# define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC 0x10 /* Clear the aspect on exec */
+# define PR_PPC_DEXCR_CTRL_MASK 0x1f
+
#endif /* _LINUX_PRCTL_H */
diff --git a/tools/include/uapi/linux/sched.h b/tools/include/uapi/linux/sched.h
deleted file mode 100644
index 3bac0a8ceab2..000000000000
--- a/tools/include/uapi/linux/sched.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _UAPI_LINUX_SCHED_H
-#define _UAPI_LINUX_SCHED_H
-
-#include <linux/types.h>
-
-/*
- * cloning flags:
- */
-#define CSIGNAL 0x000000ff /* signal mask to be sent at exit */
-#define CLONE_VM 0x00000100 /* set if VM shared between processes */
-#define CLONE_FS 0x00000200 /* set if fs info shared between processes */
-#define CLONE_FILES 0x00000400 /* set if open files shared between processes */
-#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */
-#define CLONE_PIDFD 0x00001000 /* set if a pidfd should be placed in parent */
-#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */
-#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */
-#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */
-#define CLONE_THREAD 0x00010000 /* Same thread group? */
-#define CLONE_NEWNS 0x00020000 /* New mount namespace group */
-#define CLONE_SYSVSEM 0x00040000 /* share system V SEM_UNDO semantics */
-#define CLONE_SETTLS 0x00080000 /* create a new TLS for the child */
-#define CLONE_PARENT_SETTID 0x00100000 /* set the TID in the parent */
-#define CLONE_CHILD_CLEARTID 0x00200000 /* clear the TID in the child */
-#define CLONE_DETACHED 0x00400000 /* Unused, ignored */
-#define CLONE_UNTRACED 0x00800000 /* set if the tracing process can't force CLONE_PTRACE on this clone */
-#define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */
-#define CLONE_NEWCGROUP 0x02000000 /* New cgroup namespace */
-#define CLONE_NEWUTS 0x04000000 /* New utsname namespace */
-#define CLONE_NEWIPC 0x08000000 /* New ipc namespace */
-#define CLONE_NEWUSER 0x10000000 /* New user namespace */
-#define CLONE_NEWPID 0x20000000 /* New pid namespace */
-#define CLONE_NEWNET 0x40000000 /* New network namespace */
-#define CLONE_IO 0x80000000 /* Clone io context */
-
-/* Flags for the clone3() syscall. */
-#define CLONE_CLEAR_SIGHAND 0x100000000ULL /* Clear any signal handler and reset to SIG_DFL. */
-#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */
-
-/*
- * cloning flags intersect with CSIGNAL so can be used with unshare and clone3
- * syscalls only:
- */
-#define CLONE_NEWTIME 0x00000080 /* New time namespace */
-
-#ifndef __ASSEMBLY__
-/**
- * struct clone_args - arguments for the clone3 syscall
- * @flags: Flags for the new process as listed above.
- * All flags are valid except for CSIGNAL and
- * CLONE_DETACHED.
- * @pidfd: If CLONE_PIDFD is set, a pidfd will be
- * returned in this argument.
- * @child_tid: If CLONE_CHILD_SETTID is set, the TID of the
- * child process will be returned in the child's
- * memory.
- * @parent_tid: If CLONE_PARENT_SETTID is set, the TID of
- * the child process will be returned in the
- * parent's memory.
- * @exit_signal: The exit_signal the parent process will be
- * sent when the child exits.
- * @stack: Specify the location of the stack for the
- * child process.
- * Note, @stack is expected to point to the
- * lowest address. The stack direction will be
- * determined by the kernel and set up
- * appropriately based on @stack_size.
- * @stack_size: The size of the stack for the child process.
- * @tls: If CLONE_SETTLS is set, the tls descriptor
- * is set to tls.
- * @set_tid: Pointer to an array of type *pid_t. The size
- * of the array is defined using @set_tid_size.
- * This array is used to select PIDs/TIDs for
- * newly created processes. The first element in
- * this defines the PID in the most nested PID
- * namespace. Each additional element in the array
- * defines the PID in the parent PID namespace of
- * the original PID namespace. If the array has
- * less entries than the number of currently
- * nested PID namespaces only the PIDs in the
- * corresponding namespaces are set.
- * @set_tid_size: This defines the size of the array referenced
- * in @set_tid. This cannot be larger than the
- * kernel's limit of nested PID namespaces.
- * @cgroup: If CLONE_INTO_CGROUP is specified set this to
- * a file descriptor for the cgroup.
- *
- * The structure is versioned by size and thus extensible.
- * New struct members must go at the end of the struct and
- * must be properly 64bit aligned.
- */
-struct clone_args {
- __aligned_u64 flags;
- __aligned_u64 pidfd;
- __aligned_u64 child_tid;
- __aligned_u64 parent_tid;
- __aligned_u64 exit_signal;
- __aligned_u64 stack;
- __aligned_u64 stack_size;
- __aligned_u64 tls;
- __aligned_u64 set_tid;
- __aligned_u64 set_tid_size;
- __aligned_u64 cgroup;
-};
-#endif
-
-#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */
-#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */
-#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */
-
-/*
- * Scheduling policies
- */
-#define SCHED_NORMAL 0
-#define SCHED_FIFO 1
-#define SCHED_RR 2
-#define SCHED_BATCH 3
-/* SCHED_ISO: reserved but not implemented yet */
-#define SCHED_IDLE 5
-#define SCHED_DEADLINE 6
-
-/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
-#define SCHED_RESET_ON_FORK 0x40000000
-
-/*
- * For the sched_{set,get}attr() calls
- */
-#define SCHED_FLAG_RESET_ON_FORK 0x01
-#define SCHED_FLAG_RECLAIM 0x02
-#define SCHED_FLAG_DL_OVERRUN 0x04
-#define SCHED_FLAG_KEEP_POLICY 0x08
-#define SCHED_FLAG_KEEP_PARAMS 0x10
-#define SCHED_FLAG_UTIL_CLAMP_MIN 0x20
-#define SCHED_FLAG_UTIL_CLAMP_MAX 0x40
-
-#define SCHED_FLAG_KEEP_ALL (SCHED_FLAG_KEEP_POLICY | \
- SCHED_FLAG_KEEP_PARAMS)
-
-#define SCHED_FLAG_UTIL_CLAMP (SCHED_FLAG_UTIL_CLAMP_MIN | \
- SCHED_FLAG_UTIL_CLAMP_MAX)
-
-#define SCHED_FLAG_ALL (SCHED_FLAG_RESET_ON_FORK | \
- SCHED_FLAG_RECLAIM | \
- SCHED_FLAG_DL_OVERRUN | \
- SCHED_FLAG_KEEP_ALL | \
- SCHED_FLAG_UTIL_CLAMP)
-
-#endif /* _UAPI_LINUX_SCHED_H */
diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h
index 2f2ee82d5517..887a25286441 100644
--- a/tools/include/uapi/linux/stat.h
+++ b/tools/include/uapi/linux/stat.h
@@ -127,7 +127,14 @@ struct statx {
__u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */
__u32 stx_dio_offset_align; /* File offset alignment for direct I/O */
/* 0xa0 */
- __u64 __spare3[12]; /* Spare space for future expansion */
+ __u64 stx_subvol; /* Subvolume identifier */
+ __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */
+ __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */
+ /* 0xb0 */
+ __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */
+ __u32 __spare1[1];
+ /* 0xb8 */
+ __u64 __spare3[9]; /* Spare space for future expansion */
/* 0x100 */
};
@@ -155,6 +162,8 @@ struct statx {
#define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */
#define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */
#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */
+#define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */
+#define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
@@ -190,6 +199,7 @@ struct statx {
#define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */
#define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */
#define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */
+#define STATX_ATTR_WRITE_ATOMIC 0x00400000 /* File supports atomic write operations */
#endif /* _UAPI_LINUX_STAT_H */
diff --git a/tools/include/uapi/linux/stddef.h b/tools/include/uapi/linux/stddef.h
index bb6ea517efb5..c53cde425406 100644
--- a/tools/include/uapi/linux/stddef.h
+++ b/tools/include/uapi/linux/stddef.h
@@ -8,6 +8,13 @@
#define __always_inline __inline__
#endif
+/* Not all C++ standards support type declarations inside an anonymous union */
+#ifndef __cplusplus
+#define __struct_group_tag(TAG) TAG
+#else
+#define __struct_group_tag(TAG)
+#endif
+
/**
* __struct_group() - Create a mirrored named and anonyomous struct
*
@@ -20,14 +27,14 @@
* and size: one anonymous and one named. The former's members can be used
* normally without sub-struct naming, and the latter can be used to
* reason about the start, end, and size of the group of struct members.
- * The named struct can also be explicitly tagged for layer reuse, as well
- * as both having struct attributes appended.
+ * The named struct can also be explicitly tagged for layer reuse (C only),
+ * as well as both having struct attributes appended.
*/
#define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \
union { \
struct { MEMBERS } ATTRS; \
- struct TAG { MEMBERS } ATTRS NAME; \
- }
+ struct __struct_group_tag(TAG) { MEMBERS } ATTRS NAME; \
+ } ATTRS
/**
* __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union
diff --git a/tools/include/uapi/linux/usbdevice_fs.h b/tools/include/uapi/linux/usbdevice_fs.h
deleted file mode 100644
index 74a84e02422a..000000000000
--- a/tools/include/uapi/linux/usbdevice_fs.h
+++ /dev/null
@@ -1,231 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
-/*****************************************************************************/
-
-/*
- * usbdevice_fs.h -- USB device file system.
- *
- * Copyright (C) 2000
- * Thomas Sailer (sailer@ife.ee.ethz.ch)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * History:
- * 0.1 04.01.2000 Created
- */
-
-/*****************************************************************************/
-
-#ifndef _UAPI_LINUX_USBDEVICE_FS_H
-#define _UAPI_LINUX_USBDEVICE_FS_H
-
-#include <linux/types.h>
-#include <linux/magic.h>
-
-/* --------------------------------------------------------------------- */
-
-/* usbdevfs ioctl codes */
-
-struct usbdevfs_ctrltransfer {
- __u8 bRequestType;
- __u8 bRequest;
- __u16 wValue;
- __u16 wIndex;
- __u16 wLength;
- __u32 timeout; /* in milliseconds */
- void __user *data;
-};
-
-struct usbdevfs_bulktransfer {
- unsigned int ep;
- unsigned int len;
- unsigned int timeout; /* in milliseconds */
- void __user *data;
-};
-
-struct usbdevfs_setinterface {
- unsigned int interface;
- unsigned int altsetting;
-};
-
-struct usbdevfs_disconnectsignal {
- unsigned int signr;
- void __user *context;
-};
-
-#define USBDEVFS_MAXDRIVERNAME 255
-
-struct usbdevfs_getdriver {
- unsigned int interface;
- char driver[USBDEVFS_MAXDRIVERNAME + 1];
-};
-
-struct usbdevfs_connectinfo {
- unsigned int devnum;
- unsigned char slow;
-};
-
-struct usbdevfs_conninfo_ex {
- __u32 size; /* Size of the structure from the kernel's */
- /* point of view. Can be used by userspace */
- /* to determine how much data can be */
- /* used/trusted. */
- __u32 busnum; /* USB bus number, as enumerated by the */
- /* kernel, the device is connected to. */
- __u32 devnum; /* Device address on the bus. */
- __u32 speed; /* USB_SPEED_* constants from ch9.h */
- __u8 num_ports; /* Number of ports the device is connected */
- /* to on the way to the root hub. It may */
- /* be bigger than size of 'ports' array so */
- /* userspace can detect overflows. */
- __u8 ports[7]; /* List of ports on the way from the root */
- /* hub to the device. Current limit in */
- /* USB specification is 7 tiers (root hub, */
- /* 5 intermediate hubs, device), which */
- /* gives at most 6 port entries. */
-};
-
-#define USBDEVFS_URB_SHORT_NOT_OK 0x01
-#define USBDEVFS_URB_ISO_ASAP 0x02
-#define USBDEVFS_URB_BULK_CONTINUATION 0x04
-#define USBDEVFS_URB_NO_FSBR 0x20 /* Not used */
-#define USBDEVFS_URB_ZERO_PACKET 0x40
-#define USBDEVFS_URB_NO_INTERRUPT 0x80
-
-#define USBDEVFS_URB_TYPE_ISO 0
-#define USBDEVFS_URB_TYPE_INTERRUPT 1
-#define USBDEVFS_URB_TYPE_CONTROL 2
-#define USBDEVFS_URB_TYPE_BULK 3
-
-struct usbdevfs_iso_packet_desc {
- unsigned int length;
- unsigned int actual_length;
- unsigned int status;
-};
-
-struct usbdevfs_urb {
- unsigned char type;
- unsigned char endpoint;
- int status;
- unsigned int flags;
- void __user *buffer;
- int buffer_length;
- int actual_length;
- int start_frame;
- union {
- int number_of_packets; /* Only used for isoc urbs */
- unsigned int stream_id; /* Only used with bulk streams */
- };
- int error_count;
- unsigned int signr; /* signal to be sent on completion,
- or 0 if none should be sent. */
- void __user *usercontext;
- struct usbdevfs_iso_packet_desc iso_frame_desc[];
-};
-
-/* ioctls for talking directly to drivers */
-struct usbdevfs_ioctl {
- int ifno; /* interface 0..N ; negative numbers reserved */
- int ioctl_code; /* MUST encode size + direction of data so the
- * macros in <asm/ioctl.h> give correct values */
- void __user *data; /* param buffer (in, or out) */
-};
-
-/* You can do most things with hubs just through control messages,
- * except find out what device connects to what port. */
-struct usbdevfs_hub_portinfo {
- char nports; /* number of downstream ports in this hub */
- char port [127]; /* e.g. port 3 connects to device 27 */
-};
-
-/* System and bus capability flags */
-#define USBDEVFS_CAP_ZERO_PACKET 0x01
-#define USBDEVFS_CAP_BULK_CONTINUATION 0x02
-#define USBDEVFS_CAP_NO_PACKET_SIZE_LIM 0x04
-#define USBDEVFS_CAP_BULK_SCATTER_GATHER 0x08
-#define USBDEVFS_CAP_REAP_AFTER_DISCONNECT 0x10
-#define USBDEVFS_CAP_MMAP 0x20
-#define USBDEVFS_CAP_DROP_PRIVILEGES 0x40
-#define USBDEVFS_CAP_CONNINFO_EX 0x80
-#define USBDEVFS_CAP_SUSPEND 0x100
-
-/* USBDEVFS_DISCONNECT_CLAIM flags & struct */
-
-/* disconnect-and-claim if the driver matches the driver field */
-#define USBDEVFS_DISCONNECT_CLAIM_IF_DRIVER 0x01
-/* disconnect-and-claim except when the driver matches the driver field */
-#define USBDEVFS_DISCONNECT_CLAIM_EXCEPT_DRIVER 0x02
-
-struct usbdevfs_disconnect_claim {
- unsigned int interface;
- unsigned int flags;
- char driver[USBDEVFS_MAXDRIVERNAME + 1];
-};
-
-struct usbdevfs_streams {
- unsigned int num_streams; /* Not used by USBDEVFS_FREE_STREAMS */
- unsigned int num_eps;
- unsigned char eps[];
-};
-
-/*
- * USB_SPEED_* values returned by USBDEVFS_GET_SPEED are defined in
- * linux/usb/ch9.h
- */
-
-#define USBDEVFS_CONTROL _IOWR('U', 0, struct usbdevfs_ctrltransfer)
-#define USBDEVFS_CONTROL32 _IOWR('U', 0, struct usbdevfs_ctrltransfer32)
-#define USBDEVFS_BULK _IOWR('U', 2, struct usbdevfs_bulktransfer)
-#define USBDEVFS_BULK32 _IOWR('U', 2, struct usbdevfs_bulktransfer32)
-#define USBDEVFS_RESETEP _IOR('U', 3, unsigned int)
-#define USBDEVFS_SETINTERFACE _IOR('U', 4, struct usbdevfs_setinterface)
-#define USBDEVFS_SETCONFIGURATION _IOR('U', 5, unsigned int)
-#define USBDEVFS_GETDRIVER _IOW('U', 8, struct usbdevfs_getdriver)
-#define USBDEVFS_SUBMITURB _IOR('U', 10, struct usbdevfs_urb)
-#define USBDEVFS_SUBMITURB32 _IOR('U', 10, struct usbdevfs_urb32)
-#define USBDEVFS_DISCARDURB _IO('U', 11)
-#define USBDEVFS_REAPURB _IOW('U', 12, void *)
-#define USBDEVFS_REAPURB32 _IOW('U', 12, __u32)
-#define USBDEVFS_REAPURBNDELAY _IOW('U', 13, void *)
-#define USBDEVFS_REAPURBNDELAY32 _IOW('U', 13, __u32)
-#define USBDEVFS_DISCSIGNAL _IOR('U', 14, struct usbdevfs_disconnectsignal)
-#define USBDEVFS_DISCSIGNAL32 _IOR('U', 14, struct usbdevfs_disconnectsignal32)
-#define USBDEVFS_CLAIMINTERFACE _IOR('U', 15, unsigned int)
-#define USBDEVFS_RELEASEINTERFACE _IOR('U', 16, unsigned int)
-#define USBDEVFS_CONNECTINFO _IOW('U', 17, struct usbdevfs_connectinfo)
-#define USBDEVFS_IOCTL _IOWR('U', 18, struct usbdevfs_ioctl)
-#define USBDEVFS_IOCTL32 _IOWR('U', 18, struct usbdevfs_ioctl32)
-#define USBDEVFS_HUB_PORTINFO _IOR('U', 19, struct usbdevfs_hub_portinfo)
-#define USBDEVFS_RESET _IO('U', 20)
-#define USBDEVFS_CLEAR_HALT _IOR('U', 21, unsigned int)
-#define USBDEVFS_DISCONNECT _IO('U', 22)
-#define USBDEVFS_CONNECT _IO('U', 23)
-#define USBDEVFS_CLAIM_PORT _IOR('U', 24, unsigned int)
-#define USBDEVFS_RELEASE_PORT _IOR('U', 25, unsigned int)
-#define USBDEVFS_GET_CAPABILITIES _IOR('U', 26, __u32)
-#define USBDEVFS_DISCONNECT_CLAIM _IOR('U', 27, struct usbdevfs_disconnect_claim)
-#define USBDEVFS_ALLOC_STREAMS _IOR('U', 28, struct usbdevfs_streams)
-#define USBDEVFS_FREE_STREAMS _IOR('U', 29, struct usbdevfs_streams)
-#define USBDEVFS_DROP_PRIVILEGES _IOW('U', 30, __u32)
-#define USBDEVFS_GET_SPEED _IO('U', 31)
-/*
- * Returns struct usbdevfs_conninfo_ex; length is variable to allow
- * extending size of the data returned.
- */
-#define USBDEVFS_CONNINFO_EX(len) _IOC(_IOC_READ, 'U', 32, len)
-#define USBDEVFS_FORBID_SUSPEND _IO('U', 33)
-#define USBDEVFS_ALLOW_SUSPEND _IO('U', 34)
-#define USBDEVFS_WAIT_FOR_RESUME _IO('U', 35)
-
-#endif /* _UAPI_LINUX_USBDEVICE_FS_H */
diff --git a/tools/include/uapi/linux/userfaultfd.h b/tools/include/uapi/linux/userfaultfd.h
new file mode 100644
index 000000000000..4283de22d5b6
--- /dev/null
+++ b/tools/include/uapi/linux/userfaultfd.h
@@ -0,0 +1,386 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * include/linux/userfaultfd.h
+ *
+ * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org>
+ * Copyright (C) 2015 Red Hat, Inc.
+ *
+ */
+
+#ifndef _LINUX_USERFAULTFD_H
+#define _LINUX_USERFAULTFD_H
+
+#include <linux/types.h>
+
+/* ioctls for /dev/userfaultfd */
+#define USERFAULTFD_IOC 0xAA
+#define USERFAULTFD_IOC_NEW _IO(USERFAULTFD_IOC, 0x00)
+
+/*
+ * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and
+ * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR. In
+ * userfaultfd.h we assumed the kernel was reading (instead _IOC_READ
+ * means the userland is reading).
+ */
+#define UFFD_API ((__u64)0xAA)
+#define UFFD_API_REGISTER_MODES (UFFDIO_REGISTER_MODE_MISSING | \
+ UFFDIO_REGISTER_MODE_WP | \
+ UFFDIO_REGISTER_MODE_MINOR)
+#define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \
+ UFFD_FEATURE_EVENT_FORK | \
+ UFFD_FEATURE_EVENT_REMAP | \
+ UFFD_FEATURE_EVENT_REMOVE | \
+ UFFD_FEATURE_EVENT_UNMAP | \
+ UFFD_FEATURE_MISSING_HUGETLBFS | \
+ UFFD_FEATURE_MISSING_SHMEM | \
+ UFFD_FEATURE_SIGBUS | \
+ UFFD_FEATURE_THREAD_ID | \
+ UFFD_FEATURE_MINOR_HUGETLBFS | \
+ UFFD_FEATURE_MINOR_SHMEM | \
+ UFFD_FEATURE_EXACT_ADDRESS | \
+ UFFD_FEATURE_WP_HUGETLBFS_SHMEM | \
+ UFFD_FEATURE_WP_UNPOPULATED | \
+ UFFD_FEATURE_POISON | \
+ UFFD_FEATURE_WP_ASYNC | \
+ UFFD_FEATURE_MOVE)
+#define UFFD_API_IOCTLS \
+ ((__u64)1 << _UFFDIO_REGISTER | \
+ (__u64)1 << _UFFDIO_UNREGISTER | \
+ (__u64)1 << _UFFDIO_API)
+#define UFFD_API_RANGE_IOCTLS \
+ ((__u64)1 << _UFFDIO_WAKE | \
+ (__u64)1 << _UFFDIO_COPY | \
+ (__u64)1 << _UFFDIO_ZEROPAGE | \
+ (__u64)1 << _UFFDIO_MOVE | \
+ (__u64)1 << _UFFDIO_WRITEPROTECT | \
+ (__u64)1 << _UFFDIO_CONTINUE | \
+ (__u64)1 << _UFFDIO_POISON)
+#define UFFD_API_RANGE_IOCTLS_BASIC \
+ ((__u64)1 << _UFFDIO_WAKE | \
+ (__u64)1 << _UFFDIO_COPY | \
+ (__u64)1 << _UFFDIO_WRITEPROTECT | \
+ (__u64)1 << _UFFDIO_CONTINUE | \
+ (__u64)1 << _UFFDIO_POISON)
+
+/*
+ * Valid ioctl command number range with this API is from 0x00 to
+ * 0x3F. UFFDIO_API is the fixed number, everything else can be
+ * changed by implementing a different UFFD_API. If sticking to the
+ * same UFFD_API more ioctl can be added and userland will be aware of
+ * which ioctl the running kernel implements through the ioctl command
+ * bitmask written by the UFFDIO_API.
+ */
+#define _UFFDIO_REGISTER (0x00)
+#define _UFFDIO_UNREGISTER (0x01)
+#define _UFFDIO_WAKE (0x02)
+#define _UFFDIO_COPY (0x03)
+#define _UFFDIO_ZEROPAGE (0x04)
+#define _UFFDIO_MOVE (0x05)
+#define _UFFDIO_WRITEPROTECT (0x06)
+#define _UFFDIO_CONTINUE (0x07)
+#define _UFFDIO_POISON (0x08)
+#define _UFFDIO_API (0x3F)
+
+/* userfaultfd ioctl ids */
+#define UFFDIO 0xAA
+#define UFFDIO_API _IOWR(UFFDIO, _UFFDIO_API, \
+ struct uffdio_api)
+#define UFFDIO_REGISTER _IOWR(UFFDIO, _UFFDIO_REGISTER, \
+ struct uffdio_register)
+#define UFFDIO_UNREGISTER _IOR(UFFDIO, _UFFDIO_UNREGISTER, \
+ struct uffdio_range)
+#define UFFDIO_WAKE _IOR(UFFDIO, _UFFDIO_WAKE, \
+ struct uffdio_range)
+#define UFFDIO_COPY _IOWR(UFFDIO, _UFFDIO_COPY, \
+ struct uffdio_copy)
+#define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \
+ struct uffdio_zeropage)
+#define UFFDIO_MOVE _IOWR(UFFDIO, _UFFDIO_MOVE, \
+ struct uffdio_move)
+#define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \
+ struct uffdio_writeprotect)
+#define UFFDIO_CONTINUE _IOWR(UFFDIO, _UFFDIO_CONTINUE, \
+ struct uffdio_continue)
+#define UFFDIO_POISON _IOWR(UFFDIO, _UFFDIO_POISON, \
+ struct uffdio_poison)
+
+/* read() structure */
+struct uffd_msg {
+ __u8 event;
+
+ __u8 reserved1;
+ __u16 reserved2;
+ __u32 reserved3;
+
+ union {
+ struct {
+ __u64 flags;
+ __u64 address;
+ union {
+ __u32 ptid;
+ } feat;
+ } pagefault;
+
+ struct {
+ __u32 ufd;
+ } fork;
+
+ struct {
+ __u64 from;
+ __u64 to;
+ __u64 len;
+ } remap;
+
+ struct {
+ __u64 start;
+ __u64 end;
+ } remove;
+
+ struct {
+ /* unused reserved fields */
+ __u64 reserved1;
+ __u64 reserved2;
+ __u64 reserved3;
+ } reserved;
+ } arg;
+} __attribute__((packed));
+
+/*
+ * Start at 0x12 and not at 0 to be more strict against bugs.
+ */
+#define UFFD_EVENT_PAGEFAULT 0x12
+#define UFFD_EVENT_FORK 0x13
+#define UFFD_EVENT_REMAP 0x14
+#define UFFD_EVENT_REMOVE 0x15
+#define UFFD_EVENT_UNMAP 0x16
+
+/* flags for UFFD_EVENT_PAGEFAULT */
+#define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */
+#define UFFD_PAGEFAULT_FLAG_WP (1<<1) /* If reason is VM_UFFD_WP */
+#define UFFD_PAGEFAULT_FLAG_MINOR (1<<2) /* If reason is VM_UFFD_MINOR */
+
+struct uffdio_api {
+ /* userland asks for an API number and the features to enable */
+ __u64 api;
+ /*
+ * Kernel answers below with the all available features for
+ * the API, this notifies userland of which events and/or
+ * which flags for each event are enabled in the current
+ * kernel.
+ *
+ * Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE
+ * are to be considered implicitly always enabled in all kernels as
+ * long as the uffdio_api.api requested matches UFFD_API.
+ *
+ * UFFD_FEATURE_MISSING_HUGETLBFS means an UFFDIO_REGISTER
+ * with UFFDIO_REGISTER_MODE_MISSING mode will succeed on
+ * hugetlbfs virtual memory ranges. Adding or not adding
+ * UFFD_FEATURE_MISSING_HUGETLBFS to uffdio_api.features has
+ * no real functional effect after UFFDIO_API returns, but
+ * it's only useful for an initial feature set probe at
+ * UFFDIO_API time. There are two ways to use it:
+ *
+ * 1) by adding UFFD_FEATURE_MISSING_HUGETLBFS to the
+ * uffdio_api.features before calling UFFDIO_API, an error
+ * will be returned by UFFDIO_API on a kernel without
+ * hugetlbfs missing support
+ *
+ * 2) the UFFD_FEATURE_MISSING_HUGETLBFS can not be added in
+ * uffdio_api.features and instead it will be set by the
+ * kernel in the uffdio_api.features if the kernel supports
+ * it, so userland can later check if the feature flag is
+ * present in uffdio_api.features after UFFDIO_API
+ * succeeded.
+ *
+ * UFFD_FEATURE_MISSING_SHMEM works the same as
+ * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem
+ * (i.e. tmpfs and other shmem based APIs).
+ *
+ * UFFD_FEATURE_SIGBUS feature means no page-fault
+ * (UFFD_EVENT_PAGEFAULT) event will be delivered, instead
+ * a SIGBUS signal will be sent to the faulting process.
+ *
+ * UFFD_FEATURE_THREAD_ID pid of the page faulted task_struct will
+ * be returned, if feature is not requested 0 will be returned.
+ *
+ * UFFD_FEATURE_MINOR_HUGETLBFS indicates that minor faults
+ * can be intercepted (via REGISTER_MODE_MINOR) for
+ * hugetlbfs-backed pages.
+ *
+ * UFFD_FEATURE_MINOR_SHMEM indicates the same support as
+ * UFFD_FEATURE_MINOR_HUGETLBFS, but for shmem-backed pages instead.
+ *
+ * UFFD_FEATURE_EXACT_ADDRESS indicates that the exact address of page
+ * faults would be provided and the offset within the page would not be
+ * masked.
+ *
+ * UFFD_FEATURE_WP_HUGETLBFS_SHMEM indicates that userfaultfd
+ * write-protection mode is supported on both shmem and hugetlbfs.
+ *
+ * UFFD_FEATURE_WP_UNPOPULATED indicates that userfaultfd
+ * write-protection mode will always apply to unpopulated pages
+ * (i.e. empty ptes). This will be the default behavior for shmem
+ * & hugetlbfs, so this flag only affects anonymous memory behavior
+ * when userfault write-protection mode is registered.
+ *
+ * UFFD_FEATURE_WP_ASYNC indicates that userfaultfd write-protection
+ * asynchronous mode is supported in which the write fault is
+ * automatically resolved and write-protection is un-set.
+ * It implies UFFD_FEATURE_WP_UNPOPULATED.
+ *
+ * UFFD_FEATURE_MOVE indicates that the kernel supports moving an
+ * existing page contents from userspace.
+ */
+#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0)
+#define UFFD_FEATURE_EVENT_FORK (1<<1)
+#define UFFD_FEATURE_EVENT_REMAP (1<<2)
+#define UFFD_FEATURE_EVENT_REMOVE (1<<3)
+#define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4)
+#define UFFD_FEATURE_MISSING_SHMEM (1<<5)
+#define UFFD_FEATURE_EVENT_UNMAP (1<<6)
+#define UFFD_FEATURE_SIGBUS (1<<7)
+#define UFFD_FEATURE_THREAD_ID (1<<8)
+#define UFFD_FEATURE_MINOR_HUGETLBFS (1<<9)
+#define UFFD_FEATURE_MINOR_SHMEM (1<<10)
+#define UFFD_FEATURE_EXACT_ADDRESS (1<<11)
+#define UFFD_FEATURE_WP_HUGETLBFS_SHMEM (1<<12)
+#define UFFD_FEATURE_WP_UNPOPULATED (1<<13)
+#define UFFD_FEATURE_POISON (1<<14)
+#define UFFD_FEATURE_WP_ASYNC (1<<15)
+#define UFFD_FEATURE_MOVE (1<<16)
+ __u64 features;
+
+ __u64 ioctls;
+};
+
+struct uffdio_range {
+ __u64 start;
+ __u64 len;
+};
+
+struct uffdio_register {
+ struct uffdio_range range;
+#define UFFDIO_REGISTER_MODE_MISSING ((__u64)1<<0)
+#define UFFDIO_REGISTER_MODE_WP ((__u64)1<<1)
+#define UFFDIO_REGISTER_MODE_MINOR ((__u64)1<<2)
+ __u64 mode;
+
+ /*
+ * kernel answers which ioctl commands are available for the
+ * range, keep at the end as the last 8 bytes aren't read.
+ */
+ __u64 ioctls;
+};
+
+struct uffdio_copy {
+ __u64 dst;
+ __u64 src;
+ __u64 len;
+#define UFFDIO_COPY_MODE_DONTWAKE ((__u64)1<<0)
+ /*
+ * UFFDIO_COPY_MODE_WP will map the page write protected on
+ * the fly. UFFDIO_COPY_MODE_WP is available only if the
+ * write protected ioctl is implemented for the range
+ * according to the uffdio_register.ioctls.
+ */
+#define UFFDIO_COPY_MODE_WP ((__u64)1<<1)
+ __u64 mode;
+
+ /*
+ * "copy" is written by the ioctl and must be at the end: the
+ * copy_from_user will not read the last 8 bytes.
+ */
+ __s64 copy;
+};
+
+struct uffdio_zeropage {
+ struct uffdio_range range;
+#define UFFDIO_ZEROPAGE_MODE_DONTWAKE ((__u64)1<<0)
+ __u64 mode;
+
+ /*
+ * "zeropage" is written by the ioctl and must be at the end:
+ * the copy_from_user will not read the last 8 bytes.
+ */
+ __s64 zeropage;
+};
+
+struct uffdio_writeprotect {
+ struct uffdio_range range;
+/*
+ * UFFDIO_WRITEPROTECT_MODE_WP: set the flag to write protect a range,
+ * unset the flag to undo protection of a range which was previously
+ * write protected.
+ *
+ * UFFDIO_WRITEPROTECT_MODE_DONTWAKE: set the flag to avoid waking up
+ * any wait thread after the operation succeeds.
+ *
+ * NOTE: Write protecting a region (WP=1) is unrelated to page faults,
+ * therefore DONTWAKE flag is meaningless with WP=1. Removing write
+ * protection (WP=0) in response to a page fault wakes the faulting
+ * task unless DONTWAKE is set.
+ */
+#define UFFDIO_WRITEPROTECT_MODE_WP ((__u64)1<<0)
+#define UFFDIO_WRITEPROTECT_MODE_DONTWAKE ((__u64)1<<1)
+ __u64 mode;
+};
+
+struct uffdio_continue {
+ struct uffdio_range range;
+#define UFFDIO_CONTINUE_MODE_DONTWAKE ((__u64)1<<0)
+ /*
+ * UFFDIO_CONTINUE_MODE_WP will map the page write protected on
+ * the fly. UFFDIO_CONTINUE_MODE_WP is available only if the
+ * write protected ioctl is implemented for the range
+ * according to the uffdio_register.ioctls.
+ */
+#define UFFDIO_CONTINUE_MODE_WP ((__u64)1<<1)
+ __u64 mode;
+
+ /*
+ * Fields below here are written by the ioctl and must be at the end:
+ * the copy_from_user will not read past here.
+ */
+ __s64 mapped;
+};
+
+struct uffdio_poison {
+ struct uffdio_range range;
+#define UFFDIO_POISON_MODE_DONTWAKE ((__u64)1<<0)
+ __u64 mode;
+
+ /*
+ * Fields below here are written by the ioctl and must be at the end:
+ * the copy_from_user will not read past here.
+ */
+ __s64 updated;
+};
+
+struct uffdio_move {
+ __u64 dst;
+ __u64 src;
+ __u64 len;
+ /*
+ * Especially if used to atomically remove memory from the
+ * address space the wake on the dst range is not needed.
+ */
+#define UFFDIO_MOVE_MODE_DONTWAKE ((__u64)1<<0)
+#define UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES ((__u64)1<<1)
+ __u64 mode;
+ /*
+ * "move" is written by the ioctl and must be at the end: the
+ * copy_from_user will not read the last 8 bytes.
+ */
+ __s64 move;
+};
+
+/*
+ * Flags for the userfaultfd(2) system call itself.
+ */
+
+/*
+ * Create a userfaultfd that can handle page faults only in user mode.
+ */
+#define UFFD_USER_MODE_ONLY 1
+
+#endif /* _LINUX_USERFAULTFD_H */
diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h
deleted file mode 100644
index 649560c685f1..000000000000
--- a/tools/include/uapi/linux/vhost.h
+++ /dev/null
@@ -1,230 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _LINUX_VHOST_H
-#define _LINUX_VHOST_H
-/* Userspace interface for in-kernel virtio accelerators. */
-
-/* vhost is used to reduce the number of system calls involved in virtio.
- *
- * Existing virtio net code is used in the guest without modification.
- *
- * This header includes interface used by userspace hypervisor for
- * device configuration.
- */
-
-#include <linux/vhost_types.h>
-#include <linux/types.h>
-#include <linux/ioctl.h>
-
-#define VHOST_FILE_UNBIND -1
-
-/* ioctls */
-
-#define VHOST_VIRTIO 0xAF
-
-/* Features bitmask for forward compatibility. Transport bits are used for
- * vhost specific features. */
-#define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64)
-#define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64)
-
-/* Set current process as the (exclusive) owner of this file descriptor. This
- * must be called before any other vhost command. Further calls to
- * VHOST_OWNER_SET fail until VHOST_OWNER_RESET is called. */
-#define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01)
-/* Give up ownership, and reset the device to default values.
- * Allows subsequent call to VHOST_OWNER_SET to succeed. */
-#define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02)
-
-/* Set up/modify memory layout */
-#define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory)
-
-/* Write logging setup. */
-/* Memory writes can optionally be logged by setting bit at an offset
- * (calculated from the physical address) from specified log base.
- * The bit is set using an atomic 32 bit operation. */
-/* Set base address for logging. */
-#define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64)
-/* Specify an eventfd file descriptor to signal on log write. */
-#define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int)
-/* By default, a device gets one vhost_worker that its virtqueues share. This
- * command allows the owner of the device to create an additional vhost_worker
- * for the device. It can later be bound to 1 or more of its virtqueues using
- * the VHOST_ATTACH_VRING_WORKER command.
- *
- * This must be called after VHOST_SET_OWNER and the caller must be the owner
- * of the device. The new thread will inherit caller's cgroups and namespaces,
- * and will share the caller's memory space. The new thread will also be
- * counted against the caller's RLIMIT_NPROC value.
- *
- * The worker's ID used in other commands will be returned in
- * vhost_worker_state.
- */
-#define VHOST_NEW_WORKER _IOR(VHOST_VIRTIO, 0x8, struct vhost_worker_state)
-/* Free a worker created with VHOST_NEW_WORKER if it's not attached to any
- * virtqueue. If userspace is not able to call this for workers its created,
- * the kernel will free all the device's workers when the device is closed.
- */
-#define VHOST_FREE_WORKER _IOW(VHOST_VIRTIO, 0x9, struct vhost_worker_state)
-
-/* Ring setup. */
-/* Set number of descriptors in ring. This parameter can not
- * be modified while ring is running (bound to a device). */
-#define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state)
-/* Set addresses for the ring. */
-#define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr)
-/* Base value where queue looks for available descriptors */
-#define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
-/* Get accessor: reads index, writes value in num */
-#define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
-
-/* Set the vring byte order in num. Valid values are VHOST_VRING_LITTLE_ENDIAN
- * or VHOST_VRING_BIG_ENDIAN (other values return -EINVAL).
- * The byte order cannot be changed while the device is active: trying to do so
- * returns -EBUSY.
- * This is a legacy only API that is simply ignored when VIRTIO_F_VERSION_1 is
- * set.
- * Not all kernel configurations support this ioctl, but all configurations that
- * support SET also support GET.
- */
-#define VHOST_VRING_LITTLE_ENDIAN 0
-#define VHOST_VRING_BIG_ENDIAN 1
-#define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state)
-#define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state)
-/* Attach a vhost_worker created with VHOST_NEW_WORKER to one of the device's
- * virtqueues.
- *
- * This will replace the virtqueue's existing worker. If the replaced worker
- * is no longer attached to any virtqueues, it can be freed with
- * VHOST_FREE_WORKER.
- */
-#define VHOST_ATTACH_VRING_WORKER _IOW(VHOST_VIRTIO, 0x15, \
- struct vhost_vring_worker)
-/* Return the vring worker's ID */
-#define VHOST_GET_VRING_WORKER _IOWR(VHOST_VIRTIO, 0x16, \
- struct vhost_vring_worker)
-
-/* The following ioctls use eventfd file descriptors to signal and poll
- * for events. */
-
-/* Set eventfd to poll for added buffers */
-#define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file)
-/* Set eventfd to signal when buffers have beed used */
-#define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file)
-/* Set eventfd to signal an error */
-#define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file)
-/* Set busy loop timeout (in us) */
-#define VHOST_SET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x23, \
- struct vhost_vring_state)
-/* Get busy loop timeout (in us) */
-#define VHOST_GET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x24, \
- struct vhost_vring_state)
-
-/* Set or get vhost backend capability */
-
-#define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64)
-#define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64)
-
-/* VHOST_NET specific defines */
-
-/* Attach virtio net ring to a raw socket, or tap device.
- * The socket must be already bound to an ethernet device, this device will be
- * used for transmit. Pass fd -1 to unbind from the socket and the transmit
- * device. This can be used to stop the ring (e.g. for migration). */
-#define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file)
-
-/* VHOST_SCSI specific defines */
-
-#define VHOST_SCSI_SET_ENDPOINT _IOW(VHOST_VIRTIO, 0x40, struct vhost_scsi_target)
-#define VHOST_SCSI_CLEAR_ENDPOINT _IOW(VHOST_VIRTIO, 0x41, struct vhost_scsi_target)
-/* Changing this breaks userspace. */
-#define VHOST_SCSI_GET_ABI_VERSION _IOW(VHOST_VIRTIO, 0x42, int)
-/* Set and get the events missed flag */
-#define VHOST_SCSI_SET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x43, __u32)
-#define VHOST_SCSI_GET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x44, __u32)
-
-/* VHOST_VSOCK specific defines */
-
-#define VHOST_VSOCK_SET_GUEST_CID _IOW(VHOST_VIRTIO, 0x60, __u64)
-#define VHOST_VSOCK_SET_RUNNING _IOW(VHOST_VIRTIO, 0x61, int)
-
-/* VHOST_VDPA specific defines */
-
-/* Get the device id. The device ids follow the same definition of
- * the device id defined in virtio-spec.
- */
-#define VHOST_VDPA_GET_DEVICE_ID _IOR(VHOST_VIRTIO, 0x70, __u32)
-/* Get and set the status. The status bits follow the same definition
- * of the device status defined in virtio-spec.
- */
-#define VHOST_VDPA_GET_STATUS _IOR(VHOST_VIRTIO, 0x71, __u8)
-#define VHOST_VDPA_SET_STATUS _IOW(VHOST_VIRTIO, 0x72, __u8)
-/* Get and set the device config. The device config follows the same
- * definition of the device config defined in virtio-spec.
- */
-#define VHOST_VDPA_GET_CONFIG _IOR(VHOST_VIRTIO, 0x73, \
- struct vhost_vdpa_config)
-#define VHOST_VDPA_SET_CONFIG _IOW(VHOST_VIRTIO, 0x74, \
- struct vhost_vdpa_config)
-/* Enable/disable the ring. */
-#define VHOST_VDPA_SET_VRING_ENABLE _IOW(VHOST_VIRTIO, 0x75, \
- struct vhost_vring_state)
-/* Get the max ring size. */
-#define VHOST_VDPA_GET_VRING_NUM _IOR(VHOST_VIRTIO, 0x76, __u16)
-
-/* Set event fd for config interrupt*/
-#define VHOST_VDPA_SET_CONFIG_CALL _IOW(VHOST_VIRTIO, 0x77, int)
-
-/* Get the valid iova range */
-#define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \
- struct vhost_vdpa_iova_range)
-/* Get the config size */
-#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32)
-
-/* Get the count of all virtqueues */
-#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32)
-
-/* Get the number of virtqueue groups. */
-#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32)
-
-/* Get the number of address spaces. */
-#define VHOST_VDPA_GET_AS_NUM _IOR(VHOST_VIRTIO, 0x7A, unsigned int)
-
-/* Get the group for a virtqueue: read index, write group in num,
- * The virtqueue index is stored in the index field of
- * vhost_vring_state. The group for this specific virtqueue is
- * returned via num field of vhost_vring_state.
- */
-#define VHOST_VDPA_GET_VRING_GROUP _IOWR(VHOST_VIRTIO, 0x7B, \
- struct vhost_vring_state)
-/* Set the ASID for a virtqueue group. The group index is stored in
- * the index field of vhost_vring_state, the ASID associated with this
- * group is stored at num field of vhost_vring_state.
- */
-#define VHOST_VDPA_SET_GROUP_ASID _IOW(VHOST_VIRTIO, 0x7C, \
- struct vhost_vring_state)
-
-/* Suspend a device so it does not process virtqueue requests anymore
- *
- * After the return of ioctl the device must preserve all the necessary state
- * (the virtqueue vring base plus the possible device specific states) that is
- * required for restoring in the future. The device must not change its
- * configuration after that point.
- */
-#define VHOST_VDPA_SUSPEND _IO(VHOST_VIRTIO, 0x7D)
-
-/* Resume a device so it can resume processing virtqueue requests
- *
- * After the return of this ioctl the device will have restored all the
- * necessary states and it is fully operational to continue processing the
- * virtqueue descriptors.
- */
-#define VHOST_VDPA_RESUME _IO(VHOST_VIRTIO, 0x7E)
-
-/* Get the group for the descriptor table including driver & device areas
- * of a virtqueue: read index, write group in num.
- * The virtqueue index is stored in the index field of vhost_vring_state.
- * The group ID of the descriptor table for this specific virtqueue
- * is returned via num field of vhost_vring_state.
- */
-#define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \
- struct vhost_vring_state)
-#endif