summaryrefslogtreecommitdiff
path: root/tools/include/uapi
diff options
context:
space:
mode:
Diffstat (limited to 'tools/include/uapi')
-rw-r--r--tools/include/uapi/README73
-rw-r--r--tools/include/uapi/asm-generic/mman-common.h4
-rw-r--r--tools/include/uapi/asm-generic/mman.h4
-rw-r--r--tools/include/uapi/asm-generic/socket.h23
-rw-r--r--tools/include/uapi/asm-generic/unistd.h25
-rw-r--r--tools/include/uapi/drm/drm.h84
-rw-r--r--tools/include/uapi/drm/i915_drm.h27
-rw-r--r--tools/include/uapi/linux/bits.h11
-rw-r--r--tools/include/uapi/linux/bpf.h223
-rw-r--r--tools/include/uapi/linux/btf.h3
-rw-r--r--tools/include/uapi/linux/const.h17
-rw-r--r--tools/include/uapi/linux/coredump.h104
-rw-r--r--tools/include/uapi/linux/elf.h524
-rw-r--r--tools/include/uapi/linux/fanotify.h274
-rw-r--r--tools/include/uapi/linux/fs.h569
-rw-r--r--tools/include/uapi/linux/fscrypt.h6
-rw-r--r--tools/include/uapi/linux/genetlink.h103
-rw-r--r--tools/include/uapi/linux/if_addr.h79
-rw-r--r--tools/include/uapi/linux/if_link.h556
-rw-r--r--tools/include/uapi/linux/if_xdp.h25
-rw-r--r--tools/include/uapi/linux/in.h6
-rw-r--r--tools/include/uapi/linux/kvm.h79
-rw-r--r--tools/include/uapi/linux/mman.h1
-rw-r--r--tools/include/uapi/linux/mount.h235
-rw-r--r--tools/include/uapi/linux/neighbour.h229
-rw-r--r--tools/include/uapi/linux/netdev.h42
-rw-r--r--tools/include/uapi/linux/netfilter.h80
-rw-r--r--tools/include/uapi/linux/netfilter_arp.h23
-rw-r--r--tools/include/uapi/linux/nsfs.h128
-rw-r--r--tools/include/uapi/linux/perf_event.h687
-rw-r--r--tools/include/uapi/linux/prctl.h377
-rw-r--r--tools/include/uapi/linux/rtnetlink.h848
-rw-r--r--tools/include/uapi/linux/stat.h105
-rw-r--r--tools/include/uapi/linux/stddef.h15
-rw-r--r--tools/include/uapi/linux/types.h3
35 files changed, 5175 insertions, 417 deletions
diff --git a/tools/include/uapi/README b/tools/include/uapi/README
new file mode 100644
index 000000000000..7147b1b2cb28
--- /dev/null
+++ b/tools/include/uapi/README
@@ -0,0 +1,73 @@
+Why we want a copy of kernel headers in tools?
+==============================================
+
+There used to be no copies, with tools/ code using kernel headers
+directly. From time to time tools/perf/ broke due to legitimate kernel
+hacking. At some point Linus complained about such direct usage. Then we
+adopted the current model.
+
+The way these headers are used in perf are not restricted to just
+including them to compile something.
+
+There are sometimes used in scripts that convert defines into string
+tables, etc, so some change may break one of these scripts, or new MSRs
+may use some different #define pattern, etc.
+
+E.g.:
+
+ $ ls -1 tools/perf/trace/beauty/*.sh | head -5
+ tools/perf/trace/beauty/arch_errno_names.sh
+ tools/perf/trace/beauty/drm_ioctl.sh
+ tools/perf/trace/beauty/fadvise.sh
+ tools/perf/trace/beauty/fsconfig.sh
+ tools/perf/trace/beauty/fsmount.sh
+ $
+ $ tools/perf/trace/beauty/fadvise.sh
+ static const char *fadvise_advices[] = {
+ [0] = "NORMAL",
+ [1] = "RANDOM",
+ [2] = "SEQUENTIAL",
+ [3] = "WILLNEED",
+ [4] = "DONTNEED",
+ [5] = "NOREUSE",
+ };
+ $
+
+The tools/perf/check-headers.sh script, part of the tools/ build
+process, points out changes in the original files.
+
+So its important not to touch the copies in tools/ when doing changes in
+the original kernel headers, that will be done later, when
+check-headers.sh inform about the change to the perf tools hackers.
+
+Another explanation from Ingo Molnar:
+It's better than all the alternatives we tried so far:
+
+ - Symbolic links and direct #includes: this was the original approach but
+ was pushed back on from the kernel side, when tooling modified the
+ headers and broke them accidentally for kernel builds.
+
+ - Duplicate self-defined ABI headers like glibc: double the maintenance
+ burden, double the chance for mistakes, plus there's no tech-driven
+ notification mechanism to look at new kernel side changes.
+
+What we are doing now is a third option:
+
+ - A software-enforced copy-on-write mechanism of kernel headers to
+ tooling, driven by non-fatal warnings on the tooling side build when
+ kernel headers get modified:
+
+ Warning: Kernel ABI header differences:
+ diff -u tools/include/uapi/drm/i915_drm.h include/uapi/drm/i915_drm.h
+ diff -u tools/include/uapi/linux/fs.h include/uapi/linux/fs.h
+ diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h
+ ...
+
+ The tooling policy is to always pick up the kernel side headers as-is,
+ and integate them into the tooling build. The warnings above serve as a
+ notification to tooling maintainers that there's changes on the kernel
+ side.
+
+We've been using this for many years now, and it might seem hacky, but
+works surprisingly well.
+
diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h
index 6ce1f1ceb432..ef1c27fa3c57 100644
--- a/tools/include/uapi/asm-generic/mman-common.h
+++ b/tools/include/uapi/asm-generic/mman-common.h
@@ -79,9 +79,13 @@
#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */
+#define MADV_GUARD_INSTALL 102 /* fatal signal on access to range */
+#define MADV_GUARD_REMOVE 103 /* unguard range */
+
/* compatibility flags */
#define MAP_FILE 0
+#define PKEY_UNRESTRICTED 0x0
#define PKEY_DISABLE_ACCESS 0x1
#define PKEY_DISABLE_WRITE 0x2
#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
diff --git a/tools/include/uapi/asm-generic/mman.h b/tools/include/uapi/asm-generic/mman.h
index 406f7718f9ad..51d2556af54a 100644
--- a/tools/include/uapi/asm-generic/mman.h
+++ b/tools/include/uapi/asm-generic/mman.h
@@ -19,4 +19,8 @@
#define MCL_FUTURE 2 /* lock all future mappings */
#define MCL_ONFAULT 4 /* lock all pages that are faulted in */
+#define SHADOW_STACK_SET_TOKEN (1ULL << 0) /* Set up a restore token in the shadow stack */
+#define SHADOW_STACK_SET_MARKER (1ULL << 1) /* Set up a top of stack marker in the shadow stack */
+
+
#endif /* __ASM_GENERIC_MMAN_H */
diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h
index 54d9c8bf7c55..f333a0ac4ee4 100644
--- a/tools/include/uapi/asm-generic/socket.h
+++ b/tools/include/uapi/asm-generic/socket.h
@@ -119,11 +119,34 @@
#define SO_DETACH_REUSEPORT_BPF 68
+#define SO_PREFER_BUSY_POLL 69
+#define SO_BUSY_POLL_BUDGET 70
+
+#define SO_NETNS_COOKIE 71
+
+#define SO_BUF_LOCK 72
+
+#define SO_RESERVE_MEM 73
+
+#define SO_TXREHASH 74
+
#define SO_RCVMARK 75
#define SO_PASSPIDFD 76
#define SO_PEERPIDFD 77
+#define SO_DEVMEM_LINEAR 78
+#define SCM_DEVMEM_LINEAR SO_DEVMEM_LINEAR
+#define SO_DEVMEM_DMABUF 79
+#define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF
+#define SO_DEVMEM_DONTNEED 80
+
+#define SCM_TS_OPT_ID 81
+
+#define SO_RCVPRIORITY 82
+
+#define SO_PASSRIGHTS 83
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index d983c48a3b6a..04e0077fb4c9 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -737,7 +737,7 @@ __SC_COMP(__NR_pselect6_time64, sys_pselect6, compat_sys_pselect6_time64)
#define __NR_ppoll_time64 414
__SC_COMP(__NR_ppoll_time64, sys_ppoll, compat_sys_ppoll_time64)
#define __NR_io_pgetevents_time64 416
-__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents)
+__SC_COMP(__NR_io_pgetevents_time64, sys_io_pgetevents, compat_sys_io_pgetevents_time64)
#define __NR_recvmmsg_time64 417
__SC_COMP(__NR_recvmmsg_time64, sys_recvmmsg, compat_sys_recvmmsg_time64)
#define __NR_mq_timedsend_time64 418
@@ -776,12 +776,8 @@ __SYSCALL(__NR_fsmount, sys_fsmount)
__SYSCALL(__NR_fspick, sys_fspick)
#define __NR_pidfd_open 434
__SYSCALL(__NR_pidfd_open, sys_pidfd_open)
-
-#ifdef __ARCH_WANT_SYS_CLONE3
#define __NR_clone3 435
__SYSCALL(__NR_clone3, sys_clone3)
-#endif
-
#define __NR_close_range 436
__SYSCALL(__NR_close_range, sys_close_range)
#define __NR_openat2 437
@@ -845,8 +841,25 @@ __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules)
#define __NR_mseal 462
__SYSCALL(__NR_mseal, sys_mseal)
+#define __NR_setxattrat 463
+__SYSCALL(__NR_setxattrat, sys_setxattrat)
+#define __NR_getxattrat 464
+__SYSCALL(__NR_getxattrat, sys_getxattrat)
+#define __NR_listxattrat 465
+__SYSCALL(__NR_listxattrat, sys_listxattrat)
+#define __NR_removexattrat 466
+__SYSCALL(__NR_removexattrat, sys_removexattrat)
+#define __NR_open_tree_attr 467
+__SYSCALL(__NR_open_tree_attr, sys_open_tree_attr)
+
+/* fs/inode.c */
+#define __NR_file_getattr 468
+__SYSCALL(__NR_file_getattr, sys_file_getattr)
+#define __NR_file_setattr 469
+__SYSCALL(__NR_file_setattr, sys_file_setattr)
+
#undef __NR_syscalls
-#define __NR_syscalls 463
+#define __NR_syscalls 470
/*
* 32 bit systems traditionally used different
diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
index 16122819edfe..3cd5cf15e3c9 100644
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -597,35 +597,66 @@ struct drm_set_version {
int drm_dd_minor;
};
-/* DRM_IOCTL_GEM_CLOSE ioctl argument type */
+/**
+ * struct drm_gem_close - Argument for &DRM_IOCTL_GEM_CLOSE ioctl.
+ * @handle: Handle of the object to be closed.
+ * @pad: Padding.
+ *
+ * Releases the handle to an mm object.
+ */
struct drm_gem_close {
- /** Handle of the object to be closed. */
__u32 handle;
__u32 pad;
};
-/* DRM_IOCTL_GEM_FLINK ioctl argument type */
+/**
+ * struct drm_gem_flink - Argument for &DRM_IOCTL_GEM_FLINK ioctl.
+ * @handle: Handle for the object being named.
+ * @name: Returned global name.
+ *
+ * Create a global name for an object, returning the name.
+ *
+ * Note that the name does not hold a reference; when the object
+ * is freed, the name goes away.
+ */
struct drm_gem_flink {
- /** Handle for the object being named */
__u32 handle;
-
- /** Returned global name */
__u32 name;
};
-/* DRM_IOCTL_GEM_OPEN ioctl argument type */
+/**
+ * struct drm_gem_open - Argument for &DRM_IOCTL_GEM_OPEN ioctl.
+ * @name: Name of object being opened.
+ * @handle: Returned handle for the object.
+ * @size: Returned size of the object
+ *
+ * Open an object using the global name, returning a handle and the size.
+ *
+ * This handle (of course) holds a reference to the object, so the object
+ * will not go away until the handle is deleted.
+ */
struct drm_gem_open {
- /** Name of object being opened */
__u32 name;
-
- /** Returned handle for the object */
__u32 handle;
-
- /** Returned size of the object */
__u64 size;
};
/**
+ * struct drm_gem_change_handle - Argument for &DRM_IOCTL_GEM_CHANGE_HANDLE ioctl.
+ * @handle: The handle of a gem object.
+ * @new_handle: An available gem handle.
+ *
+ * This ioctl changes the handle of a GEM object to the specified one.
+ * The new handle must be unused. On success the old handle is closed
+ * and all further IOCTL should refer to the new handle only.
+ * Calls to DRM_IOCTL_PRIME_FD_TO_HANDLE will return the new handle.
+ */
+struct drm_gem_change_handle {
+ __u32 handle;
+ __u32 new_handle;
+};
+
+/**
* DRM_CAP_DUMB_BUFFER
*
* If set to 1, the driver supports creating dumb buffers via the
@@ -905,13 +936,17 @@ struct drm_syncobj_destroy {
};
#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE (1 << 0)
+#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_TIMELINE (1 << 1)
#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE (1 << 0)
+#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_TIMELINE (1 << 1)
struct drm_syncobj_handle {
__u32 handle;
__u32 flags;
__s32 fd;
__u32 pad;
+
+ __u64 point;
};
struct drm_syncobj_transfer {
@@ -1024,6 +1059,13 @@ struct drm_crtc_queue_sequence {
__u64 user_data; /* user data passed to event */
};
+#define DRM_CLIENT_NAME_MAX_LEN 64
+struct drm_set_client_name {
+ __u64 name_len;
+ __u64 name;
+};
+
+
#if defined(__cplusplus)
}
#endif
@@ -1288,6 +1330,24 @@ extern "C" {
*/
#define DRM_IOCTL_MODE_CLOSEFB DRM_IOWR(0xD0, struct drm_mode_closefb)
+/**
+ * DRM_IOCTL_SET_CLIENT_NAME - Attach a name to a drm_file
+ *
+ * Having a name allows for easier tracking and debugging.
+ * The length of the name (without null ending char) must be
+ * <= DRM_CLIENT_NAME_MAX_LEN.
+ * The call will fail if the name contains whitespaces or non-printable chars.
+ */
+#define DRM_IOCTL_SET_CLIENT_NAME DRM_IOWR(0xD1, struct drm_set_client_name)
+
+/**
+ * DRM_IOCTL_GEM_CHANGE_HANDLE - Move an object to a different handle
+ *
+ * Some applications (notably CRIU) need objects to have specific gem handles.
+ * This ioctl changes the object at one gem handle to use a new gem handle.
+ */
+#define DRM_IOCTL_GEM_CHANGE_HANDLE DRM_IOWR(0xD2, struct drm_gem_change_handle)
+
/*
* Device specific ioctls should only be in their respective headers
* The device specific ioctl range is from 0x40 to 0x9f.
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index d4d86e566e07..535cb68fdb5c 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -2163,6 +2163,15 @@ struct drm_i915_gem_context_param {
* supports this per context flag.
*/
#define I915_CONTEXT_PARAM_LOW_LATENCY 0xe
+
+/*
+ * I915_CONTEXT_PARAM_CONTEXT_IMAGE:
+ *
+ * Allows userspace to provide own context images.
+ *
+ * Note that this is a debug API not available on production kernel builds.
+ */
+#define I915_CONTEXT_PARAM_CONTEXT_IMAGE 0xf
/* Must be kept compact -- no holes and well documented */
/** @value: Context parameter value to be set or queried */
@@ -2564,6 +2573,24 @@ struct i915_context_param_engines {
struct i915_engine_class_instance engines[N__]; \
} __attribute__((packed)) name__
+struct i915_gem_context_param_context_image {
+ /** @engine: Engine class & instance to be configured. */
+ struct i915_engine_class_instance engine;
+
+ /** @flags: One of the supported flags or zero. */
+ __u32 flags;
+#define I915_CONTEXT_IMAGE_FLAG_ENGINE_INDEX (1u << 0)
+
+ /** @size: Size of the image blob pointed to by @image. */
+ __u32 size;
+
+ /** @mbz: Must be zero. */
+ __u32 mbz;
+
+ /** @image: Userspace memory containing the context image. */
+ __u64 image;
+} __attribute__((packed));
+
/**
* struct drm_i915_gem_context_create_ext_setparam - Context parameter
* to set or query during context creation.
diff --git a/tools/include/uapi/linux/bits.h b/tools/include/uapi/linux/bits.h
index 3c2a101986a3..a04afef9efca 100644
--- a/tools/include/uapi/linux/bits.h
+++ b/tools/include/uapi/linux/bits.h
@@ -4,12 +4,11 @@
#ifndef _UAPI_LINUX_BITS_H
#define _UAPI_LINUX_BITS_H
-#define __GENMASK(h, l) \
- (((~_UL(0)) - (_UL(1) << (l)) + 1) & \
- (~_UL(0) >> (__BITS_PER_LONG - 1 - (h))))
+#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h))))
-#define __GENMASK_ULL(h, l) \
- (((~_ULL(0)) - (_ULL(1) << (l)) + 1) & \
- (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h))))
+#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h))))
+
+#define __GENMASK_U128(h, l) \
+ ((_BIT128((h)) << 1) - (_BIT128(l)))
#endif /* _UAPI_LINUX_BITS_H */
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 35bcf52dbc65..be7d8e060e10 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -51,6 +51,9 @@
#define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */
#define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */
+#define BPF_LOAD_ACQ 0x100 /* load-acquire */
+#define BPF_STORE_REL 0x110 /* store-release */
+
enum bpf_cond_pseudo_jmp {
BPF_MAY_GOTO = 0,
};
@@ -447,6 +450,7 @@ union bpf_iter_link_info {
* * **struct bpf_map_info**
* * **struct bpf_btf_info**
* * **struct bpf_link_info**
+ * * **struct bpf_token_info**
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
@@ -903,6 +907,17 @@ union bpf_iter_link_info {
* A new file descriptor (a nonnegative integer), or -1 if an
* error occurred (in which case, *errno* is set appropriately).
*
+ * BPF_PROG_STREAM_READ_BY_FD
+ * Description
+ * Read data of a program's BPF stream. The program is identified
+ * by *prog_fd*, and the stream is identified by the *stream_id*.
+ * The data is copied to a buffer pointed to by *stream_buf*, and
+ * filled less than or equal to *stream_buf_len* bytes.
+ *
+ * Return
+ * Number of bytes read from the stream on success, or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
* NOTES
* eBPF objects (maps and programs) can be shared between processes.
*
@@ -958,6 +973,7 @@ enum bpf_cmd {
BPF_LINK_DETACH,
BPF_PROG_BIND_MAP,
BPF_TOKEN_CREATE,
+ BPF_PROG_STREAM_READ_BY_FD,
__MAX_BPF_CMD,
};
@@ -1010,6 +1026,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_USER_RINGBUF,
BPF_MAP_TYPE_CGRP_STORAGE,
BPF_MAP_TYPE_ARENA,
+ BPF_MAP_TYPE_INSN_ARRAY,
__MAX_BPF_MAP_TYPE
};
@@ -1116,11 +1133,15 @@ enum bpf_attach_type {
BPF_NETKIT_PRIMARY,
BPF_NETKIT_PEER,
BPF_TRACE_KPROBE_SESSION,
+ BPF_TRACE_UPROBE_SESSION,
__MAX_BPF_ATTACH_TYPE
};
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
+/* Add BPF_LINK_TYPE(type, name) in bpf_types.h to keep bpf_link_type_strs[]
+ * in sync with the definitions below.
+ */
enum bpf_link_type {
BPF_LINK_TYPE_UNSPEC = 0,
BPF_LINK_TYPE_RAW_TRACEPOINT = 1,
@@ -1203,6 +1224,7 @@ enum bpf_perf_event_type {
#define BPF_F_BEFORE (1U << 3)
#define BPF_F_AFTER (1U << 4)
#define BPF_F_ID (1U << 5)
+#define BPF_F_PREORDER (1U << 6)
#define BPF_F_LINK BPF_F_LINK /* 1 << 13 */
/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
@@ -1409,6 +1431,9 @@ enum {
/* Do not translate kernel bpf_arena pointers to user pointers */
BPF_F_NO_USER_CONV = (1U << 18),
+
+/* Enable BPF ringbuf overwrite mode */
+ BPF_F_RB_OVERWRITE = (1U << 19),
};
/* Flags for BPF_PROG_QUERY. */
@@ -1455,6 +1480,11 @@ struct bpf_stack_build_id {
#define BPF_OBJ_NAME_LEN 16U
+enum {
+ BPF_STREAM_STDOUT = 1,
+ BPF_STREAM_STDERR = 2,
+};
+
union bpf_attr {
struct { /* anonymous struct used by BPF_MAP_CREATE command */
__u32 map_type; /* one of enum bpf_map_type */
@@ -1496,9 +1526,15 @@ union bpf_attr {
* If provided, map_flags should have BPF_F_TOKEN_FD flag set.
*/
__s32 map_token_fd;
+
+ /* Hash of the program that has exclusive access to the map.
+ */
+ __aligned_u64 excl_prog_hash;
+ /* Size of the passed excl_prog_hash. */
+ __u32 excl_prog_hash_size;
};
- struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
+ struct { /* anonymous struct used by BPF_MAP_*_ELEM and BPF_MAP_FREEZE commands */
__u32 map_fd;
__aligned_u64 key;
union {
@@ -1569,6 +1605,26 @@ union bpf_attr {
* If provided, prog_flags should have BPF_F_TOKEN_FD flag set.
*/
__s32 prog_token_fd;
+ /* The fd_array_cnt can be used to pass the length of the
+ * fd_array array. In this case all the [map] file descriptors
+ * passed in this array will be bound to the program, even if
+ * the maps are not referenced directly. The functionality is
+ * similar to the BPF_PROG_BIND_MAP syscall, but maps can be
+ * used by the verifier during the program load. If provided,
+ * then the fd_array[0,...,fd_array_cnt-1] is expected to be
+ * continuous.
+ */
+ __u32 fd_array_cnt;
+ /* Pointer to a buffer containing the signature of the BPF
+ * program.
+ */
+ __aligned_u64 signature;
+ /* Size of the signature buffer in bytes. */
+ __u32 signature_size;
+ /* ID of the kernel keyring to be used for signature
+ * verification.
+ */
+ __s32 keyring_id;
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -1634,6 +1690,7 @@ union bpf_attr {
};
__u32 next_id;
__u32 open_flags;
+ __s32 fd_by_id_token_fd;
};
struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */
@@ -1775,6 +1832,13 @@ union bpf_attr {
};
__u64 expected_revision;
} netkit;
+ struct {
+ union {
+ __u32 relative_fd;
+ __u32 relative_id;
+ };
+ __u64 expected_revision;
+ } cgroup;
};
} link_create;
@@ -1823,6 +1887,13 @@ union bpf_attr {
__u32 bpffs_fd;
} token_create;
+ struct {
+ __aligned_u64 stream_buf;
+ __u32 stream_buf_len;
+ __u32 stream_id;
+ __u32 prog_fd;
+ } prog_stream_read;
+
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
@@ -1970,15 +2041,21 @@ union bpf_attr {
* program.
* Return
* The SMP id of the processor running the program.
+ * Attributes
+ * __bpf_fastcall
*
* long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
* Description
* Store *len* bytes from address *from* into the packet
- * associated to *skb*, at *offset*. *flags* are a combination of
- * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the
- * checksum for the packet after storing the bytes) and
- * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\
- * **->swhash** and *skb*\ **->l4hash** to 0).
+ * associated to *skb*, at *offset*. The *flags* are a combination
+ * of the following values:
+ *
+ * **BPF_F_RECOMPUTE_CSUM**
+ * Automatically update *skb*\ **->csum** after storing the
+ * bytes.
+ * **BPF_F_INVALIDATE_HASH**
+ * Set *skb*\ **->hash**, *skb*\ **->swhash** and *skb*\
+ * **->l4hash** to 0.
*
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
@@ -2030,7 +2107,8 @@ union bpf_attr {
* untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and
* for updates resulting in a null checksum the value is set to
* **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
- * the checksum is to be computed against a pseudo-header.
+ * that the modified header field is part of the pseudo-header.
+ * Flag **BPF_F_IPV6** should be set for IPv6 packets.
*
* This helper works in combination with **bpf_csum_diff**\ (),
* which does not update the checksum in-place, but offers more
@@ -2377,7 +2455,7 @@ union bpf_attr {
* into it. An example is available in file
* *samples/bpf/trace_output_user.c* in the Linux kernel source
* tree (the eBPF program counterpart is in
- * *samples/bpf/trace_output_kern.c*).
+ * *samples/bpf/trace_output.bpf.c*).
*
* **bpf_perf_event_output**\ () achieves better performance
* than **bpf_trace_printk**\ () for sharing data with user
@@ -2851,7 +2929,7 @@ union bpf_attr {
* **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**,
* **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**,
* **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**,
- * **TCP_BPF_RTO_MIN**.
+ * **TCP_BPF_RTO_MIN**, **TCP_BPF_SOCK_OPS_CB_FLAGS**.
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
* * **IPPROTO_IPV6**, which supports the following *optname*\ s:
* **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**.
@@ -3101,10 +3179,6 @@ union bpf_attr {
* with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration
* option, and in this case it only works on functions tagged with
* **ALLOW_ERROR_INJECTION** in the kernel code.
- *
- * Also, the helper is only available for the architectures having
- * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing,
- * x86 architecture is the only one to support this feature.
* Return
* 0
*
@@ -4821,7 +4895,7 @@ union bpf_attr {
*
* **-ENOENT** if the bpf_local_storage cannot be found.
*
- * long bpf_d_path(struct path *path, char *buf, u32 sz)
+ * long bpf_d_path(const struct path *path, char *buf, u32 sz)
* Description
* Return full path for given **struct path** object, which
* needs to be the kernel BTF *path* object. The path is
@@ -4951,6 +5025,9 @@ union bpf_attr {
* the netns switch takes place from ingress to ingress without
* going through the CPU's backlog queue.
*
+ * *skb*\ **->mark** and *skb*\ **->tstamp** are not cleared during
+ * the netns switch.
+ *
* The *flags* argument is reserved and must be 0. The helper is
* currently only supported for tc BPF program types at the
* ingress hook and for veth and netkit target device types. The
@@ -5369,7 +5446,7 @@ union bpf_attr {
* Currently, the **flags** must be 0. Currently, nr_loops is
* limited to 1 << 23 (~8 million) loops.
*
- * long (\*callback_fn)(u32 index, void \*ctx);
+ * long (\*callback_fn)(u64 index, void \*ctx);
*
* where **index** is the current index in the loop. The index
* is zero-indexed.
@@ -5519,11 +5596,12 @@ union bpf_attr {
* **-EOPNOTSUPP** if the hash calculation failed or **-EINVAL** if
* invalid arguments are passed.
*
- * void *bpf_kptr_xchg(void *map_value, void *ptr)
+ * void *bpf_kptr_xchg(void *dst, void *ptr)
* Description
- * Exchange kptr at pointer *map_value* with *ptr*, and return the
- * old value. *ptr* can be NULL, otherwise it must be a referenced
- * pointer which will be released when this helper is called.
+ * Exchange kptr at pointer *dst* with *ptr*, and return the old value.
+ * *dst* can be map value or local kptr. *ptr* can be NULL, otherwise
+ * it must be a referenced pointer which will be released when this helper
+ * is called.
* Return
* The old value of kptr (which can be NULL). The returned pointer
* if not NULL, is a reference which must be released using its
@@ -5544,7 +5622,7 @@ union bpf_attr {
* Return
* *sk* if casting is valid, or **NULL** otherwise.
*
- * long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr)
+ * long bpf_dynptr_from_mem(void *data, u64 size, u64 flags, struct bpf_dynptr *ptr)
* Description
* Get a dynptr to local memory *data*.
*
@@ -5587,7 +5665,7 @@ union bpf_attr {
* Return
* Nothing. Always succeeds.
*
- * long bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr *src, u32 offset, u64 flags)
+ * long bpf_dynptr_read(void *dst, u64 len, const struct bpf_dynptr *src, u64 offset, u64 flags)
* Description
* Read *len* bytes from *src* into *dst*, starting from *offset*
* into *src*.
@@ -5597,7 +5675,7 @@ union bpf_attr {
* of *src*'s data, -EINVAL if *src* is an invalid dynptr or if
* *flags* is not 0.
*
- * long bpf_dynptr_write(const struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
+ * long bpf_dynptr_write(const struct bpf_dynptr *dst, u64 offset, void *src, u64 len, u64 flags)
* Description
* Write *len* bytes from *src* into *dst*, starting from *offset*
* into *dst*.
@@ -5618,7 +5696,7 @@ union bpf_attr {
* is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs,
* other errors correspond to errors returned by **bpf_skb_store_bytes**\ ().
*
- * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len)
+ * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u64 offset, u64 len)
* Description
* Get a pointer to the underlying dynptr data.
*
@@ -6006,7 +6084,10 @@ union bpf_attr {
FN(user_ringbuf_drain, 209, ##ctx) \
FN(cgrp_storage_get, 210, ##ctx) \
FN(cgrp_storage_delete, 211, ##ctx) \
- /* */
+ /* This helper list is effectively frozen. If you are trying to \
+ * add a new helper, you should add a kfunc instead which has \
+ * less stability guarantees. See Documentation/bpf/kfuncs.rst \
+ */
/* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't
* know or care about integer value that is now passed as second argument
@@ -6044,11 +6125,7 @@ enum {
BPF_F_PSEUDO_HDR = (1ULL << 4),
BPF_F_MARK_MANGLED_0 = (1ULL << 5),
BPF_F_MARK_ENFORCE = (1ULL << 6),
-};
-
-/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
-enum {
- BPF_F_INGRESS = (1ULL << 0),
+ BPF_F_IPV6 = (1ULL << 7),
};
/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
@@ -6158,6 +6235,7 @@ enum {
BPF_RB_RING_SIZE = 1,
BPF_RB_CONS_POS = 2,
BPF_RB_PROD_POS = 3,
+ BPF_RB_OVERWRITE_POS = 4,
};
/* BPF ring buffer constants */
@@ -6197,10 +6275,12 @@ enum {
BPF_F_BPRM_SECUREEXEC = (1ULL << 0),
};
-/* Flags for bpf_redirect_map helper */
+/* Flags for bpf_redirect and bpf_redirect_map helpers */
enum {
- BPF_F_BROADCAST = (1ULL << 3),
- BPF_F_EXCLUDE_INGRESS = (1ULL << 4),
+ BPF_F_INGRESS = (1ULL << 0), /* used for skb path */
+ BPF_F_BROADCAST = (1ULL << 3), /* used for XDP path */
+ BPF_F_EXCLUDE_INGRESS = (1ULL << 4), /* used for XDP path */
+#define BPF_F_REDIRECT_FLAGS (BPF_F_INGRESS | BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS)
};
#define __bpf_md_ptr(type, name) \
@@ -6607,6 +6687,8 @@ struct bpf_map_info {
__u32 btf_value_type_id;
__u32 btf_vmlinux_id;
__u64 map_extra;
+ __aligned_u64 hash;
+ __u32 hash_size;
} __attribute__((aligned(8)));
struct bpf_btf_info {
@@ -6626,11 +6708,15 @@ struct bpf_link_info {
struct {
__aligned_u64 tp_name; /* in/out: tp_name buffer ptr */
__u32 tp_name_len; /* in/out: tp_name buffer len */
+ __u32 :32;
+ __u64 cookie;
} raw_tracepoint;
struct {
__u32 attach_type;
__u32 target_obj_id; /* prog_id for PROG_EXT, otherwise btf object id */
__u32 target_btf_id; /* BTF type id inside the object */
+ __u32 :32;
+ __u64 cookie;
} tracing;
struct {
__u64 cgroup_id;
@@ -6702,6 +6788,7 @@ struct bpf_link_info {
__u32 name_len;
__u32 offset; /* offset from file_name */
__u64 cookie;
+ __u64 ref_ctr_offset;
} uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */
struct {
__aligned_u64 func_name; /* in/out */
@@ -6740,6 +6827,13 @@ struct bpf_link_info {
};
} __attribute__((aligned(8)));
+struct bpf_token_info {
+ __u64 allowed_cmds;
+ __u64 allowed_maps;
+ __u64 allowed_progs;
+ __u64 allowed_attachs;
+} __attribute__((aligned(8)));
+
/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
* by user and intended to be used by socket (e.g. to bind to, depends on
* attach type).
@@ -6903,6 +6997,12 @@ enum {
BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F,
};
+enum {
+ SK_BPF_CB_TX_TIMESTAMPING = 1<<0,
+ SK_BPF_CB_MASK = (SK_BPF_CB_TX_TIMESTAMPING - 1) |
+ SK_BPF_CB_TX_TIMESTAMPING
+};
+
/* List of known BPF sock_ops operators.
* New entries can only be added at the end
*/
@@ -7015,6 +7115,29 @@ enum {
* by the kernel or the
* earlier bpf-progs.
*/
+ BPF_SOCK_OPS_TSTAMP_SCHED_CB, /* Called when skb is passing
+ * through dev layer when
+ * SK_BPF_CB_TX_TIMESTAMPING
+ * feature is on.
+ */
+ BPF_SOCK_OPS_TSTAMP_SND_SW_CB, /* Called when skb is about to send
+ * to the nic when SK_BPF_CB_TX_TIMESTAMPING
+ * feature is on.
+ */
+ BPF_SOCK_OPS_TSTAMP_SND_HW_CB, /* Called in hardware phase when
+ * SK_BPF_CB_TX_TIMESTAMPING feature
+ * is on.
+ */
+ BPF_SOCK_OPS_TSTAMP_ACK_CB, /* Called when all the skbs in the
+ * same sendmsg call are acked
+ * when SK_BPF_CB_TX_TIMESTAMPING
+ * feature is on.
+ */
+ BPF_SOCK_OPS_TSTAMP_SENDMSG_CB, /* Called when every sendmsg syscall
+ * is triggered. It's used to correlate
+ * sendmsg timestamp with corresponding
+ * tskey.
+ */
};
/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
@@ -7080,6 +7203,9 @@ enum {
TCP_BPF_SYN = 1005, /* Copy the TCP header */
TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */
TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */
+ TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */
+ SK_BPF_CB_FLAGS = 1009, /* Get or set sock ops flags in socket */
+ SK_BPF_BYPASS_PROT_MEM = 1010, /* Get or Set sk->sk_bypass_prot_mem */
};
enum {
@@ -7316,6 +7442,10 @@ struct bpf_timer {
__u64 __opaque[2];
} __attribute__((aligned(8)));
+struct bpf_task_work {
+ __u64 __opaque;
+} __attribute__((aligned(8)));
+
struct bpf_wq {
__u64 __opaque[2];
} __attribute__((aligned(8)));
@@ -7512,4 +7642,33 @@ struct bpf_iter_num {
__u64 __opaque[1];
} __attribute__((aligned(8)));
+/*
+ * Flags to control BPF kfunc behaviour.
+ * - BPF_F_PAD_ZEROS: Pad destination buffer with zeros. (See the respective
+ * helper documentation for details.)
+ */
+enum bpf_kfunc_flags {
+ BPF_F_PAD_ZEROS = (1ULL << 0),
+};
+
+/*
+ * Values of a BPF_MAP_TYPE_INSN_ARRAY entry must be of this type.
+ *
+ * Before the map is used the orig_off field should point to an
+ * instruction inside the program being loaded. The other fields
+ * must be set to 0.
+ *
+ * After the program is loaded, the xlated_off will be adjusted
+ * by the verifier to point to the index of the original instruction
+ * in the xlated program. If the instruction is deleted, it will
+ * be set to (u32)-1. The jitted_off will be set to the corresponding
+ * offset in the jitted image of the program.
+ */
+struct bpf_insn_array_value {
+ __u32 orig_off;
+ __u32 xlated_off;
+ __u32 jitted_off;
+ __u32 :32;
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
index ec1798b6d3ff..266d4ffa6c07 100644
--- a/tools/include/uapi/linux/btf.h
+++ b/tools/include/uapi/linux/btf.h
@@ -36,7 +36,8 @@ struct btf_type {
* bits 24-28: kind (e.g. int, ptr, array...etc)
* bits 29-30: unused
* bit 31: kind_flag, currently used by
- * struct, union, enum, fwd and enum64
+ * struct, union, enum, fwd, enum64,
+ * decl_tag and type_tag
*/
__u32 info;
/* "size" is used by INT, ENUM, STRUCT, UNION, DATASEC and ENUM64.
diff --git a/tools/include/uapi/linux/const.h b/tools/include/uapi/linux/const.h
index a429381e7ca5..b8f629ef135f 100644
--- a/tools/include/uapi/linux/const.h
+++ b/tools/include/uapi/linux/const.h
@@ -28,6 +28,23 @@
#define _BITUL(x) (_UL(1) << (x))
#define _BITULL(x) (_ULL(1) << (x))
+#if !defined(__ASSEMBLY__)
+/*
+ * Missing asm support
+ *
+ * __BIT128() would not work in the asm code, as it shifts an
+ * 'unsigned __int128' data type as direct representation of
+ * 128 bit constants is not supported in the gcc compiler, as
+ * they get silently truncated.
+ *
+ * TODO: Please revisit this implementation when gcc compiler
+ * starts representing 128 bit constants directly like long
+ * and unsigned long etc. Subsequently drop the comment for
+ * GENMASK_U128() which would then start supporting asm code.
+ */
+#define _BIT128(x) ((unsigned __int128)(1) << (x))
+#endif
+
#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (__typeof__(x))(a) - 1)
#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask))
diff --git a/tools/include/uapi/linux/coredump.h b/tools/include/uapi/linux/coredump.h
new file mode 100644
index 000000000000..dc3789b78af0
--- /dev/null
+++ b/tools/include/uapi/linux/coredump.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+
+#ifndef _UAPI_LINUX_COREDUMP_H
+#define _UAPI_LINUX_COREDUMP_H
+
+#include <linux/types.h>
+
+/**
+ * coredump_{req,ack} flags
+ * @COREDUMP_KERNEL: kernel writes coredump
+ * @COREDUMP_USERSPACE: userspace writes coredump
+ * @COREDUMP_REJECT: don't generate coredump
+ * @COREDUMP_WAIT: wait for coredump server
+ */
+enum {
+ COREDUMP_KERNEL = (1ULL << 0),
+ COREDUMP_USERSPACE = (1ULL << 1),
+ COREDUMP_REJECT = (1ULL << 2),
+ COREDUMP_WAIT = (1ULL << 3),
+};
+
+/**
+ * struct coredump_req - message kernel sends to userspace
+ * @size: size of struct coredump_req
+ * @size_ack: known size of struct coredump_ack on this kernel
+ * @mask: supported features
+ *
+ * When a coredump happens the kernel will connect to the coredump
+ * socket and send a coredump request to the coredump server. The @size
+ * member is set to the size of struct coredump_req and provides a hint
+ * to userspace how much data can be read. Userspace may use MSG_PEEK to
+ * peek the size of struct coredump_req and then choose to consume it in
+ * one go. Userspace may also simply read a COREDUMP_ACK_SIZE_VER0
+ * request. If the size the kernel sends is larger userspace simply
+ * discards any remaining data.
+ *
+ * The coredump_req->mask member is set to the currently know features.
+ * Userspace may only set coredump_ack->mask to the bits raised by the
+ * kernel in coredump_req->mask.
+ *
+ * The coredump_req->size_ack member is set by the kernel to the size of
+ * struct coredump_ack the kernel knows. Userspace may only send up to
+ * coredump_req->size_ack bytes to the kernel and must set
+ * coredump_ack->size accordingly.
+ */
+struct coredump_req {
+ __u32 size;
+ __u32 size_ack;
+ __u64 mask;
+};
+
+enum {
+ COREDUMP_REQ_SIZE_VER0 = 16U, /* size of first published struct */
+};
+
+/**
+ * struct coredump_ack - message userspace sends to kernel
+ * @size: size of the struct
+ * @spare: unused
+ * @mask: features kernel is supposed to use
+ *
+ * The @size member must be set to the size of struct coredump_ack. It
+ * may never exceed what the kernel returned in coredump_req->size_ack
+ * but it may of course be smaller (>= COREDUMP_ACK_SIZE_VER0 and <=
+ * coredump_req->size_ack).
+ *
+ * The @mask member must be set to the features the coredump server
+ * wants the kernel to use. Only bits the kernel returned in
+ * coredump_req->mask may be set.
+ */
+struct coredump_ack {
+ __u32 size;
+ __u32 spare;
+ __u64 mask;
+};
+
+enum {
+ COREDUMP_ACK_SIZE_VER0 = 16U, /* size of first published struct */
+};
+
+/**
+ * enum coredump_mark - Markers for the coredump socket
+ *
+ * The kernel will place a single byte on the coredump socket. The
+ * markers notify userspace whether the coredump ack succeeded or
+ * failed.
+ *
+ * @COREDUMP_MARK_MINSIZE: the provided coredump_ack size was too small
+ * @COREDUMP_MARK_MAXSIZE: the provided coredump_ack size was too big
+ * @COREDUMP_MARK_UNSUPPORTED: the provided coredump_ack mask was invalid
+ * @COREDUMP_MARK_CONFLICTING: the provided coredump_ack mask has conflicting options
+ * @COREDUMP_MARK_REQACK: the coredump request and ack was successful
+ * @__COREDUMP_MARK_MAX: the maximum coredump mark value
+ */
+enum coredump_mark {
+ COREDUMP_MARK_REQACK = 0U,
+ COREDUMP_MARK_MINSIZE = 1U,
+ COREDUMP_MARK_MAXSIZE = 2U,
+ COREDUMP_MARK_UNSUPPORTED = 3U,
+ COREDUMP_MARK_CONFLICTING = 4U,
+ __COREDUMP_MARK_MAX = (1U << 31),
+};
+
+#endif /* _UAPI_LINUX_COREDUMP_H */
diff --git a/tools/include/uapi/linux/elf.h b/tools/include/uapi/linux/elf.h
new file mode 100644
index 000000000000..5834b83d7f9a
--- /dev/null
+++ b/tools/include/uapi/linux/elf.h
@@ -0,0 +1,524 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _LINUX_ELF_H
+#define _LINUX_ELF_H
+
+#include <linux/types.h>
+#include <linux/elf-em.h>
+
+/* 32-bit ELF base types. */
+typedef __u32 Elf32_Addr;
+typedef __u16 Elf32_Half;
+typedef __u32 Elf32_Off;
+typedef __s32 Elf32_Sword;
+typedef __u32 Elf32_Word;
+typedef __u16 Elf32_Versym;
+
+/* 64-bit ELF base types. */
+typedef __u64 Elf64_Addr;
+typedef __u16 Elf64_Half;
+typedef __s16 Elf64_SHalf;
+typedef __u64 Elf64_Off;
+typedef __s32 Elf64_Sword;
+typedef __u32 Elf64_Word;
+typedef __u64 Elf64_Xword;
+typedef __s64 Elf64_Sxword;
+typedef __u16 Elf64_Versym;
+
+/* These constants are for the segment types stored in the image headers */
+#define PT_NULL 0
+#define PT_LOAD 1
+#define PT_DYNAMIC 2
+#define PT_INTERP 3
+#define PT_NOTE 4
+#define PT_SHLIB 5
+#define PT_PHDR 6
+#define PT_TLS 7 /* Thread local storage segment */
+#define PT_LOOS 0x60000000 /* OS-specific */
+#define PT_HIOS 0x6fffffff /* OS-specific */
+#define PT_LOPROC 0x70000000
+#define PT_HIPROC 0x7fffffff
+#define PT_GNU_EH_FRAME (PT_LOOS + 0x474e550)
+#define PT_GNU_STACK (PT_LOOS + 0x474e551)
+#define PT_GNU_RELRO (PT_LOOS + 0x474e552)
+#define PT_GNU_PROPERTY (PT_LOOS + 0x474e553)
+
+
+/* ARM MTE memory tag segment type */
+#define PT_AARCH64_MEMTAG_MTE (PT_LOPROC + 0x2)
+
+/*
+ * Extended Numbering
+ *
+ * If the real number of program header table entries is larger than
+ * or equal to PN_XNUM(0xffff), it is set to sh_info field of the
+ * section header at index 0, and PN_XNUM is set to e_phnum
+ * field. Otherwise, the section header at index 0 is zero
+ * initialized, if it exists.
+ *
+ * Specifications are available in:
+ *
+ * - Oracle: Linker and Libraries.
+ * Part No: 817–1984–19, August 2011.
+ * https://docs.oracle.com/cd/E18752_01/pdf/817-1984.pdf
+ *
+ * - System V ABI AMD64 Architecture Processor Supplement
+ * Draft Version 0.99.4,
+ * January 13, 2010.
+ * http://www.cs.washington.edu/education/courses/cse351/12wi/supp-docs/abi.pdf
+ */
+#define PN_XNUM 0xffff
+
+/* These constants define the different elf file types */
+#define ET_NONE 0
+#define ET_REL 1
+#define ET_EXEC 2
+#define ET_DYN 3
+#define ET_CORE 4
+#define ET_LOPROC 0xff00
+#define ET_HIPROC 0xffff
+
+/* This is the info that is needed to parse the dynamic section of the file */
+#define DT_NULL 0
+#define DT_NEEDED 1
+#define DT_PLTRELSZ 2
+#define DT_PLTGOT 3
+#define DT_HASH 4
+#define DT_STRTAB 5
+#define DT_SYMTAB 6
+#define DT_RELA 7
+#define DT_RELASZ 8
+#define DT_RELAENT 9
+#define DT_STRSZ 10
+#define DT_SYMENT 11
+#define DT_INIT 12
+#define DT_FINI 13
+#define DT_SONAME 14
+#define DT_RPATH 15
+#define DT_SYMBOLIC 16
+#define DT_REL 17
+#define DT_RELSZ 18
+#define DT_RELENT 19
+#define DT_PLTREL 20
+#define DT_DEBUG 21
+#define DT_TEXTREL 22
+#define DT_JMPREL 23
+#define DT_ENCODING 32
+#define OLD_DT_LOOS 0x60000000
+#define DT_LOOS 0x6000000d
+#define DT_HIOS 0x6ffff000
+#define DT_VALRNGLO 0x6ffffd00
+#define DT_VALRNGHI 0x6ffffdff
+#define DT_ADDRRNGLO 0x6ffffe00
+#define DT_GNU_HASH 0x6ffffef5
+#define DT_ADDRRNGHI 0x6ffffeff
+#define DT_VERSYM 0x6ffffff0
+#define DT_RELACOUNT 0x6ffffff9
+#define DT_RELCOUNT 0x6ffffffa
+#define DT_FLAGS_1 0x6ffffffb
+#define DT_VERDEF 0x6ffffffc
+#define DT_VERDEFNUM 0x6ffffffd
+#define DT_VERNEED 0x6ffffffe
+#define DT_VERNEEDNUM 0x6fffffff
+#define OLD_DT_HIOS 0x6fffffff
+#define DT_LOPROC 0x70000000
+#define DT_HIPROC 0x7fffffff
+
+/* This info is needed when parsing the symbol table */
+#define STB_LOCAL 0
+#define STB_GLOBAL 1
+#define STB_WEAK 2
+
+#define STN_UNDEF 0
+
+#define STT_NOTYPE 0
+#define STT_OBJECT 1
+#define STT_FUNC 2
+#define STT_SECTION 3
+#define STT_FILE 4
+#define STT_COMMON 5
+#define STT_TLS 6
+
+#define VER_FLG_BASE 0x1
+#define VER_FLG_WEAK 0x2
+
+#define ELF_ST_BIND(x) ((x) >> 4)
+#define ELF_ST_TYPE(x) ((x) & 0xf)
+#define ELF32_ST_BIND(x) ELF_ST_BIND(x)
+#define ELF32_ST_TYPE(x) ELF_ST_TYPE(x)
+#define ELF64_ST_BIND(x) ELF_ST_BIND(x)
+#define ELF64_ST_TYPE(x) ELF_ST_TYPE(x)
+
+typedef struct {
+ Elf32_Sword d_tag;
+ union {
+ Elf32_Sword d_val;
+ Elf32_Addr d_ptr;
+ } d_un;
+} Elf32_Dyn;
+
+typedef struct {
+ Elf64_Sxword d_tag; /* entry tag value */
+ union {
+ Elf64_Xword d_val;
+ Elf64_Addr d_ptr;
+ } d_un;
+} Elf64_Dyn;
+
+/* The following are used with relocations */
+#define ELF32_R_SYM(x) ((x) >> 8)
+#define ELF32_R_TYPE(x) ((x) & 0xff)
+
+#define ELF64_R_SYM(i) ((i) >> 32)
+#define ELF64_R_TYPE(i) ((i) & 0xffffffff)
+
+typedef struct elf32_rel {
+ Elf32_Addr r_offset;
+ Elf32_Word r_info;
+} Elf32_Rel;
+
+typedef struct elf64_rel {
+ Elf64_Addr r_offset; /* Location at which to apply the action */
+ Elf64_Xword r_info; /* index and type of relocation */
+} Elf64_Rel;
+
+typedef struct elf32_rela {
+ Elf32_Addr r_offset;
+ Elf32_Word r_info;
+ Elf32_Sword r_addend;
+} Elf32_Rela;
+
+typedef struct elf64_rela {
+ Elf64_Addr r_offset; /* Location at which to apply the action */
+ Elf64_Xword r_info; /* index and type of relocation */
+ Elf64_Sxword r_addend; /* Constant addend used to compute value */
+} Elf64_Rela;
+
+typedef struct elf32_sym {
+ Elf32_Word st_name;
+ Elf32_Addr st_value;
+ Elf32_Word st_size;
+ unsigned char st_info;
+ unsigned char st_other;
+ Elf32_Half st_shndx;
+} Elf32_Sym;
+
+typedef struct elf64_sym {
+ Elf64_Word st_name; /* Symbol name, index in string tbl */
+ unsigned char st_info; /* Type and binding attributes */
+ unsigned char st_other; /* No defined meaning, 0 */
+ Elf64_Half st_shndx; /* Associated section index */
+ Elf64_Addr st_value; /* Value of the symbol */
+ Elf64_Xword st_size; /* Associated symbol size */
+} Elf64_Sym;
+
+
+#define EI_NIDENT 16
+
+typedef struct elf32_hdr {
+ unsigned char e_ident[EI_NIDENT];
+ Elf32_Half e_type;
+ Elf32_Half e_machine;
+ Elf32_Word e_version;
+ Elf32_Addr e_entry; /* Entry point */
+ Elf32_Off e_phoff;
+ Elf32_Off e_shoff;
+ Elf32_Word e_flags;
+ Elf32_Half e_ehsize;
+ Elf32_Half e_phentsize;
+ Elf32_Half e_phnum;
+ Elf32_Half e_shentsize;
+ Elf32_Half e_shnum;
+ Elf32_Half e_shstrndx;
+} Elf32_Ehdr;
+
+typedef struct elf64_hdr {
+ unsigned char e_ident[EI_NIDENT]; /* ELF "magic number" */
+ Elf64_Half e_type;
+ Elf64_Half e_machine;
+ Elf64_Word e_version;
+ Elf64_Addr e_entry; /* Entry point virtual address */
+ Elf64_Off e_phoff; /* Program header table file offset */
+ Elf64_Off e_shoff; /* Section header table file offset */
+ Elf64_Word e_flags;
+ Elf64_Half e_ehsize;
+ Elf64_Half e_phentsize;
+ Elf64_Half e_phnum;
+ Elf64_Half e_shentsize;
+ Elf64_Half e_shnum;
+ Elf64_Half e_shstrndx;
+} Elf64_Ehdr;
+
+/* These constants define the permissions on sections in the program
+ header, p_flags. */
+#define PF_R 0x4
+#define PF_W 0x2
+#define PF_X 0x1
+
+typedef struct elf32_phdr {
+ Elf32_Word p_type;
+ Elf32_Off p_offset;
+ Elf32_Addr p_vaddr;
+ Elf32_Addr p_paddr;
+ Elf32_Word p_filesz;
+ Elf32_Word p_memsz;
+ Elf32_Word p_flags;
+ Elf32_Word p_align;
+} Elf32_Phdr;
+
+typedef struct elf64_phdr {
+ Elf64_Word p_type;
+ Elf64_Word p_flags;
+ Elf64_Off p_offset; /* Segment file offset */
+ Elf64_Addr p_vaddr; /* Segment virtual address */
+ Elf64_Addr p_paddr; /* Segment physical address */
+ Elf64_Xword p_filesz; /* Segment size in file */
+ Elf64_Xword p_memsz; /* Segment size in memory */
+ Elf64_Xword p_align; /* Segment alignment, file & memory */
+} Elf64_Phdr;
+
+/* sh_type */
+#define SHT_NULL 0
+#define SHT_PROGBITS 1
+#define SHT_SYMTAB 2
+#define SHT_STRTAB 3
+#define SHT_RELA 4
+#define SHT_HASH 5
+#define SHT_DYNAMIC 6
+#define SHT_NOTE 7
+#define SHT_NOBITS 8
+#define SHT_REL 9
+#define SHT_SHLIB 10
+#define SHT_DYNSYM 11
+#define SHT_NUM 12
+#define SHT_LOPROC 0x70000000
+#define SHT_HIPROC 0x7fffffff
+#define SHT_LOUSER 0x80000000
+#define SHT_HIUSER 0xffffffff
+
+/* sh_flags */
+#define SHF_WRITE 0x1
+#define SHF_ALLOC 0x2
+#define SHF_EXECINSTR 0x4
+#define SHF_RELA_LIVEPATCH 0x00100000
+#define SHF_RO_AFTER_INIT 0x00200000
+#define SHF_MASKPROC 0xf0000000
+
+/* special section indexes */
+#define SHN_UNDEF 0
+#define SHN_LORESERVE 0xff00
+#define SHN_LOPROC 0xff00
+#define SHN_HIPROC 0xff1f
+#define SHN_LIVEPATCH 0xff20
+#define SHN_ABS 0xfff1
+#define SHN_COMMON 0xfff2
+#define SHN_HIRESERVE 0xffff
+
+typedef struct elf32_shdr {
+ Elf32_Word sh_name;
+ Elf32_Word sh_type;
+ Elf32_Word sh_flags;
+ Elf32_Addr sh_addr;
+ Elf32_Off sh_offset;
+ Elf32_Word sh_size;
+ Elf32_Word sh_link;
+ Elf32_Word sh_info;
+ Elf32_Word sh_addralign;
+ Elf32_Word sh_entsize;
+} Elf32_Shdr;
+
+typedef struct elf64_shdr {
+ Elf64_Word sh_name; /* Section name, index in string tbl */
+ Elf64_Word sh_type; /* Type of section */
+ Elf64_Xword sh_flags; /* Miscellaneous section attributes */
+ Elf64_Addr sh_addr; /* Section virtual addr at execution */
+ Elf64_Off sh_offset; /* Section file offset */
+ Elf64_Xword sh_size; /* Size of section in bytes */
+ Elf64_Word sh_link; /* Index of another section */
+ Elf64_Word sh_info; /* Additional section information */
+ Elf64_Xword sh_addralign; /* Section alignment */
+ Elf64_Xword sh_entsize; /* Entry size if section holds table */
+} Elf64_Shdr;
+
+#define EI_MAG0 0 /* e_ident[] indexes */
+#define EI_MAG1 1
+#define EI_MAG2 2
+#define EI_MAG3 3
+#define EI_CLASS 4
+#define EI_DATA 5
+#define EI_VERSION 6
+#define EI_OSABI 7
+#define EI_PAD 8
+
+#define ELFMAG0 0x7f /* EI_MAG */
+#define ELFMAG1 'E'
+#define ELFMAG2 'L'
+#define ELFMAG3 'F'
+#define ELFMAG "\177ELF"
+#define SELFMAG 4
+
+#define ELFCLASSNONE 0 /* EI_CLASS */
+#define ELFCLASS32 1
+#define ELFCLASS64 2
+#define ELFCLASSNUM 3
+
+#define ELFDATANONE 0 /* e_ident[EI_DATA] */
+#define ELFDATA2LSB 1
+#define ELFDATA2MSB 2
+
+#define EV_NONE 0 /* e_version, EI_VERSION */
+#define EV_CURRENT 1
+#define EV_NUM 2
+
+#define ELFOSABI_NONE 0
+#define ELFOSABI_LINUX 3
+
+#ifndef ELF_OSABI
+#define ELF_OSABI ELFOSABI_NONE
+#endif
+
+/*
+ * Notes used in ET_CORE. Architectures export some of the arch register sets
+ * using the corresponding note types via the PTRACE_GETREGSET and
+ * PTRACE_SETREGSET requests.
+ * The note name for these types is "LINUX", except NT_PRFPREG that is named
+ * "CORE".
+ */
+#define NT_PRSTATUS 1
+#define NT_PRFPREG 2
+#define NT_PRPSINFO 3
+#define NT_TASKSTRUCT 4
+#define NT_AUXV 6
+/*
+ * Note to userspace developers: size of NT_SIGINFO note may increase
+ * in the future to accomodate more fields, don't assume it is fixed!
+ */
+#define NT_SIGINFO 0x53494749
+#define NT_FILE 0x46494c45
+#define NT_PRXFPREG 0x46e62b7f /* copied from gdb5.1/include/elf/common.h */
+#define NT_PPC_VMX 0x100 /* PowerPC Altivec/VMX registers */
+#define NT_PPC_SPE 0x101 /* PowerPC SPE/EVR registers */
+#define NT_PPC_VSX 0x102 /* PowerPC VSX registers */
+#define NT_PPC_TAR 0x103 /* Target Address Register */
+#define NT_PPC_PPR 0x104 /* Program Priority Register */
+#define NT_PPC_DSCR 0x105 /* Data Stream Control Register */
+#define NT_PPC_EBB 0x106 /* Event Based Branch Registers */
+#define NT_PPC_PMU 0x107 /* Performance Monitor Registers */
+#define NT_PPC_TM_CGPR 0x108 /* TM checkpointed GPR Registers */
+#define NT_PPC_TM_CFPR 0x109 /* TM checkpointed FPR Registers */
+#define NT_PPC_TM_CVMX 0x10a /* TM checkpointed VMX Registers */
+#define NT_PPC_TM_CVSX 0x10b /* TM checkpointed VSX Registers */
+#define NT_PPC_TM_SPR 0x10c /* TM Special Purpose Registers */
+#define NT_PPC_TM_CTAR 0x10d /* TM checkpointed Target Address Register */
+#define NT_PPC_TM_CPPR 0x10e /* TM checkpointed Program Priority Register */
+#define NT_PPC_TM_CDSCR 0x10f /* TM checkpointed Data Stream Control Register */
+#define NT_PPC_PKEY 0x110 /* Memory Protection Keys registers */
+#define NT_PPC_DEXCR 0x111 /* PowerPC DEXCR registers */
+#define NT_PPC_HASHKEYR 0x112 /* PowerPC HASHKEYR register */
+#define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */
+#define NT_386_IOPERM 0x201 /* x86 io permission bitmap (1=deny) */
+#define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */
+/* Old binutils treats 0x203 as a CET state */
+#define NT_X86_SHSTK 0x204 /* x86 SHSTK state */
+#define NT_X86_XSAVE_LAYOUT 0x205 /* XSAVE layout description */
+#define NT_S390_HIGH_GPRS 0x300 /* s390 upper register halves */
+#define NT_S390_TIMER 0x301 /* s390 timer register */
+#define NT_S390_TODCMP 0x302 /* s390 TOD clock comparator register */
+#define NT_S390_TODPREG 0x303 /* s390 TOD programmable register */
+#define NT_S390_CTRS 0x304 /* s390 control registers */
+#define NT_S390_PREFIX 0x305 /* s390 prefix register */
+#define NT_S390_LAST_BREAK 0x306 /* s390 breaking event address */
+#define NT_S390_SYSTEM_CALL 0x307 /* s390 system call restart data */
+#define NT_S390_TDB 0x308 /* s390 transaction diagnostic block */
+#define NT_S390_VXRS_LOW 0x309 /* s390 vector registers 0-15 upper half */
+#define NT_S390_VXRS_HIGH 0x30a /* s390 vector registers 16-31 */
+#define NT_S390_GS_CB 0x30b /* s390 guarded storage registers */
+#define NT_S390_GS_BC 0x30c /* s390 guarded storage broadcast control block */
+#define NT_S390_RI_CB 0x30d /* s390 runtime instrumentation */
+#define NT_S390_PV_CPU_DATA 0x30e /* s390 protvirt cpu dump data */
+#define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */
+#define NT_ARM_TLS 0x401 /* ARM TLS register */
+#define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */
+#define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */
+#define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */
+#define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension registers */
+#define NT_ARM_PAC_MASK 0x406 /* ARM pointer authentication code masks */
+#define NT_ARM_PACA_KEYS 0x407 /* ARM pointer authentication address keys */
+#define NT_ARM_PACG_KEYS 0x408 /* ARM pointer authentication generic key */
+#define NT_ARM_TAGGED_ADDR_CTRL 0x409 /* arm64 tagged address control (prctl()) */
+#define NT_ARM_PAC_ENABLED_KEYS 0x40a /* arm64 ptr auth enabled keys (prctl()) */
+#define NT_ARM_SSVE 0x40b /* ARM Streaming SVE registers */
+#define NT_ARM_ZA 0x40c /* ARM SME ZA registers */
+#define NT_ARM_ZT 0x40d /* ARM SME ZT registers */
+#define NT_ARM_FPMR 0x40e /* ARM floating point mode register */
+#define NT_ARM_POE 0x40f /* ARM POE registers */
+#define NT_ARM_GCS 0x410 /* ARM GCS state */
+#define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */
+#define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */
+#define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */
+#define NT_MIPS_FP_MODE 0x801 /* MIPS floating-point mode */
+#define NT_MIPS_MSA 0x802 /* MIPS SIMD registers */
+#define NT_RISCV_CSR 0x900 /* RISC-V Control and Status Registers */
+#define NT_RISCV_VECTOR 0x901 /* RISC-V vector registers */
+#define NT_RISCV_TAGGED_ADDR_CTRL 0x902 /* RISC-V tagged address control (prctl()) */
+#define NT_LOONGARCH_CPUCFG 0xa00 /* LoongArch CPU config registers */
+#define NT_LOONGARCH_CSR 0xa01 /* LoongArch control and status registers */
+#define NT_LOONGARCH_LSX 0xa02 /* LoongArch Loongson SIMD Extension registers */
+#define NT_LOONGARCH_LASX 0xa03 /* LoongArch Loongson Advanced SIMD Extension registers */
+#define NT_LOONGARCH_LBT 0xa04 /* LoongArch Loongson Binary Translation registers */
+#define NT_LOONGARCH_HW_BREAK 0xa05 /* LoongArch hardware breakpoint registers */
+#define NT_LOONGARCH_HW_WATCH 0xa06 /* LoongArch hardware watchpoint registers */
+
+/* Note types with note name "GNU" */
+#define NT_GNU_PROPERTY_TYPE_0 5
+
+/* Note header in a PT_NOTE section */
+typedef struct elf32_note {
+ Elf32_Word n_namesz; /* Name size */
+ Elf32_Word n_descsz; /* Content size */
+ Elf32_Word n_type; /* Content type */
+} Elf32_Nhdr;
+
+/* Note header in a PT_NOTE section */
+typedef struct elf64_note {
+ Elf64_Word n_namesz; /* Name size */
+ Elf64_Word n_descsz; /* Content size */
+ Elf64_Word n_type; /* Content type */
+} Elf64_Nhdr;
+
+/* .note.gnu.property types for EM_AARCH64: */
+#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000
+
+/* Bits for GNU_PROPERTY_AARCH64_FEATURE_1_BTI */
+#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0)
+
+typedef struct {
+ Elf32_Half vd_version;
+ Elf32_Half vd_flags;
+ Elf32_Half vd_ndx;
+ Elf32_Half vd_cnt;
+ Elf32_Word vd_hash;
+ Elf32_Word vd_aux;
+ Elf32_Word vd_next;
+} Elf32_Verdef;
+
+typedef struct {
+ Elf64_Half vd_version;
+ Elf64_Half vd_flags;
+ Elf64_Half vd_ndx;
+ Elf64_Half vd_cnt;
+ Elf64_Word vd_hash;
+ Elf64_Word vd_aux;
+ Elf64_Word vd_next;
+} Elf64_Verdef;
+
+typedef struct {
+ Elf32_Word vda_name;
+ Elf32_Word vda_next;
+} Elf32_Verdaux;
+
+typedef struct {
+ Elf64_Word vda_name;
+ Elf64_Word vda_next;
+} Elf64_Verdaux;
+
+#endif /* _LINUX_ELF_H */
diff --git a/tools/include/uapi/linux/fanotify.h b/tools/include/uapi/linux/fanotify.h
new file mode 100644
index 000000000000..e710967c7c26
--- /dev/null
+++ b/tools/include/uapi/linux/fanotify.h
@@ -0,0 +1,274 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_FANOTIFY_H
+#define _UAPI_LINUX_FANOTIFY_H
+
+#include <linux/types.h>
+
+/* the following events that user-space can register for */
+#define FAN_ACCESS 0x00000001 /* File was accessed */
+#define FAN_MODIFY 0x00000002 /* File was modified */
+#define FAN_ATTRIB 0x00000004 /* Metadata changed */
+#define FAN_CLOSE_WRITE 0x00000008 /* Writable file closed */
+#define FAN_CLOSE_NOWRITE 0x00000010 /* Unwritable file closed */
+#define FAN_OPEN 0x00000020 /* File was opened */
+#define FAN_MOVED_FROM 0x00000040 /* File was moved from X */
+#define FAN_MOVED_TO 0x00000080 /* File was moved to Y */
+#define FAN_CREATE 0x00000100 /* Subfile was created */
+#define FAN_DELETE 0x00000200 /* Subfile was deleted */
+#define FAN_DELETE_SELF 0x00000400 /* Self was deleted */
+#define FAN_MOVE_SELF 0x00000800 /* Self was moved */
+#define FAN_OPEN_EXEC 0x00001000 /* File was opened for exec */
+
+#define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */
+#define FAN_FS_ERROR 0x00008000 /* Filesystem error */
+
+#define FAN_OPEN_PERM 0x00010000 /* File open in perm check */
+#define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */
+#define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */
+/* #define FAN_DIR_MODIFY 0x00080000 */ /* Deprecated (reserved) */
+
+#define FAN_PRE_ACCESS 0x00100000 /* Pre-content access hook */
+#define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */
+#define FAN_MNT_DETACH 0x02000000 /* Mount was detached */
+
+#define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */
+
+#define FAN_RENAME 0x10000000 /* File was renamed */
+
+#define FAN_ONDIR 0x40000000 /* Event occurred against dir */
+
+/* helper events */
+#define FAN_CLOSE (FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE) /* close */
+#define FAN_MOVE (FAN_MOVED_FROM | FAN_MOVED_TO) /* moves */
+
+/* flags used for fanotify_init() */
+#define FAN_CLOEXEC 0x00000001
+#define FAN_NONBLOCK 0x00000002
+
+/* These are NOT bitwise flags. Both bits are used together. */
+#define FAN_CLASS_NOTIF 0x00000000
+#define FAN_CLASS_CONTENT 0x00000004
+#define FAN_CLASS_PRE_CONTENT 0x00000008
+
+/* Deprecated - do not use this in programs and do not add new flags here! */
+#define FAN_ALL_CLASS_BITS (FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \
+ FAN_CLASS_PRE_CONTENT)
+
+#define FAN_UNLIMITED_QUEUE 0x00000010
+#define FAN_UNLIMITED_MARKS 0x00000020
+#define FAN_ENABLE_AUDIT 0x00000040
+
+/* Flags to determine fanotify event format */
+#define FAN_REPORT_PIDFD 0x00000080 /* Report pidfd for event->pid */
+#define FAN_REPORT_TID 0x00000100 /* event->pid is thread id */
+#define FAN_REPORT_FID 0x00000200 /* Report unique file id */
+#define FAN_REPORT_DIR_FID 0x00000400 /* Report unique directory id */
+#define FAN_REPORT_NAME 0x00000800 /* Report events with name */
+#define FAN_REPORT_TARGET_FID 0x00001000 /* Report dirent target id */
+#define FAN_REPORT_FD_ERROR 0x00002000 /* event->fd can report error */
+#define FAN_REPORT_MNT 0x00004000 /* Report mount events */
+
+/* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */
+#define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME)
+/* Convenience macro - FAN_REPORT_TARGET_FID requires all other FID flags */
+#define FAN_REPORT_DFID_NAME_TARGET (FAN_REPORT_DFID_NAME | \
+ FAN_REPORT_FID | FAN_REPORT_TARGET_FID)
+
+/* Deprecated - do not use this in programs and do not add new flags here! */
+#define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \
+ FAN_ALL_CLASS_BITS | FAN_UNLIMITED_QUEUE |\
+ FAN_UNLIMITED_MARKS)
+
+/* flags used for fanotify_modify_mark() */
+#define FAN_MARK_ADD 0x00000001
+#define FAN_MARK_REMOVE 0x00000002
+#define FAN_MARK_DONT_FOLLOW 0x00000004
+#define FAN_MARK_ONLYDIR 0x00000008
+/* FAN_MARK_MOUNT is 0x00000010 */
+#define FAN_MARK_IGNORED_MASK 0x00000020
+#define FAN_MARK_IGNORED_SURV_MODIFY 0x00000040
+#define FAN_MARK_FLUSH 0x00000080
+/* FAN_MARK_FILESYSTEM is 0x00000100 */
+#define FAN_MARK_EVICTABLE 0x00000200
+/* This bit is mutually exclusive with FAN_MARK_IGNORED_MASK bit */
+#define FAN_MARK_IGNORE 0x00000400
+
+/* These are NOT bitwise flags. Both bits can be used togther. */
+#define FAN_MARK_INODE 0x00000000
+#define FAN_MARK_MOUNT 0x00000010
+#define FAN_MARK_FILESYSTEM 0x00000100
+#define FAN_MARK_MNTNS 0x00000110
+
+/*
+ * Convenience macro - FAN_MARK_IGNORE requires FAN_MARK_IGNORED_SURV_MODIFY
+ * for non-inode mark types.
+ */
+#define FAN_MARK_IGNORE_SURV (FAN_MARK_IGNORE | FAN_MARK_IGNORED_SURV_MODIFY)
+
+/* Deprecated - do not use this in programs and do not add new flags here! */
+#define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\
+ FAN_MARK_REMOVE |\
+ FAN_MARK_DONT_FOLLOW |\
+ FAN_MARK_ONLYDIR |\
+ FAN_MARK_MOUNT |\
+ FAN_MARK_IGNORED_MASK |\
+ FAN_MARK_IGNORED_SURV_MODIFY |\
+ FAN_MARK_FLUSH)
+
+/* Deprecated - do not use this in programs and do not add new flags here! */
+#define FAN_ALL_EVENTS (FAN_ACCESS |\
+ FAN_MODIFY |\
+ FAN_CLOSE |\
+ FAN_OPEN)
+
+/*
+ * All events which require a permission response from userspace
+ */
+/* Deprecated - do not use this in programs and do not add new flags here! */
+#define FAN_ALL_PERM_EVENTS (FAN_OPEN_PERM |\
+ FAN_ACCESS_PERM)
+
+/* Deprecated - do not use this in programs and do not add new flags here! */
+#define FAN_ALL_OUTGOING_EVENTS (FAN_ALL_EVENTS |\
+ FAN_ALL_PERM_EVENTS |\
+ FAN_Q_OVERFLOW)
+
+#define FANOTIFY_METADATA_VERSION 3
+
+struct fanotify_event_metadata {
+ __u32 event_len;
+ __u8 vers;
+ __u8 reserved;
+ __u16 metadata_len;
+ __aligned_u64 mask;
+ __s32 fd;
+ __s32 pid;
+};
+
+#define FAN_EVENT_INFO_TYPE_FID 1
+#define FAN_EVENT_INFO_TYPE_DFID_NAME 2
+#define FAN_EVENT_INFO_TYPE_DFID 3
+#define FAN_EVENT_INFO_TYPE_PIDFD 4
+#define FAN_EVENT_INFO_TYPE_ERROR 5
+#define FAN_EVENT_INFO_TYPE_RANGE 6
+#define FAN_EVENT_INFO_TYPE_MNT 7
+
+/* Special info types for FAN_RENAME */
+#define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME 10
+/* Reserved for FAN_EVENT_INFO_TYPE_OLD_DFID 11 */
+#define FAN_EVENT_INFO_TYPE_NEW_DFID_NAME 12
+/* Reserved for FAN_EVENT_INFO_TYPE_NEW_DFID 13 */
+
+/* Variable length info record following event metadata */
+struct fanotify_event_info_header {
+ __u8 info_type;
+ __u8 pad;
+ __u16 len;
+};
+
+/*
+ * Unique file identifier info record.
+ * This structure is used for records of types FAN_EVENT_INFO_TYPE_FID,
+ * FAN_EVENT_INFO_TYPE_DFID and FAN_EVENT_INFO_TYPE_DFID_NAME.
+ * For FAN_EVENT_INFO_TYPE_DFID_NAME there is additionally a null terminated
+ * name immediately after the file handle.
+ */
+struct fanotify_event_info_fid {
+ struct fanotify_event_info_header hdr;
+ __kernel_fsid_t fsid;
+ /*
+ * Following is an opaque struct file_handle that can be passed as
+ * an argument to open_by_handle_at(2).
+ */
+ unsigned char handle[];
+};
+
+/*
+ * This structure is used for info records of type FAN_EVENT_INFO_TYPE_PIDFD.
+ * It holds a pidfd for the pid that was responsible for generating an event.
+ */
+struct fanotify_event_info_pidfd {
+ struct fanotify_event_info_header hdr;
+ __s32 pidfd;
+};
+
+struct fanotify_event_info_error {
+ struct fanotify_event_info_header hdr;
+ __s32 error;
+ __u32 error_count;
+};
+
+struct fanotify_event_info_range {
+ struct fanotify_event_info_header hdr;
+ __u32 pad;
+ __u64 offset;
+ __u64 count;
+};
+
+struct fanotify_event_info_mnt {
+ struct fanotify_event_info_header hdr;
+ __u64 mnt_id;
+};
+
+/*
+ * User space may need to record additional information about its decision.
+ * The extra information type records what kind of information is included.
+ * The default is none. We also define an extra information buffer whose
+ * size is determined by the extra information type.
+ *
+ * If the information type is Audit Rule, then the information following
+ * is the rule number that triggered the user space decision that
+ * requires auditing.
+ */
+
+#define FAN_RESPONSE_INFO_NONE 0
+#define FAN_RESPONSE_INFO_AUDIT_RULE 1
+
+struct fanotify_response {
+ __s32 fd;
+ __u32 response;
+};
+
+struct fanotify_response_info_header {
+ __u8 type;
+ __u8 pad;
+ __u16 len;
+};
+
+struct fanotify_response_info_audit_rule {
+ struct fanotify_response_info_header hdr;
+ __u32 rule_number;
+ __u32 subj_trust;
+ __u32 obj_trust;
+};
+
+/* Legit userspace responses to a _PERM event */
+#define FAN_ALLOW 0x01
+#define FAN_DENY 0x02
+/* errno other than EPERM can specified in upper byte of deny response */
+#define FAN_ERRNO_BITS 8
+#define FAN_ERRNO_SHIFT (32 - FAN_ERRNO_BITS)
+#define FAN_ERRNO_MASK ((1 << FAN_ERRNO_BITS) - 1)
+#define FAN_DENY_ERRNO(err) \
+ (FAN_DENY | ((((__u32)(err)) & FAN_ERRNO_MASK) << FAN_ERRNO_SHIFT))
+
+#define FAN_AUDIT 0x10 /* Bitmask to create audit record for result */
+#define FAN_INFO 0x20 /* Bitmask to indicate additional information */
+
+/* No fd set in event */
+#define FAN_NOFD -1
+#define FAN_NOPIDFD FAN_NOFD
+#define FAN_EPIDFD -2
+
+/* Helper functions to deal with fanotify_event_metadata buffers */
+#define FAN_EVENT_METADATA_LEN (sizeof(struct fanotify_event_metadata))
+
+#define FAN_EVENT_NEXT(meta, len) ((len) -= (meta)->event_len, \
+ (struct fanotify_event_metadata*)(((char *)(meta)) + \
+ (meta)->event_len))
+
+#define FAN_EVENT_OK(meta, len) ((long)(len) >= (long)FAN_EVENT_METADATA_LEN && \
+ (long)(meta)->event_len >= (long)FAN_EVENT_METADATA_LEN && \
+ (long)(meta)->event_len <= (long)(len))
+
+#endif /* _UAPI_LINUX_FANOTIFY_H */
diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h
new file mode 100644
index 000000000000..24ddf7bc4f25
--- /dev/null
+++ b/tools/include/uapi/linux/fs.h
@@ -0,0 +1,569 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_FS_H
+#define _UAPI_LINUX_FS_H
+
+/*
+ * This file has definitions for some important file table structures
+ * and constants and structures used by various generic file system
+ * ioctl's. Please do not make any changes in this file before
+ * sending patches for review to linux-fsdevel@vger.kernel.org and
+ * linux-api@vger.kernel.org.
+ */
+
+#include <linux/limits.h>
+#include <linux/ioctl.h>
+#include <linux/types.h>
+#ifndef __KERNEL__
+#include <linux/fscrypt.h>
+#endif
+
+/* Use of MS_* flags within the kernel is restricted to core mount(2) code. */
+#if !defined(__KERNEL__)
+#include <linux/mount.h>
+#endif
+
+/*
+ * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
+ * the file limit at runtime and only root can increase the per-process
+ * nr_file rlimit, so it's safe to set up a ridiculously high absolute
+ * upper limit on files-per-process.
+ *
+ * Some programs (notably those using select()) may have to be
+ * recompiled to take full advantage of the new limits..
+ */
+
+/* Fixed constants first: */
+#undef NR_OPEN
+#define INR_OPEN_CUR 1024 /* Initial setting for nfile rlimits */
+#define INR_OPEN_MAX 4096 /* Hard limit for nfile rlimits */
+
+#define BLOCK_SIZE_BITS 10
+#define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
+
+/* flags for integrity meta */
+#define IO_INTEGRITY_CHK_GUARD (1U << 0) /* enforce guard check */
+#define IO_INTEGRITY_CHK_REFTAG (1U << 1) /* enforce ref check */
+#define IO_INTEGRITY_CHK_APPTAG (1U << 2) /* enforce app check */
+
+#define IO_INTEGRITY_VALID_FLAGS (IO_INTEGRITY_CHK_GUARD | \
+ IO_INTEGRITY_CHK_REFTAG | \
+ IO_INTEGRITY_CHK_APPTAG)
+
+#define SEEK_SET 0 /* seek relative to beginning of file */
+#define SEEK_CUR 1 /* seek relative to current file position */
+#define SEEK_END 2 /* seek relative to end of file */
+#define SEEK_DATA 3 /* seek to the next data */
+#define SEEK_HOLE 4 /* seek to the next hole */
+#define SEEK_MAX SEEK_HOLE
+
+#define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */
+#define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */
+#define RENAME_WHITEOUT (1 << 2) /* Whiteout source */
+
+struct file_clone_range {
+ __s64 src_fd;
+ __u64 src_offset;
+ __u64 src_length;
+ __u64 dest_offset;
+};
+
+struct fstrim_range {
+ __u64 start;
+ __u64 len;
+ __u64 minlen;
+};
+
+/*
+ * We include a length field because some filesystems (vfat) have an identifier
+ * that we do want to expose as a UUID, but doesn't have the standard length.
+ *
+ * We use a fixed size buffer beacuse this interface will, by fiat, never
+ * support "UUIDs" longer than 16 bytes; we don't want to force all downstream
+ * users to have to deal with that.
+ */
+struct fsuuid2 {
+ __u8 len;
+ __u8 uuid[16];
+};
+
+struct fs_sysfs_path {
+ __u8 len;
+ __u8 name[128];
+};
+
+/* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */
+#define FILE_DEDUPE_RANGE_SAME 0
+#define FILE_DEDUPE_RANGE_DIFFERS 1
+
+/* from struct btrfs_ioctl_file_extent_same_info */
+struct file_dedupe_range_info {
+ __s64 dest_fd; /* in - destination file */
+ __u64 dest_offset; /* in - start of extent in destination */
+ __u64 bytes_deduped; /* out - total # of bytes we were able
+ * to dedupe from this file. */
+ /* status of this dedupe operation:
+ * < 0 for error
+ * == FILE_DEDUPE_RANGE_SAME if dedupe succeeds
+ * == FILE_DEDUPE_RANGE_DIFFERS if data differs
+ */
+ __s32 status; /* out - see above description */
+ __u32 reserved; /* must be zero */
+};
+
+/* from struct btrfs_ioctl_file_extent_same_args */
+struct file_dedupe_range {
+ __u64 src_offset; /* in - start of extent in source */
+ __u64 src_length; /* in - length of extent */
+ __u16 dest_count; /* in - total elements in info array */
+ __u16 reserved1; /* must be zero */
+ __u32 reserved2; /* must be zero */
+ struct file_dedupe_range_info info[];
+};
+
+/* And dynamically-tunable limits and defaults: */
+struct files_stat_struct {
+ unsigned long nr_files; /* read only */
+ unsigned long nr_free_files; /* read only */
+ unsigned long max_files; /* tunable */
+};
+
+struct inodes_stat_t {
+ long nr_inodes;
+ long nr_unused;
+ long dummy[5]; /* padding for sysctl ABI compatibility */
+};
+
+
+#define NR_FILE 8192 /* this can well be larger on a larger system */
+
+/*
+ * Structure for FS_IOC_FSGETXATTR[A] and FS_IOC_FSSETXATTR.
+ */
+struct fsxattr {
+ __u32 fsx_xflags; /* xflags field value (get/set) */
+ __u32 fsx_extsize; /* extsize field value (get/set)*/
+ __u32 fsx_nextents; /* nextents field value (get) */
+ __u32 fsx_projid; /* project identifier (get/set) */
+ __u32 fsx_cowextsize; /* CoW extsize field value (get/set)*/
+ unsigned char fsx_pad[8];
+};
+
+/*
+ * Flags for the fsx_xflags field
+ */
+#define FS_XFLAG_REALTIME 0x00000001 /* data in realtime volume */
+#define FS_XFLAG_PREALLOC 0x00000002 /* preallocated file extents */
+#define FS_XFLAG_IMMUTABLE 0x00000008 /* file cannot be modified */
+#define FS_XFLAG_APPEND 0x00000010 /* all writes append */
+#define FS_XFLAG_SYNC 0x00000020 /* all writes synchronous */
+#define FS_XFLAG_NOATIME 0x00000040 /* do not update access time */
+#define FS_XFLAG_NODUMP 0x00000080 /* do not include in backups */
+#define FS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */
+#define FS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */
+#define FS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */
+#define FS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */
+#define FS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */
+#define FS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */
+#define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */
+#define FS_XFLAG_DAX 0x00008000 /* use DAX for IO */
+#define FS_XFLAG_COWEXTSIZE 0x00010000 /* CoW extent size allocator hint */
+#define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
+
+/* the read-only stuff doesn't really belong here, but any other place is
+ probably as bad and I don't want to create yet another include file. */
+
+#define BLKROSET _IO(0x12,93) /* set device read-only (0 = read-write) */
+#define BLKROGET _IO(0x12,94) /* get read-only status (0 = read_write) */
+#define BLKRRPART _IO(0x12,95) /* re-read partition table */
+#define BLKGETSIZE _IO(0x12,96) /* return device size /512 (long *arg) */
+#define BLKFLSBUF _IO(0x12,97) /* flush buffer cache */
+#define BLKRASET _IO(0x12,98) /* set read ahead for block device */
+#define BLKRAGET _IO(0x12,99) /* get current read ahead setting */
+#define BLKFRASET _IO(0x12,100)/* set filesystem (mm/filemap.c) read-ahead */
+#define BLKFRAGET _IO(0x12,101)/* get filesystem (mm/filemap.c) read-ahead */
+#define BLKSECTSET _IO(0x12,102)/* set max sectors per request (ll_rw_blk.c) */
+#define BLKSECTGET _IO(0x12,103)/* get max sectors per request (ll_rw_blk.c) */
+#define BLKSSZGET _IO(0x12,104)/* get block device sector size */
+#if 0
+#define BLKPG _IO(0x12,105)/* See blkpg.h */
+
+/* Some people are morons. Do not use sizeof! */
+
+#define BLKELVGET _IOR(0x12,106,size_t)/* elevator get */
+#define BLKELVSET _IOW(0x12,107,size_t)/* elevator set */
+/* This was here just to show that the number is taken -
+ probably all these _IO(0x12,*) ioctls should be moved to blkpg.h. */
+#endif
+/* A jump here: 108-111 have been used for various private purposes. */
+#define BLKBSZGET _IOR(0x12,112,size_t)
+#define BLKBSZSET _IOW(0x12,113,size_t)
+#define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */
+#define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup)
+#define BLKTRACESTART _IO(0x12,116)
+#define BLKTRACESTOP _IO(0x12,117)
+#define BLKTRACETEARDOWN _IO(0x12,118)
+#define BLKDISCARD _IO(0x12,119)
+#define BLKIOMIN _IO(0x12,120)
+#define BLKIOOPT _IO(0x12,121)
+#define BLKALIGNOFF _IO(0x12,122)
+#define BLKPBSZGET _IO(0x12,123)
+#define BLKDISCARDZEROES _IO(0x12,124)
+#define BLKSECDISCARD _IO(0x12,125)
+#define BLKROTATIONAL _IO(0x12,126)
+#define BLKZEROOUT _IO(0x12,127)
+#define BLKGETDISKSEQ _IOR(0x12,128,__u64)
+/*
+ * A jump here: 130-136 are reserved for zoned block devices
+ * (see uapi/linux/blkzoned.h)
+ */
+
+#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
+#define FIBMAP _IO(0x00,1) /* bmap access */
+#define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */
+#define FIFREEZE _IOWR('X', 119, int) /* Freeze */
+#define FITHAW _IOWR('X', 120, int) /* Thaw */
+#define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */
+#define FICLONE _IOW(0x94, 9, int)
+#define FICLONERANGE _IOW(0x94, 13, struct file_clone_range)
+#define FIDEDUPERANGE _IOWR(0x94, 54, struct file_dedupe_range)
+
+#define FSLABEL_MAX 256 /* Max chars for the interface; each fs may differ */
+
+#define FS_IOC_GETFLAGS _IOR('f', 1, long)
+#define FS_IOC_SETFLAGS _IOW('f', 2, long)
+#define FS_IOC_GETVERSION _IOR('v', 1, long)
+#define FS_IOC_SETVERSION _IOW('v', 2, long)
+#define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap)
+#define FS_IOC32_GETFLAGS _IOR('f', 1, int)
+#define FS_IOC32_SETFLAGS _IOW('f', 2, int)
+#define FS_IOC32_GETVERSION _IOR('v', 1, int)
+#define FS_IOC32_SETVERSION _IOW('v', 2, int)
+#define FS_IOC_FSGETXATTR _IOR('X', 31, struct fsxattr)
+#define FS_IOC_FSSETXATTR _IOW('X', 32, struct fsxattr)
+#define FS_IOC_GETFSLABEL _IOR(0x94, 49, char[FSLABEL_MAX])
+#define FS_IOC_SETFSLABEL _IOW(0x94, 50, char[FSLABEL_MAX])
+/* Returns the external filesystem UUID, the same one blkid returns */
+#define FS_IOC_GETFSUUID _IOR(0x15, 0, struct fsuuid2)
+/*
+ * Returns the path component under /sys/fs/ that refers to this filesystem;
+ * also /sys/kernel/debug/ for filesystems with debugfs exports
+ */
+#define FS_IOC_GETFSSYSFSPATH _IOR(0x15, 1, struct fs_sysfs_path)
+
+/*
+ * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
+ *
+ * Note: for historical reasons, these flags were originally used and
+ * defined for use by ext2/ext3, and then other file systems started
+ * using these flags so they wouldn't need to write their own version
+ * of chattr/lsattr (which was shipped as part of e2fsprogs). You
+ * should think twice before trying to use these flags in new
+ * contexts, or trying to assign these flags, since they are used both
+ * as the UAPI and the on-disk encoding for ext2/3/4. Also, we are
+ * almost out of 32-bit flags. :-)
+ *
+ * We have recently hoisted FS_IOC_FSGETXATTR / FS_IOC_FSSETXATTR from
+ * XFS to the generic FS level interface. This uses a structure that
+ * has padding and hence has more room to grow, so it may be more
+ * appropriate for many new use cases.
+ *
+ * Please do not change these flags or interfaces before checking with
+ * linux-fsdevel@vger.kernel.org and linux-api@vger.kernel.org.
+ */
+#define FS_SECRM_FL 0x00000001 /* Secure deletion */
+#define FS_UNRM_FL 0x00000002 /* Undelete */
+#define FS_COMPR_FL 0x00000004 /* Compress file */
+#define FS_SYNC_FL 0x00000008 /* Synchronous updates */
+#define FS_IMMUTABLE_FL 0x00000010 /* Immutable file */
+#define FS_APPEND_FL 0x00000020 /* writes to file may only append */
+#define FS_NODUMP_FL 0x00000040 /* do not dump file */
+#define FS_NOATIME_FL 0x00000080 /* do not update atime */
+/* Reserved for compression usage... */
+#define FS_DIRTY_FL 0x00000100
+#define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
+#define FS_NOCOMP_FL 0x00000400 /* Don't compress */
+/* End compression flags --- maybe not all used */
+#define FS_ENCRYPT_FL 0x00000800 /* Encrypted file */
+#define FS_BTREE_FL 0x00001000 /* btree format dir */
+#define FS_INDEX_FL 0x00001000 /* hash-indexed directory */
+#define FS_IMAGIC_FL 0x00002000 /* AFS directory */
+#define FS_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */
+#define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */
+#define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
+#define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
+#define FS_HUGE_FILE_FL 0x00040000 /* Reserved for ext4 */
+#define FS_EXTENT_FL 0x00080000 /* Extents */
+#define FS_VERITY_FL 0x00100000 /* Verity protected inode */
+#define FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */
+#define FS_EOFBLOCKS_FL 0x00400000 /* Reserved for ext4 */
+#define FS_NOCOW_FL 0x00800000 /* Do not cow file */
+#define FS_DAX_FL 0x02000000 /* Inode is DAX */
+#define FS_INLINE_DATA_FL 0x10000000 /* Reserved for ext4 */
+#define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
+#define FS_CASEFOLD_FL 0x40000000 /* Folder is case insensitive */
+#define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */
+
+#define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
+#define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
+
+
+#define SYNC_FILE_RANGE_WAIT_BEFORE 1
+#define SYNC_FILE_RANGE_WRITE 2
+#define SYNC_FILE_RANGE_WAIT_AFTER 4
+#define SYNC_FILE_RANGE_WRITE_AND_WAIT (SYNC_FILE_RANGE_WRITE | \
+ SYNC_FILE_RANGE_WAIT_BEFORE | \
+ SYNC_FILE_RANGE_WAIT_AFTER)
+
+/*
+ * Flags for preadv2/pwritev2:
+ */
+
+typedef int __bitwise __kernel_rwf_t;
+
+/* high priority request, poll if possible */
+#define RWF_HIPRI ((__force __kernel_rwf_t)0x00000001)
+
+/* per-IO O_DSYNC */
+#define RWF_DSYNC ((__force __kernel_rwf_t)0x00000002)
+
+/* per-IO O_SYNC */
+#define RWF_SYNC ((__force __kernel_rwf_t)0x00000004)
+
+/* per-IO, return -EAGAIN if operation would block */
+#define RWF_NOWAIT ((__force __kernel_rwf_t)0x00000008)
+
+/* per-IO O_APPEND */
+#define RWF_APPEND ((__force __kernel_rwf_t)0x00000010)
+
+/* per-IO negation of O_APPEND */
+#define RWF_NOAPPEND ((__force __kernel_rwf_t)0x00000020)
+
+/* Atomic Write */
+#define RWF_ATOMIC ((__force __kernel_rwf_t)0x00000040)
+
+/* buffered IO that drops the cache after reading or writing data */
+#define RWF_DONTCACHE ((__force __kernel_rwf_t)0x00000080)
+
+/* mask of flags supported by the kernel */
+#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
+ RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\
+ RWF_DONTCACHE)
+
+#define PROCFS_IOCTL_MAGIC 'f'
+
+/* Pagemap ioctl */
+#define PAGEMAP_SCAN _IOWR(PROCFS_IOCTL_MAGIC, 16, struct pm_scan_arg)
+
+/* Bitmasks provided in pm_scan_args masks and reported in page_region.categories. */
+#define PAGE_IS_WPALLOWED (1 << 0)
+#define PAGE_IS_WRITTEN (1 << 1)
+#define PAGE_IS_FILE (1 << 2)
+#define PAGE_IS_PRESENT (1 << 3)
+#define PAGE_IS_SWAPPED (1 << 4)
+#define PAGE_IS_PFNZERO (1 << 5)
+#define PAGE_IS_HUGE (1 << 6)
+#define PAGE_IS_SOFT_DIRTY (1 << 7)
+#define PAGE_IS_GUARD (1 << 8)
+
+/*
+ * struct page_region - Page region with flags
+ * @start: Start of the region
+ * @end: End of the region (exclusive)
+ * @categories: PAGE_IS_* category bitmask for the region
+ */
+struct page_region {
+ __u64 start;
+ __u64 end;
+ __u64 categories;
+};
+
+/* Flags for PAGEMAP_SCAN ioctl */
+#define PM_SCAN_WP_MATCHING (1 << 0) /* Write protect the pages matched. */
+#define PM_SCAN_CHECK_WPASYNC (1 << 1) /* Abort the scan when a non-WP-enabled page is found. */
+
+/*
+ * struct pm_scan_arg - Pagemap ioctl argument
+ * @size: Size of the structure
+ * @flags: Flags for the IOCTL
+ * @start: Starting address of the region
+ * @end: Ending address of the region
+ * @walk_end Address where the scan stopped (written by kernel).
+ * walk_end == end (address tags cleared) informs that the scan completed on entire range.
+ * @vec: Address of page_region struct array for output
+ * @vec_len: Length of the page_region struct array
+ * @max_pages: Optional limit for number of returned pages (0 = disabled)
+ * @category_inverted: PAGE_IS_* categories which values match if 0 instead of 1
+ * @category_mask: Skip pages for which any category doesn't match
+ * @category_anyof_mask: Skip pages for which no category matches
+ * @return_mask: PAGE_IS_* categories that are to be reported in `page_region`s returned
+ */
+struct pm_scan_arg {
+ __u64 size;
+ __u64 flags;
+ __u64 start;
+ __u64 end;
+ __u64 walk_end;
+ __u64 vec;
+ __u64 vec_len;
+ __u64 max_pages;
+ __u64 category_inverted;
+ __u64 category_mask;
+ __u64 category_anyof_mask;
+ __u64 return_mask;
+};
+
+/* /proc/<pid>/maps ioctl */
+#define PROCMAP_QUERY _IOWR(PROCFS_IOCTL_MAGIC, 17, struct procmap_query)
+
+enum procmap_query_flags {
+ /*
+ * VMA permission flags.
+ *
+ * Can be used as part of procmap_query.query_flags field to look up
+ * only VMAs satisfying specified subset of permissions. E.g., specifying
+ * PROCMAP_QUERY_VMA_READABLE only will return both readable and read/write VMAs,
+ * while having PROCMAP_QUERY_VMA_READABLE | PROCMAP_QUERY_VMA_WRITABLE will only
+ * return read/write VMAs, though both executable/non-executable and
+ * private/shared will be ignored.
+ *
+ * PROCMAP_QUERY_VMA_* flags are also returned in procmap_query.vma_flags
+ * field to specify actual VMA permissions.
+ */
+ PROCMAP_QUERY_VMA_READABLE = 0x01,
+ PROCMAP_QUERY_VMA_WRITABLE = 0x02,
+ PROCMAP_QUERY_VMA_EXECUTABLE = 0x04,
+ PROCMAP_QUERY_VMA_SHARED = 0x08,
+ /*
+ * Query modifier flags.
+ *
+ * By default VMA that covers provided address is returned, or -ENOENT
+ * is returned. With PROCMAP_QUERY_COVERING_OR_NEXT_VMA flag set, closest
+ * VMA with vma_start > addr will be returned if no covering VMA is
+ * found.
+ *
+ * PROCMAP_QUERY_FILE_BACKED_VMA instructs query to consider only VMAs that
+ * have file backing. Can be combined with PROCMAP_QUERY_COVERING_OR_NEXT_VMA
+ * to iterate all VMAs with file backing.
+ */
+ PROCMAP_QUERY_COVERING_OR_NEXT_VMA = 0x10,
+ PROCMAP_QUERY_FILE_BACKED_VMA = 0x20,
+};
+
+/*
+ * Input/output argument structured passed into ioctl() call. It can be used
+ * to query a set of VMAs (Virtual Memory Areas) of a process.
+ *
+ * Each field can be one of three kinds, marked in a short comment to the
+ * right of the field:
+ * - "in", input argument, user has to provide this value, kernel doesn't modify it;
+ * - "out", output argument, kernel sets this field with VMA data;
+ * - "in/out", input and output argument; user provides initial value (used
+ * to specify maximum allowable buffer size), and kernel sets it to actual
+ * amount of data written (or zero, if there is no data).
+ *
+ * If matching VMA is found (according to criterias specified by
+ * query_addr/query_flags, all the out fields are filled out, and ioctl()
+ * returns 0. If there is no matching VMA, -ENOENT will be returned.
+ * In case of any other error, negative error code other than -ENOENT is
+ * returned.
+ *
+ * Most of the data is similar to the one returned as text in /proc/<pid>/maps
+ * file, but procmap_query provides more querying flexibility. There are no
+ * consistency guarantees between subsequent ioctl() calls, but data returned
+ * for matched VMA is self-consistent.
+ */
+struct procmap_query {
+ /* Query struct size, for backwards/forward compatibility */
+ __u64 size;
+ /*
+ * Query flags, a combination of enum procmap_query_flags values.
+ * Defines query filtering and behavior, see enum procmap_query_flags.
+ *
+ * Input argument, provided by user. Kernel doesn't modify it.
+ */
+ __u64 query_flags; /* in */
+ /*
+ * Query address. By default, VMA that covers this address will
+ * be looked up. PROCMAP_QUERY_* flags above modify this default
+ * behavior further.
+ *
+ * Input argument, provided by user. Kernel doesn't modify it.
+ */
+ __u64 query_addr; /* in */
+ /* VMA starting (inclusive) and ending (exclusive) address, if VMA is found. */
+ __u64 vma_start; /* out */
+ __u64 vma_end; /* out */
+ /* VMA permissions flags. A combination of PROCMAP_QUERY_VMA_* flags. */
+ __u64 vma_flags; /* out */
+ /* VMA backing page size granularity. */
+ __u64 vma_page_size; /* out */
+ /*
+ * VMA file offset. If VMA has file backing, this specifies offset
+ * within the file that VMA's start address corresponds to.
+ * Is set to zero if VMA has no backing file.
+ */
+ __u64 vma_offset; /* out */
+ /* Backing file's inode number, or zero, if VMA has no backing file. */
+ __u64 inode; /* out */
+ /* Backing file's device major/minor number, or zero, if VMA has no backing file. */
+ __u32 dev_major; /* out */
+ __u32 dev_minor; /* out */
+ /*
+ * If set to non-zero value, signals the request to return VMA name
+ * (i.e., VMA's backing file's absolute path, with " (deleted)" suffix
+ * appended, if file was unlinked from FS) for matched VMA. VMA name
+ * can also be some special name (e.g., "[heap]", "[stack]") or could
+ * be even user-supplied with prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME).
+ *
+ * Kernel will set this field to zero, if VMA has no associated name.
+ * Otherwise kernel will return actual amount of bytes filled in
+ * user-supplied buffer (see vma_name_addr field below), including the
+ * terminating zero.
+ *
+ * If VMA name is longer that user-supplied maximum buffer size,
+ * -E2BIG error is returned.
+ *
+ * If this field is set to non-zero value, vma_name_addr should point
+ * to valid user space memory buffer of at least vma_name_size bytes.
+ * If set to zero, vma_name_addr should be set to zero as well
+ */
+ __u32 vma_name_size; /* in/out */
+ /*
+ * If set to non-zero value, signals the request to extract and return
+ * VMA's backing file's build ID, if the backing file is an ELF file
+ * and it contains embedded build ID.
+ *
+ * Kernel will set this field to zero, if VMA has no backing file,
+ * backing file is not an ELF file, or ELF file has no build ID
+ * embedded.
+ *
+ * Build ID is a binary value (not a string). Kernel will set
+ * build_id_size field to exact number of bytes used for build ID.
+ * If build ID is requested and present, but needs more bytes than
+ * user-supplied maximum buffer size (see build_id_addr field below),
+ * -E2BIG error will be returned.
+ *
+ * If this field is set to non-zero value, build_id_addr should point
+ * to valid user space memory buffer of at least build_id_size bytes.
+ * If set to zero, build_id_addr should be set to zero as well
+ */
+ __u32 build_id_size; /* in/out */
+ /*
+ * User-supplied address of a buffer of at least vma_name_size bytes
+ * for kernel to fill with matched VMA's name (see vma_name_size field
+ * description above for details).
+ *
+ * Should be set to zero if VMA name should not be returned.
+ */
+ __u64 vma_name_addr; /* in */
+ /*
+ * User-supplied address of a buffer of at least build_id_size bytes
+ * for kernel to fill with matched VMA's ELF build ID, if available
+ * (see build_id_size field description above for details).
+ *
+ * Should be set to zero if build ID should not be returned.
+ */
+ __u64 build_id_addr; /* in */
+};
+
+#endif /* _UAPI_LINUX_FS_H */
diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h
index 7a8f4c290187..3aff99f2696a 100644
--- a/tools/include/uapi/linux/fscrypt.h
+++ b/tools/include/uapi/linux/fscrypt.h
@@ -119,7 +119,7 @@ struct fscrypt_key_specifier {
*/
struct fscrypt_provisioning_key_payload {
__u32 type;
- __u32 __reserved;
+ __u32 flags;
__u8 raw[];
};
@@ -128,7 +128,9 @@ struct fscrypt_add_key_arg {
struct fscrypt_key_specifier key_spec;
__u32 raw_size;
__u32 key_id;
- __u32 __reserved[8];
+#define FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED 0x00000001
+ __u32 flags;
+ __u32 __reserved[7];
__u8 raw[];
};
diff --git a/tools/include/uapi/linux/genetlink.h b/tools/include/uapi/linux/genetlink.h
new file mode 100644
index 000000000000..ddba3ca01e39
--- /dev/null
+++ b/tools/include/uapi/linux/genetlink.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_GENERIC_NETLINK_H
+#define _UAPI__LINUX_GENERIC_NETLINK_H
+
+#include <linux/types.h>
+#include <linux/netlink.h>
+
+#define GENL_NAMSIZ 16 /* length of family name */
+
+#define GENL_MIN_ID NLMSG_MIN_TYPE
+#define GENL_MAX_ID 1023
+
+struct genlmsghdr {
+ __u8 cmd;
+ __u8 version;
+ __u16 reserved;
+};
+
+#define GENL_HDRLEN NLMSG_ALIGN(sizeof(struct genlmsghdr))
+
+#define GENL_ADMIN_PERM 0x01
+#define GENL_CMD_CAP_DO 0x02
+#define GENL_CMD_CAP_DUMP 0x04
+#define GENL_CMD_CAP_HASPOL 0x08
+#define GENL_UNS_ADMIN_PERM 0x10
+
+/*
+ * List of reserved static generic netlink identifiers:
+ */
+#define GENL_ID_CTRL NLMSG_MIN_TYPE
+#define GENL_ID_VFS_DQUOT (NLMSG_MIN_TYPE + 1)
+#define GENL_ID_PMCRAID (NLMSG_MIN_TYPE + 2)
+/* must be last reserved + 1 */
+#define GENL_START_ALLOC (NLMSG_MIN_TYPE + 3)
+
+/**************************************************************************
+ * Controller
+ **************************************************************************/
+
+enum {
+ CTRL_CMD_UNSPEC,
+ CTRL_CMD_NEWFAMILY,
+ CTRL_CMD_DELFAMILY,
+ CTRL_CMD_GETFAMILY,
+ CTRL_CMD_NEWOPS,
+ CTRL_CMD_DELOPS,
+ CTRL_CMD_GETOPS,
+ CTRL_CMD_NEWMCAST_GRP,
+ CTRL_CMD_DELMCAST_GRP,
+ CTRL_CMD_GETMCAST_GRP, /* unused */
+ CTRL_CMD_GETPOLICY,
+ __CTRL_CMD_MAX,
+};
+
+#define CTRL_CMD_MAX (__CTRL_CMD_MAX - 1)
+
+enum {
+ CTRL_ATTR_UNSPEC,
+ CTRL_ATTR_FAMILY_ID,
+ CTRL_ATTR_FAMILY_NAME,
+ CTRL_ATTR_VERSION,
+ CTRL_ATTR_HDRSIZE,
+ CTRL_ATTR_MAXATTR,
+ CTRL_ATTR_OPS,
+ CTRL_ATTR_MCAST_GROUPS,
+ CTRL_ATTR_POLICY,
+ CTRL_ATTR_OP_POLICY,
+ CTRL_ATTR_OP,
+ __CTRL_ATTR_MAX,
+};
+
+#define CTRL_ATTR_MAX (__CTRL_ATTR_MAX - 1)
+
+enum {
+ CTRL_ATTR_OP_UNSPEC,
+ CTRL_ATTR_OP_ID,
+ CTRL_ATTR_OP_FLAGS,
+ __CTRL_ATTR_OP_MAX,
+};
+
+#define CTRL_ATTR_OP_MAX (__CTRL_ATTR_OP_MAX - 1)
+
+enum {
+ CTRL_ATTR_MCAST_GRP_UNSPEC,
+ CTRL_ATTR_MCAST_GRP_NAME,
+ CTRL_ATTR_MCAST_GRP_ID,
+ __CTRL_ATTR_MCAST_GRP_MAX,
+};
+
+#define CTRL_ATTR_MCAST_GRP_MAX (__CTRL_ATTR_MCAST_GRP_MAX - 1)
+
+enum {
+ CTRL_ATTR_POLICY_UNSPEC,
+ CTRL_ATTR_POLICY_DO,
+ CTRL_ATTR_POLICY_DUMP,
+
+ __CTRL_ATTR_POLICY_DUMP_MAX,
+ CTRL_ATTR_POLICY_DUMP_MAX = __CTRL_ATTR_POLICY_DUMP_MAX - 1
+};
+
+#define CTRL_ATTR_POLICY_MAX (__CTRL_ATTR_POLICY_DUMP_MAX - 1)
+
+#endif /* _UAPI__LINUX_GENERIC_NETLINK_H */
diff --git a/tools/include/uapi/linux/if_addr.h b/tools/include/uapi/linux/if_addr.h
new file mode 100644
index 000000000000..aa7958b4e41d
--- /dev/null
+++ b/tools/include/uapi/linux/if_addr.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_IF_ADDR_H
+#define _UAPI__LINUX_IF_ADDR_H
+
+#include <linux/types.h>
+#include <linux/netlink.h>
+
+struct ifaddrmsg {
+ __u8 ifa_family;
+ __u8 ifa_prefixlen; /* The prefix length */
+ __u8 ifa_flags; /* Flags */
+ __u8 ifa_scope; /* Address scope */
+ __u32 ifa_index; /* Link index */
+};
+
+/*
+ * Important comment:
+ * IFA_ADDRESS is prefix address, rather than local interface address.
+ * It makes no difference for normally configured broadcast interfaces,
+ * but for point-to-point IFA_ADDRESS is DESTINATION address,
+ * local address is supplied in IFA_LOCAL attribute.
+ *
+ * IFA_FLAGS is a u32 attribute that extends the u8 field ifa_flags.
+ * If present, the value from struct ifaddrmsg will be ignored.
+ */
+enum {
+ IFA_UNSPEC,
+ IFA_ADDRESS,
+ IFA_LOCAL,
+ IFA_LABEL,
+ IFA_BROADCAST,
+ IFA_ANYCAST,
+ IFA_CACHEINFO,
+ IFA_MULTICAST,
+ IFA_FLAGS,
+ IFA_RT_PRIORITY, /* u32, priority/metric for prefix route */
+ IFA_TARGET_NETNSID,
+ IFA_PROTO, /* u8, address protocol */
+ __IFA_MAX,
+};
+
+#define IFA_MAX (__IFA_MAX - 1)
+
+/* ifa_flags */
+#define IFA_F_SECONDARY 0x01
+#define IFA_F_TEMPORARY IFA_F_SECONDARY
+
+#define IFA_F_NODAD 0x02
+#define IFA_F_OPTIMISTIC 0x04
+#define IFA_F_DADFAILED 0x08
+#define IFA_F_HOMEADDRESS 0x10
+#define IFA_F_DEPRECATED 0x20
+#define IFA_F_TENTATIVE 0x40
+#define IFA_F_PERMANENT 0x80
+#define IFA_F_MANAGETEMPADDR 0x100
+#define IFA_F_NOPREFIXROUTE 0x200
+#define IFA_F_MCAUTOJOIN 0x400
+#define IFA_F_STABLE_PRIVACY 0x800
+
+struct ifa_cacheinfo {
+ __u32 ifa_prefered;
+ __u32 ifa_valid;
+ __u32 cstamp; /* created timestamp, hundredths of seconds */
+ __u32 tstamp; /* updated timestamp, hundredths of seconds */
+};
+
+/* backwards compatibility for userspace */
+#ifndef __KERNEL__
+#define IFA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifaddrmsg))))
+#define IFA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifaddrmsg))
+#endif
+
+/* ifa_proto */
+#define IFAPROT_UNSPEC 0
+#define IFAPROT_KERNEL_LO 1 /* loopback */
+#define IFAPROT_KERNEL_RA 2 /* set by kernel from router announcement */
+#define IFAPROT_KERNEL_LL 3 /* link-local set by kernel */
+
+#endif
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index f0d71b2a3f1e..7e46ca4cd31b 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -377,6 +377,7 @@ enum {
IFLA_GSO_IPV4_MAX_SIZE,
IFLA_GRO_IPV4_MAX_SIZE,
IFLA_DPLL_PIN,
+ IFLA_MAX_PACING_OFFLOAD_HORIZON,
__IFLA_MAX
};
@@ -461,6 +462,286 @@ enum in6_addr_gen_mode {
/* Bridge section */
+/**
+ * DOC: Bridge enum definition
+ *
+ * Please *note* that the timer values in the following section are expected
+ * in clock_t format, which is seconds multiplied by USER_HZ (generally
+ * defined as 100).
+ *
+ * @IFLA_BR_FORWARD_DELAY
+ * The bridge forwarding delay is the time spent in LISTENING state
+ * (before moving to LEARNING) and in LEARNING state (before moving
+ * to FORWARDING). Only relevant if STP is enabled.
+ *
+ * The valid values are between (2 * USER_HZ) and (30 * USER_HZ).
+ * The default value is (15 * USER_HZ).
+ *
+ * @IFLA_BR_HELLO_TIME
+ * The time between hello packets sent by the bridge, when it is a root
+ * bridge or a designated bridge. Only relevant if STP is enabled.
+ *
+ * The valid values are between (1 * USER_HZ) and (10 * USER_HZ).
+ * The default value is (2 * USER_HZ).
+ *
+ * @IFLA_BR_MAX_AGE
+ * The hello packet timeout is the time until another bridge in the
+ * spanning tree is assumed to be dead, after reception of its last hello
+ * message. Only relevant if STP is enabled.
+ *
+ * The valid values are between (6 * USER_HZ) and (40 * USER_HZ).
+ * The default value is (20 * USER_HZ).
+ *
+ * @IFLA_BR_AGEING_TIME
+ * Configure the bridge's FDB entries aging time. It is the time a MAC
+ * address will be kept in the FDB after a packet has been received from
+ * that address. After this time has passed, entries are cleaned up.
+ * Allow values outside the 802.1 standard specification for special cases:
+ *
+ * * 0 - entry never ages (all permanent)
+ * * 1 - entry disappears (no persistence)
+ *
+ * The default value is (300 * USER_HZ).
+ *
+ * @IFLA_BR_STP_STATE
+ * Turn spanning tree protocol on (*IFLA_BR_STP_STATE* > 0) or off
+ * (*IFLA_BR_STP_STATE* == 0) for this bridge.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_PRIORITY
+ * Set this bridge's spanning tree priority, used during STP root bridge
+ * election.
+ *
+ * The valid values are between 0 and 65535.
+ *
+ * @IFLA_BR_VLAN_FILTERING
+ * Turn VLAN filtering on (*IFLA_BR_VLAN_FILTERING* > 0) or off
+ * (*IFLA_BR_VLAN_FILTERING* == 0). When disabled, the bridge will not
+ * consider the VLAN tag when handling packets.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_VLAN_PROTOCOL
+ * Set the protocol used for VLAN filtering.
+ *
+ * The valid values are 0x8100(802.1Q) or 0x88A8(802.1AD). The default value
+ * is 0x8100(802.1Q).
+ *
+ * @IFLA_BR_GROUP_FWD_MASK
+ * The group forwarding mask. This is the bitmask that is applied to
+ * decide whether to forward incoming frames destined to link-local
+ * addresses (of the form 01:80:C2:00:00:0X).
+ *
+ * The default value is 0, which means the bridge does not forward any
+ * link-local frames coming on this port.
+ *
+ * @IFLA_BR_ROOT_ID
+ * The bridge root id, read only.
+ *
+ * @IFLA_BR_BRIDGE_ID
+ * The bridge id, read only.
+ *
+ * @IFLA_BR_ROOT_PORT
+ * The bridge root port, read only.
+ *
+ * @IFLA_BR_ROOT_PATH_COST
+ * The bridge root path cost, read only.
+ *
+ * @IFLA_BR_TOPOLOGY_CHANGE
+ * The bridge topology change, read only.
+ *
+ * @IFLA_BR_TOPOLOGY_CHANGE_DETECTED
+ * The bridge topology change detected, read only.
+ *
+ * @IFLA_BR_HELLO_TIMER
+ * The bridge hello timer, read only.
+ *
+ * @IFLA_BR_TCN_TIMER
+ * The bridge tcn timer, read only.
+ *
+ * @IFLA_BR_TOPOLOGY_CHANGE_TIMER
+ * The bridge topology change timer, read only.
+ *
+ * @IFLA_BR_GC_TIMER
+ * The bridge gc timer, read only.
+ *
+ * @IFLA_BR_GROUP_ADDR
+ * Set the MAC address of the multicast group this bridge uses for STP.
+ * The address must be a link-local address in standard Ethernet MAC address
+ * format. It is an address of the form 01:80:C2:00:00:0X, with X in [0, 4..f].
+ *
+ * The default value is 0.
+ *
+ * @IFLA_BR_FDB_FLUSH
+ * Flush bridge's fdb dynamic entries.
+ *
+ * @IFLA_BR_MCAST_ROUTER
+ * Set bridge's multicast router if IGMP snooping is enabled.
+ * The valid values are:
+ *
+ * * 0 - disabled.
+ * * 1 - automatic (queried).
+ * * 2 - permanently enabled.
+ *
+ * The default value is 1.
+ *
+ * @IFLA_BR_MCAST_SNOOPING
+ * Turn multicast snooping on (*IFLA_BR_MCAST_SNOOPING* > 0) or off
+ * (*IFLA_BR_MCAST_SNOOPING* == 0).
+ *
+ * The default value is 1.
+ *
+ * @IFLA_BR_MCAST_QUERY_USE_IFADDR
+ * If enabled use the bridge's own IP address as source address for IGMP
+ * queries (*IFLA_BR_MCAST_QUERY_USE_IFADDR* > 0) or the default of 0.0.0.0
+ * (*IFLA_BR_MCAST_QUERY_USE_IFADDR* == 0).
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_MCAST_QUERIER
+ * Enable (*IFLA_BR_MULTICAST_QUERIER* > 0) or disable
+ * (*IFLA_BR_MULTICAST_QUERIER* == 0) IGMP querier, ie sending of multicast
+ * queries by the bridge.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_MCAST_HASH_ELASTICITY
+ * Set multicast database hash elasticity, It is the maximum chain length in
+ * the multicast hash table. This attribute is *deprecated* and the value
+ * is always 16.
+ *
+ * @IFLA_BR_MCAST_HASH_MAX
+ * Set maximum size of the multicast hash table
+ *
+ * The default value is 4096, the value must be a power of 2.
+ *
+ * @IFLA_BR_MCAST_LAST_MEMBER_CNT
+ * The Last Member Query Count is the number of Group-Specific Queries
+ * sent before the router assumes there are no local members. The Last
+ * Member Query Count is also the number of Group-and-Source-Specific
+ * Queries sent before the router assumes there are no listeners for a
+ * particular source.
+ *
+ * The default value is 2.
+ *
+ * @IFLA_BR_MCAST_STARTUP_QUERY_CNT
+ * The Startup Query Count is the number of Queries sent out on startup,
+ * separated by the Startup Query Interval.
+ *
+ * The default value is 2.
+ *
+ * @IFLA_BR_MCAST_LAST_MEMBER_INTVL
+ * The Last Member Query Interval is the Max Response Time inserted into
+ * Group-Specific Queries sent in response to Leave Group messages, and
+ * is also the amount of time between Group-Specific Query messages.
+ *
+ * The default value is (1 * USER_HZ).
+ *
+ * @IFLA_BR_MCAST_MEMBERSHIP_INTVL
+ * The interval after which the bridge will leave a group, if no membership
+ * reports for this group are received.
+ *
+ * The default value is (260 * USER_HZ).
+ *
+ * @IFLA_BR_MCAST_QUERIER_INTVL
+ * The interval between queries sent by other routers. if no queries are
+ * seen after this delay has passed, the bridge will start to send its own
+ * queries (as if *IFLA_BR_MCAST_QUERIER_INTVL* was enabled).
+ *
+ * The default value is (255 * USER_HZ).
+ *
+ * @IFLA_BR_MCAST_QUERY_INTVL
+ * The Query Interval is the interval between General Queries sent by
+ * the Querier.
+ *
+ * The default value is (125 * USER_HZ). The minimum value is (1 * USER_HZ).
+ *
+ * @IFLA_BR_MCAST_QUERY_RESPONSE_INTVL
+ * The Max Response Time used to calculate the Max Resp Code inserted
+ * into the periodic General Queries.
+ *
+ * The default value is (10 * USER_HZ).
+ *
+ * @IFLA_BR_MCAST_STARTUP_QUERY_INTVL
+ * The interval between queries in the startup phase.
+ *
+ * The default value is (125 * USER_HZ) / 4. The minimum value is (1 * USER_HZ).
+ *
+ * @IFLA_BR_NF_CALL_IPTABLES
+ * Enable (*NF_CALL_IPTABLES* > 0) or disable (*NF_CALL_IPTABLES* == 0)
+ * iptables hooks on the bridge.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_NF_CALL_IP6TABLES
+ * Enable (*NF_CALL_IP6TABLES* > 0) or disable (*NF_CALL_IP6TABLES* == 0)
+ * ip6tables hooks on the bridge.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_NF_CALL_ARPTABLES
+ * Enable (*NF_CALL_ARPTABLES* > 0) or disable (*NF_CALL_ARPTABLES* == 0)
+ * arptables hooks on the bridge.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_VLAN_DEFAULT_PVID
+ * VLAN ID applied to untagged and priority-tagged incoming packets.
+ *
+ * The default value is 1. Setting to the special value 0 makes all ports of
+ * this bridge not have a PVID by default, which means that they will
+ * not accept VLAN-untagged traffic.
+ *
+ * @IFLA_BR_PAD
+ * Bridge attribute padding type for netlink message.
+ *
+ * @IFLA_BR_VLAN_STATS_ENABLED
+ * Enable (*IFLA_BR_VLAN_STATS_ENABLED* == 1) or disable
+ * (*IFLA_BR_VLAN_STATS_ENABLED* == 0) per-VLAN stats accounting.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_MCAST_STATS_ENABLED
+ * Enable (*IFLA_BR_MCAST_STATS_ENABLED* > 0) or disable
+ * (*IFLA_BR_MCAST_STATS_ENABLED* == 0) multicast (IGMP/MLD) stats
+ * accounting.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_MCAST_IGMP_VERSION
+ * Set the IGMP version.
+ *
+ * The valid values are 2 and 3. The default value is 2.
+ *
+ * @IFLA_BR_MCAST_MLD_VERSION
+ * Set the MLD version.
+ *
+ * The valid values are 1 and 2. The default value is 1.
+ *
+ * @IFLA_BR_VLAN_STATS_PER_PORT
+ * Enable (*IFLA_BR_VLAN_STATS_PER_PORT* == 1) or disable
+ * (*IFLA_BR_VLAN_STATS_PER_PORT* == 0) per-VLAN per-port stats accounting.
+ * Can be changed only when there are no port VLANs configured.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_MULTI_BOOLOPT
+ * The multi_boolopt is used to control new boolean options to avoid adding
+ * new netlink attributes. You can look at ``enum br_boolopt_id`` for those
+ * options.
+ *
+ * @IFLA_BR_MCAST_QUERIER_STATE
+ * Bridge mcast querier states, read only.
+ *
+ * @IFLA_BR_FDB_N_LEARNED
+ * The number of dynamically learned FDB entries for the current bridge,
+ * read only.
+ *
+ * @IFLA_BR_FDB_MAX_LEARNED
+ * Set the number of max dynamically learned FDB entries for the current
+ * bridge.
+ */
enum {
IFLA_BR_UNSPEC,
IFLA_BR_FORWARD_DELAY,
@@ -510,6 +791,8 @@ enum {
IFLA_BR_VLAN_STATS_PER_PORT,
IFLA_BR_MULTI_BOOLOPT,
IFLA_BR_MCAST_QUERIER_STATE,
+ IFLA_BR_FDB_N_LEARNED,
+ IFLA_BR_FDB_MAX_LEARNED,
__IFLA_BR_MAX,
};
@@ -520,11 +803,252 @@ struct ifla_bridge_id {
__u8 addr[6]; /* ETH_ALEN */
};
+/**
+ * DOC: Bridge mode enum definition
+ *
+ * @BRIDGE_MODE_HAIRPIN
+ * Controls whether traffic may be sent back out of the port on which it
+ * was received. This option is also called reflective relay mode, and is
+ * used to support basic VEPA (Virtual Ethernet Port Aggregator)
+ * capabilities. By default, this flag is turned off and the bridge will
+ * not forward traffic back out of the receiving port.
+ */
enum {
BRIDGE_MODE_UNSPEC,
BRIDGE_MODE_HAIRPIN,
};
+/**
+ * DOC: Bridge port enum definition
+ *
+ * @IFLA_BRPORT_STATE
+ * The operation state of the port. Here are the valid values.
+ *
+ * * 0 - port is in STP *DISABLED* state. Make this port completely
+ * inactive for STP. This is also called BPDU filter and could be used
+ * to disable STP on an untrusted port, like a leaf virtual device.
+ * The traffic forwarding is also stopped on this port.
+ * * 1 - port is in STP *LISTENING* state. Only valid if STP is enabled
+ * on the bridge. In this state the port listens for STP BPDUs and
+ * drops all other traffic frames.
+ * * 2 - port is in STP *LEARNING* state. Only valid if STP is enabled on
+ * the bridge. In this state the port will accept traffic only for the
+ * purpose of updating MAC address tables.
+ * * 3 - port is in STP *FORWARDING* state. Port is fully active.
+ * * 4 - port is in STP *BLOCKING* state. Only valid if STP is enabled on
+ * the bridge. This state is used during the STP election process.
+ * In this state, port will only process STP BPDUs.
+ *
+ * @IFLA_BRPORT_PRIORITY
+ * The STP port priority. The valid values are between 0 and 255.
+ *
+ * @IFLA_BRPORT_COST
+ * The STP path cost of the port. The valid values are between 1 and 65535.
+ *
+ * @IFLA_BRPORT_MODE
+ * Set the bridge port mode. See *BRIDGE_MODE_HAIRPIN* for more details.
+ *
+ * @IFLA_BRPORT_GUARD
+ * Controls whether STP BPDUs will be processed by the bridge port. By
+ * default, the flag is turned off to allow BPDU processing. Turning this
+ * flag on will disable the bridge port if a STP BPDU packet is received.
+ *
+ * If the bridge has Spanning Tree enabled, hostile devices on the network
+ * may send BPDU on a port and cause network failure. Setting *guard on*
+ * will detect and stop this by disabling the port. The port will be
+ * restarted if the link is brought down, or removed and reattached.
+ *
+ * @IFLA_BRPORT_PROTECT
+ * Controls whether a given port is allowed to become a root port or not.
+ * Only used when STP is enabled on the bridge. By default the flag is off.
+ *
+ * This feature is also called root port guard. If BPDU is received from a
+ * leaf (edge) port, it should not be elected as root port. This could
+ * be used if using STP on a bridge and the downstream bridges are not fully
+ * trusted; this prevents a hostile guest from rerouting traffic.
+ *
+ * @IFLA_BRPORT_FAST_LEAVE
+ * This flag allows the bridge to immediately stop multicast traffic
+ * forwarding on a port that receives an IGMP Leave message. It is only used
+ * when IGMP snooping is enabled on the bridge. By default the flag is off.
+ *
+ * @IFLA_BRPORT_LEARNING
+ * Controls whether a given port will learn *source* MAC addresses from
+ * received traffic or not. Also controls whether dynamic FDB entries
+ * (which can also be added by software) will be refreshed by incoming
+ * traffic. By default this flag is on.
+ *
+ * @IFLA_BRPORT_UNICAST_FLOOD
+ * Controls whether unicast traffic for which there is no FDB entry will
+ * be flooded towards this port. By default this flag is on.
+ *
+ * @IFLA_BRPORT_PROXYARP
+ * Enable proxy ARP on this port.
+ *
+ * @IFLA_BRPORT_LEARNING_SYNC
+ * Controls whether a given port will sync MAC addresses learned on device
+ * port to bridge FDB.
+ *
+ * @IFLA_BRPORT_PROXYARP_WIFI
+ * Enable proxy ARP on this port which meets extended requirements by
+ * IEEE 802.11 and Hotspot 2.0 specifications.
+ *
+ * @IFLA_BRPORT_ROOT_ID
+ *
+ * @IFLA_BRPORT_BRIDGE_ID
+ *
+ * @IFLA_BRPORT_DESIGNATED_PORT
+ *
+ * @IFLA_BRPORT_DESIGNATED_COST
+ *
+ * @IFLA_BRPORT_ID
+ *
+ * @IFLA_BRPORT_NO
+ *
+ * @IFLA_BRPORT_TOPOLOGY_CHANGE_ACK
+ *
+ * @IFLA_BRPORT_CONFIG_PENDING
+ *
+ * @IFLA_BRPORT_MESSAGE_AGE_TIMER
+ *
+ * @IFLA_BRPORT_FORWARD_DELAY_TIMER
+ *
+ * @IFLA_BRPORT_HOLD_TIMER
+ *
+ * @IFLA_BRPORT_FLUSH
+ * Flush bridge ports' fdb dynamic entries.
+ *
+ * @IFLA_BRPORT_MULTICAST_ROUTER
+ * Configure the port's multicast router presence. A port with
+ * a multicast router will receive all multicast traffic.
+ * The valid values are:
+ *
+ * * 0 disable multicast routers on this port
+ * * 1 let the system detect the presence of routers (default)
+ * * 2 permanently enable multicast traffic forwarding on this port
+ * * 3 enable multicast routers temporarily on this port, not depending
+ * on incoming queries.
+ *
+ * @IFLA_BRPORT_PAD
+ *
+ * @IFLA_BRPORT_MCAST_FLOOD
+ * Controls whether a given port will flood multicast traffic for which
+ * there is no MDB entry. By default this flag is on.
+ *
+ * @IFLA_BRPORT_MCAST_TO_UCAST
+ * Controls whether a given port will replicate packets using unicast
+ * instead of multicast. By default this flag is off.
+ *
+ * This is done by copying the packet per host and changing the multicast
+ * destination MAC to a unicast one accordingly.
+ *
+ * *mcast_to_unicast* works on top of the multicast snooping feature of the
+ * bridge. Which means unicast copies are only delivered to hosts which
+ * are interested in unicast and signaled this via IGMP/MLD reports previously.
+ *
+ * This feature is intended for interface types which have a more reliable
+ * and/or efficient way to deliver unicast packets than broadcast ones
+ * (e.g. WiFi).
+ *
+ * However, it should only be enabled on interfaces where no IGMPv2/MLDv1
+ * report suppression takes place. IGMP/MLD report suppression issue is
+ * usually overcome by the network daemon (supplicant) enabling AP isolation
+ * and by that separating all STAs.
+ *
+ * Delivery of STA-to-STA IP multicast is made possible again by enabling
+ * and utilizing the bridge hairpin mode, which considers the incoming port
+ * as a potential outgoing port, too (see *BRIDGE_MODE_HAIRPIN* option).
+ * Hairpin mode is performed after multicast snooping, therefore leading
+ * to only deliver reports to STAs running a multicast router.
+ *
+ * @IFLA_BRPORT_VLAN_TUNNEL
+ * Controls whether vlan to tunnel mapping is enabled on the port.
+ * By default this flag is off.
+ *
+ * @IFLA_BRPORT_BCAST_FLOOD
+ * Controls flooding of broadcast traffic on the given port. By default
+ * this flag is on.
+ *
+ * @IFLA_BRPORT_GROUP_FWD_MASK
+ * Set the group forward mask. This is a bitmask that is applied to
+ * decide whether to forward incoming frames destined to link-local
+ * addresses. The addresses of the form are 01:80:C2:00:00:0X (defaults
+ * to 0, which means the bridge does not forward any link-local frames
+ * coming on this port).
+ *
+ * @IFLA_BRPORT_NEIGH_SUPPRESS
+ * Controls whether neighbor discovery (arp and nd) proxy and suppression
+ * is enabled on the port. By default this flag is off.
+ *
+ * @IFLA_BRPORT_ISOLATED
+ * Controls whether a given port will be isolated, which means it will be
+ * able to communicate with non-isolated ports only. By default this
+ * flag is off.
+ *
+ * @IFLA_BRPORT_BACKUP_PORT
+ * Set a backup port. If the port loses carrier all traffic will be
+ * redirected to the configured backup port. Set the value to 0 to disable
+ * it.
+ *
+ * @IFLA_BRPORT_MRP_RING_OPEN
+ *
+ * @IFLA_BRPORT_MRP_IN_OPEN
+ *
+ * @IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT
+ * The number of per-port EHT hosts limit. The default value is 512.
+ * Setting to 0 is not allowed.
+ *
+ * @IFLA_BRPORT_MCAST_EHT_HOSTS_CNT
+ * The current number of tracked hosts, read only.
+ *
+ * @IFLA_BRPORT_LOCKED
+ * Controls whether a port will be locked, meaning that hosts behind the
+ * port will not be able to communicate through the port unless an FDB
+ * entry with the unit's MAC address is in the FDB. The common use case is
+ * that hosts are allowed access through authentication with the IEEE 802.1X
+ * protocol or based on whitelists. By default this flag is off.
+ *
+ * Please note that secure 802.1X deployments should always use the
+ * *BR_BOOLOPT_NO_LL_LEARN* flag, to not permit the bridge to populate its
+ * FDB based on link-local (EAPOL) traffic received on the port.
+ *
+ * @IFLA_BRPORT_MAB
+ * Controls whether a port will use MAC Authentication Bypass (MAB), a
+ * technique through which select MAC addresses may be allowed on a locked
+ * port, without using 802.1X authentication. Packets with an unknown source
+ * MAC address generates a "locked" FDB entry on the incoming bridge port.
+ * The common use case is for user space to react to these bridge FDB
+ * notifications and optionally replace the locked FDB entry with a normal
+ * one, allowing traffic to pass for whitelisted MAC addresses.
+ *
+ * Setting this flag also requires *IFLA_BRPORT_LOCKED* and
+ * *IFLA_BRPORT_LEARNING*. *IFLA_BRPORT_LOCKED* ensures that unauthorized
+ * data packets are dropped, and *IFLA_BRPORT_LEARNING* allows the dynamic
+ * FDB entries installed by user space (as replacements for the locked FDB
+ * entries) to be refreshed and/or aged out.
+ *
+ * @IFLA_BRPORT_MCAST_N_GROUPS
+ *
+ * @IFLA_BRPORT_MCAST_MAX_GROUPS
+ * Sets the maximum number of MDB entries that can be registered for a
+ * given port. Attempts to register more MDB entries at the port than this
+ * limit allows will be rejected, whether they are done through netlink
+ * (e.g. the bridge tool), or IGMP or MLD membership reports. Setting a
+ * limit of 0 disables the limit. The default value is 0.
+ *
+ * @IFLA_BRPORT_NEIGH_VLAN_SUPPRESS
+ * Controls whether neighbor discovery (arp and nd) proxy and suppression is
+ * enabled for a given port. By default this flag is off.
+ *
+ * Note that this option only takes effect when *IFLA_BRPORT_NEIGH_SUPPRESS*
+ * is enabled for a given port.
+ *
+ * @IFLA_BRPORT_BACKUP_NHID
+ * The FDB nexthop object ID to attach to packets being redirected to a
+ * backup port that has VLAN tunnel mapping enabled (via the
+ * *IFLA_BRPORT_VLAN_TUNNEL* option). Setting a value of 0 (default) has
+ * the effect of not attaching any ID.
+ */
enum {
IFLA_BRPORT_UNSPEC,
IFLA_BRPORT_STATE, /* Spanning tree state */
@@ -769,6 +1293,19 @@ enum netkit_mode {
NETKIT_L3,
};
+/* NETKIT_SCRUB_NONE leaves clearing skb->{mark,priority} up to
+ * the BPF program if attached. This also means the latter can
+ * consume the two fields if they were populated earlier.
+ *
+ * NETKIT_SCRUB_DEFAULT zeroes skb->{mark,priority} fields before
+ * invoking the attached BPF program when the peer device resides
+ * in a different network namespace. This is the default behavior.
+ */
+enum netkit_scrub {
+ NETKIT_SCRUB_NONE,
+ NETKIT_SCRUB_DEFAULT,
+};
+
enum {
IFLA_NETKIT_UNSPEC,
IFLA_NETKIT_PEER_INFO,
@@ -776,6 +1313,10 @@ enum {
IFLA_NETKIT_POLICY,
IFLA_NETKIT_PEER_POLICY,
IFLA_NETKIT_MODE,
+ IFLA_NETKIT_SCRUB,
+ IFLA_NETKIT_PEER_SCRUB,
+ IFLA_NETKIT_HEADROOM,
+ IFLA_NETKIT_TAILROOM,
__IFLA_NETKIT_MAX,
};
#define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1)
@@ -854,6 +1395,7 @@ enum {
IFLA_VXLAN_DF,
IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */
IFLA_VXLAN_LOCALBYPASS,
+ IFLA_VXLAN_LABEL_POLICY, /* IPv6 flow label policy; ifla_vxlan_label_policy */
__IFLA_VXLAN_MAX
};
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
@@ -871,6 +1413,13 @@ enum ifla_vxlan_df {
VXLAN_DF_MAX = __VXLAN_DF_END - 1,
};
+enum ifla_vxlan_label_policy {
+ VXLAN_LABEL_FIXED = 0,
+ VXLAN_LABEL_INHERIT = 1,
+ __VXLAN_LABEL_END,
+ VXLAN_LABEL_MAX = __VXLAN_LABEL_END - 1,
+};
+
/* GENEVE section */
enum {
IFLA_GENEVE_UNSPEC,
@@ -935,6 +1484,8 @@ enum {
IFLA_GTP_ROLE,
IFLA_GTP_CREATE_SOCKETS,
IFLA_GTP_RESTART_COUNT,
+ IFLA_GTP_LOCAL,
+ IFLA_GTP_LOCAL6,
__IFLA_GTP_MAX,
};
#define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1)
@@ -1240,6 +1791,7 @@ enum {
IFLA_HSR_PROTOCOL, /* Indicate different protocol than
* HSR. For example PRP.
*/
+ IFLA_HSR_INTERLINK, /* HSR interlink network device */
__IFLA_HSR_MAX,
};
@@ -1417,7 +1969,9 @@ enum {
enum {
IFLA_DSA_UNSPEC,
- IFLA_DSA_MASTER,
+ IFLA_DSA_CONDUIT,
+ /* Deprecated, use IFLA_DSA_CONDUIT instead */
+ IFLA_DSA_MASTER = IFLA_DSA_CONDUIT,
__IFLA_DSA_MAX,
};
diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h
index 638c606dfa74..23a062781468 100644
--- a/tools/include/uapi/linux/if_xdp.h
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -7,8 +7,8 @@
* Magnus Karlsson <magnus.karlsson@intel.com>
*/
-#ifndef _LINUX_IF_XDP_H
-#define _LINUX_IF_XDP_H
+#ifndef _UAPI_LINUX_IF_XDP_H
+#define _UAPI_LINUX_IF_XDP_H
#include <linux/types.h>
@@ -41,6 +41,10 @@
*/
#define XDP_UMEM_TX_SW_CSUM (1 << 1)
+/* Request to reserve tx_metadata_len bytes of per-chunk metadata.
+ */
+#define XDP_UMEM_TX_METADATA_LEN (1 << 2)
+
struct sockaddr_xdp {
__u16 sxdp_family;
__u16 sxdp_flags;
@@ -75,6 +79,7 @@ struct xdp_mmap_offsets {
#define XDP_UMEM_COMPLETION_RING 6
#define XDP_STATISTICS 7
#define XDP_OPTIONS 8
+#define XDP_MAX_TX_SKB_BUDGET 9
struct xdp_umem_reg {
__u64 addr; /* Start of packet data area */
@@ -113,16 +118,22 @@ struct xdp_options {
((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1)
/* Request transmit timestamp. Upon completion, put it into tx_timestamp
- * field of union xsk_tx_metadata.
+ * field of struct xsk_tx_metadata.
*/
#define XDP_TXMD_FLAGS_TIMESTAMP (1 << 0)
/* Request transmit checksum offload. Checksum start position and offset
- * are communicated via csum_start and csum_offset fields of union
+ * are communicated via csum_start and csum_offset fields of struct
* xsk_tx_metadata.
*/
#define XDP_TXMD_FLAGS_CHECKSUM (1 << 1)
+/* Request launch time hardware offload. The device will schedule the packet for
+ * transmission at a pre-determined time called launch time. The value of
+ * launch time is communicated via launch_time field of struct xsk_tx_metadata.
+ */
+#define XDP_TXMD_FLAGS_LAUNCH_TIME (1 << 2)
+
/* AF_XDP offloads request. 'request' union member is consumed by the driver
* when the packet is being transmitted. 'completion' union member is
* filled by the driver when the transmit completion arrives.
@@ -138,6 +149,10 @@ struct xsk_tx_metadata {
__u16 csum_start;
/* Offset from csum_start where checksum should be stored. */
__u16 csum_offset;
+
+ /* XDP_TXMD_FLAGS_LAUNCH_TIME */
+ /* Launch time in nanosecond against the PTP HW Clock */
+ __u64 launch_time;
} request;
struct {
@@ -166,4 +181,4 @@ struct xdp_desc {
/* TX packet carries valid metadata. */
#define XDP_TX_METADATA (1 << 1)
-#endif /* _LINUX_IF_XDP_H */
+#endif /* _UAPI_LINUX_IF_XDP_H */
diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h
index e682ab628dfa..ced0fc3c3aa5 100644
--- a/tools/include/uapi/linux/in.h
+++ b/tools/include/uapi/linux/in.h
@@ -79,8 +79,12 @@ enum {
#define IPPROTO_MPLS IPPROTO_MPLS
IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */
#define IPPROTO_ETHERNET IPPROTO_ETHERNET
+ IPPROTO_AGGFRAG = 144, /* AGGFRAG in ESP (RFC 9347) */
+#define IPPROTO_AGGFRAG IPPROTO_AGGFRAG
IPPROTO_RAW = 255, /* Raw IP packets */
#define IPPROTO_RAW IPPROTO_RAW
+ IPPROTO_SMC = 256, /* Shared Memory Communications */
+#define IPPROTO_SMC IPPROTO_SMC
IPPROTO_MPTCP = 262, /* Multipath TCP connection */
#define IPPROTO_MPTCP IPPROTO_MPTCP
IPPROTO_MAX
@@ -139,7 +143,7 @@ struct in_addr {
*/
#define IP_PMTUDISC_INTERFACE 4
/* weaker version of IP_PMTUDISC_INTERFACE, which allows packets to get
- * fragmented if they exeed the interface mtu
+ * fragmented if they exceed the interface mtu
*/
#define IP_PMTUDISC_OMIT 5
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index d03842abae57..52f6000ab020 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -178,6 +178,7 @@ struct kvm_xen_exit {
#define KVM_EXIT_NOTIFY 37
#define KVM_EXIT_LOONGARCH_IOCSR 38
#define KVM_EXIT_MEMORY_FAULT 39
+#define KVM_EXIT_TDX 40
/* For KVM_EXIT_INTERNAL_ERROR */
/* Emulate instruction failed. */
@@ -192,11 +193,24 @@ struct kvm_xen_exit {
/* Flags that describe what fields in emulation_failure hold valid data. */
#define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0)
+/*
+ * struct kvm_run can be modified by userspace at any time, so KVM must be
+ * careful to avoid TOCTOU bugs. In order to protect KVM, HINT_UNSAFE_IN_KVM()
+ * renames fields in struct kvm_run from <symbol> to <symbol>__unsafe when
+ * compiled into the kernel, ensuring that any use within KVM is obvious and
+ * gets extra scrutiny.
+ */
+#ifdef __KERNEL__
+#define HINT_UNSAFE_IN_KVM(_symbol) _symbol##__unsafe
+#else
+#define HINT_UNSAFE_IN_KVM(_symbol) _symbol
+#endif
+
/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
struct kvm_run {
/* in */
__u8 request_interrupt_window;
- __u8 immediate_exit;
+ __u8 HINT_UNSAFE_IN_KVM(immediate_exit);
__u8 padding1[6];
/* out */
@@ -362,6 +376,7 @@ struct kvm_run {
#define KVM_SYSTEM_EVENT_WAKEUP 4
#define KVM_SYSTEM_EVENT_SUSPEND 5
#define KVM_SYSTEM_EVENT_SEV_TERM 6
+#define KVM_SYSTEM_EVENT_TDX_FATAL 7
__u32 type;
__u32 ndata;
union {
@@ -433,6 +448,31 @@ struct kvm_run {
__u64 gpa;
__u64 size;
} memory_fault;
+ /* KVM_EXIT_TDX */
+ struct {
+ __u64 flags;
+ __u64 nr;
+ union {
+ struct {
+ __u64 ret;
+ __u64 data[5];
+ } unknown;
+ struct {
+ __u64 ret;
+ __u64 gpa;
+ __u64 size;
+ } get_quote;
+ struct {
+ __u64 ret;
+ __u64 leaf;
+ __u64 r11, r12, r13, r14;
+ } get_tdvmcall_info;
+ struct {
+ __u64 ret;
+ __u64 vector;
+ } setup_event_notify;
+ };
+ } tdx;
/* Fix the size of the union. */
char padding[256];
};
@@ -604,10 +644,7 @@ struct kvm_ioeventfd {
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
-#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \
- KVM_X86_DISABLE_EXITS_HLT | \
- KVM_X86_DISABLE_EXITS_PAUSE | \
- KVM_X86_DISABLE_EXITS_CSTATE)
+#define KVM_X86_DISABLE_EXITS_APERFMPERF (1 << 4)
/* for KVM_ENABLE_CAP */
struct kvm_enable_cap {
@@ -917,6 +954,15 @@ struct kvm_enable_cap {
#define KVM_CAP_MEMORY_ATTRIBUTES 233
#define KVM_CAP_GUEST_MEMFD 234
#define KVM_CAP_VM_TYPES 235
+#define KVM_CAP_PRE_FAULT_MEMORY 236
+#define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237
+#define KVM_CAP_X86_GUEST_MODE 238
+#define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239
+#define KVM_CAP_ARM_EL2 240
+#define KVM_CAP_ARM_EL2_E2H0 241
+#define KVM_CAP_RISCV_MP_STATE_RESET 242
+#define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243
+#define KVM_CAP_GUEST_MEMFD_FLAGS 244
struct kvm_irq_routing_irqchip {
__u32 irqchip;
@@ -1054,6 +1100,10 @@ struct kvm_dirty_tlb {
#define KVM_REG_SIZE_SHIFT 52
#define KVM_REG_SIZE_MASK 0x00f0000000000000ULL
+
+#define KVM_REG_SIZE(id) \
+ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
+
#define KVM_REG_SIZE_U8 0x0000000000000000ULL
#define KVM_REG_SIZE_U16 0x0010000000000000ULL
#define KVM_REG_SIZE_U32 0x0020000000000000ULL
@@ -1142,7 +1192,15 @@ enum kvm_device_type {
#define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME
KVM_DEV_TYPE_RISCV_AIA,
#define KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_RISCV_AIA
+ KVM_DEV_TYPE_LOONGARCH_IPI,
+#define KVM_DEV_TYPE_LOONGARCH_IPI KVM_DEV_TYPE_LOONGARCH_IPI
+ KVM_DEV_TYPE_LOONGARCH_EIOINTC,
+#define KVM_DEV_TYPE_LOONGARCH_EIOINTC KVM_DEV_TYPE_LOONGARCH_EIOINTC
+ KVM_DEV_TYPE_LOONGARCH_PCHPIC,
+#define KVM_DEV_TYPE_LOONGARCH_PCHPIC KVM_DEV_TYPE_LOONGARCH_PCHPIC
+
KVM_DEV_TYPE_MAX,
+
};
struct kvm_vfio_spapr_tce {
@@ -1541,6 +1599,8 @@ struct kvm_memory_attributes {
#define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3)
#define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd)
+#define GUEST_MEMFD_FLAG_MMAP (1ULL << 0)
+#define GUEST_MEMFD_FLAG_INIT_SHARED (1ULL << 1)
struct kvm_create_guest_memfd {
__u64 size;
@@ -1548,4 +1608,13 @@ struct kvm_create_guest_memfd {
__u64 reserved[6];
};
+#define KVM_PRE_FAULT_MEMORY _IOWR(KVMIO, 0xd5, struct kvm_pre_fault_memory)
+
+struct kvm_pre_fault_memory {
+ __u64 gpa;
+ __u64 size;
+ __u64 flags;
+ __u64 padding[5];
+};
+
#endif /* __LINUX_KVM_H */
diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h
index a246e11988d5..e89d00528f2f 100644
--- a/tools/include/uapi/linux/mman.h
+++ b/tools/include/uapi/linux/mman.h
@@ -17,6 +17,7 @@
#define MAP_SHARED 0x01 /* Share changes */
#define MAP_PRIVATE 0x02 /* Changes are private */
#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */
+#define MAP_DROPPABLE 0x08 /* Zero memory under memory pressure. */
/*
* Huge page size encoding when MAP_HUGETLB is specified, and a huge page
diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h
new file mode 100644
index 000000000000..7fa67c2031a5
--- /dev/null
+++ b/tools/include/uapi/linux/mount.h
@@ -0,0 +1,235 @@
+#ifndef _UAPI_LINUX_MOUNT_H
+#define _UAPI_LINUX_MOUNT_H
+
+#include <linux/types.h>
+
+/*
+ * These are the fs-independent mount-flags: up to 32 flags are supported
+ *
+ * Usage of these is restricted within the kernel to core mount(2) code and
+ * callers of sys_mount() only. Filesystems should be using the SB_*
+ * equivalent instead.
+ */
+#define MS_RDONLY 1 /* Mount read-only */
+#define MS_NOSUID 2 /* Ignore suid and sgid bits */
+#define MS_NODEV 4 /* Disallow access to device special files */
+#define MS_NOEXEC 8 /* Disallow program execution */
+#define MS_SYNCHRONOUS 16 /* Writes are synced at once */
+#define MS_REMOUNT 32 /* Alter flags of a mounted FS */
+#define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */
+#define MS_DIRSYNC 128 /* Directory modifications are synchronous */
+#define MS_NOSYMFOLLOW 256 /* Do not follow symlinks */
+#define MS_NOATIME 1024 /* Do not update access times. */
+#define MS_NODIRATIME 2048 /* Do not update directory access times */
+#define MS_BIND 4096
+#define MS_MOVE 8192
+#define MS_REC 16384
+#define MS_VERBOSE 32768 /* War is peace. Verbosity is silence.
+ MS_VERBOSE is deprecated. */
+#define MS_SILENT 32768
+#define MS_POSIXACL (1<<16) /* VFS does not apply the umask */
+#define MS_UNBINDABLE (1<<17) /* change to unbindable */
+#define MS_PRIVATE (1<<18) /* change to private */
+#define MS_SLAVE (1<<19) /* change to slave */
+#define MS_SHARED (1<<20) /* change to shared */
+#define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */
+#define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */
+#define MS_I_VERSION (1<<23) /* Update inode I_version field */
+#define MS_STRICTATIME (1<<24) /* Always perform atime updates */
+#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */
+
+/* These sb flags are internal to the kernel */
+#define MS_SUBMOUNT (1<<26)
+#define MS_NOREMOTELOCK (1<<27)
+#define MS_NOSEC (1<<28)
+#define MS_BORN (1<<29)
+#define MS_ACTIVE (1<<30)
+#define MS_NOUSER (1<<31)
+
+/*
+ * Superblock flags that can be altered by MS_REMOUNT
+ */
+#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\
+ MS_LAZYTIME)
+
+/*
+ * Old magic mount flag and mask
+ */
+#define MS_MGC_VAL 0xC0ED0000
+#define MS_MGC_MSK 0xffff0000
+
+/*
+ * open_tree() flags.
+ */
+#define OPEN_TREE_CLONE 1 /* Clone the target tree and attach the clone */
+#define OPEN_TREE_CLOEXEC O_CLOEXEC /* Close the file on execve() */
+
+/*
+ * move_mount() flags.
+ */
+#define MOVE_MOUNT_F_SYMLINKS 0x00000001 /* Follow symlinks on from path */
+#define MOVE_MOUNT_F_AUTOMOUNTS 0x00000002 /* Follow automounts on from path */
+#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */
+#define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */
+#define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */
+#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */
+#define MOVE_MOUNT_SET_GROUP 0x00000100 /* Set sharing group instead */
+#define MOVE_MOUNT_BENEATH 0x00000200 /* Mount beneath top mount */
+#define MOVE_MOUNT__MASK 0x00000377
+
+/*
+ * fsopen() flags.
+ */
+#define FSOPEN_CLOEXEC 0x00000001
+
+/*
+ * fspick() flags.
+ */
+#define FSPICK_CLOEXEC 0x00000001
+#define FSPICK_SYMLINK_NOFOLLOW 0x00000002
+#define FSPICK_NO_AUTOMOUNT 0x00000004
+#define FSPICK_EMPTY_PATH 0x00000008
+
+/*
+ * The type of fsconfig() call made.
+ */
+enum fsconfig_command {
+ FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */
+ FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */
+ FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */
+ FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */
+ FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */
+ FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */
+ FSCONFIG_CMD_CREATE = 6, /* Create new or reuse existing superblock */
+ FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */
+ FSCONFIG_CMD_CREATE_EXCL = 8, /* Create new superblock, fail if reusing existing superblock */
+};
+
+/*
+ * fsmount() flags.
+ */
+#define FSMOUNT_CLOEXEC 0x00000001
+
+/*
+ * Mount attributes.
+ */
+#define MOUNT_ATTR_RDONLY 0x00000001 /* Mount read-only */
+#define MOUNT_ATTR_NOSUID 0x00000002 /* Ignore suid and sgid bits */
+#define MOUNT_ATTR_NODEV 0x00000004 /* Disallow access to device special files */
+#define MOUNT_ATTR_NOEXEC 0x00000008 /* Disallow program execution */
+#define MOUNT_ATTR__ATIME 0x00000070 /* Setting on how atime should be updated */
+#define MOUNT_ATTR_RELATIME 0x00000000 /* - Update atime relative to mtime/ctime. */
+#define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */
+#define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */
+#define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */
+#define MOUNT_ATTR_IDMAP 0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */
+#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000 /* Do not follow symlinks */
+
+/*
+ * mount_setattr()
+ */
+struct mount_attr {
+ __u64 attr_set;
+ __u64 attr_clr;
+ __u64 propagation;
+ __u64 userns_fd;
+};
+
+/* List of all mount_attr versions. */
+#define MOUNT_ATTR_SIZE_VER0 32 /* sizeof first published struct */
+
+
+/*
+ * Structure for getting mount/superblock/filesystem info with statmount(2).
+ *
+ * The interface is similar to statx(2): individual fields or groups can be
+ * selected with the @mask argument of statmount(). Kernel will set the @mask
+ * field according to the supported fields.
+ *
+ * If string fields are selected, then the caller needs to pass a buffer that
+ * has space after the fixed part of the structure. Nul terminated strings are
+ * copied there and offsets relative to @str are stored in the relevant fields.
+ * If the buffer is too small, then EOVERFLOW is returned. The actually used
+ * size is returned in @size.
+ */
+struct statmount {
+ __u32 size; /* Total size, including strings */
+ __u32 mnt_opts; /* [str] Options (comma separated, escaped) */
+ __u64 mask; /* What results were written */
+ __u32 sb_dev_major; /* Device ID */
+ __u32 sb_dev_minor;
+ __u64 sb_magic; /* ..._SUPER_MAGIC */
+ __u32 sb_flags; /* SB_{RDONLY,SYNCHRONOUS,DIRSYNC,LAZYTIME} */
+ __u32 fs_type; /* [str] Filesystem type */
+ __u64 mnt_id; /* Unique ID of mount */
+ __u64 mnt_parent_id; /* Unique ID of parent (for root == mnt_id) */
+ __u32 mnt_id_old; /* Reused IDs used in proc/.../mountinfo */
+ __u32 mnt_parent_id_old;
+ __u64 mnt_attr; /* MOUNT_ATTR_... */
+ __u64 mnt_propagation; /* MS_{SHARED,SLAVE,PRIVATE,UNBINDABLE} */
+ __u64 mnt_peer_group; /* ID of shared peer group */
+ __u64 mnt_master; /* Mount receives propagation from this ID */
+ __u64 propagate_from; /* Propagation from in current namespace */
+ __u32 mnt_root; /* [str] Root of mount relative to root of fs */
+ __u32 mnt_point; /* [str] Mountpoint relative to current root */
+ __u64 mnt_ns_id; /* ID of the mount namespace */
+ __u32 fs_subtype; /* [str] Subtype of fs_type (if any) */
+ __u32 sb_source; /* [str] Source string of the mount */
+ __u32 opt_num; /* Number of fs options */
+ __u32 opt_array; /* [str] Array of nul terminated fs options */
+ __u32 opt_sec_num; /* Number of security options */
+ __u32 opt_sec_array; /* [str] Array of nul terminated security options */
+ __u64 supported_mask; /* Mask flags that this kernel supports */
+ __u32 mnt_uidmap_num; /* Number of uid mappings */
+ __u32 mnt_uidmap; /* [str] Array of uid mappings (as seen from callers namespace) */
+ __u32 mnt_gidmap_num; /* Number of gid mappings */
+ __u32 mnt_gidmap; /* [str] Array of gid mappings (as seen from callers namespace) */
+ __u64 __spare2[43];
+ char str[]; /* Variable size part containing strings */
+};
+
+/*
+ * Structure for passing mount ID and miscellaneous parameters to statmount(2)
+ * and listmount(2).
+ *
+ * For statmount(2) @param represents the request mask.
+ * For listmount(2) @param represents the last listed mount id (or zero).
+ */
+struct mnt_id_req {
+ __u32 size;
+ __u32 spare;
+ __u64 mnt_id;
+ __u64 param;
+ __u64 mnt_ns_id;
+};
+
+/* List of all mnt_id_req versions. */
+#define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */
+#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */
+
+/*
+ * @mask bits for statmount(2)
+ */
+#define STATMOUNT_SB_BASIC 0x00000001U /* Want/got sb_... */
+#define STATMOUNT_MNT_BASIC 0x00000002U /* Want/got mnt_... */
+#define STATMOUNT_PROPAGATE_FROM 0x00000004U /* Want/got propagate_from */
+#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */
+#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */
+#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */
+#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */
+#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */
+#define STATMOUNT_FS_SUBTYPE 0x00000100U /* Want/got fs_subtype */
+#define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */
+#define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */
+#define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */
+#define STATMOUNT_SUPPORTED_MASK 0x00001000U /* Want/got supported mask flags */
+#define STATMOUNT_MNT_UIDMAP 0x00002000U /* Want/got uidmap... */
+#define STATMOUNT_MNT_GIDMAP 0x00004000U /* Want/got gidmap... */
+
+/*
+ * Special @mnt_id values that can be passed to listmount
+ */
+#define LSMT_ROOT 0xffffffffffffffff /* root mount */
+#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */
+
+#endif /* _UAPI_LINUX_MOUNT_H */
diff --git a/tools/include/uapi/linux/neighbour.h b/tools/include/uapi/linux/neighbour.h
new file mode 100644
index 000000000000..c34a81245f87
--- /dev/null
+++ b/tools/include/uapi/linux/neighbour.h
@@ -0,0 +1,229 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_NEIGHBOUR_H
+#define _UAPI__LINUX_NEIGHBOUR_H
+
+#include <linux/types.h>
+#include <linux/netlink.h>
+
+struct ndmsg {
+ __u8 ndm_family;
+ __u8 ndm_pad1;
+ __u16 ndm_pad2;
+ __s32 ndm_ifindex;
+ __u16 ndm_state;
+ __u8 ndm_flags;
+ __u8 ndm_type;
+};
+
+enum {
+ NDA_UNSPEC,
+ NDA_DST,
+ NDA_LLADDR,
+ NDA_CACHEINFO,
+ NDA_PROBES,
+ NDA_VLAN,
+ NDA_PORT,
+ NDA_VNI,
+ NDA_IFINDEX,
+ NDA_MASTER,
+ NDA_LINK_NETNSID,
+ NDA_SRC_VNI,
+ NDA_PROTOCOL, /* Originator of entry */
+ NDA_NH_ID,
+ NDA_FDB_EXT_ATTRS,
+ NDA_FLAGS_EXT,
+ NDA_NDM_STATE_MASK,
+ NDA_NDM_FLAGS_MASK,
+ __NDA_MAX
+};
+
+#define NDA_MAX (__NDA_MAX - 1)
+
+/*
+ * Neighbor Cache Entry Flags
+ */
+
+#define NTF_USE (1 << 0)
+#define NTF_SELF (1 << 1)
+#define NTF_MASTER (1 << 2)
+#define NTF_PROXY (1 << 3) /* == ATF_PUBL */
+#define NTF_EXT_LEARNED (1 << 4)
+#define NTF_OFFLOADED (1 << 5)
+#define NTF_STICKY (1 << 6)
+#define NTF_ROUTER (1 << 7)
+/* Extended flags under NDA_FLAGS_EXT: */
+#define NTF_EXT_MANAGED (1 << 0)
+#define NTF_EXT_LOCKED (1 << 1)
+#define NTF_EXT_EXT_VALIDATED (1 << 2)
+
+/*
+ * Neighbor Cache Entry States.
+ */
+
+#define NUD_INCOMPLETE 0x01
+#define NUD_REACHABLE 0x02
+#define NUD_STALE 0x04
+#define NUD_DELAY 0x08
+#define NUD_PROBE 0x10
+#define NUD_FAILED 0x20
+
+/* Dummy states */
+#define NUD_NOARP 0x40
+#define NUD_PERMANENT 0x80
+#define NUD_NONE 0x00
+
+/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change and make no
+ * address resolution or NUD.
+ *
+ * NUD_PERMANENT also cannot be deleted by garbage collectors. This holds true
+ * for dynamic entries with NTF_EXT_LEARNED flag as well. However, upon carrier
+ * down event, NUD_PERMANENT entries are not flushed whereas NTF_EXT_LEARNED
+ * flagged entries explicitly are (which is also consistent with the routing
+ * subsystem).
+ *
+ * When NTF_EXT_LEARNED is set for a bridge fdb entry the different cache entry
+ * states don't make sense and thus are ignored. Such entries don't age and
+ * can roam.
+ *
+ * NTF_EXT_MANAGED flagged neigbor entries are managed by the kernel on behalf
+ * of a user space control plane, and automatically refreshed so that (if
+ * possible) they remain in NUD_REACHABLE state.
+ *
+ * NTF_EXT_LOCKED flagged bridge FDB entries are entries generated by the
+ * bridge in response to a host trying to communicate via a locked bridge port
+ * with MAB enabled. Their purpose is to notify user space that a host requires
+ * authentication.
+ *
+ * NTF_EXT_EXT_VALIDATED flagged neighbor entries were externally validated by
+ * a user space control plane. The kernel will not remove or invalidate them,
+ * but it can probe them and notify user space when they become reachable.
+ */
+
+struct nda_cacheinfo {
+ __u32 ndm_confirmed;
+ __u32 ndm_used;
+ __u32 ndm_updated;
+ __u32 ndm_refcnt;
+};
+
+/*****************************************************************
+ * Neighbour tables specific messages.
+ *
+ * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the
+ * NLM_F_DUMP flag set. Every neighbour table configuration is
+ * spread over multiple messages to avoid running into message
+ * size limits on systems with many interfaces. The first message
+ * in the sequence transports all not device specific data such as
+ * statistics, configuration, and the default parameter set.
+ * This message is followed by 0..n messages carrying device
+ * specific parameter sets.
+ * Although the ordering should be sufficient, NDTA_NAME can be
+ * used to identify sequences. The initial message can be identified
+ * by checking for NDTA_CONFIG. The device specific messages do
+ * not contain this TLV but have NDTPA_IFINDEX set to the
+ * corresponding interface index.
+ *
+ * To change neighbour table attributes, send RTM_SETNEIGHTBL
+ * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3],
+ * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked
+ * otherwise. Device specific parameter sets can be changed by
+ * setting NDTPA_IFINDEX to the interface index of the corresponding
+ * device.
+ ****/
+
+struct ndt_stats {
+ __u64 ndts_allocs;
+ __u64 ndts_destroys;
+ __u64 ndts_hash_grows;
+ __u64 ndts_res_failed;
+ __u64 ndts_lookups;
+ __u64 ndts_hits;
+ __u64 ndts_rcv_probes_mcast;
+ __u64 ndts_rcv_probes_ucast;
+ __u64 ndts_periodic_gc_runs;
+ __u64 ndts_forced_gc_runs;
+ __u64 ndts_table_fulls;
+};
+
+enum {
+ NDTPA_UNSPEC,
+ NDTPA_IFINDEX, /* u32, unchangeable */
+ NDTPA_REFCNT, /* u32, read-only */
+ NDTPA_REACHABLE_TIME, /* u64, read-only, msecs */
+ NDTPA_BASE_REACHABLE_TIME, /* u64, msecs */
+ NDTPA_RETRANS_TIME, /* u64, msecs */
+ NDTPA_GC_STALETIME, /* u64, msecs */
+ NDTPA_DELAY_PROBE_TIME, /* u64, msecs */
+ NDTPA_QUEUE_LEN, /* u32 */
+ NDTPA_APP_PROBES, /* u32 */
+ NDTPA_UCAST_PROBES, /* u32 */
+ NDTPA_MCAST_PROBES, /* u32 */
+ NDTPA_ANYCAST_DELAY, /* u64, msecs */
+ NDTPA_PROXY_DELAY, /* u64, msecs */
+ NDTPA_PROXY_QLEN, /* u32 */
+ NDTPA_LOCKTIME, /* u64, msecs */
+ NDTPA_QUEUE_LENBYTES, /* u32 */
+ NDTPA_MCAST_REPROBES, /* u32 */
+ NDTPA_PAD,
+ NDTPA_INTERVAL_PROBE_TIME_MS, /* u64, msecs */
+ __NDTPA_MAX
+};
+#define NDTPA_MAX (__NDTPA_MAX - 1)
+
+struct ndtmsg {
+ __u8 ndtm_family;
+ __u8 ndtm_pad1;
+ __u16 ndtm_pad2;
+};
+
+struct ndt_config {
+ __u16 ndtc_key_len;
+ __u16 ndtc_entry_size;
+ __u32 ndtc_entries;
+ __u32 ndtc_last_flush; /* delta to now in msecs */
+ __u32 ndtc_last_rand; /* delta to now in msecs */
+ __u32 ndtc_hash_rnd;
+ __u32 ndtc_hash_mask;
+ __u32 ndtc_hash_chain_gc;
+ __u32 ndtc_proxy_qlen;
+};
+
+enum {
+ NDTA_UNSPEC,
+ NDTA_NAME, /* char *, unchangeable */
+ NDTA_THRESH1, /* u32 */
+ NDTA_THRESH2, /* u32 */
+ NDTA_THRESH3, /* u32 */
+ NDTA_CONFIG, /* struct ndt_config, read-only */
+ NDTA_PARMS, /* nested TLV NDTPA_* */
+ NDTA_STATS, /* struct ndt_stats, read-only */
+ NDTA_GC_INTERVAL, /* u64, msecs */
+ NDTA_PAD,
+ __NDTA_MAX
+};
+#define NDTA_MAX (__NDTA_MAX - 1)
+
+ /* FDB activity notification bits used in NFEA_ACTIVITY_NOTIFY:
+ * - FDB_NOTIFY_BIT - notify on activity/expire for any entry
+ * - FDB_NOTIFY_INACTIVE_BIT - mark as inactive to avoid multiple notifications
+ */
+enum {
+ FDB_NOTIFY_BIT = (1 << 0),
+ FDB_NOTIFY_INACTIVE_BIT = (1 << 1)
+};
+
+/* embedded into NDA_FDB_EXT_ATTRS:
+ * [NDA_FDB_EXT_ATTRS] = {
+ * [NFEA_ACTIVITY_NOTIFY]
+ * ...
+ * }
+ */
+enum {
+ NFEA_UNSPEC,
+ NFEA_ACTIVITY_NOTIFY,
+ NFEA_DONT_REFRESH,
+ __NFEA_MAX
+};
+#define NFEA_MAX (__NFEA_MAX - 1)
+
+#endif
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index 43742ac5b00d..e0b579a1df4f 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -2,6 +2,7 @@
/* Do not edit directly, auto-generated from: */
/* Documentation/netlink/specs/netdev.yaml */
/* YNL-GEN uapi header */
+/* To regenerate run: tools/net/ynl/ynl-regen.sh */
#ifndef _UAPI_LINUX_NETDEV_H
#define _UAPI_LINUX_NETDEV_H
@@ -59,10 +60,13 @@ enum netdev_xdp_rx_metadata {
* by the driver.
* @NETDEV_XSK_FLAGS_TX_CHECKSUM: L3 checksum HW offload is supported by the
* driver.
+ * @NETDEV_XSK_FLAGS_TX_LAUNCH_TIME_FIFO: Launch time HW offload is supported
+ * by the driver.
*/
enum netdev_xsk_flags {
NETDEV_XSK_FLAGS_TX_TIMESTAMP = 1,
NETDEV_XSK_FLAGS_TX_CHECKSUM = 2,
+ NETDEV_XSK_FLAGS_TX_LAUNCH_TIME_FIFO = 4,
};
enum netdev_queue_type {
@@ -74,6 +78,12 @@ enum netdev_qstats_scope {
NETDEV_QSTATS_SCOPE_QUEUE = 1,
};
+enum netdev_napi_threaded {
+ NETDEV_NAPI_THREADED_DISABLED,
+ NETDEV_NAPI_THREADED_ENABLED,
+ NETDEV_NAPI_THREADED_BUSY_POLL,
+};
+
enum {
NETDEV_A_DEV_IFINDEX = 1,
NETDEV_A_DEV_PAD,
@@ -87,12 +97,19 @@ enum {
};
enum {
+ __NETDEV_A_IO_URING_PROVIDER_INFO_MAX,
+ NETDEV_A_IO_URING_PROVIDER_INFO_MAX = (__NETDEV_A_IO_URING_PROVIDER_INFO_MAX - 1)
+};
+
+enum {
NETDEV_A_PAGE_POOL_ID = 1,
NETDEV_A_PAGE_POOL_IFINDEX,
NETDEV_A_PAGE_POOL_NAPI_ID,
NETDEV_A_PAGE_POOL_INFLIGHT,
NETDEV_A_PAGE_POOL_INFLIGHT_MEM,
NETDEV_A_PAGE_POOL_DETACH_TIME,
+ NETDEV_A_PAGE_POOL_DMABUF,
+ NETDEV_A_PAGE_POOL_IO_URING,
__NETDEV_A_PAGE_POOL_MAX,
NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1)
@@ -121,16 +138,28 @@ enum {
NETDEV_A_NAPI_ID,
NETDEV_A_NAPI_IRQ,
NETDEV_A_NAPI_PID,
+ NETDEV_A_NAPI_DEFER_HARD_IRQS,
+ NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT,
+ NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT,
+ NETDEV_A_NAPI_THREADED,
__NETDEV_A_NAPI_MAX,
NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1)
};
enum {
+ __NETDEV_A_XSK_INFO_MAX,
+ NETDEV_A_XSK_INFO_MAX = (__NETDEV_A_XSK_INFO_MAX - 1)
+};
+
+enum {
NETDEV_A_QUEUE_ID = 1,
NETDEV_A_QUEUE_IFINDEX,
NETDEV_A_QUEUE_TYPE,
NETDEV_A_QUEUE_NAPI_ID,
+ NETDEV_A_QUEUE_DMABUF,
+ NETDEV_A_QUEUE_IO_URING,
+ NETDEV_A_QUEUE_XSK,
__NETDEV_A_QUEUE_MAX,
NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)
@@ -174,6 +203,16 @@ enum {
};
enum {
+ NETDEV_A_DMABUF_IFINDEX = 1,
+ NETDEV_A_DMABUF_QUEUES,
+ NETDEV_A_DMABUF_FD,
+ NETDEV_A_DMABUF_ID,
+
+ __NETDEV_A_DMABUF_MAX,
+ NETDEV_A_DMABUF_MAX = (__NETDEV_A_DMABUF_MAX - 1)
+};
+
+enum {
NETDEV_CMD_DEV_GET = 1,
NETDEV_CMD_DEV_ADD_NTF,
NETDEV_CMD_DEV_DEL_NTF,
@@ -186,6 +225,9 @@ enum {
NETDEV_CMD_QUEUE_GET,
NETDEV_CMD_NAPI_GET,
NETDEV_CMD_QSTATS_GET,
+ NETDEV_CMD_BIND_RX,
+ NETDEV_CMD_NAPI_SET,
+ NETDEV_CMD_BIND_TX,
__NETDEV_CMD_MAX,
NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
diff --git a/tools/include/uapi/linux/netfilter.h b/tools/include/uapi/linux/netfilter.h
new file mode 100644
index 000000000000..5a79ccb76701
--- /dev/null
+++ b/tools/include/uapi/linux/netfilter.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_NETFILTER_H
+#define _UAPI__LINUX_NETFILTER_H
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+
+/* Responses from hook functions. */
+#define NF_DROP 0
+#define NF_ACCEPT 1
+#define NF_STOLEN 2
+#define NF_QUEUE 3
+#define NF_REPEAT 4
+#define NF_STOP 5 /* Deprecated, for userspace nf_queue compatibility. */
+#define NF_MAX_VERDICT NF_STOP
+
+/* we overload the higher bits for encoding auxiliary data such as the queue
+ * number or errno values. Not nice, but better than additional function
+ * arguments. */
+#define NF_VERDICT_MASK 0x000000ff
+
+/* extra verdict flags have mask 0x0000ff00 */
+#define NF_VERDICT_FLAG_QUEUE_BYPASS 0x00008000
+
+/* queue number (NF_QUEUE) or errno (NF_DROP) */
+#define NF_VERDICT_QMASK 0xffff0000
+#define NF_VERDICT_QBITS 16
+
+#define NF_QUEUE_NR(x) ((((x) << 16) & NF_VERDICT_QMASK) | NF_QUEUE)
+
+#define NF_DROP_ERR(x) (((-x) << 16) | NF_DROP)
+
+/* only for userspace compatibility */
+#ifndef __KERNEL__
+
+/* NF_VERDICT_BITS should be 8 now, but userspace might break if this changes */
+#define NF_VERDICT_BITS 16
+#endif
+
+enum nf_inet_hooks {
+ NF_INET_PRE_ROUTING,
+ NF_INET_LOCAL_IN,
+ NF_INET_FORWARD,
+ NF_INET_LOCAL_OUT,
+ NF_INET_POST_ROUTING,
+ NF_INET_NUMHOOKS,
+ NF_INET_INGRESS = NF_INET_NUMHOOKS,
+};
+
+enum nf_dev_hooks {
+ NF_NETDEV_INGRESS,
+ NF_NETDEV_EGRESS,
+ NF_NETDEV_NUMHOOKS
+};
+
+enum {
+ NFPROTO_UNSPEC = 0,
+ NFPROTO_INET = 1,
+ NFPROTO_IPV4 = 2,
+ NFPROTO_ARP = 3,
+ NFPROTO_NETDEV = 5,
+ NFPROTO_BRIDGE = 7,
+ NFPROTO_IPV6 = 10,
+#ifndef __KERNEL__ /* no longer supported by kernel */
+ NFPROTO_DECNET = 12,
+#endif
+ NFPROTO_NUMPROTO,
+};
+
+union nf_inet_addr {
+ __u32 all[4];
+ __be32 ip;
+ __be32 ip6[4];
+ struct in_addr in;
+ struct in6_addr in6;
+};
+
+#endif /* _UAPI__LINUX_NETFILTER_H */
diff --git a/tools/include/uapi/linux/netfilter_arp.h b/tools/include/uapi/linux/netfilter_arp.h
new file mode 100644
index 000000000000..791dfc5ae907
--- /dev/null
+++ b/tools/include/uapi/linux/netfilter_arp.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note */
+#ifndef __LINUX_ARP_NETFILTER_H
+#define __LINUX_ARP_NETFILTER_H
+
+/* ARP-specific defines for netfilter.
+ * (C)2002 Rusty Russell IBM -- This code is GPL.
+ */
+
+#include <linux/netfilter.h>
+
+/* There is no PF_ARP. */
+#define NF_ARP 0
+
+/* ARP Hooks */
+#define NF_ARP_IN 0
+#define NF_ARP_OUT 1
+#define NF_ARP_FORWARD 2
+
+#ifndef __KERNEL__
+#define NF_ARP_NUMHOOKS 3
+#endif
+
+#endif /* __LINUX_ARP_NETFILTER_H */
diff --git a/tools/include/uapi/linux/nsfs.h b/tools/include/uapi/linux/nsfs.h
new file mode 100644
index 000000000000..a25e38d1c874
--- /dev/null
+++ b/tools/include/uapi/linux/nsfs.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __LINUX_NSFS_H
+#define __LINUX_NSFS_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+#define NSIO 0xb7
+
+/* Returns a file descriptor that refers to an owning user namespace */
+#define NS_GET_USERNS _IO(NSIO, 0x1)
+/* Returns a file descriptor that refers to a parent namespace */
+#define NS_GET_PARENT _IO(NSIO, 0x2)
+/* Returns the type of namespace (CLONE_NEW* value) referred to by
+ file descriptor */
+#define NS_GET_NSTYPE _IO(NSIO, 0x3)
+/* Get owner UID (in the caller's user namespace) for a user namespace */
+#define NS_GET_OWNER_UID _IO(NSIO, 0x4)
+/* Translate pid from target pid namespace into the caller's pid namespace. */
+#define NS_GET_PID_FROM_PIDNS _IOR(NSIO, 0x6, int)
+/* Return thread-group leader id of pid in the callers pid namespace. */
+#define NS_GET_TGID_FROM_PIDNS _IOR(NSIO, 0x7, int)
+/* Translate pid from caller's pid namespace into a target pid namespace. */
+#define NS_GET_PID_IN_PIDNS _IOR(NSIO, 0x8, int)
+/* Return thread-group leader id of pid in the target pid namespace. */
+#define NS_GET_TGID_IN_PIDNS _IOR(NSIO, 0x9, int)
+
+struct mnt_ns_info {
+ __u32 size;
+ __u32 nr_mounts;
+ __u64 mnt_ns_id;
+};
+
+#define MNT_NS_INFO_SIZE_VER0 16 /* size of first published struct */
+
+/* Get information about namespace. */
+#define NS_MNT_GET_INFO _IOR(NSIO, 10, struct mnt_ns_info)
+/* Get next namespace. */
+#define NS_MNT_GET_NEXT _IOR(NSIO, 11, struct mnt_ns_info)
+/* Get previous namespace. */
+#define NS_MNT_GET_PREV _IOR(NSIO, 12, struct mnt_ns_info)
+
+/* Retrieve namespace identifiers. */
+#define NS_GET_MNTNS_ID _IOR(NSIO, 5, __u64)
+#define NS_GET_ID _IOR(NSIO, 13, __u64)
+
+enum init_ns_ino {
+ IPC_NS_INIT_INO = 0xEFFFFFFFU,
+ UTS_NS_INIT_INO = 0xEFFFFFFEU,
+ USER_NS_INIT_INO = 0xEFFFFFFDU,
+ PID_NS_INIT_INO = 0xEFFFFFFCU,
+ CGROUP_NS_INIT_INO = 0xEFFFFFFBU,
+ TIME_NS_INIT_INO = 0xEFFFFFFAU,
+ NET_NS_INIT_INO = 0xEFFFFFF9U,
+ MNT_NS_INIT_INO = 0xEFFFFFF8U,
+#ifdef __KERNEL__
+ MNT_NS_ANON_INO = 0xEFFFFFF7U,
+#endif
+};
+
+struct nsfs_file_handle {
+ __u64 ns_id;
+ __u32 ns_type;
+ __u32 ns_inum;
+};
+
+#define NSFS_FILE_HANDLE_SIZE_VER0 16 /* sizeof first published struct */
+#define NSFS_FILE_HANDLE_SIZE_LATEST sizeof(struct nsfs_file_handle) /* sizeof latest published struct */
+
+enum init_ns_id {
+ IPC_NS_INIT_ID = 1ULL,
+ UTS_NS_INIT_ID = 2ULL,
+ USER_NS_INIT_ID = 3ULL,
+ PID_NS_INIT_ID = 4ULL,
+ CGROUP_NS_INIT_ID = 5ULL,
+ TIME_NS_INIT_ID = 6ULL,
+ NET_NS_INIT_ID = 7ULL,
+ MNT_NS_INIT_ID = 8ULL,
+#ifdef __KERNEL__
+ NS_LAST_INIT_ID = MNT_NS_INIT_ID,
+#endif
+};
+
+enum ns_type {
+ TIME_NS = (1ULL << 7), /* CLONE_NEWTIME */
+ MNT_NS = (1ULL << 17), /* CLONE_NEWNS */
+ CGROUP_NS = (1ULL << 25), /* CLONE_NEWCGROUP */
+ UTS_NS = (1ULL << 26), /* CLONE_NEWUTS */
+ IPC_NS = (1ULL << 27), /* CLONE_NEWIPC */
+ USER_NS = (1ULL << 28), /* CLONE_NEWUSER */
+ PID_NS = (1ULL << 29), /* CLONE_NEWPID */
+ NET_NS = (1ULL << 30), /* CLONE_NEWNET */
+};
+
+/**
+ * struct ns_id_req - namespace ID request structure
+ * @size: size of this structure
+ * @spare: reserved for future use
+ * @filter: filter mask
+ * @ns_id: last namespace id
+ * @user_ns_id: owning user namespace ID
+ *
+ * Structure for passing namespace ID and miscellaneous parameters to
+ * statns(2) and listns(2).
+ *
+ * For statns(2) @param represents the request mask.
+ * For listns(2) @param represents the last listed mount id (or zero).
+ */
+struct ns_id_req {
+ __u32 size;
+ __u32 spare;
+ __u64 ns_id;
+ struct /* listns */ {
+ __u32 ns_type;
+ __u32 spare2;
+ __u64 user_ns_id;
+ };
+};
+
+/*
+ * Special @user_ns_id value that can be passed to listns()
+ */
+#define LISTNS_CURRENT_USER 0xffffffffffffffff /* Caller's userns */
+
+/* List of all ns_id_req versions. */
+#define NS_ID_REQ_SIZE_VER0 32 /* sizeof first published struct */
+
+#endif /* __LINUX_NSFS_H */
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 3a64499b0f5d..c44a8fb3e418 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -39,18 +39,21 @@ enum perf_type_id {
/*
* attr.config layout for type PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE
+ *
* PERF_TYPE_HARDWARE: 0xEEEEEEEE000000AA
* AA: hardware event ID
* EEEEEEEE: PMU type ID
+ *
* PERF_TYPE_HW_CACHE: 0xEEEEEEEE00DDCCBB
* BB: hardware cache ID
* CC: hardware cache op ID
* DD: hardware cache op result ID
* EEEEEEEE: PMU type ID
- * If the PMU type ID is 0, the PERF_TYPE_RAW will be applied.
+ *
+ * If the PMU type ID is 0, PERF_TYPE_RAW will be applied.
*/
-#define PERF_PMU_TYPE_SHIFT 32
-#define PERF_HW_EVENT_MASK 0xffffffff
+#define PERF_PMU_TYPE_SHIFT 32
+#define PERF_HW_EVENT_MASK 0xffffffff
/*
* Generalized performance event event_id types, used by the
@@ -112,7 +115,7 @@ enum perf_hw_cache_op_result_id {
/*
* Special "software" events provided by the kernel, even if the hardware
* does not support performance events. These events measure various
- * physical and sw events of the kernel (and allow the profiling of them as
+ * physical and SW events of the kernel (and allow the profiling of them as
* well):
*/
enum perf_sw_ids {
@@ -167,8 +170,9 @@ enum perf_event_sample_format {
};
#define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT)
+
/*
- * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set
+ * Values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set.
*
* If the user does not pass priv level information via branch_sample_type,
* the kernel uses the event's priv level. Branch and event priv levels do
@@ -178,20 +182,20 @@ enum perf_event_sample_format {
* of branches and therefore it supersedes all the other types.
*/
enum perf_branch_sample_type_shift {
- PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */
- PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */
- PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */
-
- PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */
- PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */
- PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */
- PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */
- PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */
- PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */
- PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */
+ PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */
+ PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */
+ PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */
+
+ PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */
+ PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */
+ PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */
+ PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */
+ PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */
+ PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */
+ PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */
PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */
- PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* call/ret stack */
+ PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* CALL/RET stack */
PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT = 12, /* indirect jumps */
PERF_SAMPLE_BRANCH_CALL_SHIFT = 13, /* direct call */
@@ -210,96 +214,95 @@ enum perf_branch_sample_type_shift {
};
enum perf_branch_sample_type {
- PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT,
- PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT,
- PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT,
+ PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT,
+ PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT,
+ PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT,
- PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT,
- PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT,
- PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT,
- PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT,
- PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT,
- PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT,
- PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT,
- PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT,
+ PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT,
+ PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT,
+ PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT,
+ PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT,
+ PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT,
+ PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT,
+ PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT,
+ PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT,
- PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT,
- PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT,
- PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT,
+ PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT,
+ PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT,
+ PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT,
- PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT,
- PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT,
+ PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT,
+ PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT,
- PERF_SAMPLE_BRANCH_TYPE_SAVE =
- 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT,
+ PERF_SAMPLE_BRANCH_TYPE_SAVE = 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT,
- PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT,
+ PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT,
- PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT,
+ PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT,
- PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT,
+ PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT,
- PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
+ PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
};
/*
- * Common flow change classification
+ * Common control flow change classifications:
*/
enum {
- PERF_BR_UNKNOWN = 0, /* unknown */
- PERF_BR_COND = 1, /* conditional */
- PERF_BR_UNCOND = 2, /* unconditional */
- PERF_BR_IND = 3, /* indirect */
- PERF_BR_CALL = 4, /* function call */
- PERF_BR_IND_CALL = 5, /* indirect function call */
- PERF_BR_RET = 6, /* function return */
- PERF_BR_SYSCALL = 7, /* syscall */
- PERF_BR_SYSRET = 8, /* syscall return */
- PERF_BR_COND_CALL = 9, /* conditional function call */
- PERF_BR_COND_RET = 10, /* conditional function return */
- PERF_BR_ERET = 11, /* exception return */
- PERF_BR_IRQ = 12, /* irq */
- PERF_BR_SERROR = 13, /* system error */
- PERF_BR_NO_TX = 14, /* not in transaction */
- PERF_BR_EXTEND_ABI = 15, /* extend ABI */
+ PERF_BR_UNKNOWN = 0, /* Unknown */
+ PERF_BR_COND = 1, /* Conditional */
+ PERF_BR_UNCOND = 2, /* Unconditional */
+ PERF_BR_IND = 3, /* Indirect */
+ PERF_BR_CALL = 4, /* Function call */
+ PERF_BR_IND_CALL = 5, /* Indirect function call */
+ PERF_BR_RET = 6, /* Function return */
+ PERF_BR_SYSCALL = 7, /* Syscall */
+ PERF_BR_SYSRET = 8, /* Syscall return */
+ PERF_BR_COND_CALL = 9, /* Conditional function call */
+ PERF_BR_COND_RET = 10, /* Conditional function return */
+ PERF_BR_ERET = 11, /* Exception return */
+ PERF_BR_IRQ = 12, /* IRQ */
+ PERF_BR_SERROR = 13, /* System error */
+ PERF_BR_NO_TX = 14, /* Not in transaction */
+ PERF_BR_EXTEND_ABI = 15, /* Extend ABI */
PERF_BR_MAX,
};
/*
- * Common branch speculation outcome classification
+ * Common branch speculation outcome classifications:
*/
enum {
- PERF_BR_SPEC_NA = 0, /* Not available */
- PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */
- PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */
- PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */
+ PERF_BR_SPEC_NA = 0, /* Not available */
+ PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */
+ PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */
+ PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */
PERF_BR_SPEC_MAX,
};
enum {
- PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */
- PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */
- PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */
- PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */
- PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */
- PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */
- PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */
- PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */
+ PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */
+ PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */
+ PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */
+ PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */
+ PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */
+ PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */
+ PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */
+ PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */
PERF_BR_NEW_MAX,
};
enum {
- PERF_BR_PRIV_UNKNOWN = 0,
- PERF_BR_PRIV_USER = 1,
- PERF_BR_PRIV_KERNEL = 2,
- PERF_BR_PRIV_HV = 3,
+ PERF_BR_PRIV_UNKNOWN = 0,
+ PERF_BR_PRIV_USER = 1,
+ PERF_BR_PRIV_KERNEL = 2,
+ PERF_BR_PRIV_HV = 3,
};
-#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1
-#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2
-#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3
-#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4
-#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5
+#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1
+#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2
+#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3
+#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4
+#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5
#define PERF_SAMPLE_BRANCH_PLM_ALL \
(PERF_SAMPLE_BRANCH_USER|\
@@ -310,9 +313,9 @@ enum {
* Values to determine ABI of the registers dump.
*/
enum perf_sample_regs_abi {
- PERF_SAMPLE_REGS_ABI_NONE = 0,
- PERF_SAMPLE_REGS_ABI_32 = 1,
- PERF_SAMPLE_REGS_ABI_64 = 2,
+ PERF_SAMPLE_REGS_ABI_NONE = 0,
+ PERF_SAMPLE_REGS_ABI_32 = 1,
+ PERF_SAMPLE_REGS_ABI_64 = 2,
};
/*
@@ -320,21 +323,21 @@ enum perf_sample_regs_abi {
* abort events. Multiple bits can be set.
*/
enum {
- PERF_TXN_ELISION = (1 << 0), /* From elision */
- PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */
- PERF_TXN_SYNC = (1 << 2), /* Instruction is related */
- PERF_TXN_ASYNC = (1 << 3), /* Instruction not related */
- PERF_TXN_RETRY = (1 << 4), /* Retry possible */
- PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */
- PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */
- PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */
+ PERF_TXN_ELISION = (1 << 0), /* From elision */
+ PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */
+ PERF_TXN_SYNC = (1 << 2), /* Instruction is related */
+ PERF_TXN_ASYNC = (1 << 3), /* Instruction is not related */
+ PERF_TXN_RETRY = (1 << 4), /* Retry possible */
+ PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */
+ PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */
+ PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */
- PERF_TXN_MAX = (1 << 8), /* non-ABI */
+ PERF_TXN_MAX = (1 << 8), /* non-ABI */
- /* bits 32..63 are reserved for the abort code */
+ /* Bits 32..63 are reserved for the abort code */
- PERF_TXN_ABORT_MASK = (0xffffffffULL << 32),
- PERF_TXN_ABORT_SHIFT = 32,
+ PERF_TXN_ABORT_MASK = (0xffffffffULL << 32),
+ PERF_TXN_ABORT_SHIFT = 32,
};
/*
@@ -369,22 +372,23 @@ enum perf_event_read_format {
PERF_FORMAT_MAX = 1U << 5, /* non-ABI */
};
-#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
-#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */
-#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */
-#define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */
- /* add: sample_stack_user */
-#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */
-#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */
-#define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */
-#define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */
-#define PERF_ATTR_SIZE_VER8 136 /* add: config3 */
+#define PERF_ATTR_SIZE_VER0 64 /* Size of first published 'struct perf_event_attr' */
+#define PERF_ATTR_SIZE_VER1 72 /* Add: config2 */
+#define PERF_ATTR_SIZE_VER2 80 /* Add: branch_sample_type */
+#define PERF_ATTR_SIZE_VER3 96 /* Add: sample_regs_user */
+ /* Add: sample_stack_user */
+#define PERF_ATTR_SIZE_VER4 104 /* Add: sample_regs_intr */
+#define PERF_ATTR_SIZE_VER5 112 /* Add: aux_watermark */
+#define PERF_ATTR_SIZE_VER6 120 /* Add: aux_sample_size */
+#define PERF_ATTR_SIZE_VER7 128 /* Add: sig_data */
+#define PERF_ATTR_SIZE_VER8 136 /* Add: config3 */
+#define PERF_ATTR_SIZE_VER9 144 /* add: config4 */
/*
- * Hardware event_id to monitor via a performance monitoring event:
- *
- * @sample_max_stack: Max number of frame pointers in a callchain,
- * should be < /proc/sys/kernel/perf_event_max_stack
+ * 'struct perf_event_attr' contains various attributes that define
+ * a performance event - most of them hardware related configuration
+ * details, but also a lot of behavioral switches and values implemented
+ * by the kernel.
*/
struct perf_event_attr {
@@ -394,7 +398,7 @@ struct perf_event_attr {
__u32 type;
/*
- * Size of the attr structure, for fwd/bwd compat.
+ * Size of the attr structure, for forward/backwards compatibility.
*/
__u32 size;
@@ -449,21 +453,23 @@ struct perf_event_attr {
comm_exec : 1, /* flag comm events that are due to an exec */
use_clockid : 1, /* use @clockid for time fields */
context_switch : 1, /* context switch data */
- write_backward : 1, /* Write ring buffer from end to beginning */
+ write_backward : 1, /* write ring buffer from end to beginning */
namespaces : 1, /* include namespaces data */
ksymbol : 1, /* include ksymbol events */
- bpf_event : 1, /* include bpf events */
+ bpf_event : 1, /* include BPF events */
aux_output : 1, /* generate AUX records instead of events */
cgroup : 1, /* include cgroup events */
text_poke : 1, /* include text poke events */
- build_id : 1, /* use build id in mmap2 events */
+ build_id : 1, /* use build ID in mmap2 events */
inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */
remove_on_exec : 1, /* event is removed from task on exec */
sigtrap : 1, /* send synchronous SIGTRAP on event */
- __reserved_1 : 26;
+ defer_callchain: 1, /* request PERF_RECORD_CALLCHAIN_DEFERRED records */
+ defer_output : 1, /* output PERF_RECORD_CALLCHAIN_DEFERRED records */
+ __reserved_1 : 24;
union {
- __u32 wakeup_events; /* wakeup every n events */
+ __u32 wakeup_events; /* wake up every n events */
__u32 wakeup_watermark; /* bytes before wakeup */
};
@@ -472,13 +478,13 @@ struct perf_event_attr {
__u64 bp_addr;
__u64 kprobe_func; /* for perf_kprobe */
__u64 uprobe_path; /* for perf_uprobe */
- __u64 config1; /* extension of config */
+ __u64 config1; /* extension of config */
};
union {
__u64 bp_len;
- __u64 kprobe_addr; /* when kprobe_func == NULL */
+ __u64 kprobe_addr; /* when kprobe_func == NULL */
__u64 probe_offset; /* for perf_[k,u]probe */
- __u64 config2; /* extension of config1 */
+ __u64 config2; /* extension of config1 */
};
__u64 branch_sample_type; /* enum perf_branch_sample_type */
@@ -508,10 +514,28 @@ struct perf_event_attr {
* Wakeup watermark for AUX area
*/
__u32 aux_watermark;
+
+ /*
+ * Max number of frame pointers in a callchain, should be
+ * lower than /proc/sys/kernel/perf_event_max_stack.
+ *
+ * Max number of entries of branch stack should be lower
+ * than the hardware limit.
+ */
__u16 sample_max_stack;
+
__u16 __reserved_2;
__u32 aux_sample_size;
- __u32 __reserved_3;
+
+ union {
+ __u32 aux_action;
+ struct {
+ __u32 aux_start_paused : 1, /* start AUX area tracing paused */
+ aux_pause : 1, /* on overflow, pause AUX area tracing */
+ aux_resume : 1, /* on overflow, resume AUX area tracing */
+ __reserved_3 : 29;
+ };
+ };
/*
* User provided data if sigtrap=1, passed back to user via
@@ -522,11 +546,12 @@ struct perf_event_attr {
__u64 sig_data;
__u64 config3; /* extension of config2 */
+ __u64 config4; /* extension of config3 */
};
/*
* Structure used by below PERF_EVENT_IOC_QUERY_BPF command
- * to query bpf programs attached to the same perf tracepoint
+ * to query BPF programs attached to the same perf tracepoint
* as the given perf event.
*/
struct perf_event_query_bpf {
@@ -548,21 +573,21 @@ struct perf_event_query_bpf {
/*
* Ioctls that can be done on a perf event fd:
*/
-#define PERF_EVENT_IOC_ENABLE _IO ('$', 0)
-#define PERF_EVENT_IOC_DISABLE _IO ('$', 1)
-#define PERF_EVENT_IOC_REFRESH _IO ('$', 2)
-#define PERF_EVENT_IOC_RESET _IO ('$', 3)
-#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64)
-#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5)
-#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *)
-#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *)
-#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32)
-#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32)
+#define PERF_EVENT_IOC_ENABLE _IO ('$', 0)
+#define PERF_EVENT_IOC_DISABLE _IO ('$', 1)
+#define PERF_EVENT_IOC_REFRESH _IO ('$', 2)
+#define PERF_EVENT_IOC_RESET _IO ('$', 3)
+#define PERF_EVENT_IOC_PERIOD _IOW ('$', 4, __u64)
+#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5)
+#define PERF_EVENT_IOC_SET_FILTER _IOW ('$', 6, char *)
+#define PERF_EVENT_IOC_ID _IOR ('$', 7, __u64 *)
+#define PERF_EVENT_IOC_SET_BPF _IOW ('$', 8, __u32)
+#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW ('$', 9, __u32)
#define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *)
-#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW('$', 11, struct perf_event_attr *)
+#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW ('$', 11, struct perf_event_attr *)
enum perf_event_ioc_flags {
- PERF_IOC_FLAG_GROUP = 1U << 0,
+ PERF_IOC_FLAG_GROUP = 1U << 0,
};
/*
@@ -573,7 +598,7 @@ struct perf_event_mmap_page {
__u32 compat_version; /* lowest version this is compat with */
/*
- * Bits needed to read the hw events in user-space.
+ * Bits needed to read the HW events in user-space.
*
* u32 seq, time_mult, time_shift, index, width;
* u64 count, enabled, running;
@@ -611,7 +636,7 @@ struct perf_event_mmap_page {
__u32 index; /* hardware event identifier */
__s64 offset; /* add to hardware event value */
__u64 time_enabled; /* time event active */
- __u64 time_running; /* time event on cpu */
+ __u64 time_running; /* time event on CPU */
union {
__u64 capabilities;
struct {
@@ -639,7 +664,7 @@ struct perf_event_mmap_page {
/*
* If cap_usr_time the below fields can be used to compute the time
- * delta since time_enabled (in ns) using rdtsc or similar.
+ * delta since time_enabled (in ns) using RDTSC or similar.
*
* u64 quot, rem;
* u64 delta;
@@ -712,7 +737,7 @@ struct perf_event_mmap_page {
* after reading this value.
*
* When the mapping is PROT_WRITE the @data_tail value should be
- * written by userspace to reflect the last read data, after issueing
+ * written by user-space to reflect the last read data, after issuing
* an smp_mb() to separate the data read from the ->data_tail store.
* In this case the kernel will not over-write unread data.
*
@@ -728,7 +753,7 @@ struct perf_event_mmap_page {
/*
* AUX area is defined by aux_{offset,size} fields that should be set
- * by the userspace, so that
+ * by the user-space, so that
*
* aux_offset >= data_offset + data_size
*
@@ -802,7 +827,7 @@ struct perf_event_mmap_page {
* Indicates that thread was preempted in TASK_RUNNING state.
*
* PERF_RECORD_MISC_MMAP_BUILD_ID:
- * Indicates that mmap2 event carries build id data.
+ * Indicates that mmap2 event carries build ID data.
*/
#define PERF_RECORD_MISC_EXACT_IP (1 << 14)
#define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14)
@@ -813,26 +838,26 @@ struct perf_event_mmap_page {
#define PERF_RECORD_MISC_EXT_RESERVED (1 << 15)
struct perf_event_header {
- __u32 type;
- __u16 misc;
- __u16 size;
+ __u32 type;
+ __u16 misc;
+ __u16 size;
};
struct perf_ns_link_info {
- __u64 dev;
- __u64 ino;
+ __u64 dev;
+ __u64 ino;
};
enum {
- NET_NS_INDEX = 0,
- UTS_NS_INDEX = 1,
- IPC_NS_INDEX = 2,
- PID_NS_INDEX = 3,
- USER_NS_INDEX = 4,
- MNT_NS_INDEX = 5,
- CGROUP_NS_INDEX = 6,
-
- NR_NAMESPACES, /* number of available namespaces */
+ NET_NS_INDEX = 0,
+ UTS_NS_INDEX = 1,
+ IPC_NS_INDEX = 2,
+ PID_NS_INDEX = 3,
+ USER_NS_INDEX = 4,
+ MNT_NS_INDEX = 5,
+ CGROUP_NS_INDEX = 6,
+
+ NR_NAMESPACES, /* number of available namespaces */
};
enum perf_event_type {
@@ -848,11 +873,11 @@ enum perf_event_type {
* optional fields being ignored.
*
* struct sample_id {
- * { u32 pid, tid; } && PERF_SAMPLE_TID
- * { u64 time; } && PERF_SAMPLE_TIME
- * { u64 id; } && PERF_SAMPLE_ID
- * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID
- * { u32 cpu, res; } && PERF_SAMPLE_CPU
+ * { u32 pid, tid; } && PERF_SAMPLE_TID
+ * { u64 time; } && PERF_SAMPLE_TIME
+ * { u64 id; } && PERF_SAMPLE_ID
+ * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID
+ * { u32 cpu, res; } && PERF_SAMPLE_CPU
* { u64 id; } && PERF_SAMPLE_IDENTIFIER
* } && perf_event_attr::sample_id_all
*
@@ -863,7 +888,7 @@ enum perf_event_type {
/*
* The MMAP events record the PROT_EXEC mappings so that we can
- * correlate userspace IPs to code. They have the following structure:
+ * correlate user-space IPs to code. They have the following structure:
*
* struct {
* struct perf_event_header header;
@@ -873,7 +898,7 @@ enum perf_event_type {
* u64 len;
* u64 pgoff;
* char filename[];
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_MMAP = 1,
@@ -883,7 +908,7 @@ enum perf_event_type {
* struct perf_event_header header;
* u64 id;
* u64 lost;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_LOST = 2,
@@ -894,7 +919,7 @@ enum perf_event_type {
*
* u32 pid, tid;
* char comm[];
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_COMM = 3,
@@ -905,7 +930,7 @@ enum perf_event_type {
* u32 pid, ppid;
* u32 tid, ptid;
* u64 time;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_EXIT = 4,
@@ -916,7 +941,7 @@ enum perf_event_type {
* u64 time;
* u64 id;
* u64 stream_id;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_THROTTLE = 5,
@@ -928,7 +953,7 @@ enum perf_event_type {
* u32 pid, ppid;
* u32 tid, ptid;
* u64 time;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_FORK = 7,
@@ -939,7 +964,7 @@ enum perf_event_type {
* u32 pid, tid;
*
* struct read_format values;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_READ = 8,
@@ -994,12 +1019,12 @@ enum perf_event_type {
* { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS
* } && PERF_SAMPLE_BRANCH_STACK
*
- * { u64 abi; # enum perf_sample_regs_abi
- * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
+ * { u64 abi; # enum perf_sample_regs_abi
+ * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
*
- * { u64 size;
- * char data[size];
- * u64 dyn_size; } && PERF_SAMPLE_STACK_USER
+ * { u64 size;
+ * char data[size];
+ * u64 dyn_size; } && PERF_SAMPLE_STACK_USER
*
* { union perf_sample_weight
* {
@@ -1024,10 +1049,11 @@ enum perf_event_type {
* { u64 abi; # enum perf_sample_regs_abi
* u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
* { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR
- * { u64 size;
- * char data[size]; } && PERF_SAMPLE_AUX
+ * { u64 cgroup;} && PERF_SAMPLE_CGROUP
* { u64 data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE
* { u64 code_page_size;} && PERF_SAMPLE_CODE_PAGE_SIZE
+ * { u64 size;
+ * char data[size]; } && PERF_SAMPLE_AUX
* };
*/
PERF_RECORD_SAMPLE = 9,
@@ -1059,7 +1085,7 @@ enum perf_event_type {
* };
* u32 prot, flags;
* char filename[];
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_MMAP2 = 10,
@@ -1068,12 +1094,12 @@ enum perf_event_type {
* Records that new data landed in the AUX buffer part.
*
* struct {
- * struct perf_event_header header;
+ * struct perf_event_header header;
*
- * u64 aux_offset;
- * u64 aux_size;
+ * u64 aux_offset;
+ * u64 aux_size;
* u64 flags;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_AUX = 11,
@@ -1156,7 +1182,7 @@ enum perf_event_type {
PERF_RECORD_KSYMBOL = 17,
/*
- * Record bpf events:
+ * Record BPF events:
* enum perf_bpf_event_type {
* PERF_BPF_EVENT_UNKNOWN = 0,
* PERF_BPF_EVENT_PROG_LOAD = 1,
@@ -1217,6 +1243,22 @@ enum perf_event_type {
*/
PERF_RECORD_AUX_OUTPUT_HW_ID = 21,
+ /*
+ * This user callchain capture was deferred until shortly before
+ * returning to user space. Previous samples would have kernel
+ * callchains only and they need to be stitched with this to make full
+ * callchains.
+ *
+ * struct {
+ * struct perf_event_header header;
+ * u64 cookie;
+ * u64 nr;
+ * u64 ips[nr];
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_CALLCHAIN_DEFERRED = 22,
+
PERF_RECORD_MAX, /* non-ABI */
};
@@ -1234,179 +1276,182 @@ enum perf_record_ksymbol_type {
#define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0)
enum perf_bpf_event_type {
- PERF_BPF_EVENT_UNKNOWN = 0,
- PERF_BPF_EVENT_PROG_LOAD = 1,
- PERF_BPF_EVENT_PROG_UNLOAD = 2,
- PERF_BPF_EVENT_MAX, /* non-ABI */
+ PERF_BPF_EVENT_UNKNOWN = 0,
+ PERF_BPF_EVENT_PROG_LOAD = 1,
+ PERF_BPF_EVENT_PROG_UNLOAD = 2,
+ PERF_BPF_EVENT_MAX, /* non-ABI */
};
-#define PERF_MAX_STACK_DEPTH 127
-#define PERF_MAX_CONTEXTS_PER_STACK 8
+#define PERF_MAX_STACK_DEPTH 127
+#define PERF_MAX_CONTEXTS_PER_STACK 8
enum perf_callchain_context {
- PERF_CONTEXT_HV = (__u64)-32,
- PERF_CONTEXT_KERNEL = (__u64)-128,
- PERF_CONTEXT_USER = (__u64)-512,
+ PERF_CONTEXT_HV = (__u64)-32,
+ PERF_CONTEXT_KERNEL = (__u64)-128,
+ PERF_CONTEXT_USER = (__u64)-512,
+ PERF_CONTEXT_USER_DEFERRED = (__u64)-640,
- PERF_CONTEXT_GUEST = (__u64)-2048,
- PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176,
- PERF_CONTEXT_GUEST_USER = (__u64)-2560,
+ PERF_CONTEXT_GUEST = (__u64)-2048,
+ PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176,
+ PERF_CONTEXT_GUEST_USER = (__u64)-2560,
- PERF_CONTEXT_MAX = (__u64)-4095,
+ PERF_CONTEXT_MAX = (__u64)-4095,
};
/**
* PERF_RECORD_AUX::flags bits
*/
-#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */
-#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */
-#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */
-#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */
+#define PERF_AUX_FLAG_TRUNCATED 0x0001 /* Record was truncated to fit */
+#define PERF_AUX_FLAG_OVERWRITE 0x0002 /* Snapshot from overwrite mode */
+#define PERF_AUX_FLAG_PARTIAL 0x0004 /* Record contains gaps */
+#define PERF_AUX_FLAG_COLLISION 0x0008 /* Sample collided with another */
#define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK 0xff00 /* PMU specific trace format type */
/* CoreSight PMU AUX buffer formats */
-#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */
-#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */
+#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */
+#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */
-#define PERF_FLAG_FD_NO_GROUP (1UL << 0)
-#define PERF_FLAG_FD_OUTPUT (1UL << 1)
-#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
-#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
+#define PERF_FLAG_FD_NO_GROUP (1UL << 0)
+#define PERF_FLAG_FD_OUTPUT (1UL << 1)
+#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup ID, per-CPU mode only */
+#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
#if defined(__LITTLE_ENDIAN_BITFIELD)
union perf_mem_data_src {
__u64 val;
struct {
- __u64 mem_op:5, /* type of opcode */
- mem_lvl:14, /* memory hierarchy level */
- mem_snoop:5, /* snoop mode */
- mem_lock:2, /* lock instr */
- mem_dtlb:7, /* tlb access */
- mem_lvl_num:4, /* memory hierarchy level number */
- mem_remote:1, /* remote */
- mem_snoopx:2, /* snoop mode, ext */
- mem_blk:3, /* access blocked */
- mem_hops:3, /* hop level */
- mem_rsvd:18;
+ __u64 mem_op : 5, /* Type of opcode */
+ mem_lvl : 14, /* Memory hierarchy level */
+ mem_snoop : 5, /* Snoop mode */
+ mem_lock : 2, /* Lock instr */
+ mem_dtlb : 7, /* TLB access */
+ mem_lvl_num : 4, /* Memory hierarchy level number */
+ mem_remote : 1, /* Remote */
+ mem_snoopx : 2, /* Snoop mode, ext */
+ mem_blk : 3, /* Access blocked */
+ mem_hops : 3, /* Hop level */
+ mem_rsvd : 18;
};
};
#elif defined(__BIG_ENDIAN_BITFIELD)
union perf_mem_data_src {
__u64 val;
struct {
- __u64 mem_rsvd:18,
- mem_hops:3, /* hop level */
- mem_blk:3, /* access blocked */
- mem_snoopx:2, /* snoop mode, ext */
- mem_remote:1, /* remote */
- mem_lvl_num:4, /* memory hierarchy level number */
- mem_dtlb:7, /* tlb access */
- mem_lock:2, /* lock instr */
- mem_snoop:5, /* snoop mode */
- mem_lvl:14, /* memory hierarchy level */
- mem_op:5; /* type of opcode */
+ __u64 mem_rsvd : 18,
+ mem_hops : 3, /* Hop level */
+ mem_blk : 3, /* Access blocked */
+ mem_snoopx : 2, /* Snoop mode, ext */
+ mem_remote : 1, /* Remote */
+ mem_lvl_num : 4, /* Memory hierarchy level number */
+ mem_dtlb : 7, /* TLB access */
+ mem_lock : 2, /* Lock instr */
+ mem_snoop : 5, /* Snoop mode */
+ mem_lvl : 14, /* Memory hierarchy level */
+ mem_op : 5; /* Type of opcode */
};
};
#else
-#error "Unknown endianness"
+# error "Unknown endianness"
#endif
-/* type of opcode (load/store/prefetch,code) */
-#define PERF_MEM_OP_NA 0x01 /* not available */
-#define PERF_MEM_OP_LOAD 0x02 /* load instruction */
-#define PERF_MEM_OP_STORE 0x04 /* store instruction */
-#define PERF_MEM_OP_PFETCH 0x08 /* prefetch */
-#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */
-#define PERF_MEM_OP_SHIFT 0
+/* Type of memory opcode: */
+#define PERF_MEM_OP_NA 0x0001 /* Not available */
+#define PERF_MEM_OP_LOAD 0x0002 /* Load instruction */
+#define PERF_MEM_OP_STORE 0x0004 /* Store instruction */
+#define PERF_MEM_OP_PFETCH 0x0008 /* Prefetch */
+#define PERF_MEM_OP_EXEC 0x0010 /* Code (execution) */
+#define PERF_MEM_OP_SHIFT 0
/*
- * PERF_MEM_LVL_* namespace being depricated to some extent in the
+ * The PERF_MEM_LVL_* namespace is being deprecated to some extent in
* favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields.
- * Supporting this namespace inorder to not break defined ABIs.
+ * We support this namespace in order to not break defined ABIs.
*
- * memory hierarchy (memory level, hit or miss)
+ * Memory hierarchy (memory level, hit or miss)
*/
-#define PERF_MEM_LVL_NA 0x01 /* not available */
-#define PERF_MEM_LVL_HIT 0x02 /* hit level */
-#define PERF_MEM_LVL_MISS 0x04 /* miss level */
-#define PERF_MEM_LVL_L1 0x08 /* L1 */
-#define PERF_MEM_LVL_LFB 0x10 /* Line Fill Buffer */
-#define PERF_MEM_LVL_L2 0x20 /* L2 */
-#define PERF_MEM_LVL_L3 0x40 /* L3 */
-#define PERF_MEM_LVL_LOC_RAM 0x80 /* Local DRAM */
-#define PERF_MEM_LVL_REM_RAM1 0x100 /* Remote DRAM (1 hop) */
-#define PERF_MEM_LVL_REM_RAM2 0x200 /* Remote DRAM (2 hops) */
-#define PERF_MEM_LVL_REM_CCE1 0x400 /* Remote Cache (1 hop) */
-#define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */
-#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */
-#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */
-#define PERF_MEM_LVL_SHIFT 5
-
-#define PERF_MEM_REMOTE_REMOTE 0x01 /* Remote */
-#define PERF_MEM_REMOTE_SHIFT 37
-
-#define PERF_MEM_LVLNUM_L1 0x01 /* L1 */
-#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */
-#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */
-#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */
-/* 5-0x7 available */
-#define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */
-#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */
-#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */
-#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
-#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */
-#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */
-#define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */
-#define PERF_MEM_LVLNUM_NA 0x0f /* N/A */
-
-#define PERF_MEM_LVLNUM_SHIFT 33
-
-/* snoop mode */
-#define PERF_MEM_SNOOP_NA 0x01 /* not available */
-#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */
-#define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */
-#define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */
-#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */
-#define PERF_MEM_SNOOP_SHIFT 19
-
-#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */
-#define PERF_MEM_SNOOPX_PEER 0x02 /* xfer from peer */
-#define PERF_MEM_SNOOPX_SHIFT 38
-
-/* locked instruction */
-#define PERF_MEM_LOCK_NA 0x01 /* not available */
-#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */
-#define PERF_MEM_LOCK_SHIFT 24
+#define PERF_MEM_LVL_NA 0x0001 /* Not available */
+#define PERF_MEM_LVL_HIT 0x0002 /* Hit level */
+#define PERF_MEM_LVL_MISS 0x0004 /* Miss level */
+#define PERF_MEM_LVL_L1 0x0008 /* L1 */
+#define PERF_MEM_LVL_LFB 0x0010 /* Line Fill Buffer */
+#define PERF_MEM_LVL_L2 0x0020 /* L2 */
+#define PERF_MEM_LVL_L3 0x0040 /* L3 */
+#define PERF_MEM_LVL_LOC_RAM 0x0080 /* Local DRAM */
+#define PERF_MEM_LVL_REM_RAM1 0x0100 /* Remote DRAM (1 hop) */
+#define PERF_MEM_LVL_REM_RAM2 0x0200 /* Remote DRAM (2 hops) */
+#define PERF_MEM_LVL_REM_CCE1 0x0400 /* Remote Cache (1 hop) */
+#define PERF_MEM_LVL_REM_CCE2 0x0800 /* Remote Cache (2 hops) */
+#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */
+#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */
+#define PERF_MEM_LVL_SHIFT 5
+
+#define PERF_MEM_REMOTE_REMOTE 0x0001 /* Remote */
+#define PERF_MEM_REMOTE_SHIFT 37
+
+#define PERF_MEM_LVLNUM_L1 0x0001 /* L1 */
+#define PERF_MEM_LVLNUM_L2 0x0002 /* L2 */
+#define PERF_MEM_LVLNUM_L3 0x0003 /* L3 */
+#define PERF_MEM_LVLNUM_L4 0x0004 /* L4 */
+#define PERF_MEM_LVLNUM_L2_MHB 0x0005 /* L2 Miss Handling Buffer */
+#define PERF_MEM_LVLNUM_MSC 0x0006 /* Memory-side Cache */
+/* 0x007 available */
+#define PERF_MEM_LVLNUM_UNC 0x0008 /* Uncached */
+#define PERF_MEM_LVLNUM_CXL 0x0009 /* CXL */
+#define PERF_MEM_LVLNUM_IO 0x000a /* I/O */
+#define PERF_MEM_LVLNUM_ANY_CACHE 0x000b /* Any cache */
+#define PERF_MEM_LVLNUM_LFB 0x000c /* LFB / L1 Miss Handling Buffer */
+#define PERF_MEM_LVLNUM_RAM 0x000d /* RAM */
+#define PERF_MEM_LVLNUM_PMEM 0x000e /* PMEM */
+#define PERF_MEM_LVLNUM_NA 0x000f /* N/A */
+
+#define PERF_MEM_LVLNUM_SHIFT 33
+
+/* Snoop mode */
+#define PERF_MEM_SNOOP_NA 0x0001 /* Not available */
+#define PERF_MEM_SNOOP_NONE 0x0002 /* No snoop */
+#define PERF_MEM_SNOOP_HIT 0x0004 /* Snoop hit */
+#define PERF_MEM_SNOOP_MISS 0x0008 /* Snoop miss */
+#define PERF_MEM_SNOOP_HITM 0x0010 /* Snoop hit modified */
+#define PERF_MEM_SNOOP_SHIFT 19
+
+#define PERF_MEM_SNOOPX_FWD 0x0001 /* Forward */
+#define PERF_MEM_SNOOPX_PEER 0x0002 /* Transfer from peer */
+#define PERF_MEM_SNOOPX_SHIFT 38
+
+/* Locked instruction */
+#define PERF_MEM_LOCK_NA 0x0001 /* Not available */
+#define PERF_MEM_LOCK_LOCKED 0x0002 /* Locked transaction */
+#define PERF_MEM_LOCK_SHIFT 24
/* TLB access */
-#define PERF_MEM_TLB_NA 0x01 /* not available */
-#define PERF_MEM_TLB_HIT 0x02 /* hit level */
-#define PERF_MEM_TLB_MISS 0x04 /* miss level */
-#define PERF_MEM_TLB_L1 0x08 /* L1 */
-#define PERF_MEM_TLB_L2 0x10 /* L2 */
-#define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/
-#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */
-#define PERF_MEM_TLB_SHIFT 26
+#define PERF_MEM_TLB_NA 0x0001 /* Not available */
+#define PERF_MEM_TLB_HIT 0x0002 /* Hit level */
+#define PERF_MEM_TLB_MISS 0x0004 /* Miss level */
+#define PERF_MEM_TLB_L1 0x0008 /* L1 */
+#define PERF_MEM_TLB_L2 0x0010 /* L2 */
+#define PERF_MEM_TLB_WK 0x0020 /* Hardware Walker*/
+#define PERF_MEM_TLB_OS 0x0040 /* OS fault handler */
+#define PERF_MEM_TLB_SHIFT 26
/* Access blocked */
-#define PERF_MEM_BLK_NA 0x01 /* not available */
-#define PERF_MEM_BLK_DATA 0x02 /* data could not be forwarded */
-#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */
-#define PERF_MEM_BLK_SHIFT 40
-
-/* hop level */
-#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */
-#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */
-#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */
-#define PERF_MEM_HOPS_3 0x04 /* remote board */
+#define PERF_MEM_BLK_NA 0x0001 /* Not available */
+#define PERF_MEM_BLK_DATA 0x0002 /* Data could not be forwarded */
+#define PERF_MEM_BLK_ADDR 0x0004 /* Address conflict */
+#define PERF_MEM_BLK_SHIFT 40
+
+/* Hop level */
+#define PERF_MEM_HOPS_0 0x0001 /* Remote core, same node */
+#define PERF_MEM_HOPS_1 0x0002 /* Remote node, same socket */
+#define PERF_MEM_HOPS_2 0x0003 /* Remote socket, same board */
+#define PERF_MEM_HOPS_3 0x0004 /* Remote board */
/* 5-7 available */
-#define PERF_MEM_HOPS_SHIFT 43
+#define PERF_MEM_HOPS_SHIFT 43
#define PERF_MEM_S(a, s) \
(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
/*
- * single taken branch record layout:
+ * Layout of single taken branch records:
*
* from: source instruction (may not always be a branch insn)
* to: branch target
@@ -1425,37 +1470,37 @@ union perf_mem_data_src {
struct perf_branch_entry {
__u64 from;
__u64 to;
- __u64 mispred:1, /* target mispredicted */
- predicted:1,/* target predicted */
- in_tx:1, /* in transaction */
- abort:1, /* transaction abort */
- cycles:16, /* cycle count to last branch */
- type:4, /* branch type */
- spec:2, /* branch speculation info */
- new_type:4, /* additional branch type */
- priv:3, /* privilege level */
- reserved:31;
+ __u64 mispred : 1, /* target mispredicted */
+ predicted : 1, /* target predicted */
+ in_tx : 1, /* in transaction */
+ abort : 1, /* transaction abort */
+ cycles : 16, /* cycle count to last branch */
+ type : 4, /* branch type */
+ spec : 2, /* branch speculation info */
+ new_type : 4, /* additional branch type */
+ priv : 3, /* privilege level */
+ reserved : 31;
};
/* Size of used info bits in struct perf_branch_entry */
#define PERF_BRANCH_ENTRY_INFO_BITS_MAX 33
union perf_sample_weight {
- __u64 full;
+ __u64 full;
#if defined(__LITTLE_ENDIAN_BITFIELD)
struct {
- __u32 var1_dw;
- __u16 var2_w;
- __u16 var3_w;
+ __u32 var1_dw;
+ __u16 var2_w;
+ __u16 var3_w;
};
#elif defined(__BIG_ENDIAN_BITFIELD)
struct {
- __u16 var3_w;
- __u16 var2_w;
- __u32 var1_dw;
+ __u16 var3_w;
+ __u16 var2_w;
+ __u32 var1_dw;
};
#else
-#error "Unknown endianness"
+# error "Unknown endianness"
#endif
};
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
new file mode 100644
index 000000000000..475fc8ca4403
--- /dev/null
+++ b/tools/include/uapi/linux/prctl.h
@@ -0,0 +1,377 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _LINUX_PRCTL_H
+#define _LINUX_PRCTL_H
+
+#include <linux/types.h>
+
+/* Values to pass as first argument to prctl() */
+
+#define PR_SET_PDEATHSIG 1 /* Second arg is a signal */
+#define PR_GET_PDEATHSIG 2 /* Second arg is a ptr to return the signal */
+
+/* Get/set current->mm->dumpable */
+#define PR_GET_DUMPABLE 3
+#define PR_SET_DUMPABLE 4
+
+/* Get/set unaligned access control bits (if meaningful) */
+#define PR_GET_UNALIGN 5
+#define PR_SET_UNALIGN 6
+# define PR_UNALIGN_NOPRINT 1 /* silently fix up unaligned user accesses */
+# define PR_UNALIGN_SIGBUS 2 /* generate SIGBUS on unaligned user access */
+
+/* Get/set whether or not to drop capabilities on setuid() away from
+ * uid 0 (as per security/commoncap.c) */
+#define PR_GET_KEEPCAPS 7
+#define PR_SET_KEEPCAPS 8
+
+/* Get/set floating-point emulation control bits (if meaningful) */
+#define PR_GET_FPEMU 9
+#define PR_SET_FPEMU 10
+# define PR_FPEMU_NOPRINT 1 /* silently emulate fp operations accesses */
+# define PR_FPEMU_SIGFPE 2 /* don't emulate fp operations, send SIGFPE instead */
+
+/* Get/set floating-point exception mode (if meaningful) */
+#define PR_GET_FPEXC 11
+#define PR_SET_FPEXC 12
+# define PR_FP_EXC_SW_ENABLE 0x80 /* Use FPEXC for FP exception enables */
+# define PR_FP_EXC_DIV 0x010000 /* floating point divide by zero */
+# define PR_FP_EXC_OVF 0x020000 /* floating point overflow */
+# define PR_FP_EXC_UND 0x040000 /* floating point underflow */
+# define PR_FP_EXC_RES 0x080000 /* floating point inexact result */
+# define PR_FP_EXC_INV 0x100000 /* floating point invalid operation */
+# define PR_FP_EXC_DISABLED 0 /* FP exceptions disabled */
+# define PR_FP_EXC_NONRECOV 1 /* async non-recoverable exc. mode */
+# define PR_FP_EXC_ASYNC 2 /* async recoverable exception mode */
+# define PR_FP_EXC_PRECISE 3 /* precise exception mode */
+
+/* Get/set whether we use statistical process timing or accurate timestamp
+ * based process timing */
+#define PR_GET_TIMING 13
+#define PR_SET_TIMING 14
+# define PR_TIMING_STATISTICAL 0 /* Normal, traditional,
+ statistical process timing */
+# define PR_TIMING_TIMESTAMP 1 /* Accurate timestamp based
+ process timing */
+
+#define PR_SET_NAME 15 /* Set process name */
+#define PR_GET_NAME 16 /* Get process name */
+
+/* Get/set process endian */
+#define PR_GET_ENDIAN 19
+#define PR_SET_ENDIAN 20
+# define PR_ENDIAN_BIG 0
+# define PR_ENDIAN_LITTLE 1 /* True little endian mode */
+# define PR_ENDIAN_PPC_LITTLE 2 /* "PowerPC" pseudo little endian */
+
+/* Get/set process seccomp mode */
+#define PR_GET_SECCOMP 21
+#define PR_SET_SECCOMP 22
+
+/* Get/set the capability bounding set (as per security/commoncap.c) */
+#define PR_CAPBSET_READ 23
+#define PR_CAPBSET_DROP 24
+
+/* Get/set the process' ability to use the timestamp counter instruction */
+#define PR_GET_TSC 25
+#define PR_SET_TSC 26
+# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */
+# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */
+
+/* Get/set securebits (as per security/commoncap.c) */
+#define PR_GET_SECUREBITS 27
+#define PR_SET_SECUREBITS 28
+
+/*
+ * Get/set the timerslack as used by poll/select/nanosleep
+ * A value of 0 means "use default"
+ */
+#define PR_SET_TIMERSLACK 29
+#define PR_GET_TIMERSLACK 30
+
+#define PR_TASK_PERF_EVENTS_DISABLE 31
+#define PR_TASK_PERF_EVENTS_ENABLE 32
+
+/*
+ * Set early/late kill mode for hwpoison memory corruption.
+ * This influences when the process gets killed on a memory corruption.
+ */
+#define PR_MCE_KILL 33
+# define PR_MCE_KILL_CLEAR 0
+# define PR_MCE_KILL_SET 1
+
+# define PR_MCE_KILL_LATE 0
+# define PR_MCE_KILL_EARLY 1
+# define PR_MCE_KILL_DEFAULT 2
+
+#define PR_MCE_KILL_GET 34
+
+/*
+ * Tune up process memory map specifics.
+ */
+#define PR_SET_MM 35
+# define PR_SET_MM_START_CODE 1
+# define PR_SET_MM_END_CODE 2
+# define PR_SET_MM_START_DATA 3
+# define PR_SET_MM_END_DATA 4
+# define PR_SET_MM_START_STACK 5
+# define PR_SET_MM_START_BRK 6
+# define PR_SET_MM_BRK 7
+# define PR_SET_MM_ARG_START 8
+# define PR_SET_MM_ARG_END 9
+# define PR_SET_MM_ENV_START 10
+# define PR_SET_MM_ENV_END 11
+# define PR_SET_MM_AUXV 12
+# define PR_SET_MM_EXE_FILE 13
+# define PR_SET_MM_MAP 14
+# define PR_SET_MM_MAP_SIZE 15
+
+/*
+ * This structure provides new memory descriptor
+ * map which mostly modifies /proc/pid/stat[m]
+ * output for a task. This mostly done in a
+ * sake of checkpoint/restore functionality.
+ */
+struct prctl_mm_map {
+ __u64 start_code; /* code section bounds */
+ __u64 end_code;
+ __u64 start_data; /* data section bounds */
+ __u64 end_data;
+ __u64 start_brk; /* heap for brk() syscall */
+ __u64 brk;
+ __u64 start_stack; /* stack starts at */
+ __u64 arg_start; /* command line arguments bounds */
+ __u64 arg_end;
+ __u64 env_start; /* environment variables bounds */
+ __u64 env_end;
+ __u64 *auxv; /* auxiliary vector */
+ __u32 auxv_size; /* vector size */
+ __u32 exe_fd; /* /proc/$pid/exe link file */
+};
+
+/*
+ * Set specific pid that is allowed to ptrace the current task.
+ * A value of 0 mean "no process".
+ */
+#define PR_SET_PTRACER 0x59616d61
+# define PR_SET_PTRACER_ANY ((unsigned long)-1)
+
+#define PR_SET_CHILD_SUBREAPER 36
+#define PR_GET_CHILD_SUBREAPER 37
+
+/*
+ * If no_new_privs is set, then operations that grant new privileges (i.e.
+ * execve) will either fail or not grant them. This affects suid/sgid,
+ * file capabilities, and LSMs.
+ *
+ * Operations that merely manipulate or drop existing privileges (setresuid,
+ * capset, etc.) will still work. Drop those privileges if you want them gone.
+ *
+ * Changing LSM security domain is considered a new privilege. So, for example,
+ * asking selinux for a specific new context (e.g. with runcon) will result
+ * in execve returning -EPERM.
+ *
+ * See Documentation/userspace-api/no_new_privs.rst for more details.
+ */
+#define PR_SET_NO_NEW_PRIVS 38
+#define PR_GET_NO_NEW_PRIVS 39
+
+#define PR_GET_TID_ADDRESS 40
+
+#define PR_SET_THP_DISABLE 41
+#define PR_GET_THP_DISABLE 42
+
+/*
+ * No longer implemented, but left here to ensure the numbers stay reserved:
+ */
+#define PR_MPX_ENABLE_MANAGEMENT 43
+#define PR_MPX_DISABLE_MANAGEMENT 44
+
+#define PR_SET_FP_MODE 45
+#define PR_GET_FP_MODE 46
+# define PR_FP_MODE_FR (1 << 0) /* 64b FP registers */
+# define PR_FP_MODE_FRE (1 << 1) /* 32b compatibility */
+
+/* Control the ambient capability set */
+#define PR_CAP_AMBIENT 47
+# define PR_CAP_AMBIENT_IS_SET 1
+# define PR_CAP_AMBIENT_RAISE 2
+# define PR_CAP_AMBIENT_LOWER 3
+# define PR_CAP_AMBIENT_CLEAR_ALL 4
+
+/* arm64 Scalable Vector Extension controls */
+/* Flag values must be kept in sync with ptrace NT_ARM_SVE interface */
+#define PR_SVE_SET_VL 50 /* set task vector length */
+# define PR_SVE_SET_VL_ONEXEC (1 << 18) /* defer effect until exec */
+#define PR_SVE_GET_VL 51 /* get task vector length */
+/* Bits common to PR_SVE_SET_VL and PR_SVE_GET_VL */
+# define PR_SVE_VL_LEN_MASK 0xffff
+# define PR_SVE_VL_INHERIT (1 << 17) /* inherit across exec */
+
+/* Per task speculation control */
+#define PR_GET_SPECULATION_CTRL 52
+#define PR_SET_SPECULATION_CTRL 53
+/* Speculation control variants */
+# define PR_SPEC_STORE_BYPASS 0
+# define PR_SPEC_INDIRECT_BRANCH 1
+# define PR_SPEC_L1D_FLUSH 2
+/* Return and control values for PR_SET/GET_SPECULATION_CTRL */
+# define PR_SPEC_NOT_AFFECTED 0
+# define PR_SPEC_PRCTL (1UL << 0)
+# define PR_SPEC_ENABLE (1UL << 1)
+# define PR_SPEC_DISABLE (1UL << 2)
+# define PR_SPEC_FORCE_DISABLE (1UL << 3)
+# define PR_SPEC_DISABLE_NOEXEC (1UL << 4)
+
+/* Reset arm64 pointer authentication keys */
+#define PR_PAC_RESET_KEYS 54
+# define PR_PAC_APIAKEY (1UL << 0)
+# define PR_PAC_APIBKEY (1UL << 1)
+# define PR_PAC_APDAKEY (1UL << 2)
+# define PR_PAC_APDBKEY (1UL << 3)
+# define PR_PAC_APGAKEY (1UL << 4)
+
+/* Tagged user address controls for arm64 and RISC-V */
+#define PR_SET_TAGGED_ADDR_CTRL 55
+#define PR_GET_TAGGED_ADDR_CTRL 56
+# define PR_TAGGED_ADDR_ENABLE (1UL << 0)
+/* MTE tag check fault modes */
+# define PR_MTE_TCF_NONE 0UL
+# define PR_MTE_TCF_SYNC (1UL << 1)
+# define PR_MTE_TCF_ASYNC (1UL << 2)
+# define PR_MTE_TCF_MASK (PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC)
+/* MTE tag inclusion mask */
+# define PR_MTE_TAG_SHIFT 3
+# define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT)
+/* Unused; kept only for source compatibility */
+# define PR_MTE_TCF_SHIFT 1
+/* RISC-V pointer masking tag length */
+# define PR_PMLEN_SHIFT 24
+# define PR_PMLEN_MASK (0x7fUL << PR_PMLEN_SHIFT)
+
+/* Control reclaim behavior when allocating memory */
+#define PR_SET_IO_FLUSHER 57
+#define PR_GET_IO_FLUSHER 58
+
+/* Dispatch syscalls to a userspace handler */
+#define PR_SET_SYSCALL_USER_DISPATCH 59
+# define PR_SYS_DISPATCH_OFF 0
+/* Enable dispatch except for the specified range */
+# define PR_SYS_DISPATCH_EXCLUSIVE_ON 1
+/* Enable dispatch for the specified range */
+# define PR_SYS_DISPATCH_INCLUSIVE_ON 2
+/* Legacy name for backwards compatibility */
+# define PR_SYS_DISPATCH_ON PR_SYS_DISPATCH_EXCLUSIVE_ON
+/* The control values for the user space selector when dispatch is enabled */
+# define SYSCALL_DISPATCH_FILTER_ALLOW 0
+# define SYSCALL_DISPATCH_FILTER_BLOCK 1
+
+/* Set/get enabled arm64 pointer authentication keys */
+#define PR_PAC_SET_ENABLED_KEYS 60
+#define PR_PAC_GET_ENABLED_KEYS 61
+
+/* Request the scheduler to share a core */
+#define PR_SCHED_CORE 62
+# define PR_SCHED_CORE_GET 0
+# define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */
+# define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */
+# define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */
+# define PR_SCHED_CORE_MAX 4
+# define PR_SCHED_CORE_SCOPE_THREAD 0
+# define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1
+# define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2
+
+/* arm64 Scalable Matrix Extension controls */
+/* Flag values must be in sync with SVE versions */
+#define PR_SME_SET_VL 63 /* set task vector length */
+# define PR_SME_SET_VL_ONEXEC (1 << 18) /* defer effect until exec */
+#define PR_SME_GET_VL 64 /* get task vector length */
+/* Bits common to PR_SME_SET_VL and PR_SME_GET_VL */
+# define PR_SME_VL_LEN_MASK 0xffff
+# define PR_SME_VL_INHERIT (1 << 17) /* inherit across exec */
+
+/* Memory deny write / execute */
+#define PR_SET_MDWE 65
+# define PR_MDWE_REFUSE_EXEC_GAIN (1UL << 0)
+# define PR_MDWE_NO_INHERIT (1UL << 1)
+
+#define PR_GET_MDWE 66
+
+#define PR_SET_VMA 0x53564d41
+# define PR_SET_VMA_ANON_NAME 0
+
+#define PR_GET_AUXV 0x41555856
+
+#define PR_SET_MEMORY_MERGE 67
+#define PR_GET_MEMORY_MERGE 68
+
+#define PR_RISCV_V_SET_CONTROL 69
+#define PR_RISCV_V_GET_CONTROL 70
+# define PR_RISCV_V_VSTATE_CTRL_DEFAULT 0
+# define PR_RISCV_V_VSTATE_CTRL_OFF 1
+# define PR_RISCV_V_VSTATE_CTRL_ON 2
+# define PR_RISCV_V_VSTATE_CTRL_INHERIT (1 << 4)
+# define PR_RISCV_V_VSTATE_CTRL_CUR_MASK 0x3
+# define PR_RISCV_V_VSTATE_CTRL_NEXT_MASK 0xc
+# define PR_RISCV_V_VSTATE_CTRL_MASK 0x1f
+
+#define PR_RISCV_SET_ICACHE_FLUSH_CTX 71
+# define PR_RISCV_CTX_SW_FENCEI_ON 0
+# define PR_RISCV_CTX_SW_FENCEI_OFF 1
+# define PR_RISCV_SCOPE_PER_PROCESS 0
+# define PR_RISCV_SCOPE_PER_THREAD 1
+
+/* PowerPC Dynamic Execution Control Register (DEXCR) controls */
+#define PR_PPC_GET_DEXCR 72
+#define PR_PPC_SET_DEXCR 73
+/* DEXCR aspect to act on */
+# define PR_PPC_DEXCR_SBHE 0 /* Speculative branch hint enable */
+# define PR_PPC_DEXCR_IBRTPD 1 /* Indirect branch recurrent target prediction disable */
+# define PR_PPC_DEXCR_SRAPD 2 /* Subroutine return address prediction disable */
+# define PR_PPC_DEXCR_NPHIE 3 /* Non-privileged hash instruction enable */
+/* Action to apply / return */
+# define PR_PPC_DEXCR_CTRL_EDITABLE 0x1 /* Aspect can be modified with PR_PPC_SET_DEXCR */
+# define PR_PPC_DEXCR_CTRL_SET 0x2 /* Set the aspect for this process */
+# define PR_PPC_DEXCR_CTRL_CLEAR 0x4 /* Clear the aspect for this process */
+# define PR_PPC_DEXCR_CTRL_SET_ONEXEC 0x8 /* Set the aspect on exec */
+# define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC 0x10 /* Clear the aspect on exec */
+# define PR_PPC_DEXCR_CTRL_MASK 0x1f
+
+/*
+ * Get the current shadow stack configuration for the current thread,
+ * this will be the value configured via PR_SET_SHADOW_STACK_STATUS.
+ */
+#define PR_GET_SHADOW_STACK_STATUS 74
+
+/*
+ * Set the current shadow stack configuration. Enabling the shadow
+ * stack will cause a shadow stack to be allocated for the thread.
+ */
+#define PR_SET_SHADOW_STACK_STATUS 75
+# define PR_SHADOW_STACK_ENABLE (1UL << 0)
+# define PR_SHADOW_STACK_WRITE (1UL << 1)
+# define PR_SHADOW_STACK_PUSH (1UL << 2)
+
+/*
+ * Prevent further changes to the specified shadow stack
+ * configuration. All bits may be locked via this call, including
+ * undefined bits.
+ */
+#define PR_LOCK_SHADOW_STACK_STATUS 76
+
+/*
+ * Controls the mode of timer_create() for CRIU restore operations.
+ * Enabling this allows CRIU to restore timers with explicit IDs.
+ *
+ * Don't use for normal operations as the result might be undefined.
+ */
+#define PR_TIMER_CREATE_RESTORE_IDS 77
+# define PR_TIMER_CREATE_RESTORE_IDS_OFF 0
+# define PR_TIMER_CREATE_RESTORE_IDS_ON 1
+# define PR_TIMER_CREATE_RESTORE_IDS_GET 2
+
+/* FUTEX hash management */
+#define PR_FUTEX_HASH 78
+# define PR_FUTEX_HASH_SET_SLOTS 1
+# define PR_FUTEX_HASH_GET_SLOTS 2
+
+#endif /* _LINUX_PRCTL_H */
diff --git a/tools/include/uapi/linux/rtnetlink.h b/tools/include/uapi/linux/rtnetlink.h
new file mode 100644
index 000000000000..dab9493c791b
--- /dev/null
+++ b/tools/include/uapi/linux/rtnetlink.h
@@ -0,0 +1,848 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_RTNETLINK_H
+#define _UAPI__LINUX_RTNETLINK_H
+
+#include <linux/types.h>
+#include <linux/netlink.h>
+#include <linux/if_link.h>
+#include <linux/if_addr.h>
+#include <linux/neighbour.h>
+
+/* rtnetlink families. Values up to 127 are reserved for real address
+ * families, values above 128 may be used arbitrarily.
+ */
+#define RTNL_FAMILY_IPMR 128
+#define RTNL_FAMILY_IP6MR 129
+#define RTNL_FAMILY_MAX 129
+
+/****
+ * Routing/neighbour discovery messages.
+ ****/
+
+/* Types of messages */
+
+enum {
+ RTM_BASE = 16,
+#define RTM_BASE RTM_BASE
+
+ RTM_NEWLINK = 16,
+#define RTM_NEWLINK RTM_NEWLINK
+ RTM_DELLINK,
+#define RTM_DELLINK RTM_DELLINK
+ RTM_GETLINK,
+#define RTM_GETLINK RTM_GETLINK
+ RTM_SETLINK,
+#define RTM_SETLINK RTM_SETLINK
+
+ RTM_NEWADDR = 20,
+#define RTM_NEWADDR RTM_NEWADDR
+ RTM_DELADDR,
+#define RTM_DELADDR RTM_DELADDR
+ RTM_GETADDR,
+#define RTM_GETADDR RTM_GETADDR
+
+ RTM_NEWROUTE = 24,
+#define RTM_NEWROUTE RTM_NEWROUTE
+ RTM_DELROUTE,
+#define RTM_DELROUTE RTM_DELROUTE
+ RTM_GETROUTE,
+#define RTM_GETROUTE RTM_GETROUTE
+
+ RTM_NEWNEIGH = 28,
+#define RTM_NEWNEIGH RTM_NEWNEIGH
+ RTM_DELNEIGH,
+#define RTM_DELNEIGH RTM_DELNEIGH
+ RTM_GETNEIGH,
+#define RTM_GETNEIGH RTM_GETNEIGH
+
+ RTM_NEWRULE = 32,
+#define RTM_NEWRULE RTM_NEWRULE
+ RTM_DELRULE,
+#define RTM_DELRULE RTM_DELRULE
+ RTM_GETRULE,
+#define RTM_GETRULE RTM_GETRULE
+
+ RTM_NEWQDISC = 36,
+#define RTM_NEWQDISC RTM_NEWQDISC
+ RTM_DELQDISC,
+#define RTM_DELQDISC RTM_DELQDISC
+ RTM_GETQDISC,
+#define RTM_GETQDISC RTM_GETQDISC
+
+ RTM_NEWTCLASS = 40,
+#define RTM_NEWTCLASS RTM_NEWTCLASS
+ RTM_DELTCLASS,
+#define RTM_DELTCLASS RTM_DELTCLASS
+ RTM_GETTCLASS,
+#define RTM_GETTCLASS RTM_GETTCLASS
+
+ RTM_NEWTFILTER = 44,
+#define RTM_NEWTFILTER RTM_NEWTFILTER
+ RTM_DELTFILTER,
+#define RTM_DELTFILTER RTM_DELTFILTER
+ RTM_GETTFILTER,
+#define RTM_GETTFILTER RTM_GETTFILTER
+
+ RTM_NEWACTION = 48,
+#define RTM_NEWACTION RTM_NEWACTION
+ RTM_DELACTION,
+#define RTM_DELACTION RTM_DELACTION
+ RTM_GETACTION,
+#define RTM_GETACTION RTM_GETACTION
+
+ RTM_NEWPREFIX = 52,
+#define RTM_NEWPREFIX RTM_NEWPREFIX
+
+ RTM_NEWMULTICAST = 56,
+#define RTM_NEWMULTICAST RTM_NEWMULTICAST
+ RTM_DELMULTICAST,
+#define RTM_DELMULTICAST RTM_DELMULTICAST
+ RTM_GETMULTICAST,
+#define RTM_GETMULTICAST RTM_GETMULTICAST
+
+ RTM_NEWANYCAST = 60,
+#define RTM_NEWANYCAST RTM_NEWANYCAST
+ RTM_DELANYCAST,
+#define RTM_DELANYCAST RTM_DELANYCAST
+ RTM_GETANYCAST,
+#define RTM_GETANYCAST RTM_GETANYCAST
+
+ RTM_NEWNEIGHTBL = 64,
+#define RTM_NEWNEIGHTBL RTM_NEWNEIGHTBL
+ RTM_GETNEIGHTBL = 66,
+#define RTM_GETNEIGHTBL RTM_GETNEIGHTBL
+ RTM_SETNEIGHTBL,
+#define RTM_SETNEIGHTBL RTM_SETNEIGHTBL
+
+ RTM_NEWNDUSEROPT = 68,
+#define RTM_NEWNDUSEROPT RTM_NEWNDUSEROPT
+
+ RTM_NEWADDRLABEL = 72,
+#define RTM_NEWADDRLABEL RTM_NEWADDRLABEL
+ RTM_DELADDRLABEL,
+#define RTM_DELADDRLABEL RTM_DELADDRLABEL
+ RTM_GETADDRLABEL,
+#define RTM_GETADDRLABEL RTM_GETADDRLABEL
+
+ RTM_GETDCB = 78,
+#define RTM_GETDCB RTM_GETDCB
+ RTM_SETDCB,
+#define RTM_SETDCB RTM_SETDCB
+
+ RTM_NEWNETCONF = 80,
+#define RTM_NEWNETCONF RTM_NEWNETCONF
+ RTM_DELNETCONF,
+#define RTM_DELNETCONF RTM_DELNETCONF
+ RTM_GETNETCONF = 82,
+#define RTM_GETNETCONF RTM_GETNETCONF
+
+ RTM_NEWMDB = 84,
+#define RTM_NEWMDB RTM_NEWMDB
+ RTM_DELMDB = 85,
+#define RTM_DELMDB RTM_DELMDB
+ RTM_GETMDB = 86,
+#define RTM_GETMDB RTM_GETMDB
+
+ RTM_NEWNSID = 88,
+#define RTM_NEWNSID RTM_NEWNSID
+ RTM_DELNSID = 89,
+#define RTM_DELNSID RTM_DELNSID
+ RTM_GETNSID = 90,
+#define RTM_GETNSID RTM_GETNSID
+
+ RTM_NEWSTATS = 92,
+#define RTM_NEWSTATS RTM_NEWSTATS
+ RTM_GETSTATS = 94,
+#define RTM_GETSTATS RTM_GETSTATS
+ RTM_SETSTATS,
+#define RTM_SETSTATS RTM_SETSTATS
+
+ RTM_NEWCACHEREPORT = 96,
+#define RTM_NEWCACHEREPORT RTM_NEWCACHEREPORT
+
+ RTM_NEWCHAIN = 100,
+#define RTM_NEWCHAIN RTM_NEWCHAIN
+ RTM_DELCHAIN,
+#define RTM_DELCHAIN RTM_DELCHAIN
+ RTM_GETCHAIN,
+#define RTM_GETCHAIN RTM_GETCHAIN
+
+ RTM_NEWNEXTHOP = 104,
+#define RTM_NEWNEXTHOP RTM_NEWNEXTHOP
+ RTM_DELNEXTHOP,
+#define RTM_DELNEXTHOP RTM_DELNEXTHOP
+ RTM_GETNEXTHOP,
+#define RTM_GETNEXTHOP RTM_GETNEXTHOP
+
+ RTM_NEWLINKPROP = 108,
+#define RTM_NEWLINKPROP RTM_NEWLINKPROP
+ RTM_DELLINKPROP,
+#define RTM_DELLINKPROP RTM_DELLINKPROP
+ RTM_GETLINKPROP,
+#define RTM_GETLINKPROP RTM_GETLINKPROP
+
+ RTM_NEWVLAN = 112,
+#define RTM_NEWVLAN RTM_NEWVLAN
+ RTM_DELVLAN,
+#define RTM_DELVLAN RTM_DELVLAN
+ RTM_GETVLAN,
+#define RTM_GETVLAN RTM_GETVLAN
+
+ RTM_NEWNEXTHOPBUCKET = 116,
+#define RTM_NEWNEXTHOPBUCKET RTM_NEWNEXTHOPBUCKET
+ RTM_DELNEXTHOPBUCKET,
+#define RTM_DELNEXTHOPBUCKET RTM_DELNEXTHOPBUCKET
+ RTM_GETNEXTHOPBUCKET,
+#define RTM_GETNEXTHOPBUCKET RTM_GETNEXTHOPBUCKET
+
+ RTM_NEWTUNNEL = 120,
+#define RTM_NEWTUNNEL RTM_NEWTUNNEL
+ RTM_DELTUNNEL,
+#define RTM_DELTUNNEL RTM_DELTUNNEL
+ RTM_GETTUNNEL,
+#define RTM_GETTUNNEL RTM_GETTUNNEL
+
+ __RTM_MAX,
+#define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1)
+};
+
+#define RTM_NR_MSGTYPES (RTM_MAX + 1 - RTM_BASE)
+#define RTM_NR_FAMILIES (RTM_NR_MSGTYPES >> 2)
+#define RTM_FAM(cmd) (((cmd) - RTM_BASE) >> 2)
+
+/*
+ Generic structure for encapsulation of optional route information.
+ It is reminiscent of sockaddr, but with sa_family replaced
+ with attribute type.
+ */
+
+struct rtattr {
+ unsigned short rta_len;
+ unsigned short rta_type;
+};
+
+/* Macros to handle rtattributes */
+
+#define RTA_ALIGNTO 4U
+#define RTA_ALIGN(len) ( ((len)+RTA_ALIGNTO-1) & ~(RTA_ALIGNTO-1) )
+#define RTA_OK(rta,len) ((len) >= (int)sizeof(struct rtattr) && \
+ (rta)->rta_len >= sizeof(struct rtattr) && \
+ (rta)->rta_len <= (len))
+#define RTA_NEXT(rta,attrlen) ((attrlen) -= RTA_ALIGN((rta)->rta_len), \
+ (struct rtattr*)(((char*)(rta)) + RTA_ALIGN((rta)->rta_len)))
+#define RTA_LENGTH(len) (RTA_ALIGN(sizeof(struct rtattr)) + (len))
+#define RTA_SPACE(len) RTA_ALIGN(RTA_LENGTH(len))
+#define RTA_DATA(rta) ((void*)(((char*)(rta)) + RTA_LENGTH(0)))
+#define RTA_PAYLOAD(rta) ((int)((rta)->rta_len) - RTA_LENGTH(0))
+
+
+
+
+/******************************************************************************
+ * Definitions used in routing table administration.
+ ****/
+
+struct rtmsg {
+ unsigned char rtm_family;
+ unsigned char rtm_dst_len;
+ unsigned char rtm_src_len;
+ unsigned char rtm_tos;
+
+ unsigned char rtm_table; /* Routing table id */
+ unsigned char rtm_protocol; /* Routing protocol; see below */
+ unsigned char rtm_scope; /* See below */
+ unsigned char rtm_type; /* See below */
+
+ unsigned rtm_flags;
+};
+
+/* rtm_type */
+
+enum {
+ RTN_UNSPEC,
+ RTN_UNICAST, /* Gateway or direct route */
+ RTN_LOCAL, /* Accept locally */
+ RTN_BROADCAST, /* Accept locally as broadcast,
+ send as broadcast */
+ RTN_ANYCAST, /* Accept locally as broadcast,
+ but send as unicast */
+ RTN_MULTICAST, /* Multicast route */
+ RTN_BLACKHOLE, /* Drop */
+ RTN_UNREACHABLE, /* Destination is unreachable */
+ RTN_PROHIBIT, /* Administratively prohibited */
+ RTN_THROW, /* Not in this table */
+ RTN_NAT, /* Translate this address */
+ RTN_XRESOLVE, /* Use external resolver */
+ __RTN_MAX
+};
+
+#define RTN_MAX (__RTN_MAX - 1)
+
+
+/* rtm_protocol */
+
+#define RTPROT_UNSPEC 0
+#define RTPROT_REDIRECT 1 /* Route installed by ICMP redirects;
+ not used by current IPv4 */
+#define RTPROT_KERNEL 2 /* Route installed by kernel */
+#define RTPROT_BOOT 3 /* Route installed during boot */
+#define RTPROT_STATIC 4 /* Route installed by administrator */
+
+/* Values of protocol >= RTPROT_STATIC are not interpreted by kernel;
+ they are just passed from user and back as is.
+ It will be used by hypothetical multiple routing daemons.
+ Note that protocol values should be standardized in order to
+ avoid conflicts.
+ */
+
+#define RTPROT_GATED 8 /* Apparently, GateD */
+#define RTPROT_RA 9 /* RDISC/ND router advertisements */
+#define RTPROT_MRT 10 /* Merit MRT */
+#define RTPROT_ZEBRA 11 /* Zebra */
+#define RTPROT_BIRD 12 /* BIRD */
+#define RTPROT_DNROUTED 13 /* DECnet routing daemon */
+#define RTPROT_XORP 14 /* XORP */
+#define RTPROT_NTK 15 /* Netsukuku */
+#define RTPROT_DHCP 16 /* DHCP client */
+#define RTPROT_MROUTED 17 /* Multicast daemon */
+#define RTPROT_KEEPALIVED 18 /* Keepalived daemon */
+#define RTPROT_BABEL 42 /* Babel daemon */
+#define RTPROT_OVN 84 /* OVN daemon */
+#define RTPROT_OPENR 99 /* Open Routing (Open/R) Routes */
+#define RTPROT_BGP 186 /* BGP Routes */
+#define RTPROT_ISIS 187 /* ISIS Routes */
+#define RTPROT_OSPF 188 /* OSPF Routes */
+#define RTPROT_RIP 189 /* RIP Routes */
+#define RTPROT_EIGRP 192 /* EIGRP Routes */
+
+/* rtm_scope
+
+ Really it is not scope, but sort of distance to the destination.
+ NOWHERE are reserved for not existing destinations, HOST is our
+ local addresses, LINK are destinations, located on directly attached
+ link and UNIVERSE is everywhere in the Universe.
+
+ Intermediate values are also possible f.e. interior routes
+ could be assigned a value between UNIVERSE and LINK.
+*/
+
+enum rt_scope_t {
+ RT_SCOPE_UNIVERSE=0,
+/* User defined values */
+ RT_SCOPE_SITE=200,
+ RT_SCOPE_LINK=253,
+ RT_SCOPE_HOST=254,
+ RT_SCOPE_NOWHERE=255
+};
+
+/* rtm_flags */
+
+#define RTM_F_NOTIFY 0x100 /* Notify user of route change */
+#define RTM_F_CLONED 0x200 /* This route is cloned */
+#define RTM_F_EQUALIZE 0x400 /* Multipath equalizer: NI */
+#define RTM_F_PREFIX 0x800 /* Prefix addresses */
+#define RTM_F_LOOKUP_TABLE 0x1000 /* set rtm_table to FIB lookup result */
+#define RTM_F_FIB_MATCH 0x2000 /* return full fib lookup match */
+#define RTM_F_OFFLOAD 0x4000 /* route is offloaded */
+#define RTM_F_TRAP 0x8000 /* route is trapping packets */
+#define RTM_F_OFFLOAD_FAILED 0x20000000 /* route offload failed, this value
+ * is chosen to avoid conflicts with
+ * other flags defined in
+ * include/uapi/linux/ipv6_route.h
+ */
+
+/* Reserved table identifiers */
+
+enum rt_class_t {
+ RT_TABLE_UNSPEC=0,
+/* User defined values */
+ RT_TABLE_COMPAT=252,
+ RT_TABLE_DEFAULT=253,
+ RT_TABLE_MAIN=254,
+ RT_TABLE_LOCAL=255,
+ RT_TABLE_MAX=0xFFFFFFFF
+};
+
+
+/* Routing message attributes */
+
+enum rtattr_type_t {
+ RTA_UNSPEC,
+ RTA_DST,
+ RTA_SRC,
+ RTA_IIF,
+ RTA_OIF,
+ RTA_GATEWAY,
+ RTA_PRIORITY,
+ RTA_PREFSRC,
+ RTA_METRICS,
+ RTA_MULTIPATH,
+ RTA_PROTOINFO, /* no longer used */
+ RTA_FLOW,
+ RTA_CACHEINFO,
+ RTA_SESSION, /* no longer used */
+ RTA_MP_ALGO, /* no longer used */
+ RTA_TABLE,
+ RTA_MARK,
+ RTA_MFC_STATS,
+ RTA_VIA,
+ RTA_NEWDST,
+ RTA_PREF,
+ RTA_ENCAP_TYPE,
+ RTA_ENCAP,
+ RTA_EXPIRES,
+ RTA_PAD,
+ RTA_UID,
+ RTA_TTL_PROPAGATE,
+ RTA_IP_PROTO,
+ RTA_SPORT,
+ RTA_DPORT,
+ RTA_NH_ID,
+ RTA_FLOWLABEL,
+ __RTA_MAX
+};
+
+#define RTA_MAX (__RTA_MAX - 1)
+
+#define RTM_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct rtmsg))))
+#define RTM_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct rtmsg))
+
+/* RTM_MULTIPATH --- array of struct rtnexthop.
+ *
+ * "struct rtnexthop" describes all necessary nexthop information,
+ * i.e. parameters of path to a destination via this nexthop.
+ *
+ * At the moment it is impossible to set different prefsrc, mtu, window
+ * and rtt for different paths from multipath.
+ */
+
+struct rtnexthop {
+ unsigned short rtnh_len;
+ unsigned char rtnh_flags;
+ unsigned char rtnh_hops;
+ int rtnh_ifindex;
+};
+
+/* rtnh_flags */
+
+#define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */
+#define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */
+#define RTNH_F_ONLINK 4 /* Gateway is forced on link */
+#define RTNH_F_OFFLOAD 8 /* Nexthop is offloaded */
+#define RTNH_F_LINKDOWN 16 /* carrier-down on nexthop */
+#define RTNH_F_UNRESOLVED 32 /* The entry is unresolved (ipmr) */
+#define RTNH_F_TRAP 64 /* Nexthop is trapping packets */
+
+#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN | \
+ RTNH_F_OFFLOAD | RTNH_F_TRAP)
+
+/* Macros to handle hexthops */
+
+#define RTNH_ALIGNTO 4
+#define RTNH_ALIGN(len) ( ((len)+RTNH_ALIGNTO-1) & ~(RTNH_ALIGNTO-1) )
+#define RTNH_OK(rtnh,len) ((rtnh)->rtnh_len >= sizeof(struct rtnexthop) && \
+ ((int)(rtnh)->rtnh_len) <= (len))
+#define RTNH_NEXT(rtnh) ((struct rtnexthop*)(((char*)(rtnh)) + RTNH_ALIGN((rtnh)->rtnh_len)))
+#define RTNH_LENGTH(len) (RTNH_ALIGN(sizeof(struct rtnexthop)) + (len))
+#define RTNH_SPACE(len) RTNH_ALIGN(RTNH_LENGTH(len))
+#define RTNH_DATA(rtnh) ((struct rtattr*)(((char*)(rtnh)) + RTNH_LENGTH(0)))
+
+/* RTA_VIA */
+struct rtvia {
+ __kernel_sa_family_t rtvia_family;
+ __u8 rtvia_addr[];
+};
+
+/* RTM_CACHEINFO */
+
+struct rta_cacheinfo {
+ __u32 rta_clntref;
+ __u32 rta_lastuse;
+ __s32 rta_expires;
+ __u32 rta_error;
+ __u32 rta_used;
+
+#define RTNETLINK_HAVE_PEERINFO 1
+ __u32 rta_id;
+ __u32 rta_ts;
+ __u32 rta_tsage;
+};
+
+/* RTM_METRICS --- array of struct rtattr with types of RTAX_* */
+
+enum {
+ RTAX_UNSPEC,
+#define RTAX_UNSPEC RTAX_UNSPEC
+ RTAX_LOCK,
+#define RTAX_LOCK RTAX_LOCK
+ RTAX_MTU,
+#define RTAX_MTU RTAX_MTU
+ RTAX_WINDOW,
+#define RTAX_WINDOW RTAX_WINDOW
+ RTAX_RTT,
+#define RTAX_RTT RTAX_RTT
+ RTAX_RTTVAR,
+#define RTAX_RTTVAR RTAX_RTTVAR
+ RTAX_SSTHRESH,
+#define RTAX_SSTHRESH RTAX_SSTHRESH
+ RTAX_CWND,
+#define RTAX_CWND RTAX_CWND
+ RTAX_ADVMSS,
+#define RTAX_ADVMSS RTAX_ADVMSS
+ RTAX_REORDERING,
+#define RTAX_REORDERING RTAX_REORDERING
+ RTAX_HOPLIMIT,
+#define RTAX_HOPLIMIT RTAX_HOPLIMIT
+ RTAX_INITCWND,
+#define RTAX_INITCWND RTAX_INITCWND
+ RTAX_FEATURES,
+#define RTAX_FEATURES RTAX_FEATURES
+ RTAX_RTO_MIN,
+#define RTAX_RTO_MIN RTAX_RTO_MIN
+ RTAX_INITRWND,
+#define RTAX_INITRWND RTAX_INITRWND
+ RTAX_QUICKACK,
+#define RTAX_QUICKACK RTAX_QUICKACK
+ RTAX_CC_ALGO,
+#define RTAX_CC_ALGO RTAX_CC_ALGO
+ RTAX_FASTOPEN_NO_COOKIE,
+#define RTAX_FASTOPEN_NO_COOKIE RTAX_FASTOPEN_NO_COOKIE
+ __RTAX_MAX
+};
+
+#define RTAX_MAX (__RTAX_MAX - 1)
+
+#define RTAX_FEATURE_ECN (1 << 0)
+#define RTAX_FEATURE_SACK (1 << 1) /* unused */
+#define RTAX_FEATURE_TIMESTAMP (1 << 2) /* unused */
+#define RTAX_FEATURE_ALLFRAG (1 << 3) /* unused */
+#define RTAX_FEATURE_TCP_USEC_TS (1 << 4)
+
+#define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | \
+ RTAX_FEATURE_SACK | \
+ RTAX_FEATURE_TIMESTAMP | \
+ RTAX_FEATURE_ALLFRAG | \
+ RTAX_FEATURE_TCP_USEC_TS)
+
+struct rta_session {
+ __u8 proto;
+ __u8 pad1;
+ __u16 pad2;
+
+ union {
+ struct {
+ __u16 sport;
+ __u16 dport;
+ } ports;
+
+ struct {
+ __u8 type;
+ __u8 code;
+ __u16 ident;
+ } icmpt;
+
+ __u32 spi;
+ } u;
+};
+
+struct rta_mfc_stats {
+ __u64 mfcs_packets;
+ __u64 mfcs_bytes;
+ __u64 mfcs_wrong_if;
+};
+
+/****
+ * General form of address family dependent message.
+ ****/
+
+struct rtgenmsg {
+ unsigned char rtgen_family;
+};
+
+/*****************************************************************
+ * Link layer specific messages.
+ ****/
+
+/* struct ifinfomsg
+ * passes link level specific information, not dependent
+ * on network protocol.
+ */
+
+struct ifinfomsg {
+ unsigned char ifi_family;
+ unsigned char __ifi_pad;
+ unsigned short ifi_type; /* ARPHRD_* */
+ int ifi_index; /* Link index */
+ unsigned ifi_flags; /* IFF_* flags */
+ unsigned ifi_change; /* IFF_* change mask */
+};
+
+/********************************************************************
+ * prefix information
+ ****/
+
+struct prefixmsg {
+ unsigned char prefix_family;
+ unsigned char prefix_pad1;
+ unsigned short prefix_pad2;
+ int prefix_ifindex;
+ unsigned char prefix_type;
+ unsigned char prefix_len;
+ unsigned char prefix_flags;
+ unsigned char prefix_pad3;
+};
+
+enum
+{
+ PREFIX_UNSPEC,
+ PREFIX_ADDRESS,
+ PREFIX_CACHEINFO,
+ __PREFIX_MAX
+};
+
+#define PREFIX_MAX (__PREFIX_MAX - 1)
+
+struct prefix_cacheinfo {
+ __u32 preferred_time;
+ __u32 valid_time;
+};
+
+
+/*****************************************************************
+ * Traffic control messages.
+ ****/
+
+struct tcmsg {
+ unsigned char tcm_family;
+ unsigned char tcm__pad1;
+ unsigned short tcm__pad2;
+ int tcm_ifindex;
+ __u32 tcm_handle;
+ __u32 tcm_parent;
+/* tcm_block_index is used instead of tcm_parent
+ * in case tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK
+ */
+#define tcm_block_index tcm_parent
+ __u32 tcm_info;
+};
+
+/* For manipulation of filters in shared block, tcm_ifindex is set to
+ * TCM_IFINDEX_MAGIC_BLOCK, and tcm_parent is aliased to tcm_block_index
+ * which is the block index.
+ */
+#define TCM_IFINDEX_MAGIC_BLOCK (0xFFFFFFFFU)
+
+enum {
+ TCA_UNSPEC,
+ TCA_KIND,
+ TCA_OPTIONS,
+ TCA_STATS,
+ TCA_XSTATS,
+ TCA_RATE,
+ TCA_FCNT,
+ TCA_STATS2,
+ TCA_STAB,
+ TCA_PAD,
+ TCA_DUMP_INVISIBLE,
+ TCA_CHAIN,
+ TCA_HW_OFFLOAD,
+ TCA_INGRESS_BLOCK,
+ TCA_EGRESS_BLOCK,
+ TCA_DUMP_FLAGS,
+ TCA_EXT_WARN_MSG,
+ __TCA_MAX
+};
+
+#define TCA_MAX (__TCA_MAX - 1)
+
+#define TCA_DUMP_FLAGS_TERSE (1 << 0) /* Means that in dump user gets only basic
+ * data necessary to identify the objects
+ * (handle, cookie, etc.) and stats.
+ */
+
+#define TCA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcmsg))))
+#define TCA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcmsg))
+
+/********************************************************************
+ * Neighbor Discovery userland options
+ ****/
+
+struct nduseroptmsg {
+ unsigned char nduseropt_family;
+ unsigned char nduseropt_pad1;
+ unsigned short nduseropt_opts_len; /* Total length of options */
+ int nduseropt_ifindex;
+ __u8 nduseropt_icmp_type;
+ __u8 nduseropt_icmp_code;
+ unsigned short nduseropt_pad2;
+ unsigned int nduseropt_pad3;
+ /* Followed by one or more ND options */
+};
+
+enum {
+ NDUSEROPT_UNSPEC,
+ NDUSEROPT_SRCADDR,
+ __NDUSEROPT_MAX
+};
+
+#define NDUSEROPT_MAX (__NDUSEROPT_MAX - 1)
+
+#ifndef __KERNEL__
+/* RTnetlink multicast groups - backwards compatibility for userspace */
+#define RTMGRP_LINK 1
+#define RTMGRP_NOTIFY 2
+#define RTMGRP_NEIGH 4
+#define RTMGRP_TC 8
+
+#define RTMGRP_IPV4_IFADDR 0x10
+#define RTMGRP_IPV4_MROUTE 0x20
+#define RTMGRP_IPV4_ROUTE 0x40
+#define RTMGRP_IPV4_RULE 0x80
+
+#define RTMGRP_IPV6_IFADDR 0x100
+#define RTMGRP_IPV6_MROUTE 0x200
+#define RTMGRP_IPV6_ROUTE 0x400
+#define RTMGRP_IPV6_IFINFO 0x800
+
+#define RTMGRP_DECnet_IFADDR 0x1000
+#define RTMGRP_DECnet_ROUTE 0x4000
+
+#define RTMGRP_IPV6_PREFIX 0x20000
+#endif
+
+/* RTnetlink multicast groups */
+enum rtnetlink_groups {
+ RTNLGRP_NONE,
+#define RTNLGRP_NONE RTNLGRP_NONE
+ RTNLGRP_LINK,
+#define RTNLGRP_LINK RTNLGRP_LINK
+ RTNLGRP_NOTIFY,
+#define RTNLGRP_NOTIFY RTNLGRP_NOTIFY
+ RTNLGRP_NEIGH,
+#define RTNLGRP_NEIGH RTNLGRP_NEIGH
+ RTNLGRP_TC,
+#define RTNLGRP_TC RTNLGRP_TC
+ RTNLGRP_IPV4_IFADDR,
+#define RTNLGRP_IPV4_IFADDR RTNLGRP_IPV4_IFADDR
+ RTNLGRP_IPV4_MROUTE,
+#define RTNLGRP_IPV4_MROUTE RTNLGRP_IPV4_MROUTE
+ RTNLGRP_IPV4_ROUTE,
+#define RTNLGRP_IPV4_ROUTE RTNLGRP_IPV4_ROUTE
+ RTNLGRP_IPV4_RULE,
+#define RTNLGRP_IPV4_RULE RTNLGRP_IPV4_RULE
+ RTNLGRP_IPV6_IFADDR,
+#define RTNLGRP_IPV6_IFADDR RTNLGRP_IPV6_IFADDR
+ RTNLGRP_IPV6_MROUTE,
+#define RTNLGRP_IPV6_MROUTE RTNLGRP_IPV6_MROUTE
+ RTNLGRP_IPV6_ROUTE,
+#define RTNLGRP_IPV6_ROUTE RTNLGRP_IPV6_ROUTE
+ RTNLGRP_IPV6_IFINFO,
+#define RTNLGRP_IPV6_IFINFO RTNLGRP_IPV6_IFINFO
+ RTNLGRP_DECnet_IFADDR,
+#define RTNLGRP_DECnet_IFADDR RTNLGRP_DECnet_IFADDR
+ RTNLGRP_NOP2,
+ RTNLGRP_DECnet_ROUTE,
+#define RTNLGRP_DECnet_ROUTE RTNLGRP_DECnet_ROUTE
+ RTNLGRP_DECnet_RULE,
+#define RTNLGRP_DECnet_RULE RTNLGRP_DECnet_RULE
+ RTNLGRP_NOP4,
+ RTNLGRP_IPV6_PREFIX,
+#define RTNLGRP_IPV6_PREFIX RTNLGRP_IPV6_PREFIX
+ RTNLGRP_IPV6_RULE,
+#define RTNLGRP_IPV6_RULE RTNLGRP_IPV6_RULE
+ RTNLGRP_ND_USEROPT,
+#define RTNLGRP_ND_USEROPT RTNLGRP_ND_USEROPT
+ RTNLGRP_PHONET_IFADDR,
+#define RTNLGRP_PHONET_IFADDR RTNLGRP_PHONET_IFADDR
+ RTNLGRP_PHONET_ROUTE,
+#define RTNLGRP_PHONET_ROUTE RTNLGRP_PHONET_ROUTE
+ RTNLGRP_DCB,
+#define RTNLGRP_DCB RTNLGRP_DCB
+ RTNLGRP_IPV4_NETCONF,
+#define RTNLGRP_IPV4_NETCONF RTNLGRP_IPV4_NETCONF
+ RTNLGRP_IPV6_NETCONF,
+#define RTNLGRP_IPV6_NETCONF RTNLGRP_IPV6_NETCONF
+ RTNLGRP_MDB,
+#define RTNLGRP_MDB RTNLGRP_MDB
+ RTNLGRP_MPLS_ROUTE,
+#define RTNLGRP_MPLS_ROUTE RTNLGRP_MPLS_ROUTE
+ RTNLGRP_NSID,
+#define RTNLGRP_NSID RTNLGRP_NSID
+ RTNLGRP_MPLS_NETCONF,
+#define RTNLGRP_MPLS_NETCONF RTNLGRP_MPLS_NETCONF
+ RTNLGRP_IPV4_MROUTE_R,
+#define RTNLGRP_IPV4_MROUTE_R RTNLGRP_IPV4_MROUTE_R
+ RTNLGRP_IPV6_MROUTE_R,
+#define RTNLGRP_IPV6_MROUTE_R RTNLGRP_IPV6_MROUTE_R
+ RTNLGRP_NEXTHOP,
+#define RTNLGRP_NEXTHOP RTNLGRP_NEXTHOP
+ RTNLGRP_BRVLAN,
+#define RTNLGRP_BRVLAN RTNLGRP_BRVLAN
+ RTNLGRP_MCTP_IFADDR,
+#define RTNLGRP_MCTP_IFADDR RTNLGRP_MCTP_IFADDR
+ RTNLGRP_TUNNEL,
+#define RTNLGRP_TUNNEL RTNLGRP_TUNNEL
+ RTNLGRP_STATS,
+#define RTNLGRP_STATS RTNLGRP_STATS
+ RTNLGRP_IPV4_MCADDR,
+#define RTNLGRP_IPV4_MCADDR RTNLGRP_IPV4_MCADDR
+ RTNLGRP_IPV6_MCADDR,
+#define RTNLGRP_IPV6_MCADDR RTNLGRP_IPV6_MCADDR
+ RTNLGRP_IPV6_ACADDR,
+#define RTNLGRP_IPV6_ACADDR RTNLGRP_IPV6_ACADDR
+ __RTNLGRP_MAX
+};
+#define RTNLGRP_MAX (__RTNLGRP_MAX - 1)
+
+/* TC action piece */
+struct tcamsg {
+ unsigned char tca_family;
+ unsigned char tca__pad1;
+ unsigned short tca__pad2;
+};
+
+enum {
+ TCA_ROOT_UNSPEC,
+ TCA_ROOT_TAB,
+#define TCA_ACT_TAB TCA_ROOT_TAB
+#define TCAA_MAX TCA_ROOT_TAB
+ TCA_ROOT_FLAGS,
+ TCA_ROOT_COUNT,
+ TCA_ROOT_TIME_DELTA, /* in msecs */
+ TCA_ROOT_EXT_WARN_MSG,
+ __TCA_ROOT_MAX,
+#define TCA_ROOT_MAX (__TCA_ROOT_MAX - 1)
+};
+
+#define TA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
+#define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
+/* tcamsg flags stored in attribute TCA_ROOT_FLAGS
+ *
+ * TCA_ACT_FLAG_LARGE_DUMP_ON user->kernel to request for larger than
+ * TCA_ACT_MAX_PRIO actions in a dump. All dump responses will contain the
+ * number of actions being dumped stored in for user app's consumption in
+ * TCA_ROOT_COUNT
+ *
+ * TCA_ACT_FLAG_TERSE_DUMP user->kernel to request terse (brief) dump that only
+ * includes essential action info (kind, index, etc.)
+ *
+ */
+#define TCA_FLAG_LARGE_DUMP_ON (1 << 0)
+#define TCA_ACT_FLAG_LARGE_DUMP_ON TCA_FLAG_LARGE_DUMP_ON
+#define TCA_ACT_FLAG_TERSE_DUMP (1 << 1)
+
+/* New extended info filters for IFLA_EXT_MASK */
+#define RTEXT_FILTER_VF (1 << 0)
+#define RTEXT_FILTER_BRVLAN (1 << 1)
+#define RTEXT_FILTER_BRVLAN_COMPRESSED (1 << 2)
+#define RTEXT_FILTER_SKIP_STATS (1 << 3)
+#define RTEXT_FILTER_MRP (1 << 4)
+#define RTEXT_FILTER_CFM_CONFIG (1 << 5)
+#define RTEXT_FILTER_CFM_STATUS (1 << 6)
+#define RTEXT_FILTER_MST (1 << 7)
+
+/* End of information exported to user level */
+
+
+
+#endif /* _UAPI__LINUX_RTNETLINK_H */
diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h
index 67626d535316..1686861aae20 100644
--- a/tools/include/uapi/linux/stat.h
+++ b/tools/include/uapi/linux/stat.h
@@ -98,37 +98,97 @@ struct statx_timestamp {
*/
struct statx {
/* 0x00 */
- __u32 stx_mask; /* What results were written [uncond] */
- __u32 stx_blksize; /* Preferred general I/O size [uncond] */
- __u64 stx_attributes; /* Flags conveying information about the file [uncond] */
+ /* What results were written [uncond] */
+ __u32 stx_mask;
+
+ /* Preferred general I/O size [uncond] */
+ __u32 stx_blksize;
+
+ /* Flags conveying information about the file [uncond] */
+ __u64 stx_attributes;
+
/* 0x10 */
- __u32 stx_nlink; /* Number of hard links */
- __u32 stx_uid; /* User ID of owner */
- __u32 stx_gid; /* Group ID of owner */
- __u16 stx_mode; /* File mode */
+ /* Number of hard links */
+ __u32 stx_nlink;
+
+ /* User ID of owner */
+ __u32 stx_uid;
+
+ /* Group ID of owner */
+ __u32 stx_gid;
+
+ /* File mode */
+ __u16 stx_mode;
__u16 __spare0[1];
+
/* 0x20 */
- __u64 stx_ino; /* Inode number */
- __u64 stx_size; /* File size */
- __u64 stx_blocks; /* Number of 512-byte blocks allocated */
- __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */
+ /* Inode number */
+ __u64 stx_ino;
+
+ /* File size */
+ __u64 stx_size;
+
+ /* Number of 512-byte blocks allocated */
+ __u64 stx_blocks;
+
+ /* Mask to show what's supported in stx_attributes */
+ __u64 stx_attributes_mask;
+
/* 0x40 */
- struct statx_timestamp stx_atime; /* Last access time */
- struct statx_timestamp stx_btime; /* File creation time */
- struct statx_timestamp stx_ctime; /* Last attribute change time */
- struct statx_timestamp stx_mtime; /* Last data modification time */
+ /* Last access time */
+ struct statx_timestamp stx_atime;
+
+ /* File creation time */
+ struct statx_timestamp stx_btime;
+
+ /* Last attribute change time */
+ struct statx_timestamp stx_ctime;
+
+ /* Last data modification time */
+ struct statx_timestamp stx_mtime;
+
/* 0x80 */
- __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */
+ /* Device ID of special file [if bdev/cdev] */
+ __u32 stx_rdev_major;
__u32 stx_rdev_minor;
- __u32 stx_dev_major; /* ID of device containing file [uncond] */
+
+ /* ID of device containing file [uncond] */
+ __u32 stx_dev_major;
__u32 stx_dev_minor;
+
/* 0x90 */
__u64 stx_mnt_id;
- __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */
- __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */
- __u64 stx_subvol; /* Subvolume identifier */
+
+ /* Memory buffer alignment for direct I/O */
+ __u32 stx_dio_mem_align;
+
+ /* File offset alignment for direct I/O */
+ __u32 stx_dio_offset_align;
+
/* 0xa0 */
- __u64 __spare3[11]; /* Spare space for future expansion */
+ /* Subvolume identifier */
+ __u64 stx_subvol;
+
+ /* Min atomic write unit in bytes */
+ __u32 stx_atomic_write_unit_min;
+
+ /* Max atomic write unit in bytes */
+ __u32 stx_atomic_write_unit_max;
+
+ /* 0xb0 */
+ /* Max atomic write segment count */
+ __u32 stx_atomic_write_segments_max;
+
+ /* File offset alignment for direct I/O reads */
+ __u32 stx_dio_read_offset_align;
+
+ /* Optimised max atomic write unit in bytes */
+ __u32 stx_atomic_write_unit_max_opt;
+ __u32 __spare2[1];
+
+ /* 0xc0 */
+ __u64 __spare3[8]; /* Spare space for future expansion */
+
/* 0x100 */
};
@@ -157,6 +217,8 @@ struct statx {
#define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */
#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */
#define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */
+#define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */
+#define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
@@ -192,6 +254,7 @@ struct statx {
#define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */
#define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */
#define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */
+#define STATX_ATTR_WRITE_ATOMIC 0x00400000 /* File supports atomic write operations */
#endif /* _UAPI_LINUX_STAT_H */
diff --git a/tools/include/uapi/linux/stddef.h b/tools/include/uapi/linux/stddef.h
index bb6ea517efb5..c53cde425406 100644
--- a/tools/include/uapi/linux/stddef.h
+++ b/tools/include/uapi/linux/stddef.h
@@ -8,6 +8,13 @@
#define __always_inline __inline__
#endif
+/* Not all C++ standards support type declarations inside an anonymous union */
+#ifndef __cplusplus
+#define __struct_group_tag(TAG) TAG
+#else
+#define __struct_group_tag(TAG)
+#endif
+
/**
* __struct_group() - Create a mirrored named and anonyomous struct
*
@@ -20,14 +27,14 @@
* and size: one anonymous and one named. The former's members can be used
* normally without sub-struct naming, and the latter can be used to
* reason about the start, end, and size of the group of struct members.
- * The named struct can also be explicitly tagged for layer reuse, as well
- * as both having struct attributes appended.
+ * The named struct can also be explicitly tagged for layer reuse (C only),
+ * as well as both having struct attributes appended.
*/
#define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \
union { \
struct { MEMBERS } ATTRS; \
- struct TAG { MEMBERS } ATTRS NAME; \
- }
+ struct __struct_group_tag(TAG) { MEMBERS } ATTRS NAME; \
+ } ATTRS
/**
* __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union
diff --git a/tools/include/uapi/linux/types.h b/tools/include/uapi/linux/types.h
index 91fa51a9c31d..85aa327245c6 100644
--- a/tools/include/uapi/linux/types.h
+++ b/tools/include/uapi/linux/types.h
@@ -4,6 +4,8 @@
#include <asm-generic/int-ll64.h>
+#ifndef __ASSEMBLER__
+
/* copied from linux:include/uapi/linux/types.h */
#define __bitwise
typedef __u16 __bitwise __le16;
@@ -20,4 +22,5 @@ typedef __u32 __bitwise __wsum;
#define __aligned_be64 __be64 __attribute__((aligned(8)))
#define __aligned_le64 __le64 __attribute__((aligned(8)))
+#endif /* __ASSEMBLER__ */
#endif /* _UAPI_LINUX_TYPES_H */