summaryrefslogtreecommitdiff
path: root/tools/include
diff options
context:
space:
mode:
Diffstat (limited to 'tools/include')
-rw-r--r--tools/include/asm-generic/bitops/__ffs.h4
-rw-r--r--tools/include/asm-generic/bitops/__fls.h10
-rw-r--r--tools/include/asm-generic/bitops/fls.h8
-rw-r--r--tools/include/asm-generic/bitops/fls64.h4
-rw-r--r--tools/include/asm-generic/io.h482
-rw-r--r--tools/include/asm-generic/unaligned.h23
-rw-r--r--tools/include/asm/alternative.h13
-rw-r--r--tools/include/asm/barrier.h2
-rw-r--r--tools/include/asm/io.h11
-rw-r--r--tools/include/asm/rwonce.h0
-rw-r--r--tools/include/asm/timex.h13
-rw-r--r--tools/include/generated/asm-offsets.h0
-rw-r--r--tools/include/generated/asm/cpucap-defs.h0
-rw-r--r--tools/include/generated/asm/sysreg-defs.h0
-rw-r--r--tools/include/io_uring/mini_liburing.h282
-rw-r--r--tools/include/linux/align.h12
-rw-r--r--tools/include/linux/args.h28
-rw-r--r--tools/include/linux/atomic.h22
-rw-r--r--tools/include/linux/bitmap.h48
-rw-r--r--tools/include/linux/bitops.h15
-rw-r--r--tools/include/linux/bits.h85
-rw-r--r--tools/include/linux/btf_ids.h13
-rw-r--r--tools/include/linux/build_bug.h10
-rw-r--r--tools/include/linux/cfi_types.h68
-rw-r--r--tools/include/linux/compiler-gcc.h8
-rw-r--r--tools/include/linux/compiler.h80
-rw-r--r--tools/include/linux/compiler_types.h4
-rw-r--r--tools/include/linux/const.h8
-rw-r--r--tools/include/linux/container_of.h18
-rw-r--r--tools/include/linux/coresight-pmu.h43
-rw-r--r--tools/include/linux/err.h2
-rw-r--r--tools/include/linux/filter.h28
-rw-r--r--tools/include/linux/gfp_types.h393
-rw-r--r--tools/include/linux/init.h43
-rw-r--r--tools/include/linux/interval_tree_generic.h10
-rw-r--r--tools/include/linux/io.h4
-rw-r--r--tools/include/linux/kallsyms.h4
-rw-r--r--tools/include/linux/kasan-tags.h15
-rw-r--r--tools/include/linux/kernel.h15
-rw-r--r--tools/include/linux/linkage.h8
-rw-r--r--tools/include/linux/livepatch_external.h76
-rw-r--r--tools/include/linux/math64.h5
-rw-r--r--tools/include/linux/mm.h21
-rw-r--r--tools/include/linux/moduleparam.h7
-rw-r--r--tools/include/linux/numa.h5
-rw-r--r--tools/include/linux/objtool.h197
-rw-r--r--tools/include/linux/objtool_types.h72
-rw-r--r--tools/include/linux/panic.h19
l---------tools/include/linux/pci_ids.h1
-rw-r--r--tools/include/linux/pfn.h1
-rw-r--r--tools/include/linux/poison.h7
-rw-r--r--tools/include/linux/prandom.h51
-rw-r--r--tools/include/linux/rbtree_augmented.h4
-rw-r--r--tools/include/linux/refcount.h5
-rw-r--r--tools/include/linux/ring_buffer.h2
-rw-r--r--tools/include/linux/rwsem.h44
-rw-r--r--tools/include/linux/seq_file.h2
-rw-r--r--tools/include/linux/slab.h167
-rw-r--r--tools/include/linux/spinlock.h1
-rw-r--r--tools/include/linux/static_call_types.h4
-rw-r--r--tools/include/linux/string.h19
-rw-r--r--tools/include/linux/types.h7
-rw-r--r--tools/include/linux/unaligned.h148
-rw-r--r--tools/include/nolibc/.gitignore1
-rw-r--r--tools/include/nolibc/Makefile82
-rw-r--r--tools/include/nolibc/arch-arm.h193
-rw-r--r--tools/include/nolibc/arch-arm64.h (renamed from tools/include/nolibc/arch-aarch64.h)106
-rw-r--r--tools/include/nolibc/arch-i386.h219
-rw-r--r--tools/include/nolibc/arch-loongarch.h157
-rw-r--r--tools/include/nolibc/arch-m68k.h143
-rw-r--r--tools/include/nolibc/arch-mips.h259
-rw-r--r--tools/include/nolibc/arch-powerpc.h221
-rw-r--r--tools/include/nolibc/arch-riscv.h94
-rw-r--r--tools/include/nolibc/arch-s390.h189
-rw-r--r--tools/include/nolibc/arch-sh.h164
-rw-r--r--tools/include/nolibc/arch-sparc.h209
-rw-r--r--tools/include/nolibc/arch-x86.h393
-rw-r--r--tools/include/nolibc/arch-x86_64.h215
-rw-r--r--tools/include/nolibc/arch.h33
-rw-r--r--tools/include/nolibc/compiler.h50
-rw-r--r--tools/include/nolibc/crt.h94
-rw-r--r--tools/include/nolibc/ctype.h6
-rw-r--r--tools/include/nolibc/dirent.h100
-rw-r--r--tools/include/nolibc/elf.h15
-rw-r--r--tools/include/nolibc/errno.h12
-rw-r--r--tools/include/nolibc/fcntl.h69
-rw-r--r--tools/include/nolibc/getopt.h101
-rw-r--r--tools/include/nolibc/inttypes.h3
-rw-r--r--tools/include/nolibc/limits.h7
-rw-r--r--tools/include/nolibc/math.h31
-rw-r--r--tools/include/nolibc/nolibc.h45
-rw-r--r--tools/include/nolibc/poll.h53
-rw-r--r--tools/include/nolibc/sched.h50
-rw-r--r--tools/include/nolibc/signal.h7
-rw-r--r--tools/include/nolibc/stackprotector.h55
-rw-r--r--tools/include/nolibc/std.h27
-rw-r--r--tools/include/nolibc/stdarg.h16
-rw-r--r--tools/include/nolibc/stdbool.h16
-rw-r--r--tools/include/nolibc/stddef.h24
-rw-r--r--tools/include/nolibc/stdint.h132
-rw-r--r--tools/include/nolibc/stdio.h413
-rw-r--r--tools/include/nolibc/stdlib.h169
-rw-r--r--tools/include/nolibc/string.h146
-rw-r--r--tools/include/nolibc/sys.h795
-rw-r--r--tools/include/nolibc/sys/auxv.h44
-rw-r--r--tools/include/nolibc/sys/ioctl.h29
-rw-r--r--tools/include/nolibc/sys/mman.h77
-rw-r--r--tools/include/nolibc/sys/mount.h37
-rw-r--r--tools/include/nolibc/sys/prctl.h36
-rw-r--r--tools/include/nolibc/sys/random.h34
-rw-r--r--tools/include/nolibc/sys/reboot.h34
-rw-r--r--tools/include/nolibc/sys/resource.h53
-rw-r--r--tools/include/nolibc/sys/select.h103
-rw-r--r--tools/include/nolibc/sys/stat.h94
-rw-r--r--tools/include/nolibc/sys/syscall.h19
-rw-r--r--tools/include/nolibc/sys/sysmacros.h20
-rw-r--r--tools/include/nolibc/sys/time.h49
-rw-r--r--tools/include/nolibc/sys/timerfd.h83
-rw-r--r--tools/include/nolibc/sys/types.h7
-rw-r--r--tools/include/nolibc/sys/uio.h49
-rw-r--r--tools/include/nolibc/sys/utsname.h42
-rw-r--r--tools/include/nolibc/sys/wait.h105
-rw-r--r--tools/include/nolibc/time.h212
-rw-r--r--tools/include/nolibc/types.h158
-rw-r--r--tools/include/nolibc/unistd.h45
-rw-r--r--tools/include/perf/arm_pmuv3.h317
-rw-r--r--tools/include/uapi/README73
-rw-r--r--tools/include/uapi/asm-generic/bitsperlong.h18
-rw-r--r--tools/include/uapi/asm-generic/fcntl.h222
-rw-r--r--tools/include/uapi/asm-generic/mman-common.h4
-rw-r--r--tools/include/uapi/asm-generic/mman.h4
-rw-r--r--tools/include/uapi/asm-generic/socket.h26
-rw-r--r--tools/include/uapi/asm-generic/unistd.h180
-rw-r--r--tools/include/uapi/asm/bitsperlong.h6
-rw-r--r--tools/include/uapi/asm/bpf_perf_event.h2
-rw-r--r--tools/include/uapi/drm/drm.h317
-rw-r--r--tools/include/uapi/drm/i915_drm.h214
-rw-r--r--tools/include/uapi/linux/bits.h14
-rw-r--r--tools/include/uapi/linux/bpf.h817
-rw-r--r--tools/include/uapi/linux/btf.h3
-rw-r--r--tools/include/uapi/linux/const.h19
-rw-r--r--tools/include/uapi/linux/coredump.h104
-rw-r--r--tools/include/uapi/linux/elf.h524
-rw-r--r--tools/include/uapi/linux/ethtool.h104
-rw-r--r--tools/include/uapi/linux/fanotify.h274
-rw-r--r--tools/include/uapi/linux/fcntl.h114
-rw-r--r--tools/include/uapi/linux/fs.h267
-rw-r--r--tools/include/uapi/linux/fscrypt.h9
-rw-r--r--tools/include/uapi/linux/genetlink.h103
-rw-r--r--tools/include/uapi/linux/hw_breakpoint.h10
-rw-r--r--tools/include/uapi/linux/if_addr.h79
-rw-r--r--tools/include/uapi/linux/if_link.h697
-rw-r--r--tools/include/uapi/linux/if_xdp.h81
-rw-r--r--tools/include/uapi/linux/in.h8
-rw-r--r--tools/include/uapi/linux/io_uring.h757
-rw-r--r--tools/include/uapi/linux/kvm.h937
-rw-r--r--tools/include/uapi/linux/memfd.h39
-rw-r--r--tools/include/uapi/linux/mman.h15
-rw-r--r--tools/include/uapi/linux/mount.h100
-rw-r--r--tools/include/uapi/linux/neighbour.h229
-rw-r--r--tools/include/uapi/linux/netdev.h239
-rw-r--r--tools/include/uapi/linux/netfilter.h80
-rw-r--r--tools/include/uapi/linux/netfilter_arp.h23
-rw-r--r--tools/include/uapi/linux/nsfs.h128
-rw-r--r--tools/include/uapi/linux/openat2.h43
-rw-r--r--tools/include/uapi/linux/perf_event.h698
-rw-r--r--tools/include/uapi/linux/pkt_cls.h47
-rw-r--r--tools/include/uapi/linux/pkt_sched.h109
-rw-r--r--tools/include/uapi/linux/prctl.h94
-rw-r--r--tools/include/uapi/linux/rtnetlink.h848
-rw-r--r--tools/include/uapi/linux/sched.h148
-rw-r--r--tools/include/uapi/linux/seccomp.h157
-rw-r--r--tools/include/uapi/linux/stat.h106
-rw-r--r--tools/include/uapi/linux/stddef.h15
-rw-r--r--tools/include/uapi/linux/types.h3
-rw-r--r--tools/include/uapi/linux/usbdevice_fs.h231
-rw-r--r--tools/include/uapi/linux/userfaultfd.h386
-rw-r--r--tools/include/uapi/linux/vhost.h183
-rw-r--r--tools/include/uapi/sound/asound.h1166
-rw-r--r--tools/include/vdso/bits.h1
-rw-r--r--tools/include/vdso/unaligned.h15
181 files changed, 14798 insertions, 5754 deletions
diff --git a/tools/include/asm-generic/bitops/__ffs.h b/tools/include/asm-generic/bitops/__ffs.h
index 9d1310519497..2d94c1e9b2f3 100644
--- a/tools/include/asm-generic/bitops/__ffs.h
+++ b/tools/include/asm-generic/bitops/__ffs.h
@@ -11,9 +11,9 @@
*
* Undefined if no bit exists, so code should check against 0 first.
*/
-static __always_inline unsigned long __ffs(unsigned long word)
+static __always_inline unsigned int __ffs(unsigned long word)
{
- int num = 0;
+ unsigned int num = 0;
#if __BITS_PER_LONG == 64
if ((word & 0xffffffff) == 0) {
diff --git a/tools/include/asm-generic/bitops/__fls.h b/tools/include/asm-generic/bitops/__fls.h
index 03f721a8a2b1..35f33780ca6c 100644
--- a/tools/include/asm-generic/bitops/__fls.h
+++ b/tools/include/asm-generic/bitops/__fls.h
@@ -5,14 +5,14 @@
#include <asm/types.h>
/**
- * __fls - find last (most-significant) set bit in a long word
+ * generic___fls - find last (most-significant) set bit in a long word
* @word: the word to search
*
* Undefined if no set bit exists, so code should check against 0 first.
*/
-static __always_inline unsigned long __fls(unsigned long word)
+static __always_inline __attribute_const__ unsigned int generic___fls(unsigned long word)
{
- int num = BITS_PER_LONG - 1;
+ unsigned int num = BITS_PER_LONG - 1;
#if BITS_PER_LONG == 64
if (!(word & (~0ul << 32))) {
@@ -41,4 +41,8 @@ static __always_inline unsigned long __fls(unsigned long word)
return num;
}
+#ifndef __HAVE_ARCH___FLS
+#define __fls(word) generic___fls(word)
+#endif
+
#endif /* _ASM_GENERIC_BITOPS___FLS_H_ */
diff --git a/tools/include/asm-generic/bitops/fls.h b/tools/include/asm-generic/bitops/fls.h
index b168bb10e1be..8eed3437edb9 100644
--- a/tools/include/asm-generic/bitops/fls.h
+++ b/tools/include/asm-generic/bitops/fls.h
@@ -3,14 +3,14 @@
#define _ASM_GENERIC_BITOPS_FLS_H_
/**
- * fls - find last (most-significant) bit set
+ * generic_fls - find last (most-significant) bit set
* @x: the word to search
*
* This is defined the same way as ffs.
* Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
*/
-static __always_inline int fls(unsigned int x)
+static __always_inline __attribute_const__ int generic_fls(unsigned int x)
{
int r = 32;
@@ -39,4 +39,8 @@ static __always_inline int fls(unsigned int x)
return r;
}
+#ifndef __HAVE_ARCH_FLS
+#define fls(x) generic_fls(x)
+#endif
+
#endif /* _ASM_GENERIC_BITOPS_FLS_H_ */
diff --git a/tools/include/asm-generic/bitops/fls64.h b/tools/include/asm-generic/bitops/fls64.h
index 866f2b2304ff..b5f58dd261a3 100644
--- a/tools/include/asm-generic/bitops/fls64.h
+++ b/tools/include/asm-generic/bitops/fls64.h
@@ -16,7 +16,7 @@
* at position 64.
*/
#if BITS_PER_LONG == 32
-static __always_inline int fls64(__u64 x)
+static __always_inline __attribute_const__ int fls64(__u64 x)
{
__u32 h = x >> 32;
if (h)
@@ -24,7 +24,7 @@ static __always_inline int fls64(__u64 x)
return fls(x);
}
#elif BITS_PER_LONG == 64
-static __always_inline int fls64(__u64 x)
+static __always_inline __attribute_const__ int fls64(__u64 x)
{
if (x == 0)
return 0;
diff --git a/tools/include/asm-generic/io.h b/tools/include/asm-generic/io.h
new file mode 100644
index 000000000000..e5a0b07ad452
--- /dev/null
+++ b/tools/include/asm-generic/io.h
@@ -0,0 +1,482 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_ASM_GENERIC_IO_H
+#define _TOOLS_ASM_GENERIC_IO_H
+
+#include <asm/barrier.h>
+#include <asm/byteorder.h>
+
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#ifndef mmiowb_set_pending
+#define mmiowb_set_pending() do { } while (0)
+#endif
+
+#ifndef __io_br
+#define __io_br() barrier()
+#endif
+
+/* prevent prefetching of coherent DMA data ahead of a dma-complete */
+#ifndef __io_ar
+#ifdef rmb
+#define __io_ar(v) rmb()
+#else
+#define __io_ar(v) barrier()
+#endif
+#endif
+
+/* flush writes to coherent DMA data before possibly triggering a DMA read */
+#ifndef __io_bw
+#ifdef wmb
+#define __io_bw() wmb()
+#else
+#define __io_bw() barrier()
+#endif
+#endif
+
+/* serialize device access against a spin_unlock, usually handled there. */
+#ifndef __io_aw
+#define __io_aw() mmiowb_set_pending()
+#endif
+
+#ifndef __io_pbw
+#define __io_pbw() __io_bw()
+#endif
+
+#ifndef __io_paw
+#define __io_paw() __io_aw()
+#endif
+
+#ifndef __io_pbr
+#define __io_pbr() __io_br()
+#endif
+
+#ifndef __io_par
+#define __io_par(v) __io_ar(v)
+#endif
+
+#ifndef _THIS_IP_
+#define _THIS_IP_ 0
+#endif
+
+static inline void log_write_mmio(u64 val, u8 width, volatile void __iomem *addr,
+ unsigned long caller_addr, unsigned long caller_addr0) {}
+static inline void log_post_write_mmio(u64 val, u8 width, volatile void __iomem *addr,
+ unsigned long caller_addr, unsigned long caller_addr0) {}
+static inline void log_read_mmio(u8 width, const volatile void __iomem *addr,
+ unsigned long caller_addr, unsigned long caller_addr0) {}
+static inline void log_post_read_mmio(u64 val, u8 width, const volatile void __iomem *addr,
+ unsigned long caller_addr, unsigned long caller_addr0) {}
+
+/*
+ * __raw_{read,write}{b,w,l,q}() access memory in native endianness.
+ *
+ * On some architectures memory mapped IO needs to be accessed differently.
+ * On the simple architectures, we just read/write the memory location
+ * directly.
+ */
+
+#ifndef __raw_readb
+#define __raw_readb __raw_readb
+static inline u8 __raw_readb(const volatile void __iomem *addr)
+{
+ return *(const volatile u8 __force *)addr;
+}
+#endif
+
+#ifndef __raw_readw
+#define __raw_readw __raw_readw
+static inline u16 __raw_readw(const volatile void __iomem *addr)
+{
+ return *(const volatile u16 __force *)addr;
+}
+#endif
+
+#ifndef __raw_readl
+#define __raw_readl __raw_readl
+static inline u32 __raw_readl(const volatile void __iomem *addr)
+{
+ return *(const volatile u32 __force *)addr;
+}
+#endif
+
+#ifndef __raw_readq
+#define __raw_readq __raw_readq
+static inline u64 __raw_readq(const volatile void __iomem *addr)
+{
+ return *(const volatile u64 __force *)addr;
+}
+#endif
+
+#ifndef __raw_writeb
+#define __raw_writeb __raw_writeb
+static inline void __raw_writeb(u8 value, volatile void __iomem *addr)
+{
+ *(volatile u8 __force *)addr = value;
+}
+#endif
+
+#ifndef __raw_writew
+#define __raw_writew __raw_writew
+static inline void __raw_writew(u16 value, volatile void __iomem *addr)
+{
+ *(volatile u16 __force *)addr = value;
+}
+#endif
+
+#ifndef __raw_writel
+#define __raw_writel __raw_writel
+static inline void __raw_writel(u32 value, volatile void __iomem *addr)
+{
+ *(volatile u32 __force *)addr = value;
+}
+#endif
+
+#ifndef __raw_writeq
+#define __raw_writeq __raw_writeq
+static inline void __raw_writeq(u64 value, volatile void __iomem *addr)
+{
+ *(volatile u64 __force *)addr = value;
+}
+#endif
+
+/*
+ * {read,write}{b,w,l,q}() access little endian memory and return result in
+ * native endianness.
+ */
+
+#ifndef readb
+#define readb readb
+static inline u8 readb(const volatile void __iomem *addr)
+{
+ u8 val;
+
+ log_read_mmio(8, addr, _THIS_IP_, _RET_IP_);
+ __io_br();
+ val = __raw_readb(addr);
+ __io_ar(val);
+ log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_);
+ return val;
+}
+#endif
+
+#ifndef readw
+#define readw readw
+static inline u16 readw(const volatile void __iomem *addr)
+{
+ u16 val;
+
+ log_read_mmio(16, addr, _THIS_IP_, _RET_IP_);
+ __io_br();
+ val = __le16_to_cpu((__le16 __force)__raw_readw(addr));
+ __io_ar(val);
+ log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_);
+ return val;
+}
+#endif
+
+#ifndef readl
+#define readl readl
+static inline u32 readl(const volatile void __iomem *addr)
+{
+ u32 val;
+
+ log_read_mmio(32, addr, _THIS_IP_, _RET_IP_);
+ __io_br();
+ val = __le32_to_cpu((__le32 __force)__raw_readl(addr));
+ __io_ar(val);
+ log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_);
+ return val;
+}
+#endif
+
+#ifndef readq
+#define readq readq
+static inline u64 readq(const volatile void __iomem *addr)
+{
+ u64 val;
+
+ log_read_mmio(64, addr, _THIS_IP_, _RET_IP_);
+ __io_br();
+ val = __le64_to_cpu((__le64 __force)__raw_readq(addr));
+ __io_ar(val);
+ log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_);
+ return val;
+}
+#endif
+
+#ifndef writeb
+#define writeb writeb
+static inline void writeb(u8 value, volatile void __iomem *addr)
+{
+ log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_);
+ __io_bw();
+ __raw_writeb(value, addr);
+ __io_aw();
+ log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_);
+}
+#endif
+
+#ifndef writew
+#define writew writew
+static inline void writew(u16 value, volatile void __iomem *addr)
+{
+ log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_);
+ __io_bw();
+ __raw_writew((u16 __force)cpu_to_le16(value), addr);
+ __io_aw();
+ log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_);
+}
+#endif
+
+#ifndef writel
+#define writel writel
+static inline void writel(u32 value, volatile void __iomem *addr)
+{
+ log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_);
+ __io_bw();
+ __raw_writel((u32 __force)__cpu_to_le32(value), addr);
+ __io_aw();
+ log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_);
+}
+#endif
+
+#ifndef writeq
+#define writeq writeq
+static inline void writeq(u64 value, volatile void __iomem *addr)
+{
+ log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
+ __io_bw();
+ __raw_writeq((u64 __force)__cpu_to_le64(value), addr);
+ __io_aw();
+ log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
+}
+#endif
+
+/*
+ * {read,write}{b,w,l,q}_relaxed() are like the regular version, but
+ * are not guaranteed to provide ordering against spinlocks or memory
+ * accesses.
+ */
+#ifndef readb_relaxed
+#define readb_relaxed readb_relaxed
+static inline u8 readb_relaxed(const volatile void __iomem *addr)
+{
+ u8 val;
+
+ log_read_mmio(8, addr, _THIS_IP_, _RET_IP_);
+ val = __raw_readb(addr);
+ log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_);
+ return val;
+}
+#endif
+
+#ifndef readw_relaxed
+#define readw_relaxed readw_relaxed
+static inline u16 readw_relaxed(const volatile void __iomem *addr)
+{
+ u16 val;
+
+ log_read_mmio(16, addr, _THIS_IP_, _RET_IP_);
+ val = __le16_to_cpu((__le16 __force)__raw_readw(addr));
+ log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_);
+ return val;
+}
+#endif
+
+#ifndef readl_relaxed
+#define readl_relaxed readl_relaxed
+static inline u32 readl_relaxed(const volatile void __iomem *addr)
+{
+ u32 val;
+
+ log_read_mmio(32, addr, _THIS_IP_, _RET_IP_);
+ val = __le32_to_cpu((__le32 __force)__raw_readl(addr));
+ log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_);
+ return val;
+}
+#endif
+
+#if defined(readq) && !defined(readq_relaxed)
+#define readq_relaxed readq_relaxed
+static inline u64 readq_relaxed(const volatile void __iomem *addr)
+{
+ u64 val;
+
+ log_read_mmio(64, addr, _THIS_IP_, _RET_IP_);
+ val = __le64_to_cpu((__le64 __force)__raw_readq(addr));
+ log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_);
+ return val;
+}
+#endif
+
+#ifndef writeb_relaxed
+#define writeb_relaxed writeb_relaxed
+static inline void writeb_relaxed(u8 value, volatile void __iomem *addr)
+{
+ log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_);
+ __raw_writeb(value, addr);
+ log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_);
+}
+#endif
+
+#ifndef writew_relaxed
+#define writew_relaxed writew_relaxed
+static inline void writew_relaxed(u16 value, volatile void __iomem *addr)
+{
+ log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_);
+ __raw_writew((u16 __force)cpu_to_le16(value), addr);
+ log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_);
+}
+#endif
+
+#ifndef writel_relaxed
+#define writel_relaxed writel_relaxed
+static inline void writel_relaxed(u32 value, volatile void __iomem *addr)
+{
+ log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_);
+ __raw_writel((u32 __force)__cpu_to_le32(value), addr);
+ log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_);
+}
+#endif
+
+#if defined(writeq) && !defined(writeq_relaxed)
+#define writeq_relaxed writeq_relaxed
+static inline void writeq_relaxed(u64 value, volatile void __iomem *addr)
+{
+ log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
+ __raw_writeq((u64 __force)__cpu_to_le64(value), addr);
+ log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
+}
+#endif
+
+/*
+ * {read,write}s{b,w,l,q}() repeatedly access the same memory address in
+ * native endianness in 8-, 16-, 32- or 64-bit chunks (@count times).
+ */
+#ifndef readsb
+#define readsb readsb
+static inline void readsb(const volatile void __iomem *addr, void *buffer,
+ unsigned int count)
+{
+ if (count) {
+ u8 *buf = buffer;
+
+ do {
+ u8 x = __raw_readb(addr);
+ *buf++ = x;
+ } while (--count);
+ }
+}
+#endif
+
+#ifndef readsw
+#define readsw readsw
+static inline void readsw(const volatile void __iomem *addr, void *buffer,
+ unsigned int count)
+{
+ if (count) {
+ u16 *buf = buffer;
+
+ do {
+ u16 x = __raw_readw(addr);
+ *buf++ = x;
+ } while (--count);
+ }
+}
+#endif
+
+#ifndef readsl
+#define readsl readsl
+static inline void readsl(const volatile void __iomem *addr, void *buffer,
+ unsigned int count)
+{
+ if (count) {
+ u32 *buf = buffer;
+
+ do {
+ u32 x = __raw_readl(addr);
+ *buf++ = x;
+ } while (--count);
+ }
+}
+#endif
+
+#ifndef readsq
+#define readsq readsq
+static inline void readsq(const volatile void __iomem *addr, void *buffer,
+ unsigned int count)
+{
+ if (count) {
+ u64 *buf = buffer;
+
+ do {
+ u64 x = __raw_readq(addr);
+ *buf++ = x;
+ } while (--count);
+ }
+}
+#endif
+
+#ifndef writesb
+#define writesb writesb
+static inline void writesb(volatile void __iomem *addr, const void *buffer,
+ unsigned int count)
+{
+ if (count) {
+ const u8 *buf = buffer;
+
+ do {
+ __raw_writeb(*buf++, addr);
+ } while (--count);
+ }
+}
+#endif
+
+#ifndef writesw
+#define writesw writesw
+static inline void writesw(volatile void __iomem *addr, const void *buffer,
+ unsigned int count)
+{
+ if (count) {
+ const u16 *buf = buffer;
+
+ do {
+ __raw_writew(*buf++, addr);
+ } while (--count);
+ }
+}
+#endif
+
+#ifndef writesl
+#define writesl writesl
+static inline void writesl(volatile void __iomem *addr, const void *buffer,
+ unsigned int count)
+{
+ if (count) {
+ const u32 *buf = buffer;
+
+ do {
+ __raw_writel(*buf++, addr);
+ } while (--count);
+ }
+}
+#endif
+
+#ifndef writesq
+#define writesq writesq
+static inline void writesq(volatile void __iomem *addr, const void *buffer,
+ unsigned int count)
+{
+ if (count) {
+ const u64 *buf = buffer;
+
+ do {
+ __raw_writeq(*buf++, addr);
+ } while (--count);
+ }
+}
+#endif
+
+#endif /* _TOOLS_ASM_GENERIC_IO_H */
diff --git a/tools/include/asm-generic/unaligned.h b/tools/include/asm-generic/unaligned.h
deleted file mode 100644
index 47387c607035..000000000000
--- a/tools/include/asm-generic/unaligned.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copied from the kernel sources to tools/perf/:
- */
-
-#ifndef __TOOLS_LINUX_ASM_GENERIC_UNALIGNED_H
-#define __TOOLS_LINUX_ASM_GENERIC_UNALIGNED_H
-
-#define __get_unaligned_t(type, ptr) ({ \
- const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \
- __pptr->x; \
-})
-
-#define __put_unaligned_t(type, val, ptr) do { \
- struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \
- __pptr->x = (val); \
-} while (0)
-
-#define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr))
-#define put_unaligned(val, ptr) __put_unaligned_t(typeof(*(ptr)), (val), (ptr))
-
-#endif /* __TOOLS_LINUX_ASM_GENERIC_UNALIGNED_H */
-
diff --git a/tools/include/asm/alternative.h b/tools/include/asm/alternative.h
index b54bd860dff6..8e548ac8f740 100644
--- a/tools/include/asm/alternative.h
+++ b/tools/include/asm/alternative.h
@@ -2,9 +2,18 @@
#ifndef _TOOLS_ASM_ALTERNATIVE_ASM_H
#define _TOOLS_ASM_ALTERNATIVE_ASM_H
+#if defined(__s390x__)
+#ifdef __ASSEMBLY__
+.macro ALTERNATIVE oldinstr, newinstr, feature
+ \oldinstr
+.endm
+#endif
+#else
+
/* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */
-#define altinstruction_entry #
-#define ALTERNATIVE_2 #
+#define ALTERNATIVE #
+
+#endif
#endif
diff --git a/tools/include/asm/barrier.h b/tools/include/asm/barrier.h
index 8d378c57cb01..0c21678ac5e6 100644
--- a/tools/include/asm/barrier.h
+++ b/tools/include/asm/barrier.h
@@ -8,6 +8,8 @@
#include "../../arch/arm64/include/asm/barrier.h"
#elif defined(__powerpc__)
#include "../../arch/powerpc/include/asm/barrier.h"
+#elif defined(__riscv)
+#include "../../arch/riscv/include/asm/barrier.h"
#elif defined(__s390__)
#include "../../arch/s390/include/asm/barrier.h"
#elif defined(__sh__)
diff --git a/tools/include/asm/io.h b/tools/include/asm/io.h
new file mode 100644
index 000000000000..eed5066f25c4
--- /dev/null
+++ b/tools/include/asm/io.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_ASM_IO_H
+#define _TOOLS_ASM_IO_H
+
+#if defined(__i386__) || defined(__x86_64__)
+#include "../../arch/x86/include/asm/io.h"
+#else
+#include <asm-generic/io.h>
+#endif
+
+#endif /* _TOOLS_ASM_IO_H */
diff --git a/tools/include/asm/rwonce.h b/tools/include/asm/rwonce.h
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/include/asm/rwonce.h
diff --git a/tools/include/asm/timex.h b/tools/include/asm/timex.h
new file mode 100644
index 000000000000..5adfe3c6d326
--- /dev/null
+++ b/tools/include/asm/timex.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TOOLS_LINUX_ASM_TIMEX_H
+#define __TOOLS_LINUX_ASM_TIMEX_H
+
+#include <time.h>
+
+#define cycles_t clock_t
+
+static inline cycles_t get_cycles(void)
+{
+ return clock();
+}
+#endif // __TOOLS_LINUX_ASM_TIMEX_H
diff --git a/tools/include/generated/asm-offsets.h b/tools/include/generated/asm-offsets.h
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/include/generated/asm-offsets.h
diff --git a/tools/include/generated/asm/cpucap-defs.h b/tools/include/generated/asm/cpucap-defs.h
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/include/generated/asm/cpucap-defs.h
diff --git a/tools/include/generated/asm/sysreg-defs.h b/tools/include/generated/asm/sysreg-defs.h
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/include/generated/asm/sysreg-defs.h
diff --git a/tools/include/io_uring/mini_liburing.h b/tools/include/io_uring/mini_liburing.h
new file mode 100644
index 000000000000..9ccb16074eb5
--- /dev/null
+++ b/tools/include/io_uring/mini_liburing.h
@@ -0,0 +1,282 @@
+/* SPDX-License-Identifier: MIT */
+
+#include <linux/io_uring.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+struct io_sq_ring {
+ unsigned int *head;
+ unsigned int *tail;
+ unsigned int *ring_mask;
+ unsigned int *ring_entries;
+ unsigned int *flags;
+ unsigned int *array;
+};
+
+struct io_cq_ring {
+ unsigned int *head;
+ unsigned int *tail;
+ unsigned int *ring_mask;
+ unsigned int *ring_entries;
+ struct io_uring_cqe *cqes;
+};
+
+struct io_uring_sq {
+ unsigned int *khead;
+ unsigned int *ktail;
+ unsigned int *kring_mask;
+ unsigned int *kring_entries;
+ unsigned int *kflags;
+ unsigned int *kdropped;
+ unsigned int *array;
+ struct io_uring_sqe *sqes;
+
+ unsigned int sqe_head;
+ unsigned int sqe_tail;
+
+ size_t ring_sz;
+};
+
+struct io_uring_cq {
+ unsigned int *khead;
+ unsigned int *ktail;
+ unsigned int *kring_mask;
+ unsigned int *kring_entries;
+ unsigned int *koverflow;
+ struct io_uring_cqe *cqes;
+
+ size_t ring_sz;
+};
+
+struct io_uring {
+ struct io_uring_sq sq;
+ struct io_uring_cq cq;
+ int ring_fd;
+};
+
+#if defined(__x86_64) || defined(__i386__)
+#define read_barrier() __asm__ __volatile__("":::"memory")
+#define write_barrier() __asm__ __volatile__("":::"memory")
+#else
+#define read_barrier() __sync_synchronize()
+#define write_barrier() __sync_synchronize()
+#endif
+
+static inline int io_uring_mmap(int fd, struct io_uring_params *p,
+ struct io_uring_sq *sq, struct io_uring_cq *cq)
+{
+ size_t size;
+ void *ptr;
+ int ret;
+
+ sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned int);
+ ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING);
+ if (ptr == MAP_FAILED)
+ return -errno;
+ sq->khead = ptr + p->sq_off.head;
+ sq->ktail = ptr + p->sq_off.tail;
+ sq->kring_mask = ptr + p->sq_off.ring_mask;
+ sq->kring_entries = ptr + p->sq_off.ring_entries;
+ sq->kflags = ptr + p->sq_off.flags;
+ sq->kdropped = ptr + p->sq_off.dropped;
+ sq->array = ptr + p->sq_off.array;
+
+ size = p->sq_entries * sizeof(struct io_uring_sqe);
+ sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQES);
+ if (sq->sqes == MAP_FAILED) {
+ ret = -errno;
+err:
+ munmap(sq->khead, sq->ring_sz);
+ return ret;
+ }
+
+ cq->ring_sz = p->cq_off.cqes + p->cq_entries * sizeof(struct io_uring_cqe);
+ ptr = mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING);
+ if (ptr == MAP_FAILED) {
+ ret = -errno;
+ munmap(sq->sqes, p->sq_entries * sizeof(struct io_uring_sqe));
+ goto err;
+ }
+ cq->khead = ptr + p->cq_off.head;
+ cq->ktail = ptr + p->cq_off.tail;
+ cq->kring_mask = ptr + p->cq_off.ring_mask;
+ cq->kring_entries = ptr + p->cq_off.ring_entries;
+ cq->koverflow = ptr + p->cq_off.overflow;
+ cq->cqes = ptr + p->cq_off.cqes;
+ return 0;
+}
+
+static inline int io_uring_setup(unsigned int entries,
+ struct io_uring_params *p)
+{
+ return syscall(__NR_io_uring_setup, entries, p);
+}
+
+static inline int io_uring_enter(int fd, unsigned int to_submit,
+ unsigned int min_complete,
+ unsigned int flags, sigset_t *sig)
+{
+ return syscall(__NR_io_uring_enter, fd, to_submit, min_complete,
+ flags, sig, _NSIG / 8);
+}
+
+static inline int io_uring_queue_init(unsigned int entries,
+ struct io_uring *ring,
+ unsigned int flags)
+{
+ struct io_uring_params p;
+ int fd, ret;
+
+ memset(ring, 0, sizeof(*ring));
+ memset(&p, 0, sizeof(p));
+ p.flags = flags;
+
+ fd = io_uring_setup(entries, &p);
+ if (fd < 0)
+ return fd;
+ ret = io_uring_mmap(fd, &p, &ring->sq, &ring->cq);
+ if (!ret)
+ ring->ring_fd = fd;
+ else
+ close(fd);
+ return ret;
+}
+
+/* Get a sqe */
+static inline struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
+{
+ struct io_uring_sq *sq = &ring->sq;
+
+ if (sq->sqe_tail + 1 - sq->sqe_head > *sq->kring_entries)
+ return NULL;
+ return &sq->sqes[sq->sqe_tail++ & *sq->kring_mask];
+}
+
+static inline int io_uring_wait_cqe(struct io_uring *ring,
+ struct io_uring_cqe **cqe_ptr)
+{
+ struct io_uring_cq *cq = &ring->cq;
+ const unsigned int mask = *cq->kring_mask;
+ unsigned int head = *cq->khead;
+ int ret;
+
+ *cqe_ptr = NULL;
+ do {
+ read_barrier();
+ if (head != *cq->ktail) {
+ *cqe_ptr = &cq->cqes[head & mask];
+ break;
+ }
+ ret = io_uring_enter(ring->ring_fd, 0, 1,
+ IORING_ENTER_GETEVENTS, NULL);
+ if (ret < 0)
+ return -errno;
+ } while (1);
+
+ return 0;
+}
+
+static inline int io_uring_submit(struct io_uring *ring)
+{
+ struct io_uring_sq *sq = &ring->sq;
+ const unsigned int mask = *sq->kring_mask;
+ unsigned int ktail, submitted, to_submit;
+ int ret;
+
+ read_barrier();
+ if (*sq->khead != *sq->ktail) {
+ submitted = *sq->kring_entries;
+ goto submit;
+ }
+ if (sq->sqe_head == sq->sqe_tail)
+ return 0;
+
+ ktail = *sq->ktail;
+ to_submit = sq->sqe_tail - sq->sqe_head;
+ for (submitted = 0; submitted < to_submit; submitted++) {
+ read_barrier();
+ sq->array[ktail++ & mask] = sq->sqe_head++ & mask;
+ }
+ if (!submitted)
+ return 0;
+
+ if (*sq->ktail != ktail) {
+ write_barrier();
+ *sq->ktail = ktail;
+ write_barrier();
+ }
+submit:
+ ret = io_uring_enter(ring->ring_fd, submitted, 0,
+ IORING_ENTER_GETEVENTS, NULL);
+ return ret < 0 ? -errno : ret;
+}
+
+static inline void io_uring_queue_exit(struct io_uring *ring)
+{
+ struct io_uring_sq *sq = &ring->sq;
+
+ munmap(sq->sqes, *sq->kring_entries * sizeof(struct io_uring_sqe));
+ munmap(sq->khead, sq->ring_sz);
+ close(ring->ring_fd);
+}
+
+/* Prepare and send the SQE */
+static inline void io_uring_prep_cmd(struct io_uring_sqe *sqe, int op,
+ int sockfd,
+ int level, int optname,
+ const void *optval,
+ int optlen)
+{
+ memset(sqe, 0, sizeof(*sqe));
+ sqe->opcode = (__u8)IORING_OP_URING_CMD;
+ sqe->fd = sockfd;
+ sqe->cmd_op = op;
+
+ sqe->level = level;
+ sqe->optname = optname;
+ sqe->optval = (unsigned long long)optval;
+ sqe->optlen = optlen;
+}
+
+static inline int io_uring_register_buffers(struct io_uring *ring,
+ const struct iovec *iovecs,
+ unsigned int nr_iovecs)
+{
+ int ret;
+
+ ret = syscall(__NR_io_uring_register, ring->ring_fd,
+ IORING_REGISTER_BUFFERS, iovecs, nr_iovecs);
+ return (ret < 0) ? -errno : ret;
+}
+
+static inline void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd,
+ const void *buf, size_t len, int flags)
+{
+ memset(sqe, 0, sizeof(*sqe));
+ sqe->opcode = (__u8)IORING_OP_SEND;
+ sqe->fd = sockfd;
+ sqe->addr = (unsigned long)buf;
+ sqe->len = len;
+ sqe->msg_flags = (__u32)flags;
+}
+
+static inline void io_uring_prep_sendzc(struct io_uring_sqe *sqe, int sockfd,
+ const void *buf, size_t len, int flags,
+ unsigned int zc_flags)
+{
+ io_uring_prep_send(sqe, sockfd, buf, len, flags);
+ sqe->opcode = (__u8)IORING_OP_SEND_ZC;
+ sqe->ioprio = zc_flags;
+}
+
+static inline void io_uring_cqe_seen(struct io_uring *ring)
+{
+ *(&ring->cq)->khead += 1;
+ write_barrier();
+}
diff --git a/tools/include/linux/align.h b/tools/include/linux/align.h
new file mode 100644
index 000000000000..14e34ace80dd
--- /dev/null
+++ b/tools/include/linux/align.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _TOOLS_LINUX_ALIGN_H
+#define _TOOLS_LINUX_ALIGN_H
+
+#include <uapi/linux/const.h>
+
+#define ALIGN(x, a) __ALIGN_KERNEL((x), (a))
+#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a))
+#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0)
+
+#endif /* _TOOLS_LINUX_ALIGN_H */
diff --git a/tools/include/linux/args.h b/tools/include/linux/args.h
new file mode 100644
index 000000000000..2e8e65d975c7
--- /dev/null
+++ b/tools/include/linux/args.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_ARGS_H
+#define _LINUX_ARGS_H
+
+/*
+ * How do these macros work?
+ *
+ * In __COUNT_ARGS() _0 to _12 are just placeholders from the start
+ * in order to make sure _n is positioned over the correct number
+ * from 12 to 0 (depending on X, which is a variadic argument list).
+ * They serve no purpose other than occupying a position. Since each
+ * macro parameter must have a distinct identifier, those identifiers
+ * are as good as any.
+ *
+ * In COUNT_ARGS() we use actual integers, so __COUNT_ARGS() returns
+ * that as _n.
+ */
+
+/* This counts to 15. Any more, it will return 16th argument. */
+#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _n, X...) _n
+#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+
+/* Concatenate two parameters, but allow them to be expanded beforehand. */
+#define __CONCAT(a, b) a ## b
+#define CONCATENATE(a, b) __CONCAT(a, b)
+
+#endif /* _LINUX_ARGS_H */
diff --git a/tools/include/linux/atomic.h b/tools/include/linux/atomic.h
index 01907b33537e..50c66ba9ada5 100644
--- a/tools/include/linux/atomic.h
+++ b/tools/include/linux/atomic.h
@@ -12,4 +12,26 @@ void atomic_long_set(atomic_long_t *v, long i);
#define atomic_cmpxchg_release atomic_cmpxchg
#endif /* atomic_cmpxchg_relaxed */
+static inline bool atomic_try_cmpxchg(atomic_t *ptr, int *oldp, int new)
+{
+ int ret, old = *oldp;
+
+ ret = atomic_cmpxchg(ptr, old, new);
+ if (ret != old)
+ *oldp = ret;
+ return ret == old;
+}
+
+static inline bool atomic_inc_unless_negative(atomic_t *v)
+{
+ int c = atomic_read(v);
+
+ do {
+ if (unlikely(c < 0))
+ return false;
+ } while (!atomic_try_cmpxchg(v, &c, c + 1));
+
+ return true;
+}
+
#endif /* __TOOLS_LINUX_ATOMIC_H */
diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h
index f3566ea0f932..0d992245c600 100644
--- a/tools/include/linux/bitmap.h
+++ b/tools/include/linux/bitmap.h
@@ -3,6 +3,8 @@
#define _TOOLS_LINUX_BITMAP_H
#include <string.h>
+#include <asm-generic/bitsperlong.h>
+#include <linux/align.h>
#include <linux/bitops.h>
#include <linux/find.h>
#include <stdlib.h>
@@ -18,20 +20,22 @@ bool __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int bits);
bool __bitmap_equal(const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int bits);
-void bitmap_clear(unsigned long *map, unsigned int start, int len);
+void __bitmap_set(unsigned long *map, unsigned int start, int len);
+void __bitmap_clear(unsigned long *map, unsigned int start, int len);
bool __bitmap_intersects(const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int bits);
#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
#define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1)))
+#define bitmap_size(nbits) (ALIGN(nbits, BITS_PER_LONG) / BITS_PER_BYTE)
+
static inline void bitmap_zero(unsigned long *dst, unsigned int nbits)
{
if (small_const_nbits(nbits))
*dst = 0UL;
else {
- int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
- memset(dst, 0, len);
+ memset(dst, 0, bitmap_size(nbits));
}
}
@@ -77,13 +81,18 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
__bitmap_or(dst, src1, src2, nbits);
}
+static inline unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags __maybe_unused)
+{
+ return malloc(bitmap_size(nbits));
+}
+
/**
* bitmap_zalloc - Allocate bitmap
* @nbits: Number of bits
*/
static inline unsigned long *bitmap_zalloc(int nbits)
{
- return calloc(1, BITS_TO_LONGS(nbits) * sizeof(unsigned long));
+ return calloc(1, bitmap_size(nbits));
}
/*
@@ -126,7 +135,6 @@ static inline bool bitmap_and(unsigned long *dst, const unsigned long *src1,
#define BITMAP_MEM_ALIGNMENT (8 * sizeof(unsigned long))
#endif
#define BITMAP_MEM_MASK (BITMAP_MEM_ALIGNMENT - 1)
-#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0)
static inline bool bitmap_equal(const unsigned long *src1,
const unsigned long *src2, unsigned int nbits)
@@ -149,4 +157,34 @@ static inline bool bitmap_intersects(const unsigned long *src1,
return __bitmap_intersects(src1, src2, nbits);
}
+static inline void bitmap_set(unsigned long *map, unsigned int start, unsigned int nbits)
+{
+ if (__builtin_constant_p(nbits) && nbits == 1)
+ __set_bit(start, map);
+ else if (small_const_nbits(start + nbits))
+ *map |= GENMASK(start + nbits - 1, start);
+ else if (__builtin_constant_p(start & BITMAP_MEM_MASK) &&
+ IS_ALIGNED(start, BITMAP_MEM_ALIGNMENT) &&
+ __builtin_constant_p(nbits & BITMAP_MEM_MASK) &&
+ IS_ALIGNED(nbits, BITMAP_MEM_ALIGNMENT))
+ memset((char *)map + start / 8, 0xff, nbits / 8);
+ else
+ __bitmap_set(map, start, nbits);
+}
+
+static inline void bitmap_clear(unsigned long *map, unsigned int start,
+ unsigned int nbits)
+{
+ if (__builtin_constant_p(nbits) && nbits == 1)
+ __clear_bit(start, map);
+ else if (small_const_nbits(start + nbits))
+ *map &= ~GENMASK(start + nbits - 1, start);
+ else if (__builtin_constant_p(start & BITMAP_MEM_MASK) &&
+ IS_ALIGNED(start, BITMAP_MEM_ALIGNMENT) &&
+ __builtin_constant_p(nbits & BITMAP_MEM_MASK) &&
+ IS_ALIGNED(nbits, BITMAP_MEM_ALIGNMENT))
+ memset((char *)map + start / 8, 0, nbits / 8);
+ else
+ __bitmap_clear(map, start, nbits);
+}
#endif /* _TOOLS_LINUX_BITMAP_H */
diff --git a/tools/include/linux/bitops.h b/tools/include/linux/bitops.h
index f18683b95ea6..b4e4cd071f8c 100644
--- a/tools/include/linux/bitops.h
+++ b/tools/include/linux/bitops.h
@@ -20,6 +20,8 @@
#define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(u32))
#define BITS_TO_BYTES(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(char))
+#define BYTES_TO_BITS(nb) ((nb) * BITS_PER_BYTE)
+
extern unsigned int __sw_hweight8(unsigned int w);
extern unsigned int __sw_hweight16(unsigned int w);
extern unsigned int __sw_hweight32(unsigned int w);
@@ -70,7 +72,7 @@ static inline unsigned long hweight_long(unsigned long w)
return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
}
-static inline unsigned fls_long(unsigned long l)
+static inline unsigned int fls_long(unsigned long l)
{
if (sizeof(l) == 4)
return fls(l);
@@ -87,4 +89,15 @@ static inline __u32 rol32(__u32 word, unsigned int shift)
return (word << shift) | (word >> ((-shift) & 31));
}
+/**
+ * sign_extend64 - sign extend a 64-bit value using specified bit as sign-bit
+ * @value: value to sign extend
+ * @index: 0 based bit index (0<=index<64) to sign bit
+ */
+static __always_inline __s64 sign_extend64(__u64 value, int index)
+{
+ __u8 shift = 63 - index;
+ return (__s64)(value << shift) >> shift;
+}
+
#endif
diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h
index 87d112650dfb..a40cc861b3a7 100644
--- a/tools/include/linux/bits.h
+++ b/tools/include/linux/bits.h
@@ -2,16 +2,15 @@
#ifndef __LINUX_BITS_H
#define __LINUX_BITS_H
-#include <linux/const.h>
#include <vdso/bits.h>
-#include <asm/bitsperlong.h>
+#include <uapi/linux/bits.h>
-#define BIT_ULL(nr) (ULL(1) << (nr))
#define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG))
#define BIT_WORD(nr) ((nr) / BITS_PER_LONG)
#define BIT_ULL_MASK(nr) (ULL(1) << ((nr) % BITS_PER_LONG_LONG))
#define BIT_ULL_WORD(nr) ((nr) / BITS_PER_LONG_LONG)
#define BITS_PER_BYTE 8
+#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE)
/*
* Create a contiguous bitmask starting at bit position @l and ending at
@@ -19,28 +18,72 @@
* GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000.
*/
#if !defined(__ASSEMBLY__)
+
+/*
+ * Missing asm support
+ *
+ * GENMASK_U*() and BIT_U*() depend on BITS_PER_TYPE() which relies on sizeof(),
+ * something not available in asm. Nevertheless, fixed width integers is a C
+ * concept. Assembly code can rely on the long and long long versions instead.
+ */
+
#include <linux/build_bug.h>
-#define GENMASK_INPUT_CHECK(h, l) \
- (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \
- __is_constexpr((l) > (h)), (l) > (h), 0)))
-#else
+#include <linux/compiler.h>
+#include <linux/overflow.h>
+
+#define GENMASK_INPUT_CHECK(h, l) BUILD_BUG_ON_ZERO(const_true((l) > (h)))
+
+/*
+ * Generate a mask for the specified type @t. Additional checks are made to
+ * guarantee the value returned fits in that type, relying on
+ * -Wshift-count-overflow compiler check to detect incompatible arguments.
+ * For example, all these create build errors or warnings:
+ *
+ * - GENMASK(15, 20): wrong argument order
+ * - GENMASK(72, 15): doesn't fit unsigned long
+ * - GENMASK_U32(33, 15): doesn't fit in a u32
+ */
+#define GENMASK_TYPE(t, h, l) \
+ ((t)(GENMASK_INPUT_CHECK(h, l) + \
+ (type_max(t) << (l) & \
+ type_max(t) >> (BITS_PER_TYPE(t) - 1 - (h)))))
+
+#define GENMASK(h, l) GENMASK_TYPE(unsigned long, h, l)
+#define GENMASK_ULL(h, l) GENMASK_TYPE(unsigned long long, h, l)
+
+#define GENMASK_U8(h, l) GENMASK_TYPE(u8, h, l)
+#define GENMASK_U16(h, l) GENMASK_TYPE(u16, h, l)
+#define GENMASK_U32(h, l) GENMASK_TYPE(u32, h, l)
+#define GENMASK_U64(h, l) GENMASK_TYPE(u64, h, l)
+#define GENMASK_U128(h, l) GENMASK_TYPE(u128, h, l)
+
+/*
+ * Fixed-type variants of BIT(), with additional checks like GENMASK_TYPE(). The
+ * following examples generate compiler warnings due to -Wshift-count-overflow:
+ *
+ * - BIT_U8(8)
+ * - BIT_U32(-1)
+ * - BIT_U32(40)
+ */
+#define BIT_INPUT_CHECK(type, nr) \
+ BUILD_BUG_ON_ZERO(const_true((nr) >= BITS_PER_TYPE(type)))
+
+#define BIT_TYPE(type, nr) ((type)(BIT_INPUT_CHECK(type, nr) + BIT_ULL(nr)))
+
+#define BIT_U8(nr) BIT_TYPE(u8, nr)
+#define BIT_U16(nr) BIT_TYPE(u16, nr)
+#define BIT_U32(nr) BIT_TYPE(u32, nr)
+#define BIT_U64(nr) BIT_TYPE(u64, nr)
+
+#else /* defined(__ASSEMBLY__) */
+
/*
* BUILD_BUG_ON_ZERO is not available in h files included from asm files,
* disable the input check if that is the case.
*/
-#define GENMASK_INPUT_CHECK(h, l) 0
-#endif
-
-#define __GENMASK(h, l) \
- (((~UL(0)) - (UL(1) << (l)) + 1) & \
- (~UL(0) >> (BITS_PER_LONG - 1 - (h))))
-#define GENMASK(h, l) \
- (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l))
-
-#define __GENMASK_ULL(h, l) \
- (((~ULL(0)) - (ULL(1) << (l)) + 1) & \
- (~ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h))))
-#define GENMASK_ULL(h, l) \
- (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l))
+#define GENMASK(h, l) __GENMASK(h, l)
+#define GENMASK_ULL(h, l) __GENMASK_ULL(h, l)
+
+#endif /* !defined(__ASSEMBLY__) */
#endif /* __LINUX_BITS_H */
diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h
index 71e54b1e3796..72ea363d434d 100644
--- a/tools/include/linux/btf_ids.h
+++ b/tools/include/linux/btf_ids.h
@@ -3,11 +3,22 @@
#ifndef _LINUX_BTF_IDS_H
#define _LINUX_BTF_IDS_H
+#include <linux/types.h> /* for u32 */
+
struct btf_id_set {
u32 cnt;
u32 ids[];
};
+struct btf_id_set8 {
+ u32 cnt;
+ u32 flags;
+ struct {
+ u32 id;
+ u32 flags;
+ } pairs[];
+};
+
#ifdef CONFIG_DEBUG_INFO_BTF
#include <linux/compiler.h> /* for __PASTE */
@@ -38,7 +49,7 @@ asm( \
____BTF_ID(symbol)
#define __ID(prefix) \
- __PASTE(prefix, __COUNTER__)
+ __PASTE(__PASTE(prefix, __COUNTER__), __LINE__)
/*
* The BTF_ID defines unique symbol for each ID pointing
diff --git a/tools/include/linux/build_bug.h b/tools/include/linux/build_bug.h
index b4898ff085de..ab2aa97bd8ce 100644
--- a/tools/include/linux/build_bug.h
+++ b/tools/include/linux/build_bug.h
@@ -4,17 +4,17 @@
#include <linux/compiler.h>
-#ifdef __CHECKER__
-#define BUILD_BUG_ON_ZERO(e) (0)
-#else /* __CHECKER__ */
/*
* Force a compilation error if condition is true, but also produce a
* result (of value 0 and type int), so the expression can be used
* e.g. in a structure initializer (or where-ever else comma expressions
* aren't permitted).
+ *
+ * Take an error message as an optional second argument. If omitted,
+ * default to the stringification of the tested expression.
*/
-#define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); })))
-#endif /* __CHECKER__ */
+#define BUILD_BUG_ON_ZERO(e, ...) \
+ __BUILD_BUG_ON_ZERO_MSG(e, ##__VA_ARGS__, #e " is true")
/* Force a compilation error if a constant expression is not a power of 2 */
#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) \
diff --git a/tools/include/linux/cfi_types.h b/tools/include/linux/cfi_types.h
new file mode 100644
index 000000000000..a86af9bc8bdc
--- /dev/null
+++ b/tools/include/linux/cfi_types.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Clang Control Flow Integrity (CFI) type definitions.
+ */
+#ifndef _LINUX_CFI_TYPES_H
+#define _LINUX_CFI_TYPES_H
+
+#ifdef __ASSEMBLY__
+#include <linux/linkage.h>
+
+#ifdef CONFIG_CFI
+/*
+ * Use the __kcfi_typeid_<function> type identifier symbol to
+ * annotate indirectly called assembly functions. The compiler emits
+ * these symbols for all address-taken function declarations in C
+ * code.
+ */
+#ifndef __CFI_TYPE
+#define __CFI_TYPE(name) \
+ .4byte __kcfi_typeid_##name
+#endif
+
+#define SYM_TYPED_ENTRY(name, linkage, align...) \
+ linkage(name) ASM_NL \
+ align ASM_NL \
+ __CFI_TYPE(name) ASM_NL \
+ name:
+
+#define SYM_TYPED_START(name, linkage, align...) \
+ SYM_TYPED_ENTRY(name, linkage, align)
+
+#else /* CONFIG_CFI */
+
+#define SYM_TYPED_START(name, linkage, align...) \
+ SYM_START(name, linkage, align)
+
+#endif /* CONFIG_CFI */
+
+#ifndef SYM_TYPED_FUNC_START
+#define SYM_TYPED_FUNC_START(name) \
+ SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
+#endif
+
+#else /* __ASSEMBLY__ */
+
+#ifdef CONFIG_CFI
+#define DEFINE_CFI_TYPE(name, func) \
+ /* \
+ * Force a reference to the function so the compiler generates \
+ * __kcfi_typeid_<func>. \
+ */ \
+ __ADDRESSABLE(func); \
+ /* u32 name __ro_after_init = __kcfi_typeid_<func> */ \
+ extern u32 name; \
+ asm ( \
+ " .pushsection .data..ro_after_init,\"aw\",\%progbits \n" \
+ " .type " #name ",\%object \n" \
+ " .globl " #name " \n" \
+ " .p2align 2, 0x0 \n" \
+ #name ": \n" \
+ " .4byte __kcfi_typeid_" #func " \n" \
+ " .size " #name ", 4 \n" \
+ " .popsection \n" \
+ );
+#endif
+
+#endif /* __ASSEMBLY__ */
+#endif /* _LINUX_CFI_TYPES_H */
diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h
index 8816f06fc6c7..e20f98e14e81 100644
--- a/tools/include/linux/compiler-gcc.h
+++ b/tools/include/linux/compiler-gcc.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _TOOLS_LINUX_COMPILER_H_
-#error "Please don't include <linux/compiler-gcc.h> directly, include <linux/compiler.h> instead."
+#error "Please do not include <linux/compiler-gcc.h> directly, include <linux/compiler.h> instead."
#endif
/*
@@ -12,8 +12,10 @@
+ __GNUC_PATCHLEVEL__)
#endif
-#if GCC_VERSION >= 70000 && !defined(__CHECKER__)
-# define __fallthrough __attribute__ ((fallthrough))
+#if __has_attribute(__fallthrough__)
+# define fallthrough __attribute__((__fallthrough__))
+#else
+# define fallthrough do {} while (0) /* fallthrough */
#endif
#if __has_attribute(__error__)
diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index 90ba44a99199..f40bd2b04c29 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -2,6 +2,8 @@
#ifndef _TOOLS_LINUX_COMPILER_H_
#define _TOOLS_LINUX_COMPILER_H_
+#ifndef __ASSEMBLY__
+
#include <linux/compiler_types.h>
#ifndef __compiletime_error
@@ -42,15 +44,65 @@
# define __always_inline inline __attribute__((always_inline))
#endif
+#ifndef __always_unused
+#define __always_unused __attribute__((__unused__))
+#endif
+
+#ifndef __noreturn
+#define __noreturn __attribute__((__noreturn__))
+#endif
+
+#ifndef unreachable
+#define unreachable() __builtin_unreachable()
+#endif
+
#ifndef noinline
#define noinline
#endif
+#ifndef __nocf_check
+#define __nocf_check __attribute__((nocf_check))
+#endif
+
+#ifndef __naked
+#define __naked __attribute__((__naked__))
+#endif
+
/* Are two types/vars the same type (ignoring qualifiers)? */
#ifndef __same_type
# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
#endif
+/*
+ * This returns a constant expression while determining if an argument is
+ * a constant expression, most importantly without evaluating the argument.
+ * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de>
+ */
+#define __is_constexpr(x) \
+ (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8)))
+
+/*
+ * Similar to statically_true() but produces a constant expression
+ *
+ * To be used in conjunction with macros, such as BUILD_BUG_ON_ZERO(),
+ * which require their input to be a constant expression and for which
+ * statically_true() would otherwise fail.
+ *
+ * This is a trade-off: const_true() requires all its operands to be
+ * compile time constants. Else, it would always returns false even on
+ * the most trivial cases like:
+ *
+ * true || non_const_var
+ *
+ * On the opposite, statically_true() is able to fold more complex
+ * tautologies and will return true on expressions such as:
+ *
+ * !(non_const_var * 8 % 4)
+ *
+ * For the general case, statically_true() is better.
+ */
+#define const_true(x) __builtin_choose_expr(__is_constexpr(x), x, false)
+
#ifdef __ANDROID__
/*
* FIXME: Big hammer to get rid of tons of:
@@ -86,6 +138,10 @@
# define __force
#endif
+#ifndef __iomem
+# define __iomem
+#endif
+
#ifndef __weak
# define __weak __attribute__((weak))
#endif
@@ -98,10 +154,6 @@
# define unlikely(x) __builtin_expect(!!(x), 0)
#endif
-#ifndef __init
-# define __init
-#endif
-
#include <linux/types.h>
/*
@@ -186,12 +238,24 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
})
-#ifndef __fallthrough
-# define __fallthrough
-#endif
-
/* Indirect macros required for expanded argument pasting, eg. __LINE__. */
#define ___PASTE(a, b) a##b
#define __PASTE(a, b) ___PASTE(a, b)
+#ifndef OPTIMIZER_HIDE_VAR
+/* Make the optimizer believe the variable can be manipulated arbitrarily. */
+#define OPTIMIZER_HIDE_VAR(var) \
+ __asm__ ("" : "=r" (var) : "0" (var))
+#endif
+
+#ifndef __BUILD_BUG_ON_ZERO_MSG
+#if defined(__clang__)
+#define __BUILD_BUG_ON_ZERO_MSG(e, msg, ...) ((int)(sizeof(struct { int:(-!!(e)); })))
+#else
+#define __BUILD_BUG_ON_ZERO_MSG(e, msg, ...) ((int)sizeof(struct {_Static_assert(!(e), msg);}))
+#endif
+#endif
+
+#endif /* __ASSEMBLY__ */
+
#endif /* _TOOLS_LINUX_COMPILER_H */
diff --git a/tools/include/linux/compiler_types.h b/tools/include/linux/compiler_types.h
index 1bdd834bdd57..d09f9dc172a4 100644
--- a/tools/include/linux/compiler_types.h
+++ b/tools/include/linux/compiler_types.h
@@ -36,8 +36,8 @@
#include <linux/compiler-gcc.h>
#endif
-#ifndef asm_volatile_goto
-#define asm_volatile_goto(x...) asm goto(x)
+#ifndef asm_goto_output
+#define asm_goto_output(x...) asm goto(x)
#endif
#endif /* __LINUX_COMPILER_TYPES_H */
diff --git a/tools/include/linux/const.h b/tools/include/linux/const.h
index 435ddd72d2c4..81b8aae5a855 100644
--- a/tools/include/linux/const.h
+++ b/tools/include/linux/const.h
@@ -3,12 +3,4 @@
#include <vdso/const.h>
-/*
- * This returns a constant expression while determining if an argument is
- * a constant expression, most importantly without evaluating the argument.
- * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de>
- */
-#define __is_constexpr(x) \
- (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8)))
-
#endif /* _LINUX_CONST_H */
diff --git a/tools/include/linux/container_of.h b/tools/include/linux/container_of.h
new file mode 100644
index 000000000000..c879e14c3dd6
--- /dev/null
+++ b/tools/include/linux/container_of.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_CONTAINER_OF_H
+#define _TOOLS_LINUX_CONTAINER_OF_H
+
+#ifndef container_of
+/**
+ * container_of - cast a member of a structure out to the containing structure
+ * @ptr: the pointer to the member.
+ * @type: the type of the container struct this is embedded in.
+ * @member: the name of the member within the struct.
+ *
+ */
+#define container_of(ptr, type, member) ({ \
+ const typeof(((type *)0)->member) * __mptr = (ptr); \
+ (type *)((char *)__mptr - offsetof(type, member)); })
+#endif
+
+#endif /* _TOOLS_LINUX_CONTAINER_OF_H */
diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h
index 6c2fd6cc5a98..89b0ac0014b0 100644
--- a/tools/include/linux/coresight-pmu.h
+++ b/tools/include/linux/coresight-pmu.h
@@ -7,8 +7,19 @@
#ifndef _LINUX_CORESIGHT_PMU_H
#define _LINUX_CORESIGHT_PMU_H
+#include <linux/bits.h>
+
#define CORESIGHT_ETM_PMU_NAME "cs_etm"
-#define CORESIGHT_ETM_PMU_SEED 0x10
+
+/*
+ * The legacy Trace ID system based on fixed calculation from the cpu
+ * number. This has been replaced by drivers using a dynamic allocation
+ * system - but need to retain the legacy algorithm for backward comparibility
+ * in certain situations:-
+ * a) new perf running on older systems that generate the legacy mapping
+ * b) older tools that may not update at the same time as the kernel.
+ */
+#define CORESIGHT_LEGACY_CPU_TRACE_ID(cpu) (0x10 + (cpu * 2))
/*
* Below are the definition of bit offsets for perf option, and works as
@@ -34,15 +45,25 @@
#define ETM4_CFG_BIT_RETSTK 12
#define ETM4_CFG_BIT_VMID_OPT 15
-static inline int coresight_get_trace_id(int cpu)
-{
- /*
- * A trace ID of value 0 is invalid, so let's start at some
- * random value that fits in 7 bits and go from there. Since
- * the common convention is to have data trace IDs be I(N) + 1,
- * set instruction trace IDs as a function of the CPU number.
- */
- return (CORESIGHT_ETM_PMU_SEED + (cpu * 2));
-}
+/*
+ * Interpretation of the PERF_RECORD_AUX_OUTPUT_HW_ID payload.
+ * Used to associate a CPU with the CoreSight Trace ID.
+ * [07:00] - Trace ID - uses 8 bits to make value easy to read in file.
+ * [39:08] - Sink ID - as reported in /sys/bus/event_source/devices/cs_etm/sinks/
+ * Added in minor version 1.
+ * [55:40] - Unused (SBZ)
+ * [59:56] - Minor Version - previously existing fields are compatible with
+ * all minor versions.
+ * [63:60] - Major Version - previously existing fields mean different things
+ * in new major versions.
+ */
+#define CS_AUX_HW_ID_TRACE_ID_MASK GENMASK_ULL(7, 0)
+#define CS_AUX_HW_ID_SINK_ID_MASK GENMASK_ULL(39, 8)
+
+#define CS_AUX_HW_ID_MINOR_VERSION_MASK GENMASK_ULL(59, 56)
+#define CS_AUX_HW_ID_MAJOR_VERSION_MASK GENMASK_ULL(63, 60)
+
+#define CS_AUX_HW_ID_MAJOR_VERSION 0
+#define CS_AUX_HW_ID_MINOR_VERSION 1
#endif
diff --git a/tools/include/linux/err.h b/tools/include/linux/err.h
index 25f2bb3a991d..332b983ead1e 100644
--- a/tools/include/linux/err.h
+++ b/tools/include/linux/err.h
@@ -20,7 +20,7 @@
* Userspace note:
* The same principle works for userspace, because 'error' pointers
* fall down to the unused hole far from user space, as described
- * in Documentation/x86/x86_64/mm.rst for x86_64 arch:
+ * in Documentation/arch/x86/x86_64/mm.rst for x86_64 arch:
*
* 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm hole caused by [48:63] sign extension
* ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h
index 736bdeccdfe4..bcc6df79301a 100644
--- a/tools/include/linux/filter.h
+++ b/tools/include/linux/filter.h
@@ -111,6 +111,24 @@
.off = 0, \
.imm = IMM })
+/* Short form of movsx, dst_reg = (s8,s16,s32)src_reg */
+
+#define BPF_MOVSX64_REG(DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+#define BPF_MOVSX32_REG(DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
/* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */
#define BPF_MOV64_RAW(TYPE, DST, SRC, IMM) \
@@ -255,6 +273,16 @@
.off = OFF, \
.imm = 0 })
+/* Unconditional jumps, gotol pc + imm32 */
+
+#define BPF_JMP32_A(IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP32 | BPF_JA, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
/* Function call */
#define BPF_EMIT_CALL(FUNC) \
diff --git a/tools/include/linux/gfp_types.h b/tools/include/linux/gfp_types.h
index 5f9f1ed190a0..65db9349f905 100644
--- a/tools/include/linux/gfp_types.h
+++ b/tools/include/linux/gfp_types.h
@@ -1 +1,392 @@
-#include "../../../include/linux/gfp_types.h"
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_GFP_TYPES_H
+#define __LINUX_GFP_TYPES_H
+
+#include <linux/bits.h>
+
+/* The typedef is in types.h but we want the documentation here */
+#if 0
+/**
+ * typedef gfp_t - Memory allocation flags.
+ *
+ * GFP flags are commonly used throughout Linux to indicate how memory
+ * should be allocated. The GFP acronym stands for get_free_pages(),
+ * the underlying memory allocation function. Not every GFP flag is
+ * supported by every function which may allocate memory. Most users
+ * will want to use a plain ``GFP_KERNEL``.
+ */
+typedef unsigned int __bitwise gfp_t;
+#endif
+
+/*
+ * In case of changes, please don't forget to update
+ * include/trace/events/mmflags.h and tools/perf/builtin-kmem.c
+ */
+
+enum {
+ ___GFP_DMA_BIT,
+ ___GFP_HIGHMEM_BIT,
+ ___GFP_DMA32_BIT,
+ ___GFP_MOVABLE_BIT,
+ ___GFP_RECLAIMABLE_BIT,
+ ___GFP_HIGH_BIT,
+ ___GFP_IO_BIT,
+ ___GFP_FS_BIT,
+ ___GFP_ZERO_BIT,
+ ___GFP_UNUSED_BIT, /* 0x200u unused */
+ ___GFP_DIRECT_RECLAIM_BIT,
+ ___GFP_KSWAPD_RECLAIM_BIT,
+ ___GFP_WRITE_BIT,
+ ___GFP_NOWARN_BIT,
+ ___GFP_RETRY_MAYFAIL_BIT,
+ ___GFP_NOFAIL_BIT,
+ ___GFP_NORETRY_BIT,
+ ___GFP_MEMALLOC_BIT,
+ ___GFP_COMP_BIT,
+ ___GFP_NOMEMALLOC_BIT,
+ ___GFP_HARDWALL_BIT,
+ ___GFP_THISNODE_BIT,
+ ___GFP_ACCOUNT_BIT,
+ ___GFP_ZEROTAGS_BIT,
+#ifdef CONFIG_KASAN_HW_TAGS
+ ___GFP_SKIP_ZERO_BIT,
+ ___GFP_SKIP_KASAN_BIT,
+#endif
+#ifdef CONFIG_LOCKDEP
+ ___GFP_NOLOCKDEP_BIT,
+#endif
+#ifdef CONFIG_SLAB_OBJ_EXT
+ ___GFP_NO_OBJ_EXT_BIT,
+#endif
+ ___GFP_LAST_BIT
+};
+
+/* Plain integer GFP bitmasks. Do not use this directly. */
+#define ___GFP_DMA BIT(___GFP_DMA_BIT)
+#define ___GFP_HIGHMEM BIT(___GFP_HIGHMEM_BIT)
+#define ___GFP_DMA32 BIT(___GFP_DMA32_BIT)
+#define ___GFP_MOVABLE BIT(___GFP_MOVABLE_BIT)
+#define ___GFP_RECLAIMABLE BIT(___GFP_RECLAIMABLE_BIT)
+#define ___GFP_HIGH BIT(___GFP_HIGH_BIT)
+#define ___GFP_IO BIT(___GFP_IO_BIT)
+#define ___GFP_FS BIT(___GFP_FS_BIT)
+#define ___GFP_ZERO BIT(___GFP_ZERO_BIT)
+/* 0x200u unused */
+#define ___GFP_DIRECT_RECLAIM BIT(___GFP_DIRECT_RECLAIM_BIT)
+#define ___GFP_KSWAPD_RECLAIM BIT(___GFP_KSWAPD_RECLAIM_BIT)
+#define ___GFP_WRITE BIT(___GFP_WRITE_BIT)
+#define ___GFP_NOWARN BIT(___GFP_NOWARN_BIT)
+#define ___GFP_RETRY_MAYFAIL BIT(___GFP_RETRY_MAYFAIL_BIT)
+#define ___GFP_NOFAIL BIT(___GFP_NOFAIL_BIT)
+#define ___GFP_NORETRY BIT(___GFP_NORETRY_BIT)
+#define ___GFP_MEMALLOC BIT(___GFP_MEMALLOC_BIT)
+#define ___GFP_COMP BIT(___GFP_COMP_BIT)
+#define ___GFP_NOMEMALLOC BIT(___GFP_NOMEMALLOC_BIT)
+#define ___GFP_HARDWALL BIT(___GFP_HARDWALL_BIT)
+#define ___GFP_THISNODE BIT(___GFP_THISNODE_BIT)
+#define ___GFP_ACCOUNT BIT(___GFP_ACCOUNT_BIT)
+#define ___GFP_ZEROTAGS BIT(___GFP_ZEROTAGS_BIT)
+#ifdef CONFIG_KASAN_HW_TAGS
+#define ___GFP_SKIP_ZERO BIT(___GFP_SKIP_ZERO_BIT)
+#define ___GFP_SKIP_KASAN BIT(___GFP_SKIP_KASAN_BIT)
+#else
+#define ___GFP_SKIP_ZERO 0
+#define ___GFP_SKIP_KASAN 0
+#endif
+#ifdef CONFIG_LOCKDEP
+#define ___GFP_NOLOCKDEP BIT(___GFP_NOLOCKDEP_BIT)
+#else
+#define ___GFP_NOLOCKDEP 0
+#endif
+#ifdef CONFIG_SLAB_OBJ_EXT
+#define ___GFP_NO_OBJ_EXT BIT(___GFP_NO_OBJ_EXT_BIT)
+#else
+#define ___GFP_NO_OBJ_EXT 0
+#endif
+
+/*
+ * Physical address zone modifiers (see linux/mmzone.h - low four bits)
+ *
+ * Do not put any conditional on these. If necessary modify the definitions
+ * without the underscores and use them consistently. The definitions here may
+ * be used in bit comparisons.
+ */
+#define __GFP_DMA ((__force gfp_t)___GFP_DMA)
+#define __GFP_HIGHMEM ((__force gfp_t)___GFP_HIGHMEM)
+#define __GFP_DMA32 ((__force gfp_t)___GFP_DMA32)
+#define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* ZONE_MOVABLE allowed */
+#define GFP_ZONEMASK (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE)
+
+/**
+ * DOC: Page mobility and placement hints
+ *
+ * Page mobility and placement hints
+ * ---------------------------------
+ *
+ * These flags provide hints about how mobile the page is. Pages with similar
+ * mobility are placed within the same pageblocks to minimise problems due
+ * to external fragmentation.
+ *
+ * %__GFP_MOVABLE (also a zone modifier) indicates that the page can be
+ * moved by page migration during memory compaction or can be reclaimed.
+ *
+ * %__GFP_RECLAIMABLE is used for slab allocations that specify
+ * SLAB_RECLAIM_ACCOUNT and whose pages can be freed via shrinkers.
+ *
+ * %__GFP_WRITE indicates the caller intends to dirty the page. Where possible,
+ * these pages will be spread between local zones to avoid all the dirty
+ * pages being in one zone (fair zone allocation policy).
+ *
+ * %__GFP_HARDWALL enforces the cpuset memory allocation policy.
+ *
+ * %__GFP_THISNODE forces the allocation to be satisfied from the requested
+ * node with no fallbacks or placement policy enforcements.
+ *
+ * %__GFP_ACCOUNT causes the allocation to be accounted to kmemcg.
+ *
+ * %__GFP_NO_OBJ_EXT causes slab allocation to have no object extension.
+ */
+#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE)
+#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE)
+#define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL)
+#define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE)
+#define __GFP_ACCOUNT ((__force gfp_t)___GFP_ACCOUNT)
+#define __GFP_NO_OBJ_EXT ((__force gfp_t)___GFP_NO_OBJ_EXT)
+
+/**
+ * DOC: Watermark modifiers
+ *
+ * Watermark modifiers -- controls access to emergency reserves
+ * ------------------------------------------------------------
+ *
+ * %__GFP_HIGH indicates that the caller is high-priority and that granting
+ * the request is necessary before the system can make forward progress.
+ * For example creating an IO context to clean pages and requests
+ * from atomic context.
+ *
+ * %__GFP_MEMALLOC allows access to all memory. This should only be used when
+ * the caller guarantees the allocation will allow more memory to be freed
+ * very shortly e.g. process exiting or swapping. Users either should
+ * be the MM or co-ordinating closely with the VM (e.g. swap over NFS).
+ * Users of this flag have to be extremely careful to not deplete the reserve
+ * completely and implement a throttling mechanism which controls the
+ * consumption of the reserve based on the amount of freed memory.
+ * Usage of a pre-allocated pool (e.g. mempool) should be always considered
+ * before using this flag.
+ *
+ * %__GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves.
+ * This takes precedence over the %__GFP_MEMALLOC flag if both are set.
+ */
+#define __GFP_HIGH ((__force gfp_t)___GFP_HIGH)
+#define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC)
+#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC)
+
+/**
+ * DOC: Reclaim modifiers
+ *
+ * Reclaim modifiers
+ * -----------------
+ * Please note that all the following flags are only applicable to sleepable
+ * allocations (e.g. %GFP_NOWAIT and %GFP_ATOMIC will ignore them).
+ *
+ * %__GFP_IO can start physical IO.
+ *
+ * %__GFP_FS can call down to the low-level FS. Clearing the flag avoids the
+ * allocator recursing into the filesystem which might already be holding
+ * locks.
+ *
+ * %__GFP_DIRECT_RECLAIM indicates that the caller may enter direct reclaim.
+ * This flag can be cleared to avoid unnecessary delays when a fallback
+ * option is available.
+ *
+ * %__GFP_KSWAPD_RECLAIM indicates that the caller wants to wake kswapd when
+ * the low watermark is reached and have it reclaim pages until the high
+ * watermark is reached. A caller may wish to clear this flag when fallback
+ * options are available and the reclaim is likely to disrupt the system. The
+ * canonical example is THP allocation where a fallback is cheap but
+ * reclaim/compaction may cause indirect stalls.
+ *
+ * %__GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim.
+ *
+ * The default allocator behavior depends on the request size. We have a concept
+ * of so-called costly allocations (with order > %PAGE_ALLOC_COSTLY_ORDER).
+ * !costly allocations are too essential to fail so they are implicitly
+ * non-failing by default (with some exceptions like OOM victims might fail so
+ * the caller still has to check for failures) while costly requests try to be
+ * not disruptive and back off even without invoking the OOM killer.
+ * The following three modifiers might be used to override some of these
+ * implicit rules. Please note that all of them must be used along with
+ * %__GFP_DIRECT_RECLAIM flag.
+ *
+ * %__GFP_NORETRY: The VM implementation will try only very lightweight
+ * memory direct reclaim to get some memory under memory pressure (thus
+ * it can sleep). It will avoid disruptive actions like OOM killer. The
+ * caller must handle the failure which is quite likely to happen under
+ * heavy memory pressure. The flag is suitable when failure can easily be
+ * handled at small cost, such as reduced throughput.
+ *
+ * %__GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim
+ * procedures that have previously failed if there is some indication
+ * that progress has been made elsewhere. It can wait for other
+ * tasks to attempt high-level approaches to freeing memory such as
+ * compaction (which removes fragmentation) and page-out.
+ * There is still a definite limit to the number of retries, but it is
+ * a larger limit than with %__GFP_NORETRY.
+ * Allocations with this flag may fail, but only when there is
+ * genuinely little unused memory. While these allocations do not
+ * directly trigger the OOM killer, their failure indicates that
+ * the system is likely to need to use the OOM killer soon. The
+ * caller must handle failure, but can reasonably do so by failing
+ * a higher-level request, or completing it only in a much less
+ * efficient manner.
+ * If the allocation does fail, and the caller is in a position to
+ * free some non-essential memory, doing so could benefit the system
+ * as a whole.
+ *
+ * %__GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
+ * cannot handle allocation failures. The allocation could block
+ * indefinitely but will never return with failure. Testing for
+ * failure is pointless.
+ * It _must_ be blockable and used together with __GFP_DIRECT_RECLAIM.
+ * It should _never_ be used in non-sleepable contexts.
+ * New users should be evaluated carefully (and the flag should be
+ * used only when there is no reasonable failure policy) but it is
+ * definitely preferable to use the flag rather than opencode endless
+ * loop around allocator.
+ * Allocating pages from the buddy with __GFP_NOFAIL and order > 1 is
+ * not supported. Please consider using kvmalloc() instead.
+ */
+#define __GFP_IO ((__force gfp_t)___GFP_IO)
+#define __GFP_FS ((__force gfp_t)___GFP_FS)
+#define __GFP_DIRECT_RECLAIM ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */
+#define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */
+#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM))
+#define __GFP_RETRY_MAYFAIL ((__force gfp_t)___GFP_RETRY_MAYFAIL)
+#define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL)
+#define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY)
+
+/**
+ * DOC: Action modifiers
+ *
+ * Action modifiers
+ * ----------------
+ *
+ * %__GFP_NOWARN suppresses allocation failure reports.
+ *
+ * %__GFP_COMP address compound page metadata.
+ *
+ * %__GFP_ZERO returns a zeroed page on success.
+ *
+ * %__GFP_ZEROTAGS zeroes memory tags at allocation time if the memory itself
+ * is being zeroed (either via __GFP_ZERO or via init_on_alloc, provided that
+ * __GFP_SKIP_ZERO is not set). This flag is intended for optimization: setting
+ * memory tags at the same time as zeroing memory has minimal additional
+ * performance impact.
+ *
+ * %__GFP_SKIP_KASAN makes KASAN skip unpoisoning on page allocation.
+ * Used for userspace and vmalloc pages; the latter are unpoisoned by
+ * kasan_unpoison_vmalloc instead. For userspace pages, results in
+ * poisoning being skipped as well, see should_skip_kasan_poison for
+ * details. Only effective in HW_TAGS mode.
+ */
+#define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN)
+#define __GFP_COMP ((__force gfp_t)___GFP_COMP)
+#define __GFP_ZERO ((__force gfp_t)___GFP_ZERO)
+#define __GFP_ZEROTAGS ((__force gfp_t)___GFP_ZEROTAGS)
+#define __GFP_SKIP_ZERO ((__force gfp_t)___GFP_SKIP_ZERO)
+#define __GFP_SKIP_KASAN ((__force gfp_t)___GFP_SKIP_KASAN)
+
+/* Disable lockdep for GFP context tracking */
+#define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP)
+
+/* Room for N __GFP_FOO bits */
+#define __GFP_BITS_SHIFT ___GFP_LAST_BIT
+#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
+
+/**
+ * DOC: Useful GFP flag combinations
+ *
+ * Useful GFP flag combinations
+ * ----------------------------
+ *
+ * Useful GFP flag combinations that are commonly used. It is recommended
+ * that subsystems start with one of these combinations and then set/clear
+ * %__GFP_FOO flags as necessary.
+ *
+ * %GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower
+ * watermark is applied to allow access to "atomic reserves".
+ * The current implementation doesn't support NMI and few other strict
+ * non-preemptive contexts (e.g. raw_spin_lock). The same applies to %GFP_NOWAIT.
+ *
+ * %GFP_KERNEL is typical for kernel-internal allocations. The caller requires
+ * %ZONE_NORMAL or a lower zone for direct access but can direct reclaim.
+ *
+ * %GFP_KERNEL_ACCOUNT is the same as GFP_KERNEL, except the allocation is
+ * accounted to kmemcg.
+ *
+ * %GFP_NOWAIT is for kernel allocations that should not stall for direct
+ * reclaim, start physical IO or use any filesystem callback. It is very
+ * likely to fail to allocate memory, even for very small allocations.
+ *
+ * %GFP_NOIO will use direct reclaim to discard clean pages or slab pages
+ * that do not require the starting of any physical IO.
+ * Please try to avoid using this flag directly and instead use
+ * memalloc_noio_{save,restore} to mark the whole scope which cannot
+ * perform any IO with a short explanation why. All allocation requests
+ * will inherit GFP_NOIO implicitly.
+ *
+ * %GFP_NOFS will use direct reclaim but will not use any filesystem interfaces.
+ * Please try to avoid using this flag directly and instead use
+ * memalloc_nofs_{save,restore} to mark the whole scope which cannot/shouldn't
+ * recurse into the FS layer with a short explanation why. All allocation
+ * requests will inherit GFP_NOFS implicitly.
+ *
+ * %GFP_USER is for userspace allocations that also need to be directly
+ * accessibly by the kernel or hardware. It is typically used by hardware
+ * for buffers that are mapped to userspace (e.g. graphics) that hardware
+ * still must DMA to. cpuset limits are enforced for these allocations.
+ *
+ * %GFP_DMA exists for historical reasons and should be avoided where possible.
+ * The flags indicates that the caller requires that the lowest zone be
+ * used (%ZONE_DMA or 16M on x86-64). Ideally, this would be removed but
+ * it would require careful auditing as some users really require it and
+ * others use the flag to avoid lowmem reserves in %ZONE_DMA and treat the
+ * lowest zone as a type of emergency reserve.
+ *
+ * %GFP_DMA32 is similar to %GFP_DMA except that the caller requires a 32-bit
+ * address. Note that kmalloc(..., GFP_DMA32) does not return DMA32 memory
+ * because the DMA32 kmalloc cache array is not implemented.
+ * (Reason: there is no such user in kernel).
+ *
+ * %GFP_HIGHUSER is for userspace allocations that may be mapped to userspace,
+ * do not need to be directly accessible by the kernel but that cannot
+ * move once in use. An example may be a hardware allocation that maps
+ * data directly into userspace but has no addressing limitations.
+ *
+ * %GFP_HIGHUSER_MOVABLE is for userspace allocations that the kernel does not
+ * need direct access to but can use kmap() when access is required. They
+ * are expected to be movable via page reclaim or page migration. Typically,
+ * pages on the LRU would also be allocated with %GFP_HIGHUSER_MOVABLE.
+ *
+ * %GFP_TRANSHUGE and %GFP_TRANSHUGE_LIGHT are used for THP allocations. They
+ * are compound allocations that will generally fail quickly if memory is not
+ * available and will not wake kswapd/kcompactd on failure. The _LIGHT
+ * version does not attempt reclaim/compaction at all and is by default used
+ * in page fault path, while the non-light is used by khugepaged.
+ */
+#define GFP_ATOMIC (__GFP_HIGH|__GFP_KSWAPD_RECLAIM)
+#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS)
+#define GFP_KERNEL_ACCOUNT (GFP_KERNEL | __GFP_ACCOUNT)
+#define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM | __GFP_NOWARN)
+#define GFP_NOIO (__GFP_RECLAIM)
+#define GFP_NOFS (__GFP_RECLAIM | __GFP_IO)
+#define GFP_USER (__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
+#define GFP_DMA __GFP_DMA
+#define GFP_DMA32 __GFP_DMA32
+#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM)
+#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE | __GFP_SKIP_KASAN)
+#define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
+ __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM)
+#define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM)
+
+#endif /* __LINUX_GFP_TYPES_H */
diff --git a/tools/include/linux/init.h b/tools/include/linux/init.h
new file mode 100644
index 000000000000..51b5cde28639
--- /dev/null
+++ b/tools/include/linux/init.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_INIT_H_
+#define _TOOLS_LINUX_INIT_H_
+
+#include <linux/compiler.h>
+
+#ifndef __init
+# define __init
+#endif
+
+#ifndef __exit
+# define __exit
+#endif
+
+#define __section(section) __attribute__((__section__(section)))
+
+#define __initconst
+#define __meminit
+#define __meminitdata
+#define __refdata
+#define __initdata
+
+struct obs_kernel_param {
+ const char *str;
+ int (*setup_func)(char *st);
+ int early;
+};
+
+#define __setup_param(str, unique_id, fn, early) \
+ static const char __setup_str_##unique_id[] __initconst \
+ __aligned(1) = str; \
+ static struct obs_kernel_param __setup_##unique_id \
+ __used __section(".init.setup") \
+ __aligned(__alignof__(struct obs_kernel_param)) = \
+ { __setup_str_##unique_id, fn, early }
+
+#define __setup(str, fn) \
+ __setup_param(str, fn, fn, 0)
+
+#define early_param(str, fn) \
+ __setup_param(str, fn, fn, 1)
+
+#endif /* _TOOLS_LINUX_INIT_H_ */
diff --git a/tools/include/linux/interval_tree_generic.h b/tools/include/linux/interval_tree_generic.h
index aaa8a0767aa3..c5a2fed49eb0 100644
--- a/tools/include/linux/interval_tree_generic.h
+++ b/tools/include/linux/interval_tree_generic.h
@@ -77,7 +77,7 @@ ITSTATIC void ITPREFIX ## _remove(ITSTRUCT *node, \
* Cond2: start <= ITLAST(node) \
*/ \
\
-static ITSTRUCT * \
+ITSTATIC ITSTRUCT * \
ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last) \
{ \
while (true) { \
@@ -104,12 +104,8 @@ ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last) \
if (ITSTART(node) <= last) { /* Cond1 */ \
if (start <= ITLAST(node)) /* Cond2 */ \
return node; /* node is leftmost match */ \
- if (node->ITRB.rb_right) { \
- node = rb_entry(node->ITRB.rb_right, \
- ITSTRUCT, ITRB); \
- if (start <= node->ITSUBTREE) \
- continue; \
- } \
+ node = rb_entry(node->ITRB.rb_right, ITSTRUCT, ITRB); \
+ continue; \
} \
return NULL; /* No match */ \
} \
diff --git a/tools/include/linux/io.h b/tools/include/linux/io.h
index e129871fe661..4b94b84160b8 100644
--- a/tools/include/linux/io.h
+++ b/tools/include/linux/io.h
@@ -2,4 +2,6 @@
#ifndef _TOOLS_IO_H
#define _TOOLS_IO_H
-#endif
+#include <asm/io.h>
+
+#endif /* _TOOLS_IO_H */
diff --git a/tools/include/linux/kallsyms.h b/tools/include/linux/kallsyms.h
index 5a37ccbec54f..f61a01dd7eb7 100644
--- a/tools/include/linux/kallsyms.h
+++ b/tools/include/linux/kallsyms.h
@@ -18,6 +18,7 @@ static inline const char *kallsyms_lookup(unsigned long addr,
return NULL;
}
+#ifdef HAVE_BACKTRACE_SUPPORT
#include <execinfo.h>
#include <stdlib.h>
static inline void print_ip_sym(const char *loglvl, unsigned long ip)
@@ -30,5 +31,8 @@ static inline void print_ip_sym(const char *loglvl, unsigned long ip)
free(name);
}
+#else
+static inline void print_ip_sym(const char *loglvl, unsigned long ip) {}
+#endif
#endif
diff --git a/tools/include/linux/kasan-tags.h b/tools/include/linux/kasan-tags.h
new file mode 100644
index 000000000000..4f85f562512c
--- /dev/null
+++ b/tools/include/linux/kasan-tags.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_KASAN_TAGS_H
+#define _LINUX_KASAN_TAGS_H
+
+#define KASAN_TAG_KERNEL 0xFF /* native kernel pointers tag */
+#define KASAN_TAG_INVALID 0xFE /* inaccessible memory tag */
+#define KASAN_TAG_MAX 0xFD /* maximum value for random tags */
+
+#ifdef CONFIG_KASAN_HW_TAGS
+#define KASAN_TAG_MIN 0xF0 /* minimum value for random tags */
+#else
+#define KASAN_TAG_MIN 0x00 /* minimum value for random tags */
+#endif
+
+#endif /* LINUX_KASAN_TAGS_H */
diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h
index 4b0673bf52c2..c8c18d3908a9 100644
--- a/tools/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h
@@ -8,8 +8,10 @@
#include <linux/build_bug.h>
#include <linux/compiler.h>
#include <linux/math.h>
+#include <linux/panic.h>
#include <endian.h>
#include <byteswap.h>
+#include <linux/container_of.h>
#ifndef UINT_MAX
#define UINT_MAX (~0U)
@@ -24,19 +26,6 @@
#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
#endif
-#ifndef container_of
-/**
- * container_of - cast a member of a structure out to the containing structure
- * @ptr: the pointer to the member.
- * @type: the type of the container struct this is embedded in.
- * @member: the name of the member within the struct.
- *
- */
-#define container_of(ptr, type, member) ({ \
- const typeof(((type *)0)->member) * __mptr = (ptr); \
- (type *)((char *)__mptr - offsetof(type, member)); })
-#endif
-
#ifndef max
#define max(x, y) ({ \
typeof(x) _max1 = (x); \
diff --git a/tools/include/linux/linkage.h b/tools/include/linux/linkage.h
index bc763d500262..7baaa5898ca2 100644
--- a/tools/include/linux/linkage.h
+++ b/tools/include/linux/linkage.h
@@ -1,4 +1,12 @@
#ifndef _TOOLS_INCLUDE_LINUX_LINKAGE_H
#define _TOOLS_INCLUDE_LINUX_LINKAGE_H
+#include <linux/export.h>
+
+#define SYM_FUNC_START(x) .globl x; x:
+#define SYM_FUNC_END(x)
+#define SYM_DATA_START(x) .globl x; x:
+#define SYM_DATA_START_LOCAL(x) x:
+#define SYM_DATA_END(x)
+
#endif /* _TOOLS_INCLUDE_LINUX_LINKAGE_H */
diff --git a/tools/include/linux/livepatch_external.h b/tools/include/linux/livepatch_external.h
new file mode 100644
index 000000000000..138af19b0f5c
--- /dev/null
+++ b/tools/include/linux/livepatch_external.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * External livepatch interfaces for patch creation tooling
+ */
+
+#ifndef _LINUX_LIVEPATCH_EXTERNAL_H_
+#define _LINUX_LIVEPATCH_EXTERNAL_H_
+
+#include <linux/types.h>
+
+#define KLP_RELOC_SEC_PREFIX ".klp.rela."
+#define KLP_SYM_PREFIX ".klp.sym."
+
+#define __KLP_PRE_PATCH_PREFIX __klp_pre_patch_callback_
+#define __KLP_POST_PATCH_PREFIX __klp_post_patch_callback_
+#define __KLP_PRE_UNPATCH_PREFIX __klp_pre_unpatch_callback_
+#define __KLP_POST_UNPATCH_PREFIX __klp_post_unpatch_callback_
+
+#define KLP_PRE_PATCH_PREFIX __stringify(__KLP_PRE_PATCH_PREFIX)
+#define KLP_POST_PATCH_PREFIX __stringify(__KLP_POST_PATCH_PREFIX)
+#define KLP_PRE_UNPATCH_PREFIX __stringify(__KLP_PRE_UNPATCH_PREFIX)
+#define KLP_POST_UNPATCH_PREFIX __stringify(__KLP_POST_UNPATCH_PREFIX)
+
+struct klp_object;
+
+typedef int (*klp_pre_patch_t)(struct klp_object *obj);
+typedef void (*klp_post_patch_t)(struct klp_object *obj);
+typedef void (*klp_pre_unpatch_t)(struct klp_object *obj);
+typedef void (*klp_post_unpatch_t)(struct klp_object *obj);
+
+/**
+ * struct klp_callbacks - pre/post live-(un)patch callback structure
+ * @pre_patch: executed before code patching
+ * @post_patch: executed after code patching
+ * @pre_unpatch: executed before code unpatching
+ * @post_unpatch: executed after code unpatching
+ * @post_unpatch_enabled: flag indicating if post-unpatch callback
+ * should run
+ *
+ * All callbacks are optional. Only the pre-patch callback, if provided,
+ * will be unconditionally executed. If the parent klp_object fails to
+ * patch for any reason, including a non-zero error status returned from
+ * the pre-patch callback, no further callbacks will be executed.
+ */
+struct klp_callbacks {
+ klp_pre_patch_t pre_patch;
+ klp_post_patch_t post_patch;
+ klp_pre_unpatch_t pre_unpatch;
+ klp_post_unpatch_t post_unpatch;
+ bool post_unpatch_enabled;
+};
+
+/*
+ * 'struct klp_{func,object}_ext' are compact "external" representations of
+ * 'struct klp_{func,object}'. They are used by objtool for livepatch
+ * generation. The structs are then read by the livepatch module and converted
+ * to the real structs before calling klp_enable_patch().
+ *
+ * TODO make these the official API for klp_enable_patch(). That should
+ * simplify livepatch's interface as well as its data structure lifetime
+ * management.
+ */
+struct klp_func_ext {
+ const char *old_name;
+ void *new_func;
+ unsigned long sympos;
+};
+
+struct klp_object_ext {
+ const char *name;
+ struct klp_func_ext *funcs;
+ struct klp_callbacks callbacks;
+ unsigned int nr_funcs;
+};
+
+#endif /* _LINUX_LIVEPATCH_EXTERNAL_H_ */
diff --git a/tools/include/linux/math64.h b/tools/include/linux/math64.h
index 4ad45d5943dc..8a67d478bf19 100644
--- a/tools/include/linux/math64.h
+++ b/tools/include/linux/math64.h
@@ -72,4 +72,9 @@ static inline u64 mul_u64_u64_div64(u64 a, u64 b, u64 c)
}
#endif
+static inline u64 div_u64(u64 dividend, u32 divisor)
+{
+ return dividend / divisor;
+}
+
#endif /* _LINUX_MATH64_H */
diff --git a/tools/include/linux/mm.h b/tools/include/linux/mm.h
index a03d9bba5151..677c37e4a18c 100644
--- a/tools/include/linux/mm.h
+++ b/tools/include/linux/mm.h
@@ -2,8 +2,8 @@
#ifndef _TOOLS_LINUX_MM_H
#define _TOOLS_LINUX_MM_H
+#include <linux/align.h>
#include <linux/mmzone.h>
-#include <uapi/linux/const.h>
#define PAGE_SHIFT 12
#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
@@ -11,12 +11,8 @@
#define PHYS_ADDR_MAX (~(phys_addr_t)0)
-#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
-#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask))
-#define ALIGN(x, a) __ALIGN_KERNEL((x), (a))
-#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a))
-
#define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)
+#define PAGE_ALIGN_DOWN(addr) ALIGN_DOWN(addr, PAGE_SIZE)
#define __va(x) ((void *)((unsigned long)(x)))
#define __pa(x) ((unsigned long)(x))
@@ -29,7 +25,13 @@ static inline void *phys_to_virt(unsigned long address)
return __va(address);
}
-void reserve_bootmem_region(phys_addr_t start, phys_addr_t end);
+#define virt_to_phys virt_to_phys
+static inline phys_addr_t virt_to_phys(volatile void *address)
+{
+ return (phys_addr_t)address;
+}
+
+void reserve_bootmem_region(phys_addr_t start, phys_addr_t end, int nid);
static inline void totalram_pages_inc(void)
{
@@ -39,4 +41,9 @@ static inline void totalram_pages_add(long count)
{
}
+static inline int early_pfn_to_nid(unsigned long pfn)
+{
+ return 0;
+}
+
#endif
diff --git a/tools/include/linux/moduleparam.h b/tools/include/linux/moduleparam.h
new file mode 100644
index 000000000000..4c4d05bef0cb
--- /dev/null
+++ b/tools/include/linux/moduleparam.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_MODULE_PARAMS_H
+#define _TOOLS_LINUX_MODULE_PARAMS_H
+
+#define MODULE_PARM_DESC(parm, desc)
+
+#endif // _TOOLS_LINUX_MODULE_PARAMS_H
diff --git a/tools/include/linux/numa.h b/tools/include/linux/numa.h
index 110b0e5d0fb0..c8b9369335e0 100644
--- a/tools/include/linux/numa.h
+++ b/tools/include/linux/numa.h
@@ -13,4 +13,9 @@
#define NUMA_NO_NODE (-1)
+static inline bool numa_valid_node(int nid)
+{
+ return nid >= 0 && nid < MAX_NUMNODES;
+}
+
#endif /* _LINUX_NUMA_H */
diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h
deleted file mode 100644
index 62c54ffbeeaa..000000000000
--- a/tools/include/linux/objtool.h
+++ /dev/null
@@ -1,197 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_OBJTOOL_H
-#define _LINUX_OBJTOOL_H
-
-#ifndef __ASSEMBLY__
-
-#include <linux/types.h>
-
-/*
- * This struct is used by asm and inline asm code to manually annotate the
- * location of registers on the stack.
- */
-struct unwind_hint {
- u32 ip;
- s16 sp_offset;
- u8 sp_reg;
- u8 type;
- u8 end;
-};
-#endif
-
-/*
- * UNWIND_HINT_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP
- * (the caller's SP right before it made the call). Used for all callable
- * functions, i.e. all C code and all callable asm functions.
- *
- * UNWIND_HINT_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset
- * points to a fully populated pt_regs from a syscall, interrupt, or exception.
- *
- * UNWIND_HINT_TYPE_REGS_PARTIAL: Used in entry code to indicate that
- * sp_reg+sp_offset points to the iret return frame.
- *
- * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function.
- * Useful for code which doesn't have an ELF function annotation.
- *
- * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc.
- */
-#define UNWIND_HINT_TYPE_CALL 0
-#define UNWIND_HINT_TYPE_REGS 1
-#define UNWIND_HINT_TYPE_REGS_PARTIAL 2
-#define UNWIND_HINT_TYPE_FUNC 3
-#define UNWIND_HINT_TYPE_ENTRY 4
-#define UNWIND_HINT_TYPE_SAVE 5
-#define UNWIND_HINT_TYPE_RESTORE 6
-
-#ifdef CONFIG_OBJTOOL
-
-#include <asm/asm.h>
-
-#ifndef __ASSEMBLY__
-
-#define UNWIND_HINT(sp_reg, sp_offset, type, end) \
- "987: \n\t" \
- ".pushsection .discard.unwind_hints\n\t" \
- /* struct unwind_hint */ \
- ".long 987b - .\n\t" \
- ".short " __stringify(sp_offset) "\n\t" \
- ".byte " __stringify(sp_reg) "\n\t" \
- ".byte " __stringify(type) "\n\t" \
- ".byte " __stringify(end) "\n\t" \
- ".balign 4 \n\t" \
- ".popsection\n\t"
-
-/*
- * This macro marks the given function's stack frame as "non-standard", which
- * tells objtool to ignore the function when doing stack metadata validation.
- * It should only be used in special cases where you're 100% sure it won't
- * affect the reliability of frame pointers and kernel stack traces.
- *
- * For more information, see tools/objtool/Documentation/objtool.txt.
- */
-#define STACK_FRAME_NON_STANDARD(func) \
- static void __used __section(".discard.func_stack_frame_non_standard") \
- *__func_stack_frame_non_standard_##func = func
-
-/*
- * STACK_FRAME_NON_STANDARD_FP() is a frame-pointer-specific function ignore
- * for the case where a function is intentionally missing frame pointer setup,
- * but otherwise needs objtool/ORC coverage when frame pointers are disabled.
- */
-#ifdef CONFIG_FRAME_POINTER
-#define STACK_FRAME_NON_STANDARD_FP(func) STACK_FRAME_NON_STANDARD(func)
-#else
-#define STACK_FRAME_NON_STANDARD_FP(func)
-#endif
-
-#define ANNOTATE_NOENDBR \
- "986: \n\t" \
- ".pushsection .discard.noendbr\n\t" \
- _ASM_PTR " 986b\n\t" \
- ".popsection\n\t"
-
-#define ASM_REACHABLE \
- "998:\n\t" \
- ".pushsection .discard.reachable\n\t" \
- ".long 998b - .\n\t" \
- ".popsection\n\t"
-
-#else /* __ASSEMBLY__ */
-
-/*
- * This macro indicates that the following intra-function call is valid.
- * Any non-annotated intra-function call will cause objtool to issue a warning.
- */
-#define ANNOTATE_INTRA_FUNCTION_CALL \
- 999: \
- .pushsection .discard.intra_function_calls; \
- .long 999b; \
- .popsection;
-
-/*
- * In asm, there are two kinds of code: normal C-type callable functions and
- * the rest. The normal callable functions can be called by other code, and
- * don't do anything unusual with the stack. Such normal callable functions
- * are annotated with the ENTRY/ENDPROC macros. Most asm code falls in this
- * category. In this case, no special debugging annotations are needed because
- * objtool can automatically generate the ORC data for the ORC unwinder to read
- * at runtime.
- *
- * Anything which doesn't fall into the above category, such as syscall and
- * interrupt handlers, tends to not be called directly by other functions, and
- * often does unusual non-C-function-type things with the stack pointer. Such
- * code needs to be annotated such that objtool can understand it. The
- * following CFI hint macros are for this type of code.
- *
- * These macros provide hints to objtool about the state of the stack at each
- * instruction. Objtool starts from the hints and follows the code flow,
- * making automatic CFI adjustments when it sees pushes and pops, filling out
- * the debuginfo as necessary. It will also warn if it sees any
- * inconsistencies.
- */
-.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
-.Lunwind_hint_ip_\@:
- .pushsection .discard.unwind_hints
- /* struct unwind_hint */
- .long .Lunwind_hint_ip_\@ - .
- .short \sp_offset
- .byte \sp_reg
- .byte \type
- .byte \end
- .balign 4
- .popsection
-.endm
-
-.macro STACK_FRAME_NON_STANDARD func:req
- .pushsection .discard.func_stack_frame_non_standard, "aw"
- _ASM_PTR \func
- .popsection
-.endm
-
-.macro STACK_FRAME_NON_STANDARD_FP func:req
-#ifdef CONFIG_FRAME_POINTER
- STACK_FRAME_NON_STANDARD \func
-#endif
-.endm
-
-.macro ANNOTATE_NOENDBR
-.Lhere_\@:
- .pushsection .discard.noendbr
- .quad .Lhere_\@
- .popsection
-.endm
-
-.macro REACHABLE
-.Lhere_\@:
- .pushsection .discard.reachable
- .long .Lhere_\@ - .
- .popsection
-.endm
-
-#endif /* __ASSEMBLY__ */
-
-#else /* !CONFIG_OBJTOOL */
-
-#ifndef __ASSEMBLY__
-
-#define UNWIND_HINT(sp_reg, sp_offset, type, end) \
- "\n\t"
-#define STACK_FRAME_NON_STANDARD(func)
-#define STACK_FRAME_NON_STANDARD_FP(func)
-#define ANNOTATE_NOENDBR
-#define ASM_REACHABLE
-#else
-#define ANNOTATE_INTRA_FUNCTION_CALL
-.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
-.endm
-.macro STACK_FRAME_NON_STANDARD func:req
-.endm
-.macro ANNOTATE_NOENDBR
-.endm
-.macro REACHABLE
-.endm
-#endif
-
-#endif /* CONFIG_OBJTOOL */
-
-#endif /* _LINUX_OBJTOOL_H */
diff --git a/tools/include/linux/objtool_types.h b/tools/include/linux/objtool_types.h
new file mode 100644
index 000000000000..c6def4049b1a
--- /dev/null
+++ b/tools/include/linux/objtool_types.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_OBJTOOL_TYPES_H
+#define _LINUX_OBJTOOL_TYPES_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+/*
+ * This struct is used by asm and inline asm code to manually annotate the
+ * location of registers on the stack.
+ */
+struct unwind_hint {
+ u32 ip;
+ s16 sp_offset;
+ u8 sp_reg;
+ u8 type;
+ u8 signal;
+};
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * UNWIND_HINT_TYPE_UNDEFINED: A blind spot in ORC coverage which can result in
+ * a truncated and unreliable stack unwind.
+ *
+ * UNWIND_HINT_TYPE_END_OF_STACK: The end of the kernel stack unwind before
+ * hitting user entry, boot code, or fork entry (when there are no pt_regs
+ * available).
+ *
+ * UNWIND_HINT_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP
+ * (the caller's SP right before it made the call). Used for all callable
+ * functions, i.e. all C code and all callable asm functions.
+ *
+ * UNWIND_HINT_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset
+ * points to a fully populated pt_regs from a syscall, interrupt, or exception.
+ *
+ * UNWIND_HINT_TYPE_REGS_PARTIAL: Used in entry code to indicate that
+ * sp_reg+sp_offset points to the iret return frame.
+ *
+ * UNWIND_HINT_TYPE_FUNC: Generate the unwind metadata of a callable function.
+ * Useful for code which doesn't have an ELF function annotation.
+ *
+ * UNWIND_HINT_TYPE_{SAVE,RESTORE}: Save the unwind metadata at a certain
+ * location so that it can be restored later.
+ */
+#define UNWIND_HINT_TYPE_UNDEFINED 0
+#define UNWIND_HINT_TYPE_END_OF_STACK 1
+#define UNWIND_HINT_TYPE_CALL 2
+#define UNWIND_HINT_TYPE_REGS 3
+#define UNWIND_HINT_TYPE_REGS_PARTIAL 4
+/* The below hint types don't have corresponding ORC types */
+#define UNWIND_HINT_TYPE_FUNC 5
+#define UNWIND_HINT_TYPE_SAVE 6
+#define UNWIND_HINT_TYPE_RESTORE 7
+
+/*
+ * Annotate types
+ */
+#define ANNOTYPE_NOENDBR 1
+#define ANNOTYPE_RETPOLINE_SAFE 2
+#define ANNOTYPE_INSTR_BEGIN 3
+#define ANNOTYPE_INSTR_END 4
+#define ANNOTYPE_UNRET_BEGIN 5
+#define ANNOTYPE_IGNORE_ALTS 6
+#define ANNOTYPE_INTRA_FUNCTION_CALL 7
+#define ANNOTYPE_REACHABLE 8
+#define ANNOTYPE_NOCFI 9
+
+#define ANNOTYPE_DATA_SPECIAL 1
+
+#endif /* _LINUX_OBJTOOL_TYPES_H */
diff --git a/tools/include/linux/panic.h b/tools/include/linux/panic.h
new file mode 100644
index 000000000000..9c8f17a41ce8
--- /dev/null
+++ b/tools/include/linux/panic.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_PANIC_H
+#define _TOOLS_LINUX_PANIC_H
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+static inline void panic(const char *fmt, ...)
+{
+ va_list argp;
+
+ va_start(argp, fmt);
+ vfprintf(stderr, fmt, argp);
+ va_end(argp);
+ exit(-1);
+}
+
+#endif
diff --git a/tools/include/linux/pci_ids.h b/tools/include/linux/pci_ids.h
new file mode 120000
index 000000000000..1c9e88f41261
--- /dev/null
+++ b/tools/include/linux/pci_ids.h
@@ -0,0 +1 @@
+../../../include/linux/pci_ids.h \ No newline at end of file
diff --git a/tools/include/linux/pfn.h b/tools/include/linux/pfn.h
index 7512a58189eb..f77a30d70152 100644
--- a/tools/include/linux/pfn.h
+++ b/tools/include/linux/pfn.h
@@ -7,4 +7,5 @@
#define PFN_UP(x) (((x) + PAGE_SIZE - 1) >> PAGE_SHIFT)
#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
#define PFN_PHYS(x) ((phys_addr_t)(x) << PAGE_SHIFT)
+#define PHYS_PFN(x) ((unsigned long)((x) >> PAGE_SHIFT))
#endif
diff --git a/tools/include/linux/poison.h b/tools/include/linux/poison.h
index 2e6338ac5eed..e530e54046c9 100644
--- a/tools/include/linux/poison.h
+++ b/tools/include/linux/poison.h
@@ -47,11 +47,8 @@
* Magic nums for obj red zoning.
* Placed in the first word before and the first word after an obj.
*/
-#define RED_INACTIVE 0x09F911029D74E35BULL /* when obj is inactive */
-#define RED_ACTIVE 0xD84156C5635688C0ULL /* when obj is active */
-
-#define SLUB_RED_INACTIVE 0xbb
-#define SLUB_RED_ACTIVE 0xcc
+#define SLUB_RED_INACTIVE 0xbb /* when obj is inactive */
+#define SLUB_RED_ACTIVE 0xcc /* when obj is active */
/* ...and for poisoning */
#define POISON_INUSE 0x5a /* for use-uninitialised poisoning */
diff --git a/tools/include/linux/prandom.h b/tools/include/linux/prandom.h
new file mode 100644
index 000000000000..b745041ccd6a
--- /dev/null
+++ b/tools/include/linux/prandom.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TOOLS_LINUX_PRANDOM_H
+#define __TOOLS_LINUX_PRANDOM_H
+
+#include <linux/types.h>
+
+struct rnd_state {
+ __u32 s1, s2, s3, s4;
+};
+
+/*
+ * Handle minimum values for seeds
+ */
+static inline u32 __seed(u32 x, u32 m)
+{
+ return (x < m) ? x + m : x;
+}
+
+/**
+ * prandom_seed_state - set seed for prandom_u32_state().
+ * @state: pointer to state structure to receive the seed.
+ * @seed: arbitrary 64-bit value to use as a seed.
+ */
+static inline void prandom_seed_state(struct rnd_state *state, u64 seed)
+{
+ u32 i = ((seed >> 32) ^ (seed << 10) ^ seed) & 0xffffffffUL;
+
+ state->s1 = __seed(i, 2U);
+ state->s2 = __seed(i, 8U);
+ state->s3 = __seed(i, 16U);
+ state->s4 = __seed(i, 128U);
+}
+
+/**
+ * prandom_u32_state - seeded pseudo-random number generator.
+ * @state: pointer to state structure holding seeded state.
+ *
+ * This is used for pseudo-randomness with no outside seeding.
+ * For more random results, use get_random_u32().
+ */
+static inline u32 prandom_u32_state(struct rnd_state *state)
+{
+#define TAUSWORTHE(s, a, b, c, d) (((s & c) << d) ^ (((s << a) ^ s) >> b))
+ state->s1 = TAUSWORTHE(state->s1, 6U, 13U, 4294967294U, 18U);
+ state->s2 = TAUSWORTHE(state->s2, 2U, 27U, 4294967288U, 2U);
+ state->s3 = TAUSWORTHE(state->s3, 13U, 21U, 4294967280U, 7U);
+ state->s4 = TAUSWORTHE(state->s4, 3U, 12U, 4294967168U, 13U);
+
+ return (state->s1 ^ state->s2 ^ state->s3 ^ state->s4);
+}
+#endif // __TOOLS_LINUX_PRANDOM_H
diff --git a/tools/include/linux/rbtree_augmented.h b/tools/include/linux/rbtree_augmented.h
index 570bb9794421..95483c7d81df 100644
--- a/tools/include/linux/rbtree_augmented.h
+++ b/tools/include/linux/rbtree_augmented.h
@@ -158,13 +158,13 @@ RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME, \
static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
{
- rb->__rb_parent_color = rb_color(rb) | (unsigned long)p;
+ rb->__rb_parent_color = rb_color(rb) + (unsigned long)p;
}
static inline void rb_set_parent_color(struct rb_node *rb,
struct rb_node *p, int color)
{
- rb->__rb_parent_color = (unsigned long)p | color;
+ rb->__rb_parent_color = (unsigned long)p + color;
}
static inline void
diff --git a/tools/include/linux/refcount.h b/tools/include/linux/refcount.h
index 36cb29bc57c2..1f30956e070d 100644
--- a/tools/include/linux/refcount.h
+++ b/tools/include/linux/refcount.h
@@ -60,6 +60,11 @@ static inline void refcount_set(refcount_t *r, unsigned int n)
atomic_set(&r->refs, n);
}
+static inline void refcount_set_release(refcount_t *r, unsigned int n)
+{
+ atomic_set(&r->refs, n);
+}
+
static inline unsigned int refcount_read(const refcount_t *r)
{
return atomic_read(&r->refs);
diff --git a/tools/include/linux/ring_buffer.h b/tools/include/linux/ring_buffer.h
index 6c02617377c2..a74c397359c7 100644
--- a/tools/include/linux/ring_buffer.h
+++ b/tools/include/linux/ring_buffer.h
@@ -55,7 +55,7 @@ static inline u64 ring_buffer_read_head(struct perf_event_mmap_page *base)
* READ_ONCE() + smp_mb() pair.
*/
#if defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) || \
- defined(__ia64__) || defined(__sparc__) && defined(__arch64__)
+ defined(__ia64__) || defined(__sparc__) && defined(__arch64__) || defined(__riscv)
return smp_load_acquire(&base->data_head);
#else
u64 head = READ_ONCE(base->data_head);
diff --git a/tools/include/linux/rwsem.h b/tools/include/linux/rwsem.h
new file mode 100644
index 000000000000..f8bffd4a987c
--- /dev/null
+++ b/tools/include/linux/rwsem.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#ifndef _TOOLS__RWSEM_H
+#define _TOOLS__RWSEM_H
+
+#include <pthread.h>
+
+struct rw_semaphore {
+ pthread_rwlock_t lock;
+};
+
+static inline int init_rwsem(struct rw_semaphore *sem)
+{
+ return pthread_rwlock_init(&sem->lock, NULL);
+}
+
+static inline int exit_rwsem(struct rw_semaphore *sem)
+{
+ return pthread_rwlock_destroy(&sem->lock);
+}
+
+static inline int down_read(struct rw_semaphore *sem)
+{
+ return pthread_rwlock_rdlock(&sem->lock);
+}
+
+static inline int up_read(struct rw_semaphore *sem)
+{
+ return pthread_rwlock_unlock(&sem->lock);
+}
+
+static inline int down_write(struct rw_semaphore *sem)
+{
+ return pthread_rwlock_wrlock(&sem->lock);
+}
+
+static inline int up_write(struct rw_semaphore *sem)
+{
+ return pthread_rwlock_unlock(&sem->lock);
+}
+
+#define down_read_nested(sem, subclass) down_read(sem)
+#define down_write_nested(sem, subclass) down_write(sem)
+
+#endif /* _TOOLS_RWSEM_H */
diff --git a/tools/include/linux/seq_file.h b/tools/include/linux/seq_file.h
index 102fd9217f1f..f6bc226af0c1 100644
--- a/tools/include/linux/seq_file.h
+++ b/tools/include/linux/seq_file.h
@@ -1,4 +1,6 @@
#ifndef _TOOLS_INCLUDE_LINUX_SEQ_FILE_H
#define _TOOLS_INCLUDE_LINUX_SEQ_FILE_H
+struct seq_file;
+
#endif /* _TOOLS_INCLUDE_LINUX_SEQ_FILE_H */
diff --git a/tools/include/linux/slab.h b/tools/include/linux/slab.h
index 311759ea25e9..94937a699402 100644
--- a/tools/include/linux/slab.h
+++ b/tools/include/linux/slab.h
@@ -4,25 +4,137 @@
#include <linux/types.h>
#include <linux/gfp.h>
+#include <pthread.h>
-#define SLAB_PANIC 2
#define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */
#define kzalloc_node(size, flags, node) kmalloc(size, flags)
+enum _slab_flag_bits {
+ _SLAB_KMALLOC,
+ _SLAB_HWCACHE_ALIGN,
+ _SLAB_PANIC,
+ _SLAB_TYPESAFE_BY_RCU,
+ _SLAB_ACCOUNT,
+ _SLAB_FLAGS_LAST_BIT
+};
+
+#define __SLAB_FLAG_BIT(nr) ((unsigned int __force)(1U << (nr)))
+#define __SLAB_FLAG_UNUSED ((unsigned int __force)(0U))
+
+#define SLAB_HWCACHE_ALIGN __SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN)
+#define SLAB_PANIC __SLAB_FLAG_BIT(_SLAB_PANIC)
+#define SLAB_TYPESAFE_BY_RCU __SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU)
+#ifdef CONFIG_MEMCG
+# define SLAB_ACCOUNT __SLAB_FLAG_BIT(_SLAB_ACCOUNT)
+#else
+# define SLAB_ACCOUNT __SLAB_FLAG_UNUSED
+#endif
void *kmalloc(size_t size, gfp_t gfp);
void kfree(void *p);
+void *kmalloc_array(size_t n, size_t size, gfp_t gfp);
bool slab_is_available(void);
enum slab_state {
DOWN,
PARTIAL,
- PARTIAL_NODE,
UP,
FULL
};
+struct kmem_cache {
+ pthread_mutex_t lock;
+ unsigned int size;
+ unsigned int align;
+ unsigned int sheaf_capacity;
+ int nr_objs;
+ void *objs;
+ void (*ctor)(void *);
+ bool non_kernel_enabled;
+ unsigned int non_kernel;
+ unsigned long nr_allocated;
+ unsigned long nr_tallocated;
+ bool exec_callback;
+ void (*callback)(void *);
+ void *private;
+};
+
+struct kmem_cache_args {
+ /**
+ * @align: The required alignment for the objects.
+ *
+ * %0 means no specific alignment is requested.
+ */
+ unsigned int align;
+ /**
+ * @sheaf_capacity: The maximum size of the sheaf.
+ */
+ unsigned int sheaf_capacity;
+ /**
+ * @useroffset: Usercopy region offset.
+ *
+ * %0 is a valid offset, when @usersize is non-%0
+ */
+ unsigned int useroffset;
+ /**
+ * @usersize: Usercopy region size.
+ *
+ * %0 means no usercopy region is specified.
+ */
+ unsigned int usersize;
+ /**
+ * @freeptr_offset: Custom offset for the free pointer
+ * in &SLAB_TYPESAFE_BY_RCU caches
+ *
+ * By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer
+ * outside of the object. This might cause the object to grow in size.
+ * Cache creators that have a reason to avoid this can specify a custom
+ * free pointer offset in their struct where the free pointer will be
+ * placed.
+ *
+ * Note that placing the free pointer inside the object requires the
+ * caller to ensure that no fields are invalidated that are required to
+ * guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for
+ * details).
+ *
+ * Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset
+ * is specified, %use_freeptr_offset must be set %true.
+ *
+ * Note that @ctor currently isn't supported with custom free pointers
+ * as a @ctor requires an external free pointer.
+ */
+ unsigned int freeptr_offset;
+ /**
+ * @use_freeptr_offset: Whether a @freeptr_offset is used.
+ */
+ bool use_freeptr_offset;
+ /**
+ * @ctor: A constructor for the objects.
+ *
+ * The constructor is invoked for each object in a newly allocated slab
+ * page. It is the cache user's responsibility to free object in the
+ * same state as after calling the constructor, or deal appropriately
+ * with any differences between a freshly constructed and a reallocated
+ * object.
+ *
+ * %NULL means no constructor.
+ */
+ void (*ctor)(void *);
+};
+
+struct slab_sheaf {
+ union {
+ struct list_head barn_list;
+ /* only used for prefilled sheafs */
+ unsigned int capacity;
+ };
+ struct kmem_cache *cache;
+ unsigned int size;
+ int node; /* only used for rcu_sheaf */
+ void *objects[];
+};
+
static inline void *kzalloc(size_t size, gfp_t gfp)
{
return kmalloc(size, gfp | __GFP_ZERO);
@@ -37,12 +149,57 @@ static inline void *kmem_cache_alloc(struct kmem_cache *cachep, int flags)
}
void kmem_cache_free(struct kmem_cache *cachep, void *objp);
-struct kmem_cache *kmem_cache_create(const char *name, unsigned int size,
- unsigned int align, unsigned int flags,
- void (*ctor)(void *));
+
+struct kmem_cache *
+__kmem_cache_create_args(const char *name, unsigned int size,
+ struct kmem_cache_args *args, unsigned int flags);
+
+/* If NULL is passed for @args, use this variant with default arguments. */
+static inline struct kmem_cache *
+__kmem_cache_default_args(const char *name, unsigned int size,
+ struct kmem_cache_args *args, unsigned int flags)
+{
+ struct kmem_cache_args kmem_default_args = {};
+
+ return __kmem_cache_create_args(name, size, &kmem_default_args, flags);
+}
+
+static inline struct kmem_cache *
+__kmem_cache_create(const char *name, unsigned int size, unsigned int align,
+ unsigned int flags, void (*ctor)(void *))
+{
+ struct kmem_cache_args kmem_args = {
+ .align = align,
+ .ctor = ctor,
+ };
+
+ return __kmem_cache_create_args(name, size, &kmem_args, flags);
+}
+
+#define kmem_cache_create(__name, __object_size, __args, ...) \
+ _Generic((__args), \
+ struct kmem_cache_args *: __kmem_cache_create_args, \
+ void *: __kmem_cache_default_args, \
+ default: __kmem_cache_create)(__name, __object_size, __args, __VA_ARGS__)
void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list);
int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size,
void **list);
+struct slab_sheaf *
+kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size);
+
+void *
+kmem_cache_alloc_from_sheaf(struct kmem_cache *s, gfp_t gfp,
+ struct slab_sheaf *sheaf);
+
+void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp,
+ struct slab_sheaf *sheaf);
+int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp,
+ struct slab_sheaf **sheafp, unsigned int size);
+
+static inline unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf)
+{
+ return sheaf->size;
+}
#endif /* _TOOLS_SLAB_H */
diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h
index 622266b197d0..a6cdf25b6b9d 100644
--- a/tools/include/linux/spinlock.h
+++ b/tools/include/linux/spinlock.h
@@ -11,6 +11,7 @@
#define spin_lock_init(x) pthread_mutex_init(x, NULL)
#define spin_lock(x) pthread_mutex_lock(x)
+#define spin_lock_nested(x, subclass) pthread_mutex_lock(x)
#define spin_unlock(x) pthread_mutex_unlock(x)
#define spin_lock_bh(x) pthread_mutex_lock(x)
#define spin_unlock_bh(x) pthread_mutex_unlock(x)
diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h
index 5a00b8b2cf9f..cfb6ddeb292b 100644
--- a/tools/include/linux/static_call_types.h
+++ b/tools/include/linux/static_call_types.h
@@ -25,6 +25,8 @@
#define STATIC_CALL_SITE_INIT 2UL /* init section */
#define STATIC_CALL_SITE_FLAGS 3UL
+#ifndef __ASSEMBLY__
+
/*
* The static call site table needs to be created by external tooling (objtool
* or a compiler plugin).
@@ -100,4 +102,6 @@ struct static_call_key {
#endif /* CONFIG_HAVE_STATIC_CALL */
+#endif /* __ASSEMBLY__ */
+
#endif /* _STATIC_CALL_TYPES_H */
diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h
index db5c99318c79..51ad3cf4fa82 100644
--- a/tools/include/linux/string.h
+++ b/tools/include/linux/string.h
@@ -12,6 +12,8 @@ void argv_free(char **argv);
int strtobool(const char *s, bool *res);
+#define strscpy strcpy
+
/*
* glibc based builds needs the extern while uClibc doesn't.
* However uClibc headers also define __GLIBC__ hence the hack below
@@ -42,9 +44,26 @@ static inline bool strstarts(const char *str, const char *prefix)
return strncmp(str, prefix, strlen(prefix)) == 0;
}
+/*
+ * Checks if a string ends with another.
+ */
+static inline bool str_ends_with(const char *str, const char *substr)
+{
+ size_t len = strlen(str);
+ size_t sublen = strlen(substr);
+
+ if (sublen > len)
+ return false;
+
+ return !strcmp(str + len - sublen, substr);
+}
+
extern char * __must_check skip_spaces(const char *);
extern char *strim(char *);
+extern void remove_spaces(char *s);
+
extern void *memchr_inv(const void *start, int c, size_t bytes);
+extern unsigned long long memparse(const char *ptr, char **retptr);
#endif /* _TOOLS_LINUX_STRING_H_ */
diff --git a/tools/include/linux/types.h b/tools/include/linux/types.h
index 051fdeaf2670..4928e33d44ac 100644
--- a/tools/include/linux/types.h
+++ b/tools/include/linux/types.h
@@ -42,6 +42,8 @@ typedef __s16 s16;
typedef __u8 u8;
typedef __s8 s8;
+typedef unsigned long long ullong;
+
#ifdef __CHECKER__
#define __bitwise __attribute__((bitwise))
#else
@@ -49,7 +51,12 @@ typedef __s8 s8;
#endif
#define __force
+/* This is defined in linux/compiler_types.h and is left for backward
+ * compatibility.
+ */
+#ifndef __user
#define __user
+#endif
#define __must_check
#define __cold
diff --git a/tools/include/linux/unaligned.h b/tools/include/linux/unaligned.h
new file mode 100644
index 000000000000..395a4464fe73
--- /dev/null
+++ b/tools/include/linux/unaligned.h
@@ -0,0 +1,148 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_UNALIGNED_H
+#define __LINUX_UNALIGNED_H
+
+/*
+ * This is the most generic implementation of unaligned accesses
+ * and should work almost anywhere.
+ */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpacked"
+#pragma GCC diagnostic ignored "-Wattributes"
+#include <vdso/unaligned.h>
+
+#define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr))
+#define put_unaligned(val, ptr) __put_unaligned_t(typeof(*(ptr)), (val), (ptr))
+
+static inline u16 get_unaligned_le16(const void *p)
+{
+ return le16_to_cpu(__get_unaligned_t(__le16, p));
+}
+
+static inline u32 get_unaligned_le32(const void *p)
+{
+ return le32_to_cpu(__get_unaligned_t(__le32, p));
+}
+
+static inline u64 get_unaligned_le64(const void *p)
+{
+ return le64_to_cpu(__get_unaligned_t(__le64, p));
+}
+
+static inline void put_unaligned_le16(u16 val, void *p)
+{
+ __put_unaligned_t(__le16, cpu_to_le16(val), p);
+}
+
+static inline void put_unaligned_le32(u32 val, void *p)
+{
+ __put_unaligned_t(__le32, cpu_to_le32(val), p);
+}
+
+static inline void put_unaligned_le64(u64 val, void *p)
+{
+ __put_unaligned_t(__le64, cpu_to_le64(val), p);
+}
+
+static inline u16 get_unaligned_be16(const void *p)
+{
+ return be16_to_cpu(__get_unaligned_t(__be16, p));
+}
+
+static inline u32 get_unaligned_be32(const void *p)
+{
+ return be32_to_cpu(__get_unaligned_t(__be32, p));
+}
+
+static inline u64 get_unaligned_be64(const void *p)
+{
+ return be64_to_cpu(__get_unaligned_t(__be64, p));
+}
+
+static inline void put_unaligned_be16(u16 val, void *p)
+{
+ __put_unaligned_t(__be16, cpu_to_be16(val), p);
+}
+
+static inline void put_unaligned_be32(u32 val, void *p)
+{
+ __put_unaligned_t(__be32, cpu_to_be32(val), p);
+}
+
+static inline void put_unaligned_be64(u64 val, void *p)
+{
+ __put_unaligned_t(__be64, cpu_to_be64(val), p);
+}
+
+static inline u32 __get_unaligned_be24(const u8 *p)
+{
+ return p[0] << 16 | p[1] << 8 | p[2];
+}
+
+static inline u32 get_unaligned_be24(const void *p)
+{
+ return __get_unaligned_be24(p);
+}
+
+static inline u32 __get_unaligned_le24(const u8 *p)
+{
+ return p[0] | p[1] << 8 | p[2] << 16;
+}
+
+static inline u32 get_unaligned_le24(const void *p)
+{
+ return __get_unaligned_le24(p);
+}
+
+static inline void __put_unaligned_be24(const u32 val, u8 *p)
+{
+ *p++ = (val >> 16) & 0xff;
+ *p++ = (val >> 8) & 0xff;
+ *p++ = val & 0xff;
+}
+
+static inline void put_unaligned_be24(const u32 val, void *p)
+{
+ __put_unaligned_be24(val, p);
+}
+
+static inline void __put_unaligned_le24(const u32 val, u8 *p)
+{
+ *p++ = val & 0xff;
+ *p++ = (val >> 8) & 0xff;
+ *p++ = (val >> 16) & 0xff;
+}
+
+static inline void put_unaligned_le24(const u32 val, void *p)
+{
+ __put_unaligned_le24(val, p);
+}
+
+static inline void __put_unaligned_be48(const u64 val, u8 *p)
+{
+ *p++ = (val >> 40) & 0xff;
+ *p++ = (val >> 32) & 0xff;
+ *p++ = (val >> 24) & 0xff;
+ *p++ = (val >> 16) & 0xff;
+ *p++ = (val >> 8) & 0xff;
+ *p++ = val & 0xff;
+}
+
+static inline void put_unaligned_be48(const u64 val, void *p)
+{
+ __put_unaligned_be48(val, p);
+}
+
+static inline u64 __get_unaligned_be48(const u8 *p)
+{
+ return (u64)p[0] << 40 | (u64)p[1] << 32 | (u64)p[2] << 24 |
+ p[3] << 16 | p[4] << 8 | p[5];
+}
+
+static inline u64 get_unaligned_be48(const void *p)
+{
+ return __get_unaligned_be48(p);
+}
+#pragma GCC diagnostic pop
+
+#endif /* __LINUX_UNALIGNED_H */
diff --git a/tools/include/nolibc/.gitignore b/tools/include/nolibc/.gitignore
new file mode 100644
index 000000000000..dea22eaaed2b
--- /dev/null
+++ b/tools/include/nolibc/.gitignore
@@ -0,0 +1 @@
+sysroot
diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile
index cfd06764b5ae..8118e22844f1 100644
--- a/tools/include/nolibc/Makefile
+++ b/tools/include/nolibc/Makefile
@@ -23,10 +23,55 @@ else
Q=@
endif
-nolibc_arch := $(patsubst arm64,aarch64,$(ARCH))
-arch_file := arch-$(nolibc_arch).h
-all_files := ctype.h errno.h nolibc.h signal.h std.h stdio.h stdlib.h string.h \
- sys.h time.h types.h unistd.h
+arch_files := arch.h $(wildcard arch-*.h)
+all_files := \
+ compiler.h \
+ crt.h \
+ ctype.h \
+ dirent.h \
+ elf.h \
+ errno.h \
+ fcntl.h \
+ getopt.h \
+ inttypes.h \
+ limits.h \
+ math.h \
+ nolibc.h \
+ poll.h \
+ sched.h \
+ signal.h \
+ stackprotector.h \
+ std.h \
+ stdarg.h \
+ stdbool.h \
+ stddef.h \
+ stdint.h \
+ stdlib.h \
+ string.h \
+ sys.h \
+ sys/auxv.h \
+ sys/ioctl.h \
+ sys/mman.h \
+ sys/mount.h \
+ sys/prctl.h \
+ sys/random.h \
+ sys/reboot.h \
+ sys/resource.h \
+ sys/select.h \
+ sys/stat.h \
+ sys/syscall.h \
+ sys/sysmacros.h \
+ sys/time.h \
+ sys/timerfd.h \
+ sys/types.h \
+ sys/uio.h \
+ sys/utsname.h \
+ sys/wait.h \
+ time.h \
+ types.h \
+ unistd.h \
+ stdio.h \
+
# install all headers needed to support a bare-metal compiler
all: headers
@@ -37,7 +82,7 @@ help:
@echo "Supported targets under nolibc:"
@echo " all call \"headers\""
@echo " clean clean the sysroot"
- @echo " headers prepare a sysroot in tools/include/nolibc/sysroot"
+ @echo " headers prepare a multi-arch sysroot in \$${OUTPUT}sysroot"
@echo " headers_standalone like \"headers\", and also install kernel headers"
@echo " help this help"
@echo ""
@@ -48,28 +93,21 @@ help:
@echo " OUTPUT = $(OUTPUT)"
@echo ""
-# Note: when ARCH is "x86" we concatenate both x86_64 and i386
+# installs headers for all archs at once.
headers:
- $(Q)mkdir -p $(OUTPUT)sysroot
- $(Q)mkdir -p $(OUTPUT)sysroot/include
- $(Q)cp $(all_files) $(OUTPUT)sysroot/include/
- $(Q)if [ "$(ARCH)" = "x86" ]; then \
- sed -e \
- 's,^#ifndef _NOLIBC_ARCH_X86_64_H,#if !defined(_NOLIBC_ARCH_X86_64_H) \&\& defined(__x86_64__),' \
- arch-x86_64.h; \
- sed -e \
- 's,^#ifndef _NOLIBC_ARCH_I386_H,#if !defined(_NOLIBC_ARCH_I386_H) \&\& !defined(__x86_64__),' \
- arch-i386.h; \
- elif [ -e "$(arch_file)" ]; then \
- cat $(arch_file); \
- else \
- echo "Fatal: architecture $(ARCH) not yet supported by nolibc." >&2; \
- exit 1; \
- fi > $(OUTPUT)sysroot/include/arch.h
+ $(Q)mkdir -p "$(OUTPUT)sysroot"
+ $(Q)mkdir -p "$(OUTPUT)sysroot/include"
+ $(Q)cp --parents $(arch_files) $(all_files) "$(OUTPUT)sysroot/include/"
headers_standalone: headers
$(Q)$(MAKE) -C $(srctree) headers
$(Q)$(MAKE) -C $(srctree) headers_install INSTALL_HDR_PATH=$(OUTPUT)sysroot
+headers_check: headers_standalone
+ $(Q)for header in $(filter-out crt.h std.h,$(all_files)); do \
+ $(CC) $(CLANG_CROSS_FLAGS) -Wall -Werror -nostdinc -fsyntax-only -x c /dev/null \
+ -I$(or $(objtree),$(srctree))/usr/include -include $$header -include $$header || exit 1; \
+ done
+
clean:
$(call QUIET_CLEAN, nolibc) rm -rf "$(OUTPUT)sysroot"
diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h
index f31be8e967d6..251c42579028 100644
--- a/tools/include/nolibc/arch-arm.h
+++ b/tools/include/nolibc/arch-arm.h
@@ -7,54 +7,8 @@
#ifndef _NOLIBC_ARCH_ARM_H
#define _NOLIBC_ARCH_ARM_H
-/* O_* macros for fcntl/open are architecture-specific */
-#define O_RDONLY 0
-#define O_WRONLY 1
-#define O_RDWR 2
-#define O_CREAT 0x40
-#define O_EXCL 0x80
-#define O_NOCTTY 0x100
-#define O_TRUNC 0x200
-#define O_APPEND 0x400
-#define O_NONBLOCK 0x800
-#define O_DIRECTORY 0x4000
-
-/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
- * exactly 56 bytes (stops before the unused array). In big endian, the format
- * differs as devices are returned as short only.
- */
-struct sys_stat_struct {
-#if defined(__ARMEB__)
- unsigned short st_dev;
- unsigned short __pad1;
-#else
- unsigned long st_dev;
-#endif
- unsigned long st_ino;
- unsigned short st_mode;
- unsigned short st_nlink;
- unsigned short st_uid;
- unsigned short st_gid;
-
-#if defined(__ARMEB__)
- unsigned short st_rdev;
- unsigned short __pad2;
-#else
- unsigned long st_rdev;
-#endif
- unsigned long st_size;
- unsigned long st_blksize;
- unsigned long st_blocks;
-
- unsigned long st_atime;
- unsigned long st_atime_nsec;
- unsigned long st_mtime;
- unsigned long st_mtime_nsec;
-
- unsigned long st_ctime;
- unsigned long st_ctime_nsec;
- unsigned long __unused[2];
-};
+#include "compiler.h"
+#include "crt.h"
/* Syscalls for ARM in ARM or Thumb modes :
* - registers are 32-bit
@@ -70,20 +24,44 @@ struct sys_stat_struct {
* don't have to experience issues with register constraints.
* - the syscall number is always specified last in order to allow to force
* some registers before (gcc refuses a %-register at the last position).
+ * - in thumb mode without -fomit-frame-pointer, r7 is also used to store the
+ * frame pointer, and we cannot directly assign it as a register variable,
+ * nor can we clobber it. Instead we assign the r6 register and swap it
+ * with r7 before calling svc, and r6 is marked as clobbered.
+ * We're just using any regular register which we assign to r7 after saving
+ * it.
*
* Also, ARM supports the old_select syscall if newselect is not available
*/
#define __ARCH_WANT_SYS_OLD_SELECT
+#if (defined(__THUMBEB__) || defined(__THUMBEL__)) && \
+ !defined(NOLIBC_OMIT_FRAME_POINTER)
+/* swap r6,r7 needed in Thumb mode since we can't use nor clobber r7 */
+#define _NOLIBC_SYSCALL_REG "r6"
+#define _NOLIBC_THUMB_SET_R7 "eor r7, r6\neor r6, r7\neor r7, r6\n"
+#define _NOLIBC_THUMB_RESTORE_R7 "mov r7, r6\n"
+
+#else /* we're in ARM mode */
+/* in Arm mode we can directly use r7 */
+#define _NOLIBC_SYSCALL_REG "r7"
+#define _NOLIBC_THUMB_SET_R7 ""
+#define _NOLIBC_THUMB_RESTORE_R7 ""
+
+#endif /* end THUMB */
+
#define my_syscall0(num) \
({ \
- register long _num __asm__ ("r7") = (num); \
+ register long _num __asm__(_NOLIBC_SYSCALL_REG) = (num); \
register long _arg1 __asm__ ("r0"); \
- \
- __asm__ volatile ( \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_THUMB_SET_R7 \
"svc #0\n" \
- : "=r"(_arg1) \
- : "r"(_num) \
+ _NOLIBC_THUMB_RESTORE_R7 \
+ : "=r"(_arg1), "=r"(_num) \
+ : "r"(_arg1), \
+ "r"(_num) \
: "memory", "cc", "lr" \
); \
_arg1; \
@@ -91,12 +69,14 @@ struct sys_stat_struct {
#define my_syscall1(num, arg1) \
({ \
- register long _num __asm__ ("r7") = (num); \
+ register long _num __asm__(_NOLIBC_SYSCALL_REG) = (num); \
register long _arg1 __asm__ ("r0") = (long)(arg1); \
- \
- __asm__ volatile ( \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_THUMB_SET_R7 \
"svc #0\n" \
- : "=r"(_arg1) \
+ _NOLIBC_THUMB_RESTORE_R7 \
+ : "=r"(_arg1), "=r" (_num) \
: "r"(_arg1), \
"r"(_num) \
: "memory", "cc", "lr" \
@@ -106,13 +86,15 @@ struct sys_stat_struct {
#define my_syscall2(num, arg1, arg2) \
({ \
- register long _num __asm__ ("r7") = (num); \
+ register long _num __asm__(_NOLIBC_SYSCALL_REG) = (num); \
register long _arg1 __asm__ ("r0") = (long)(arg1); \
register long _arg2 __asm__ ("r1") = (long)(arg2); \
- \
- __asm__ volatile ( \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_THUMB_SET_R7 \
"svc #0\n" \
- : "=r"(_arg1) \
+ _NOLIBC_THUMB_RESTORE_R7 \
+ : "=r"(_arg1), "=r" (_num) \
: "r"(_arg1), "r"(_arg2), \
"r"(_num) \
: "memory", "cc", "lr" \
@@ -122,14 +104,16 @@ struct sys_stat_struct {
#define my_syscall3(num, arg1, arg2, arg3) \
({ \
- register long _num __asm__ ("r7") = (num); \
+ register long _num __asm__(_NOLIBC_SYSCALL_REG) = (num); \
register long _arg1 __asm__ ("r0") = (long)(arg1); \
register long _arg2 __asm__ ("r1") = (long)(arg2); \
register long _arg3 __asm__ ("r2") = (long)(arg3); \
- \
- __asm__ volatile ( \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_THUMB_SET_R7 \
"svc #0\n" \
- : "=r"(_arg1) \
+ _NOLIBC_THUMB_RESTORE_R7 \
+ : "=r"(_arg1), "=r" (_num) \
: "r"(_arg1), "r"(_arg2), "r"(_arg3), \
"r"(_num) \
: "memory", "cc", "lr" \
@@ -139,15 +123,17 @@ struct sys_stat_struct {
#define my_syscall4(num, arg1, arg2, arg3, arg4) \
({ \
- register long _num __asm__ ("r7") = (num); \
+ register long _num __asm__(_NOLIBC_SYSCALL_REG) = (num); \
register long _arg1 __asm__ ("r0") = (long)(arg1); \
register long _arg2 __asm__ ("r1") = (long)(arg2); \
register long _arg3 __asm__ ("r2") = (long)(arg3); \
register long _arg4 __asm__ ("r3") = (long)(arg4); \
- \
- __asm__ volatile ( \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_THUMB_SET_R7 \
"svc #0\n" \
- : "=r"(_arg1) \
+ _NOLIBC_THUMB_RESTORE_R7 \
+ : "=r"(_arg1), "=r" (_num) \
: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
"r"(_num) \
: "memory", "cc", "lr" \
@@ -157,16 +143,18 @@ struct sys_stat_struct {
#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
({ \
- register long _num __asm__ ("r7") = (num); \
+ register long _num __asm__(_NOLIBC_SYSCALL_REG) = (num); \
register long _arg1 __asm__ ("r0") = (long)(arg1); \
register long _arg2 __asm__ ("r1") = (long)(arg2); \
register long _arg3 __asm__ ("r2") = (long)(arg3); \
register long _arg4 __asm__ ("r3") = (long)(arg4); \
register long _arg5 __asm__ ("r4") = (long)(arg5); \
- \
- __asm__ volatile ( \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_THUMB_SET_R7 \
"svc #0\n" \
- : "=r" (_arg1) \
+ _NOLIBC_THUMB_RESTORE_R7 \
+ : "=r"(_arg1), "=r" (_num) \
: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
"r"(_num) \
: "memory", "cc", "lr" \
@@ -174,31 +162,38 @@ struct sys_stat_struct {
_arg1; \
})
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
+({ \
+ register long _num __asm__(_NOLIBC_SYSCALL_REG) = (num); \
+ register long _arg1 __asm__ ("r0") = (long)(arg1); \
+ register long _arg2 __asm__ ("r1") = (long)(arg2); \
+ register long _arg3 __asm__ ("r2") = (long)(arg3); \
+ register long _arg4 __asm__ ("r3") = (long)(arg4); \
+ register long _arg5 __asm__ ("r4") = (long)(arg5); \
+ register long _arg6 __asm__ ("r5") = (long)(arg6); \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_THUMB_SET_R7 \
+ "svc #0\n" \
+ _NOLIBC_THUMB_RESTORE_R7 \
+ : "=r"(_arg1), "=r" (_num) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "r"(_arg6), "r"(_num) \
+ : "memory", "cc", "lr" \
+ ); \
+ _arg1; \
+})
+
+#ifndef NOLIBC_NO_RUNTIME
/* startup code */
-__asm__ (".section .text\n"
- ".weak _start\n"
- "_start:\n"
-#if defined(__THUMBEB__) || defined(__THUMBEL__)
- /* We enter here in 32-bit mode but if some previous functions were in
- * 16-bit mode, the assembler cannot know, so we need to tell it we're in
- * 32-bit now, then switch to 16-bit (is there a better way to do it than
- * adding 1 by hand ?) and tell the asm we're now in 16-bit mode so that
- * it generates correct instructions. Note that we do not support thumb1.
- */
- ".code 32\n"
- "add r0, pc, #1\n"
- "bx r0\n"
- ".code 16\n"
-#endif
- "pop {%r0}\n" // argc was in the stack
- "mov %r1, %sp\n" // argv = sp
- "add %r2, %r1, %r0, lsl #2\n" // envp = argv + 4*argc ...
- "add %r2, %r2, $4\n" // ... + 4
- "and %r3, %r1, $-8\n" // AAPCS : sp must be 8-byte aligned in the
- "mov %sp, %r3\n" // callee, an bl doesn't push (lr=pc)
- "bl main\n" // main() returns the status code, we'll exit with it.
- "movs r7, $1\n" // NR_exit == 1
- "svc $0x00\n"
- "");
+void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
+{
+ __asm__ volatile (
+ "mov r0, sp\n" /* save stack pointer to %r0, as arg1 of _start_c */
+ "bl _start_c\n" /* transfer to c runtime */
+ );
+ __nolibc_entrypoint_epilogue();
+}
+#endif /* NOLIBC_NO_RUNTIME */
-#endif // _NOLIBC_ARCH_ARM_H
+#endif /* _NOLIBC_ARCH_ARM_H */
diff --git a/tools/include/nolibc/arch-aarch64.h b/tools/include/nolibc/arch-arm64.h
index f68baf8f395f..080a55a7144e 100644
--- a/tools/include/nolibc/arch-aarch64.h
+++ b/tools/include/nolibc/arch-arm64.h
@@ -1,53 +1,16 @@
/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
/*
- * AARCH64 specific definitions for NOLIBC
+ * ARM64 specific definitions for NOLIBC
* Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
*/
-#ifndef _NOLIBC_ARCH_AARCH64_H
-#define _NOLIBC_ARCH_AARCH64_H
+#ifndef _NOLIBC_ARCH_ARM64_H
+#define _NOLIBC_ARCH_ARM64_H
-/* O_* macros for fcntl/open are architecture-specific */
-#define O_RDONLY 0
-#define O_WRONLY 1
-#define O_RDWR 2
-#define O_CREAT 0x40
-#define O_EXCL 0x80
-#define O_NOCTTY 0x100
-#define O_TRUNC 0x200
-#define O_APPEND 0x400
-#define O_NONBLOCK 0x800
-#define O_DIRECTORY 0x4000
+#include "compiler.h"
+#include "crt.h"
-/* The struct returned by the newfstatat() syscall. Differs slightly from the
- * x86_64's stat one by field ordering, so be careful.
- */
-struct sys_stat_struct {
- unsigned long st_dev;
- unsigned long st_ino;
- unsigned int st_mode;
- unsigned int st_nlink;
- unsigned int st_uid;
- unsigned int st_gid;
-
- unsigned long st_rdev;
- unsigned long __pad1;
- long st_size;
- int st_blksize;
- int __pad2;
-
- long st_blocks;
- long st_atime;
- unsigned long st_atime_nsec;
- long st_mtime;
-
- unsigned long st_mtime_nsec;
- long st_ctime;
- unsigned long st_ctime_nsec;
- unsigned int __unused[2];
-};
-
-/* Syscalls for AARCH64 :
+/* Syscalls for ARM64 :
* - registers are 64-bit
* - stack is 16-byte aligned
* - syscall number is passed in x8
@@ -57,17 +20,14 @@ struct sys_stat_struct {
* - the arguments are cast to long and assigned into the target registers
* which are then simply passed as registers to the asm code, so that we
* don't have to experience issues with register constraints.
- *
- * On aarch64, select() is not implemented so we have to use pselect6().
*/
-#define __ARCH_WANT_SYS_PSELECT6
#define my_syscall0(num) \
({ \
register long _num __asm__ ("x8") = (num); \
register long _arg1 __asm__ ("x0"); \
- \
- __asm__ volatile ( \
+ \
+ __asm__ volatile ( \
"svc #0\n" \
: "=r"(_arg1) \
: "r"(_num) \
@@ -80,8 +40,8 @@ struct sys_stat_struct {
({ \
register long _num __asm__ ("x8") = (num); \
register long _arg1 __asm__ ("x0") = (long)(arg1); \
- \
- __asm__ volatile ( \
+ \
+ __asm__ volatile ( \
"svc #0\n" \
: "=r"(_arg1) \
: "r"(_arg1), \
@@ -96,8 +56,8 @@ struct sys_stat_struct {
register long _num __asm__ ("x8") = (num); \
register long _arg1 __asm__ ("x0") = (long)(arg1); \
register long _arg2 __asm__ ("x1") = (long)(arg2); \
- \
- __asm__ volatile ( \
+ \
+ __asm__ volatile ( \
"svc #0\n" \
: "=r"(_arg1) \
: "r"(_arg1), "r"(_arg2), \
@@ -113,8 +73,8 @@ struct sys_stat_struct {
register long _arg1 __asm__ ("x0") = (long)(arg1); \
register long _arg2 __asm__ ("x1") = (long)(arg2); \
register long _arg3 __asm__ ("x2") = (long)(arg3); \
- \
- __asm__ volatile ( \
+ \
+ __asm__ volatile ( \
"svc #0\n" \
: "=r"(_arg1) \
: "r"(_arg1), "r"(_arg2), "r"(_arg3), \
@@ -131,8 +91,8 @@ struct sys_stat_struct {
register long _arg2 __asm__ ("x1") = (long)(arg2); \
register long _arg3 __asm__ ("x2") = (long)(arg3); \
register long _arg4 __asm__ ("x3") = (long)(arg4); \
- \
- __asm__ volatile ( \
+ \
+ __asm__ volatile ( \
"svc #0\n" \
: "=r"(_arg1) \
: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
@@ -150,8 +110,8 @@ struct sys_stat_struct {
register long _arg3 __asm__ ("x2") = (long)(arg3); \
register long _arg4 __asm__ ("x3") = (long)(arg4); \
register long _arg5 __asm__ ("x4") = (long)(arg5); \
- \
- __asm__ volatile ( \
+ \
+ __asm__ volatile ( \
"svc #0\n" \
: "=r" (_arg1) \
: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
@@ -170,8 +130,8 @@ struct sys_stat_struct {
register long _arg4 __asm__ ("x3") = (long)(arg4); \
register long _arg5 __asm__ ("x4") = (long)(arg5); \
register long _arg6 __asm__ ("x5") = (long)(arg6); \
- \
- __asm__ volatile ( \
+ \
+ __asm__ volatile ( \
"svc #0\n" \
: "=r" (_arg1) \
: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
@@ -181,19 +141,15 @@ struct sys_stat_struct {
_arg1; \
})
+#ifndef NOLIBC_NO_RUNTIME
/* startup code */
-__asm__ (".section .text\n"
- ".weak _start\n"
- "_start:\n"
- "ldr x0, [sp]\n" // argc (x0) was in the stack
- "add x1, sp, 8\n" // argv (x1) = sp
- "lsl x2, x0, 3\n" // envp (x2) = 8*argc ...
- "add x2, x2, 8\n" // + 8 (skip null)
- "add x2, x2, x1\n" // + argv
- "and sp, x1, -16\n" // sp must be 16-byte aligned in the callee
- "bl main\n" // main() returns the status code, we'll exit with it.
- "mov x8, 93\n" // NR_exit == 93
- "svc #0\n"
- "");
-
-#endif // _NOLIBC_ARCH_AARCH64_H
+void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
+{
+ __asm__ volatile (
+ "mov x0, sp\n" /* save stack pointer to x0, as arg1 of _start_c */
+ "bl _start_c\n" /* transfer to c runtime */
+ );
+ __nolibc_entrypoint_epilogue();
+}
+#endif /* NOLIBC_NO_RUNTIME */
+#endif /* _NOLIBC_ARCH_ARM64_H */
diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h
deleted file mode 100644
index d7e7212346e2..000000000000
--- a/tools/include/nolibc/arch-i386.h
+++ /dev/null
@@ -1,219 +0,0 @@
-/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
-/*
- * i386 specific definitions for NOLIBC
- * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
- */
-
-#ifndef _NOLIBC_ARCH_I386_H
-#define _NOLIBC_ARCH_I386_H
-
-/* O_* macros for fcntl/open are architecture-specific */
-#define O_RDONLY 0
-#define O_WRONLY 1
-#define O_RDWR 2
-#define O_CREAT 0x40
-#define O_EXCL 0x80
-#define O_NOCTTY 0x100
-#define O_TRUNC 0x200
-#define O_APPEND 0x400
-#define O_NONBLOCK 0x800
-#define O_DIRECTORY 0x10000
-
-/* The struct returned by the stat() syscall, 32-bit only, the syscall returns
- * exactly 56 bytes (stops before the unused array).
- */
-struct sys_stat_struct {
- unsigned long st_dev;
- unsigned long st_ino;
- unsigned short st_mode;
- unsigned short st_nlink;
- unsigned short st_uid;
- unsigned short st_gid;
-
- unsigned long st_rdev;
- unsigned long st_size;
- unsigned long st_blksize;
- unsigned long st_blocks;
-
- unsigned long st_atime;
- unsigned long st_atime_nsec;
- unsigned long st_mtime;
- unsigned long st_mtime_nsec;
-
- unsigned long st_ctime;
- unsigned long st_ctime_nsec;
- unsigned long __unused[2];
-};
-
-/* Syscalls for i386 :
- * - mostly similar to x86_64
- * - registers are 32-bit
- * - syscall number is passed in eax
- * - arguments are in ebx, ecx, edx, esi, edi, ebp respectively
- * - all registers are preserved (except eax of course)
- * - the system call is performed by calling int $0x80
- * - syscall return comes in eax
- * - the arguments are cast to long and assigned into the target registers
- * which are then simply passed as registers to the asm code, so that we
- * don't have to experience issues with register constraints.
- * - the syscall number is always specified last in order to allow to force
- * some registers before (gcc refuses a %-register at the last position).
- *
- * Also, i386 supports the old_select syscall if newselect is not available
- */
-#define __ARCH_WANT_SYS_OLD_SELECT
-
-#define my_syscall0(num) \
-({ \
- long _ret; \
- register long _num __asm__ ("eax") = (num); \
- \
- __asm__ volatile ( \
- "int $0x80\n" \
- : "=a" (_ret) \
- : "0"(_num) \
- : "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall1(num, arg1) \
-({ \
- long _ret; \
- register long _num __asm__ ("eax") = (num); \
- register long _arg1 __asm__ ("ebx") = (long)(arg1); \
- \
- __asm__ volatile ( \
- "int $0x80\n" \
- : "=a" (_ret) \
- : "r"(_arg1), \
- "0"(_num) \
- : "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall2(num, arg1, arg2) \
-({ \
- long _ret; \
- register long _num __asm__ ("eax") = (num); \
- register long _arg1 __asm__ ("ebx") = (long)(arg1); \
- register long _arg2 __asm__ ("ecx") = (long)(arg2); \
- \
- __asm__ volatile ( \
- "int $0x80\n" \
- : "=a" (_ret) \
- : "r"(_arg1), "r"(_arg2), \
- "0"(_num) \
- : "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall3(num, arg1, arg2, arg3) \
-({ \
- long _ret; \
- register long _num __asm__ ("eax") = (num); \
- register long _arg1 __asm__ ("ebx") = (long)(arg1); \
- register long _arg2 __asm__ ("ecx") = (long)(arg2); \
- register long _arg3 __asm__ ("edx") = (long)(arg3); \
- \
- __asm__ volatile ( \
- "int $0x80\n" \
- : "=a" (_ret) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
- "0"(_num) \
- : "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall4(num, arg1, arg2, arg3, arg4) \
-({ \
- long _ret; \
- register long _num __asm__ ("eax") = (num); \
- register long _arg1 __asm__ ("ebx") = (long)(arg1); \
- register long _arg2 __asm__ ("ecx") = (long)(arg2); \
- register long _arg3 __asm__ ("edx") = (long)(arg3); \
- register long _arg4 __asm__ ("esi") = (long)(arg4); \
- \
- __asm__ volatile ( \
- "int $0x80\n" \
- : "=a" (_ret) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
- "0"(_num) \
- : "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
-({ \
- long _ret; \
- register long _num __asm__ ("eax") = (num); \
- register long _arg1 __asm__ ("ebx") = (long)(arg1); \
- register long _arg2 __asm__ ("ecx") = (long)(arg2); \
- register long _arg3 __asm__ ("edx") = (long)(arg3); \
- register long _arg4 __asm__ ("esi") = (long)(arg4); \
- register long _arg5 __asm__ ("edi") = (long)(arg5); \
- \
- __asm__ volatile ( \
- "int $0x80\n" \
- : "=a" (_ret) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
- "0"(_num) \
- : "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
-({ \
- long _eax = (long)(num); \
- long _arg6 = (long)(arg6); /* Always in memory */ \
- __asm__ volatile ( \
- "pushl %[_arg6]\n\t" \
- "pushl %%ebp\n\t" \
- "movl 4(%%esp),%%ebp\n\t" \
- "int $0x80\n\t" \
- "popl %%ebp\n\t" \
- "addl $4,%%esp\n\t" \
- : "+a"(_eax) /* %eax */ \
- : "b"(arg1), /* %ebx */ \
- "c"(arg2), /* %ecx */ \
- "d"(arg3), /* %edx */ \
- "S"(arg4), /* %esi */ \
- "D"(arg5), /* %edi */ \
- [_arg6]"m"(_arg6) /* memory */ \
- : "memory", "cc" \
- ); \
- _eax; \
-})
-
-/* startup code */
-/*
- * i386 System V ABI mandates:
- * 1) last pushed argument must be 16-byte aligned.
- * 2) The deepest stack frame should be set to zero
- *
- */
-__asm__ (".section .text\n"
- ".weak _start\n"
- "_start:\n"
- "pop %eax\n" // argc (first arg, %eax)
- "mov %esp, %ebx\n" // argv[] (second arg, %ebx)
- "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx)
- "xor %ebp, %ebp\n" // zero the stack frame
- "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned before
- "sub $4, %esp\n" // the call instruction (args are aligned)
- "push %ecx\n" // push all registers on the stack so that we
- "push %ebx\n" // support both regparm and plain stack modes
- "push %eax\n"
- "call main\n" // main() returns the status code in %eax
- "mov %eax, %ebx\n" // retrieve exit code (32-bit int)
- "movl $1, %eax\n" // NR_exit == 1
- "int $0x80\n" // exit now
- "hlt\n" // ensure it does not
- "");
-
-#endif // _NOLIBC_ARCH_I386_H
diff --git a/tools/include/nolibc/arch-loongarch.h b/tools/include/nolibc/arch-loongarch.h
new file mode 100644
index 000000000000..c894176c3f89
--- /dev/null
+++ b/tools/include/nolibc/arch-loongarch.h
@@ -0,0 +1,157 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * LoongArch specific definitions for NOLIBC
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+#ifndef _NOLIBC_ARCH_LOONGARCH_H
+#define _NOLIBC_ARCH_LOONGARCH_H
+
+#include "compiler.h"
+#include "crt.h"
+
+/* Syscalls for LoongArch :
+ * - stack is 16-byte aligned
+ * - syscall number is passed in a7
+ * - arguments are in a0, a1, a2, a3, a4, a5
+ * - the system call is performed by calling "syscall 0"
+ * - syscall return comes in a0
+ * - the arguments are cast to long and assigned into the target
+ * registers which are then simply passed as registers to the asm code,
+ * so that we don't have to experience issues with register constraints.
+ */
+
+#define _NOLIBC_SYSCALL_CLOBBERLIST \
+ "memory", "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8"
+
+#define my_syscall0(num) \
+({ \
+ register long _num __asm__ ("a7") = (num); \
+ register long _arg1 __asm__ ("a0"); \
+ \
+ __asm__ volatile ( \
+ "syscall 0\n" \
+ : "=r"(_arg1) \
+ : "r"(_num) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall1(num, arg1) \
+({ \
+ register long _num __asm__ ("a7") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ \
+ __asm__ volatile ( \
+ "syscall 0\n" \
+ : "+r"(_arg1) \
+ : "r"(_num) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ register long _num __asm__ ("a7") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ \
+ __asm__ volatile ( \
+ "syscall 0\n" \
+ : "+r"(_arg1) \
+ : "r"(_arg2), \
+ "r"(_num) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ register long _num __asm__ ("a7") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ register long _arg3 __asm__ ("a2") = (long)(arg3); \
+ \
+ __asm__ volatile ( \
+ "syscall 0\n" \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_arg3), \
+ "r"(_num) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+({ \
+ register long _num __asm__ ("a7") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ register long _arg3 __asm__ ("a2") = (long)(arg3); \
+ register long _arg4 __asm__ ("a3") = (long)(arg4); \
+ \
+ __asm__ volatile ( \
+ "syscall 0\n" \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_arg3), "r"(_arg4), \
+ "r"(_num) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ register long _num __asm__ ("a7") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ register long _arg3 __asm__ ("a2") = (long)(arg3); \
+ register long _arg4 __asm__ ("a3") = (long)(arg4); \
+ register long _arg5 __asm__ ("a4") = (long)(arg5); \
+ \
+ __asm__ volatile ( \
+ "syscall 0\n" \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "r"(_num) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
+({ \
+ register long _num __asm__ ("a7") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ register long _arg3 __asm__ ("a2") = (long)(arg3); \
+ register long _arg4 __asm__ ("a3") = (long)(arg4); \
+ register long _arg5 __asm__ ("a4") = (long)(arg5); \
+ register long _arg6 __asm__ ("a5") = (long)(arg6); \
+ \
+ __asm__ volatile ( \
+ "syscall 0\n" \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \
+ "r"(_num) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _arg1; \
+})
+
+#ifndef NOLIBC_NO_RUNTIME
+/* startup code */
+void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
+{
+ __asm__ volatile (
+ "move $a0, $sp\n" /* save stack pointer to $a0, as arg1 of _start_c */
+ "bl _start_c\n" /* transfer to c runtime */
+ );
+ __nolibc_entrypoint_epilogue();
+}
+#endif /* NOLIBC_NO_RUNTIME */
+
+#endif /* _NOLIBC_ARCH_LOONGARCH_H */
diff --git a/tools/include/nolibc/arch-m68k.h b/tools/include/nolibc/arch-m68k.h
new file mode 100644
index 000000000000..2a4fbada5e79
--- /dev/null
+++ b/tools/include/nolibc/arch-m68k.h
@@ -0,0 +1,143 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * m68k specific definitions for NOLIBC
+ * Copyright (C) 2025 Daniel Palmer<daniel@thingy.jp>
+ *
+ * Roughly based on one or more of the other arch files.
+ *
+ */
+
+#ifndef _NOLIBC_ARCH_M68K_H
+#define _NOLIBC_ARCH_M68K_H
+
+#include "compiler.h"
+#include "crt.h"
+
+#define _NOLIBC_SYSCALL_CLOBBERLIST "memory"
+
+#define my_syscall0(num) \
+({ \
+ register long _num __asm__ ("d0") = (num); \
+ \
+ __asm__ volatile ( \
+ "trap #0\n" \
+ : "+r"(_num) \
+ : "r"(_num) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _num; \
+})
+
+#define my_syscall1(num, arg1) \
+({ \
+ register long _num __asm__ ("d0") = (num); \
+ register long _arg1 __asm__ ("d1") = (long)(arg1); \
+ \
+ __asm__ volatile ( \
+ "trap #0\n" \
+ : "+r"(_num) \
+ : "r"(_arg1) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _num; \
+})
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ register long _num __asm__ ("d0") = (num); \
+ register long _arg1 __asm__ ("d1") = (long)(arg1); \
+ register long _arg2 __asm__ ("d2") = (long)(arg2); \
+ \
+ __asm__ volatile ( \
+ "trap #0\n" \
+ : "+r"(_num) \
+ : "r"(_arg1), "r"(_arg2) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _num; \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ register long _num __asm__ ("d0") = (num); \
+ register long _arg1 __asm__ ("d1") = (long)(arg1); \
+ register long _arg2 __asm__ ("d2") = (long)(arg2); \
+ register long _arg3 __asm__ ("d3") = (long)(arg3); \
+ \
+ __asm__ volatile ( \
+ "trap #0\n" \
+ : "+r"(_num) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _num; \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+({ \
+ register long _num __asm__ ("d0") = (num); \
+ register long _arg1 __asm__ ("d1") = (long)(arg1); \
+ register long _arg2 __asm__ ("d2") = (long)(arg2); \
+ register long _arg3 __asm__ ("d3") = (long)(arg3); \
+ register long _arg4 __asm__ ("d4") = (long)(arg4); \
+ \
+ __asm__ volatile ( \
+ "trap #0\n" \
+ : "+r" (_num) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _num; \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ register long _num __asm__ ("d0") = (num); \
+ register long _arg1 __asm__ ("d1") = (long)(arg1); \
+ register long _arg2 __asm__ ("d2") = (long)(arg2); \
+ register long _arg3 __asm__ ("d3") = (long)(arg3); \
+ register long _arg4 __asm__ ("d4") = (long)(arg4); \
+ register long _arg5 __asm__ ("d5") = (long)(arg5); \
+ \
+ __asm__ volatile ( \
+ "trap #0\n" \
+ : "+r" (_num) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _num; \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
+({ \
+ register long _num __asm__ ("d0") = (num); \
+ register long _arg1 __asm__ ("d1") = (long)(arg1); \
+ register long _arg2 __asm__ ("d2") = (long)(arg2); \
+ register long _arg3 __asm__ ("d3") = (long)(arg3); \
+ register long _arg4 __asm__ ("d4") = (long)(arg4); \
+ register long _arg5 __asm__ ("d5") = (long)(arg5); \
+ register long _arg6 __asm__ ("a0") = (long)(arg6); \
+ \
+ __asm__ volatile ( \
+ "trap #0\n" \
+ : "+r" (_num) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "r"(_arg6) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _num; \
+})
+
+#ifndef NOLIBC_NO_RUNTIME
+void _start(void);
+void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
+{
+ __asm__ volatile (
+ "movel %sp, %sp@-\n"
+ "jsr _start_c\n"
+ );
+ __nolibc_entrypoint_epilogue();
+}
+#endif /* NOLIBC_NO_RUNTIME */
+
+#endif /* _NOLIBC_ARCH_M68K_H */
diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h
index 7380093ba9e7..a72506ceec6b 100644
--- a/tools/include/nolibc/arch-mips.h
+++ b/tools/include/nolibc/arch-mips.h
@@ -7,45 +7,12 @@
#ifndef _NOLIBC_ARCH_MIPS_H
#define _NOLIBC_ARCH_MIPS_H
-/* O_* macros for fcntl/open are architecture-specific */
-#define O_RDONLY 0
-#define O_WRONLY 1
-#define O_RDWR 2
-#define O_APPEND 0x0008
-#define O_NONBLOCK 0x0080
-#define O_CREAT 0x0100
-#define O_TRUNC 0x0200
-#define O_EXCL 0x0400
-#define O_NOCTTY 0x0800
-#define O_DIRECTORY 0x10000
-
-/* The struct returned by the stat() syscall. 88 bytes are returned by the
- * syscall.
- */
-struct sys_stat_struct {
- unsigned int st_dev;
- long st_pad1[3];
- unsigned long st_ino;
- unsigned int st_mode;
- unsigned int st_nlink;
- unsigned int st_uid;
- unsigned int st_gid;
- unsigned int st_rdev;
- long st_pad2[2];
- long st_size;
- long st_pad3;
-
- long st_atime;
- long st_atime_nsec;
- long st_mtime;
- long st_mtime_nsec;
-
- long st_ctime;
- long st_ctime_nsec;
- long st_blksize;
- long st_blocks;
- long st_pad4[14];
-};
+#include "compiler.h"
+#include "crt.h"
+
+#if !defined(_ABIO32) && !defined(_ABIN32) && !defined(_ABI64)
+#error Unsupported MIPS ABI
+#endif
/* Syscalls for MIPS ABI O32 :
* - WARNING! there's always a delayed slot!
@@ -65,21 +32,45 @@ struct sys_stat_struct {
* - the arguments are cast to long and assigned into the target registers
* which are then simply passed as registers to the asm code, so that we
* don't have to experience issues with register constraints.
+ *
+ * Syscalls for MIPS ABI N32, same as ABI O32 with the following differences :
+ * - arguments are in a0, a1, a2, a3, t0, t1, t2, t3.
+ * t0..t3 are also known as a4..a7.
+ * - stack is 16-byte aligned
*/
+#if defined(_ABIO32)
+
+#define _NOLIBC_SYSCALL_CLOBBERLIST \
+ "memory", "cc", "at", "v1", "hi", "lo", \
+ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"
+#define _NOLIBC_SYSCALL_STACK_RESERVE "addiu $sp, $sp, -32\n"
+#define _NOLIBC_SYSCALL_STACK_UNRESERVE "addiu $sp, $sp, 32\n"
+
+#else /* _ABIN32 || _ABI64 */
+
+/* binutils, GCC and clang disagree about register aliases, use numbers instead. */
+#define _NOLIBC_SYSCALL_CLOBBERLIST \
+ "memory", "cc", "at", "v1", \
+ "10", "11", "12", "13", "14", "15", "24", "25"
+
+#define _NOLIBC_SYSCALL_STACK_RESERVE
+#define _NOLIBC_SYSCALL_STACK_UNRESERVE
+
+#endif /* _ABIO32 */
+
#define my_syscall0(num) \
({ \
register long _num __asm__ ("v0") = (num); \
register long _arg4 __asm__ ("a3"); \
- \
- __asm__ volatile ( \
- "addiu $sp, $sp, -32\n" \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_SYSCALL_STACK_RESERVE \
"syscall\n" \
- "addiu $sp, $sp, 32\n" \
+ _NOLIBC_SYSCALL_STACK_UNRESERVE \
: "=r"(_num), "=r"(_arg4) \
: "r"(_num) \
- : "memory", "cc", "at", "v1", "hi", "lo", \
- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
); \
_arg4 ? -_num : _num; \
})
@@ -89,16 +80,15 @@ struct sys_stat_struct {
register long _num __asm__ ("v0") = (num); \
register long _arg1 __asm__ ("a0") = (long)(arg1); \
register long _arg4 __asm__ ("a3"); \
- \
- __asm__ volatile ( \
- "addiu $sp, $sp, -32\n" \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_SYSCALL_STACK_RESERVE \
"syscall\n" \
- "addiu $sp, $sp, 32\n" \
+ _NOLIBC_SYSCALL_STACK_UNRESERVE \
: "=r"(_num), "=r"(_arg4) \
: "0"(_num), \
"r"(_arg1) \
- : "memory", "cc", "at", "v1", "hi", "lo", \
- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
); \
_arg4 ? -_num : _num; \
})
@@ -109,16 +99,15 @@ struct sys_stat_struct {
register long _arg1 __asm__ ("a0") = (long)(arg1); \
register long _arg2 __asm__ ("a1") = (long)(arg2); \
register long _arg4 __asm__ ("a3"); \
- \
- __asm__ volatile ( \
- "addiu $sp, $sp, -32\n" \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_SYSCALL_STACK_RESERVE \
"syscall\n" \
- "addiu $sp, $sp, 32\n" \
+ _NOLIBC_SYSCALL_STACK_UNRESERVE \
: "=r"(_num), "=r"(_arg4) \
: "0"(_num), \
"r"(_arg1), "r"(_arg2) \
- : "memory", "cc", "at", "v1", "hi", "lo", \
- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
); \
_arg4 ? -_num : _num; \
})
@@ -130,16 +119,15 @@ struct sys_stat_struct {
register long _arg2 __asm__ ("a1") = (long)(arg2); \
register long _arg3 __asm__ ("a2") = (long)(arg3); \
register long _arg4 __asm__ ("a3"); \
- \
- __asm__ volatile ( \
- "addiu $sp, $sp, -32\n" \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_SYSCALL_STACK_RESERVE \
"syscall\n" \
- "addiu $sp, $sp, 32\n" \
+ _NOLIBC_SYSCALL_STACK_UNRESERVE \
: "=r"(_num), "=r"(_arg4) \
: "0"(_num), \
"r"(_arg1), "r"(_arg2), "r"(_arg3) \
- : "memory", "cc", "at", "v1", "hi", "lo", \
- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
); \
_arg4 ? -_num : _num; \
})
@@ -151,20 +139,21 @@ struct sys_stat_struct {
register long _arg2 __asm__ ("a1") = (long)(arg2); \
register long _arg3 __asm__ ("a2") = (long)(arg3); \
register long _arg4 __asm__ ("a3") = (long)(arg4); \
- \
- __asm__ volatile ( \
- "addiu $sp, $sp, -32\n" \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_SYSCALL_STACK_RESERVE \
"syscall\n" \
- "addiu $sp, $sp, 32\n" \
+ _NOLIBC_SYSCALL_STACK_UNRESERVE \
: "=r" (_num), "=r"(_arg4) \
: "0"(_num), \
"r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \
- : "memory", "cc", "at", "v1", "hi", "lo", \
- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
); \
_arg4 ? -_num : _num; \
})
+#if defined(_ABIO32)
+
#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
({ \
register long _num __asm__ ("v0") = (num); \
@@ -173,45 +162,111 @@ struct sys_stat_struct {
register long _arg3 __asm__ ("a2") = (long)(arg3); \
register long _arg4 __asm__ ("a3") = (long)(arg4); \
register long _arg5 = (long)(arg5); \
- \
- __asm__ volatile ( \
- "addiu $sp, $sp, -32\n" \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_SYSCALL_STACK_RESERVE \
+ "sw %7, 16($sp)\n" \
+ "syscall\n" \
+ _NOLIBC_SYSCALL_STACK_UNRESERVE \
+ : "=r" (_num), "=r"(_arg4) \
+ : "0"(_num), \
+ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _arg4 ? -_num : _num; \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
+({ \
+ register long _num __asm__ ("v0") = (num); \
+ register long _arg1 __asm__ ("a0") = (long)(arg1); \
+ register long _arg2 __asm__ ("a1") = (long)(arg2); \
+ register long _arg3 __asm__ ("a2") = (long)(arg3); \
+ register long _arg4 __asm__ ("a3") = (long)(arg4); \
+ register long _arg5 = (long)(arg5); \
+ register long _arg6 = (long)(arg6); \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_SYSCALL_STACK_RESERVE \
"sw %7, 16($sp)\n" \
- "syscall\n " \
- "addiu $sp, $sp, 32\n" \
+ "sw %8, 20($sp)\n" \
+ "syscall\n" \
+ _NOLIBC_SYSCALL_STACK_UNRESERVE \
+ : "=r" (_num), "=r"(_arg4) \
+ : "0"(_num), \
+ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "r"(_arg6) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _arg4 ? -_num : _num; \
+})
+
+#else /* _ABIN32 || _ABI64 */
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ register long _num __asm__ ("v0") = (num); \
+ register long _arg1 __asm__ ("$4") = (long)(arg1); \
+ register long _arg2 __asm__ ("$5") = (long)(arg2); \
+ register long _arg3 __asm__ ("$6") = (long)(arg3); \
+ register long _arg4 __asm__ ("$7") = (long)(arg4); \
+ register long _arg5 __asm__ ("$8") = (long)(arg5); \
+ \
+ __asm__ volatile ( \
+ "syscall\n" \
: "=r" (_num), "=r"(_arg4) \
: "0"(_num), \
"r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \
- : "memory", "cc", "at", "v1", "hi", "lo", \
- "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _arg4 ? -_num : _num; \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
+({ \
+ register long _num __asm__ ("v0") = (num); \
+ register long _arg1 __asm__ ("$4") = (long)(arg1); \
+ register long _arg2 __asm__ ("$5") = (long)(arg2); \
+ register long _arg3 __asm__ ("$6") = (long)(arg3); \
+ register long _arg4 __asm__ ("$7") = (long)(arg4); \
+ register long _arg5 __asm__ ("$8") = (long)(arg5); \
+ register long _arg6 __asm__ ("$9") = (long)(arg6); \
+ \
+ __asm__ volatile ( \
+ "syscall\n" \
+ : "=r" (_num), "=r"(_arg4) \
+ : "0"(_num), \
+ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "r"(_arg6) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
); \
_arg4 ? -_num : _num; \
})
+#endif /* _ABIO32 */
+
+#ifndef NOLIBC_NO_RUNTIME
/* startup code, note that it's called __start on MIPS */
-__asm__ (".section .text\n"
- ".weak __start\n"
- ".set nomips16\n"
- ".set push\n"
- ".set noreorder\n"
- ".option pic0\n"
- ".ent __start\n"
- "__start:\n"
- "lw $a0,($sp)\n" // argc was in the stack
- "addiu $a1, $sp, 4\n" // argv = sp + 4
- "sll $a2, $a0, 2\n" // a2 = argc * 4
- "add $a2, $a2, $a1\n" // envp = argv + 4*argc ...
- "addiu $a2, $a2, 4\n" // ... + 4
- "li $t0, -8\n"
- "and $sp, $sp, $t0\n" // sp must be 8-byte aligned
- "addiu $sp,$sp,-16\n" // the callee expects to save a0..a3 there!
- "jal main\n" // main() returns the status code, we'll exit with it.
- "nop\n" // delayed slot
- "move $a0, $v0\n" // retrieve 32-bit exit code from v0
- "li $v0, 4001\n" // NR_exit == 4001
- "syscall\n"
- ".end __start\n"
- ".set pop\n"
- "");
-
-#endif // _NOLIBC_ARCH_MIPS_H
+void __start(void);
+void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector __start(void)
+{
+ __asm__ volatile (
+ "move $a0, $sp\n" /* save stack pointer to $a0, as arg1 of _start_c */
+#if defined(_ABIO32)
+ "addiu $sp, $sp, -16\n" /* the callee expects to save a0..a3 there */
+#endif /* _ABIO32 */
+ "lui $t9, %hi(_start_c)\n" /* ABI requires current function address in $t9 */
+ "ori $t9, %lo(_start_c)\n"
+#if defined(_ABI64)
+ "lui $t0, %highest(_start_c)\n"
+ "ori $t0, %higher(_start_c)\n"
+ "dsll $t0, 0x20\n"
+ "or $t9, $t0\n"
+#endif /* _ABI64 */
+ "jalr $t9\n" /* transfer to c runtime */
+ );
+ __nolibc_entrypoint_epilogue();
+}
+#endif /* NOLIBC_NO_RUNTIME */
+
+#endif /* _NOLIBC_ARCH_MIPS_H */
diff --git a/tools/include/nolibc/arch-powerpc.h b/tools/include/nolibc/arch-powerpc.h
new file mode 100644
index 000000000000..e0c7e0b81f7c
--- /dev/null
+++ b/tools/include/nolibc/arch-powerpc.h
@@ -0,0 +1,221 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * PowerPC specific definitions for NOLIBC
+ * Copyright (C) 2023 Zhangjin Wu <falcon@tinylab.org>
+ */
+
+#ifndef _NOLIBC_ARCH_POWERPC_H
+#define _NOLIBC_ARCH_POWERPC_H
+
+#include "compiler.h"
+#include "crt.h"
+
+/* Syscalls for PowerPC :
+ * - stack is 16-byte aligned
+ * - syscall number is passed in r0
+ * - arguments are in r3, r4, r5, r6, r7, r8, r9
+ * - the system call is performed by calling "sc"
+ * - syscall return comes in r3, and the summary overflow bit is checked
+ * to know if an error occurred, in which case errno is in r3.
+ * - the arguments are cast to long and assigned into the target
+ * registers which are then simply passed as registers to the asm code,
+ * so that we don't have to experience issues with register constraints.
+ */
+
+#define _NOLIBC_SYSCALL_CLOBBERLIST \
+ "memory", "cr0", "r12", "r11", "r10", "r9"
+
+#define my_syscall0(num) \
+({ \
+ register long _ret __asm__ ("r3"); \
+ register long _num __asm__ ("r0") = (num); \
+ \
+ __asm__ volatile ( \
+ " sc\n" \
+ " bns+ 1f\n" \
+ " neg %0, %0\n" \
+ "1:\n" \
+ : "=r"(_ret), "+r"(_num) \
+ : \
+ : _NOLIBC_SYSCALL_CLOBBERLIST, "r8", "r7", "r6", "r5", "r4" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall1(num, arg1) \
+({ \
+ register long _ret __asm__ ("r3"); \
+ register long _num __asm__ ("r0") = (num); \
+ register long _arg1 __asm__ ("r3") = (long)(arg1); \
+ \
+ __asm__ volatile ( \
+ " sc\n" \
+ " bns+ 1f\n" \
+ " neg %0, %0\n" \
+ "1:\n" \
+ : "=r"(_ret), "+r"(_num) \
+ : "0"(_arg1) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST, "r8", "r7", "r6", "r5", "r4" \
+ ); \
+ _ret; \
+})
+
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ register long _ret __asm__ ("r3"); \
+ register long _num __asm__ ("r0") = (num); \
+ register long _arg1 __asm__ ("r3") = (long)(arg1); \
+ register long _arg2 __asm__ ("r4") = (long)(arg2); \
+ \
+ __asm__ volatile ( \
+ " sc\n" \
+ " bns+ 1f\n" \
+ " neg %0, %0\n" \
+ "1:\n" \
+ : "=r"(_ret), "+r"(_num), "+r"(_arg2) \
+ : "0"(_arg1) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST, "r8", "r7", "r6", "r5" \
+ ); \
+ _ret; \
+})
+
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ register long _ret __asm__ ("r3"); \
+ register long _num __asm__ ("r0") = (num); \
+ register long _arg1 __asm__ ("r3") = (long)(arg1); \
+ register long _arg2 __asm__ ("r4") = (long)(arg2); \
+ register long _arg3 __asm__ ("r5") = (long)(arg3); \
+ \
+ __asm__ volatile ( \
+ " sc\n" \
+ " bns+ 1f\n" \
+ " neg %0, %0\n" \
+ "1:\n" \
+ : "=r"(_ret), "+r"(_num), "+r"(_arg2), "+r"(_arg3) \
+ : "0"(_arg1) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST, "r8", "r7", "r6" \
+ ); \
+ _ret; \
+})
+
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+({ \
+ register long _ret __asm__ ("r3"); \
+ register long _num __asm__ ("r0") = (num); \
+ register long _arg1 __asm__ ("r3") = (long)(arg1); \
+ register long _arg2 __asm__ ("r4") = (long)(arg2); \
+ register long _arg3 __asm__ ("r5") = (long)(arg3); \
+ register long _arg4 __asm__ ("r6") = (long)(arg4); \
+ \
+ __asm__ volatile ( \
+ " sc\n" \
+ " bns+ 1f\n" \
+ " neg %0, %0\n" \
+ "1:\n" \
+ : "=r"(_ret), "+r"(_num), "+r"(_arg2), "+r"(_arg3), \
+ "+r"(_arg4) \
+ : "0"(_arg1) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST, "r8", "r7" \
+ ); \
+ _ret; \
+})
+
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ register long _ret __asm__ ("r3"); \
+ register long _num __asm__ ("r0") = (num); \
+ register long _arg1 __asm__ ("r3") = (long)(arg1); \
+ register long _arg2 __asm__ ("r4") = (long)(arg2); \
+ register long _arg3 __asm__ ("r5") = (long)(arg3); \
+ register long _arg4 __asm__ ("r6") = (long)(arg4); \
+ register long _arg5 __asm__ ("r7") = (long)(arg5); \
+ \
+ __asm__ volatile ( \
+ " sc\n" \
+ " bns+ 1f\n" \
+ " neg %0, %0\n" \
+ "1:\n" \
+ : "=r"(_ret), "+r"(_num), "+r"(_arg2), "+r"(_arg3), \
+ "+r"(_arg4), "+r"(_arg5) \
+ : "0"(_arg1) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST, "r8" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
+({ \
+ register long _ret __asm__ ("r3"); \
+ register long _num __asm__ ("r0") = (num); \
+ register long _arg1 __asm__ ("r3") = (long)(arg1); \
+ register long _arg2 __asm__ ("r4") = (long)(arg2); \
+ register long _arg3 __asm__ ("r5") = (long)(arg3); \
+ register long _arg4 __asm__ ("r6") = (long)(arg4); \
+ register long _arg5 __asm__ ("r7") = (long)(arg5); \
+ register long _arg6 __asm__ ("r8") = (long)(arg6); \
+ \
+ __asm__ volatile ( \
+ " sc\n" \
+ " bns+ 1f\n" \
+ " neg %0, %0\n" \
+ "1:\n" \
+ : "=r"(_ret), "+r"(_num), "+r"(_arg2), "+r"(_arg3), \
+ "+r"(_arg4), "+r"(_arg5), "+r"(_arg6) \
+ : "0"(_arg1) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _ret; \
+})
+
+#if !defined(__powerpc64__) && !defined(__clang__)
+/* FIXME: For 32-bit PowerPC, with newer gcc compilers (e.g. gcc 13.1.0),
+ * "omit-frame-pointer" fails with __attribute__((no_stack_protector)) but
+ * works with __attribute__((__optimize__("-fno-stack-protector")))
+ */
+#ifdef __no_stack_protector
+#undef __no_stack_protector
+#define __no_stack_protector __attribute__((__optimize__("-fno-stack-protector")))
+#endif
+#endif /* !__powerpc64__ */
+
+#ifndef NOLIBC_NO_RUNTIME
+/* startup code */
+void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
+{
+#ifdef __powerpc64__
+#if _CALL_ELF == 2
+ /* with -mabi=elfv2, save TOC/GOT pointer to r2
+ * r12 is global entry pointer, we use it to compute TOC from r12
+ * https://www.llvm.org/devmtg/2014-04/PDFs/Talks/Euro-LLVM-2014-Weigand.pdf
+ * https://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.pdf
+ */
+ __asm__ volatile (
+ "addis 2, 12, .TOC. - _start@ha\n"
+ "addi 2, 2, .TOC. - _start@l\n"
+ );
+#endif /* _CALL_ELF == 2 */
+
+ __asm__ volatile (
+ "mr 3, 1\n" /* save stack pointer to r3, as arg1 of _start_c */
+ "li 0, 0\n" /* zero the frame pointer */
+ "stdu 1, -32(1)\n" /* the initial stack frame */
+ "bl _start_c\n" /* transfer to c runtime */
+ );
+#else
+ __asm__ volatile (
+ "mr 3, 1\n" /* save stack pointer to r3, as arg1 of _start_c */
+ "li 0, 0\n" /* zero the frame pointer */
+ "stwu 1, -16(1)\n" /* the initial stack frame */
+ "bl _start_c\n" /* transfer to c runtime */
+ );
+#endif
+ __nolibc_entrypoint_epilogue();
+}
+#endif /* NOLIBC_NO_RUNTIME */
+
+#endif /* _NOLIBC_ARCH_POWERPC_H */
diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h
index a3bdd9803f8c..1c00cacf57e1 100644
--- a/tools/include/nolibc/arch-riscv.h
+++ b/tools/include/nolibc/arch-riscv.h
@@ -7,48 +7,8 @@
#ifndef _NOLIBC_ARCH_RISCV_H
#define _NOLIBC_ARCH_RISCV_H
-/* O_* macros for fcntl/open are architecture-specific */
-#define O_RDONLY 0
-#define O_WRONLY 1
-#define O_RDWR 2
-#define O_CREAT 0x40
-#define O_EXCL 0x80
-#define O_NOCTTY 0x100
-#define O_TRUNC 0x200
-#define O_APPEND 0x400
-#define O_NONBLOCK 0x800
-#define O_DIRECTORY 0x10000
-
-struct sys_stat_struct {
- unsigned long st_dev; /* Device. */
- unsigned long st_ino; /* File serial number. */
- unsigned int st_mode; /* File mode. */
- unsigned int st_nlink; /* Link count. */
- unsigned int st_uid; /* User ID of the file's owner. */
- unsigned int st_gid; /* Group ID of the file's group. */
- unsigned long st_rdev; /* Device number, if device. */
- unsigned long __pad1;
- long st_size; /* Size of file, in bytes. */
- int st_blksize; /* Optimal block size for I/O. */
- int __pad2;
- long st_blocks; /* Number 512-byte blocks allocated. */
- long st_atime; /* Time of last access. */
- unsigned long st_atime_nsec;
- long st_mtime; /* Time of last modification. */
- unsigned long st_mtime_nsec;
- long st_ctime; /* Time of last status change. */
- unsigned long st_ctime_nsec;
- unsigned int __unused4;
- unsigned int __unused5;
-};
-
-#if __riscv_xlen == 64
-#define PTRLOG "3"
-#define SZREG "8"
-#elif __riscv_xlen == 32
-#define PTRLOG "2"
-#define SZREG "4"
-#endif
+#include "compiler.h"
+#include "crt.h"
/* Syscalls for RISCV :
* - stack is 16-byte aligned
@@ -59,17 +19,14 @@ struct sys_stat_struct {
* - the arguments are cast to long and assigned into the target
* registers which are then simply passed as registers to the asm code,
* so that we don't have to experience issues with register constraints.
- *
- * On riscv, select() is not implemented so we have to use pselect6().
*/
-#define __ARCH_WANT_SYS_PSELECT6
#define my_syscall0(num) \
({ \
register long _num __asm__ ("a7") = (num); \
register long _arg1 __asm__ ("a0"); \
\
- __asm__ volatile ( \
+ __asm__ volatile ( \
"ecall\n\t" \
: "=r"(_arg1) \
: "r"(_num) \
@@ -83,7 +40,7 @@ struct sys_stat_struct {
register long _num __asm__ ("a7") = (num); \
register long _arg1 __asm__ ("a0") = (long)(arg1); \
\
- __asm__ volatile ( \
+ __asm__ volatile ( \
"ecall\n" \
: "+r"(_arg1) \
: "r"(_num) \
@@ -98,7 +55,7 @@ struct sys_stat_struct {
register long _arg1 __asm__ ("a0") = (long)(arg1); \
register long _arg2 __asm__ ("a1") = (long)(arg2); \
\
- __asm__ volatile ( \
+ __asm__ volatile ( \
"ecall\n" \
: "+r"(_arg1) \
: "r"(_arg2), \
@@ -115,7 +72,7 @@ struct sys_stat_struct {
register long _arg2 __asm__ ("a1") = (long)(arg2); \
register long _arg3 __asm__ ("a2") = (long)(arg3); \
\
- __asm__ volatile ( \
+ __asm__ volatile ( \
"ecall\n\t" \
: "+r"(_arg1) \
: "r"(_arg2), "r"(_arg3), \
@@ -133,7 +90,7 @@ struct sys_stat_struct {
register long _arg3 __asm__ ("a2") = (long)(arg3); \
register long _arg4 __asm__ ("a3") = (long)(arg4); \
\
- __asm__ volatile ( \
+ __asm__ volatile ( \
"ecall\n" \
: "+r"(_arg1) \
: "r"(_arg2), "r"(_arg3), "r"(_arg4), \
@@ -152,7 +109,7 @@ struct sys_stat_struct {
register long _arg4 __asm__ ("a3") = (long)(arg4); \
register long _arg5 __asm__ ("a4") = (long)(arg5); \
\
- __asm__ volatile ( \
+ __asm__ volatile ( \
"ecall\n" \
: "+r"(_arg1) \
: "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
@@ -172,7 +129,7 @@ struct sys_stat_struct {
register long _arg5 __asm__ ("a4") = (long)(arg5); \
register long _arg6 __asm__ ("a5") = (long)(arg6); \
\
- __asm__ volatile ( \
+ __asm__ volatile ( \
"ecall\n" \
: "+r"(_arg1) \
: "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \
@@ -182,23 +139,20 @@ struct sys_stat_struct {
_arg1; \
})
+#ifndef NOLIBC_NO_RUNTIME
/* startup code */
-__asm__ (".section .text\n"
- ".weak _start\n"
- "_start:\n"
- ".option push\n"
- ".option norelax\n"
- "lla gp, __global_pointer$\n"
- ".option pop\n"
- "lw a0, 0(sp)\n" // argc (a0) was in the stack
- "add a1, sp, "SZREG"\n" // argv (a1) = sp
- "slli a2, a0, "PTRLOG"\n" // envp (a2) = SZREG*argc ...
- "add a2, a2, "SZREG"\n" // + SZREG (skip null)
- "add a2,a2,a1\n" // + argv
- "andi sp,a1,-16\n" // sp must be 16-byte aligned
- "call main\n" // main() returns the status code, we'll exit with it.
- "li a7, 93\n" // NR_exit == 93
- "ecall\n"
- "");
+void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
+{
+ __asm__ volatile (
+ ".option push\n"
+ ".option norelax\n"
+ "lla gp, __global_pointer$\n"
+ ".option pop\n"
+ "mv a0, sp\n" /* save stack pointer to a0, as arg1 of _start_c */
+ "call _start_c\n" /* transfer to c runtime */
+ );
+ __nolibc_entrypoint_epilogue();
+}
+#endif /* NOLIBC_NO_RUNTIME */
-#endif // _NOLIBC_ARCH_RISCV_H
+#endif /* _NOLIBC_ARCH_RISCV_H */
diff --git a/tools/include/nolibc/arch-s390.h b/tools/include/nolibc/arch-s390.h
new file mode 100644
index 000000000000..74125a254ce3
--- /dev/null
+++ b/tools/include/nolibc/arch-s390.h
@@ -0,0 +1,189 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * s390 specific definitions for NOLIBC
+ */
+
+#ifndef _NOLIBC_ARCH_S390_H
+#define _NOLIBC_ARCH_S390_H
+#include <linux/signal.h>
+#include <linux/unistd.h>
+
+#include "compiler.h"
+#include "crt.h"
+#include "std.h"
+
+/* Syscalls for s390:
+ * - registers are 64-bit
+ * - syscall number is passed in r1
+ * - arguments are in r2-r7
+ * - the system call is performed by calling the svc instruction
+ * - syscall return value is in r2
+ * - r1 and r2 are clobbered, others are preserved.
+ *
+ * Link s390 ABI: https://github.com/IBM/s390x-abi
+ *
+ */
+
+#define my_syscall0(num) \
+({ \
+ register long _num __asm__ ("1") = (num); \
+ register long _rc __asm__ ("2"); \
+ \
+ __asm__ volatile ( \
+ "svc 0\n" \
+ : "=d"(_rc) \
+ : "d"(_num) \
+ : "memory", "cc" \
+ ); \
+ _rc; \
+})
+
+#define my_syscall1(num, arg1) \
+({ \
+ register long _num __asm__ ("1") = (num); \
+ register long _arg1 __asm__ ("2") = (long)(arg1); \
+ \
+ __asm__ volatile ( \
+ "svc 0\n" \
+ : "+d"(_arg1) \
+ : "d"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ register long _num __asm__ ("1") = (num); \
+ register long _arg1 __asm__ ("2") = (long)(arg1); \
+ register long _arg2 __asm__ ("3") = (long)(arg2); \
+ \
+ __asm__ volatile ( \
+ "svc 0\n" \
+ : "+d"(_arg1) \
+ : "d"(_arg2), "d"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ register long _num __asm__ ("1") = (num); \
+ register long _arg1 __asm__ ("2") = (long)(arg1); \
+ register long _arg2 __asm__ ("3") = (long)(arg2); \
+ register long _arg3 __asm__ ("4") = (long)(arg3); \
+ \
+ __asm__ volatile ( \
+ "svc 0\n" \
+ : "+d"(_arg1) \
+ : "d"(_arg2), "d"(_arg3), "d"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+({ \
+ register long _num __asm__ ("1") = (num); \
+ register long _arg1 __asm__ ("2") = (long)(arg1); \
+ register long _arg2 __asm__ ("3") = (long)(arg2); \
+ register long _arg3 __asm__ ("4") = (long)(arg3); \
+ register long _arg4 __asm__ ("5") = (long)(arg4); \
+ \
+ __asm__ volatile ( \
+ "svc 0\n" \
+ : "+d"(_arg1) \
+ : "d"(_arg2), "d"(_arg3), "d"(_arg4), "d"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ register long _num __asm__ ("1") = (num); \
+ register long _arg1 __asm__ ("2") = (long)(arg1); \
+ register long _arg2 __asm__ ("3") = (long)(arg2); \
+ register long _arg3 __asm__ ("4") = (long)(arg3); \
+ register long _arg4 __asm__ ("5") = (long)(arg4); \
+ register long _arg5 __asm__ ("6") = (long)(arg5); \
+ \
+ __asm__ volatile ( \
+ "svc 0\n" \
+ : "+d"(_arg1) \
+ : "d"(_arg2), "d"(_arg3), "d"(_arg4), "d"(_arg5), \
+ "d"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
+({ \
+ register long _num __asm__ ("1") = (num); \
+ register long _arg1 __asm__ ("2") = (long)(arg1); \
+ register long _arg2 __asm__ ("3") = (long)(arg2); \
+ register long _arg3 __asm__ ("4") = (long)(arg3); \
+ register long _arg4 __asm__ ("5") = (long)(arg4); \
+ register long _arg5 __asm__ ("6") = (long)(arg5); \
+ register long _arg6 __asm__ ("7") = (long)(arg6); \
+ \
+ __asm__ volatile ( \
+ "svc 0\n" \
+ : "+d"(_arg1) \
+ : "d"(_arg2), "d"(_arg3), "d"(_arg4), "d"(_arg5), \
+ "d"(_arg6), "d"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#ifndef NOLIBC_NO_RUNTIME
+/* startup code */
+void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
+{
+ __asm__ volatile (
+ "lgr %r2, %r15\n" /* save stack pointer to %r2, as arg1 of _start_c */
+ "aghi %r15, -160\n" /* allocate new stackframe */
+ "xc 0(8,%r15), 0(%r15)\n" /* clear backchain */
+ "brasl %r14, _start_c\n" /* transfer to c runtime */
+ );
+ __nolibc_entrypoint_epilogue();
+}
+#endif /* NOLIBC_NO_RUNTIME */
+
+struct s390_mmap_arg_struct {
+ unsigned long addr;
+ unsigned long len;
+ unsigned long prot;
+ unsigned long flags;
+ unsigned long fd;
+ unsigned long offset;
+};
+
+static __attribute__((unused))
+void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd,
+ off_t offset)
+{
+ struct s390_mmap_arg_struct args = {
+ .addr = (unsigned long)addr,
+ .len = (unsigned long)length,
+ .prot = prot,
+ .flags = flags,
+ .fd = fd,
+ .offset = (unsigned long)offset
+ };
+
+ return (void *)my_syscall1(__NR_mmap, &args);
+}
+#define sys_mmap sys_mmap
+
+static __attribute__((unused))
+pid_t sys_fork(void)
+{
+ return my_syscall5(__NR_clone, 0, SIGCHLD, 0, 0, 0);
+}
+#define sys_fork sys_fork
+
+#endif /* _NOLIBC_ARCH_S390_H */
diff --git a/tools/include/nolibc/arch-sh.h b/tools/include/nolibc/arch-sh.h
new file mode 100644
index 000000000000..7a421197d104
--- /dev/null
+++ b/tools/include/nolibc/arch-sh.h
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * SuperH specific definitions for NOLIBC
+ * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net>
+ */
+
+#ifndef _NOLIBC_ARCH_SH_H
+#define _NOLIBC_ARCH_SH_H
+
+#include "compiler.h"
+#include "crt.h"
+
+/*
+ * Syscalls for SuperH:
+ * - registers are 32bit wide
+ * - syscall number is passed in r3
+ * - arguments are in r4, r5, r6, r7, r0, r1, r2
+ * - the system call is performed by calling trapa #31
+ * - syscall return value is in r0
+ */
+
+#define my_syscall0(num) \
+({ \
+ register long _num __asm__ ("r3") = (num); \
+ register long _ret __asm__ ("r0"); \
+ \
+ __asm__ volatile ( \
+ "trapa #31" \
+ : "=r"(_ret) \
+ : "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall1(num, arg1) \
+({ \
+ register long _num __asm__ ("r3") = (num); \
+ register long _ret __asm__ ("r0"); \
+ register long _arg1 __asm__ ("r4") = (long)(arg1); \
+ \
+ __asm__ volatile ( \
+ "trapa #31" \
+ : "=r"(_ret) \
+ : "r"(_num), "r"(_arg1) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ register long _num __asm__ ("r3") = (num); \
+ register long _ret __asm__ ("r0"); \
+ register long _arg1 __asm__ ("r4") = (long)(arg1); \
+ register long _arg2 __asm__ ("r5") = (long)(arg2); \
+ \
+ __asm__ volatile ( \
+ "trapa #31" \
+ : "=r"(_ret) \
+ : "r"(_num), "r"(_arg1), "r"(_arg2) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ register long _num __asm__ ("r3") = (num); \
+ register long _ret __asm__ ("r0"); \
+ register long _arg1 __asm__ ("r4") = (long)(arg1); \
+ register long _arg2 __asm__ ("r5") = (long)(arg2); \
+ register long _arg3 __asm__ ("r6") = (long)(arg3); \
+ \
+ __asm__ volatile ( \
+ "trapa #31" \
+ : "=r"(_ret) \
+ : "r"(_num), "r"(_arg1), "r"(_arg2), "r"(_arg3) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+({ \
+ register long _num __asm__ ("r3") = (num); \
+ register long _ret __asm__ ("r0"); \
+ register long _arg1 __asm__ ("r4") = (long)(arg1); \
+ register long _arg2 __asm__ ("r5") = (long)(arg2); \
+ register long _arg3 __asm__ ("r6") = (long)(arg3); \
+ register long _arg4 __asm__ ("r7") = (long)(arg4); \
+ \
+ __asm__ volatile ( \
+ "trapa #31" \
+ : "=r"(_ret) \
+ : "r"(_num), "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ register long _num __asm__ ("r3") = (num); \
+ register long _ret __asm__ ("r0"); \
+ register long _arg1 __asm__ ("r4") = (long)(arg1); \
+ register long _arg2 __asm__ ("r5") = (long)(arg2); \
+ register long _arg3 __asm__ ("r6") = (long)(arg3); \
+ register long _arg4 __asm__ ("r7") = (long)(arg4); \
+ register long _arg5 __asm__ ("r0") = (long)(arg5); \
+ \
+ __asm__ volatile ( \
+ "trapa #31" \
+ : "=r"(_ret) \
+ : "r"(_num), "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
+ "r"(_arg5) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
+({ \
+ register long _num __asm__ ("r3") = (num); \
+ register long _ret __asm__ ("r0"); \
+ register long _arg1 __asm__ ("r4") = (long)(arg1); \
+ register long _arg2 __asm__ ("r5") = (long)(arg2); \
+ register long _arg3 __asm__ ("r6") = (long)(arg3); \
+ register long _arg4 __asm__ ("r7") = (long)(arg4); \
+ register long _arg5 __asm__ ("r0") = (long)(arg5); \
+ register long _arg6 __asm__ ("r1") = (long)(arg6); \
+ \
+ __asm__ volatile ( \
+ "trapa #31" \
+ : "=r"(_ret) \
+ : "r"(_num), "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
+ "r"(_arg5), "r"(_arg6) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#ifndef NOLIBC_NO_RUNTIME
+/* startup code */
+void _start_wrapper(void);
+void __attribute__((weak,noreturn)) __nolibc_entrypoint __no_stack_protector _start_wrapper(void)
+{
+ __asm__ volatile (
+ ".global _start\n" /* The C function will have a prologue, */
+ ".type _start, @function\n" /* corrupting "sp" */
+ ".weak _start\n"
+ "_start:\n"
+
+ "mov sp, r4\n" /* save argc pointer to r4, as arg1 of _start_c */
+ "bsr _start_c\n" /* transfer to c runtime */
+ "nop\n" /* delay slot */
+
+ ".size _start, .-_start\n"
+ );
+ __nolibc_entrypoint_epilogue();
+}
+#endif /* NOLIBC_NO_RUNTIME */
+
+#endif /* _NOLIBC_ARCH_SH_H */
diff --git a/tools/include/nolibc/arch-sparc.h b/tools/include/nolibc/arch-sparc.h
new file mode 100644
index 000000000000..2ebb5686e105
--- /dev/null
+++ b/tools/include/nolibc/arch-sparc.h
@@ -0,0 +1,209 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * SPARC (32bit and 64bit) specific definitions for NOLIBC
+ * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net>
+ */
+
+#ifndef _NOLIBC_ARCH_SPARC_H
+#define _NOLIBC_ARCH_SPARC_H
+
+#include <linux/unistd.h>
+
+#include "compiler.h"
+#include "crt.h"
+
+/*
+ * Syscalls for SPARC:
+ * - registers are native word size
+ * - syscall number is passed in g1
+ * - arguments are in o0-o5
+ * - the system call is performed by calling a trap instruction
+ * - syscall return value is in o0
+ * - syscall error flag is in the carry bit of the processor status register
+ */
+
+#ifdef __arch64__
+
+#define _NOLIBC_SYSCALL "t 0x6d\n" \
+ "bcs,a %%xcc, 1f\n" \
+ "sub %%g0, %%o0, %%o0\n" \
+ "1:\n"
+
+#else
+
+#define _NOLIBC_SYSCALL "t 0x10\n" \
+ "bcs,a 1f\n" \
+ "sub %%g0, %%o0, %%o0\n" \
+ "1:\n"
+
+#endif /* __arch64__ */
+
+#define my_syscall0(num) \
+({ \
+ register long _num __asm__ ("g1") = (num); \
+ register long _arg1 __asm__ ("o0"); \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_SYSCALL \
+ : "+r"(_arg1) \
+ : "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall1(num, arg1) \
+({ \
+ register long _num __asm__ ("g1") = (num); \
+ register long _arg1 __asm__ ("o0") = (long)(arg1); \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_SYSCALL \
+ : "+r"(_arg1) \
+ : "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ register long _num __asm__ ("g1") = (num); \
+ register long _arg1 __asm__ ("o0") = (long)(arg1); \
+ register long _arg2 __asm__ ("o1") = (long)(arg2); \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_SYSCALL \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ register long _num __asm__ ("g1") = (num); \
+ register long _arg1 __asm__ ("o0") = (long)(arg1); \
+ register long _arg2 __asm__ ("o1") = (long)(arg2); \
+ register long _arg3 __asm__ ("o2") = (long)(arg3); \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_SYSCALL \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_arg3), "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+({ \
+ register long _num __asm__ ("g1") = (num); \
+ register long _arg1 __asm__ ("o0") = (long)(arg1); \
+ register long _arg2 __asm__ ("o1") = (long)(arg2); \
+ register long _arg3 __asm__ ("o2") = (long)(arg3); \
+ register long _arg4 __asm__ ("o3") = (long)(arg4); \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_SYSCALL \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ register long _num __asm__ ("g1") = (num); \
+ register long _arg1 __asm__ ("o0") = (long)(arg1); \
+ register long _arg2 __asm__ ("o1") = (long)(arg2); \
+ register long _arg3 __asm__ ("o2") = (long)(arg3); \
+ register long _arg4 __asm__ ("o3") = (long)(arg4); \
+ register long _arg5 __asm__ ("o4") = (long)(arg5); \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_SYSCALL \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
+({ \
+ register long _num __asm__ ("g1") = (num); \
+ register long _arg1 __asm__ ("o0") = (long)(arg1); \
+ register long _arg2 __asm__ ("o1") = (long)(arg2); \
+ register long _arg3 __asm__ ("o2") = (long)(arg3); \
+ register long _arg4 __asm__ ("o3") = (long)(arg4); \
+ register long _arg5 __asm__ ("o4") = (long)(arg5); \
+ register long _arg6 __asm__ ("o5") = (long)(arg6); \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_SYSCALL \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \
+ "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#ifndef NOLIBC_NO_RUNTIME
+/* startup code */
+void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
+{
+ __asm__ volatile (
+ /*
+ * Save argc pointer to o0, as arg1 of _start_c.
+ * Account for the window save area, which is 16 registers wide.
+ */
+#ifdef __arch64__
+ "add %sp, 128 + 2047, %o0\n" /* on sparc64 / v9 the stack is offset by 2047 */
+#else
+ "add %sp, 64, %o0\n"
+#endif
+ "b,a _start_c\n" /* transfer to c runtime */
+ );
+ __nolibc_entrypoint_epilogue();
+}
+#endif /* NOLIBC_NO_RUNTIME */
+
+static pid_t getpid(void);
+
+static __attribute__((unused))
+pid_t sys_fork(void)
+{
+ pid_t parent, ret;
+
+ parent = getpid();
+ ret = my_syscall0(__NR_fork);
+
+ /* The syscall returns the parent pid in the child instead of 0 */
+ if (ret == parent)
+ return 0;
+ else
+ return ret;
+}
+#define sys_fork sys_fork
+
+static __attribute__((unused))
+pid_t sys_vfork(void)
+{
+ pid_t parent, ret;
+
+ parent = getpid();
+ ret = my_syscall0(__NR_vfork);
+
+ /* The syscall returns the parent pid in the child instead of 0 */
+ if (ret == parent)
+ return 0;
+ else
+ return ret;
+}
+#define sys_vfork sys_vfork
+
+#endif /* _NOLIBC_ARCH_SPARC_H */
diff --git a/tools/include/nolibc/arch-x86.h b/tools/include/nolibc/arch-x86.h
new file mode 100644
index 000000000000..f6c43ac5377b
--- /dev/null
+++ b/tools/include/nolibc/arch-x86.h
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * x86 specific definitions for NOLIBC (both 32- and 64-bit)
+ * Copyright (C) 2017-2025 Willy Tarreau <w@1wt.eu>
+ */
+
+#ifndef _NOLIBC_ARCH_X86_H
+#define _NOLIBC_ARCH_X86_H
+
+#include "compiler.h"
+#include "crt.h"
+
+#if !defined(__x86_64__)
+
+/* Syscalls for i386 :
+ * - mostly similar to x86_64
+ * - registers are 32-bit
+ * - syscall number is passed in eax
+ * - arguments are in ebx, ecx, edx, esi, edi, ebp respectively
+ * - all registers are preserved (except eax of course)
+ * - the system call is performed by calling int $0x80
+ * - syscall return comes in eax
+ * - the arguments are cast to long and assigned into the target registers
+ * which are then simply passed as registers to the asm code, so that we
+ * don't have to experience issues with register constraints.
+ * - the syscall number is always specified last in order to allow to force
+ * some registers before (gcc refuses a %-register at the last position).
+ *
+ * Also, i386 supports the old_select syscall if newselect is not available
+ */
+#define __ARCH_WANT_SYS_OLD_SELECT
+
+#define my_syscall0(num) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("eax") = (num); \
+ \
+ __asm__ volatile ( \
+ "int $0x80\n" \
+ : "=a" (_ret) \
+ : "0"(_num) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall1(num, arg1) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("eax") = (num); \
+ register long _arg1 __asm__ ("ebx") = (long)(arg1); \
+ \
+ __asm__ volatile ( \
+ "int $0x80\n" \
+ : "=a" (_ret) \
+ : "r"(_arg1), \
+ "0"(_num) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("eax") = (num); \
+ register long _arg1 __asm__ ("ebx") = (long)(arg1); \
+ register long _arg2 __asm__ ("ecx") = (long)(arg2); \
+ \
+ __asm__ volatile ( \
+ "int $0x80\n" \
+ : "=a" (_ret) \
+ : "r"(_arg1), "r"(_arg2), \
+ "0"(_num) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("eax") = (num); \
+ register long _arg1 __asm__ ("ebx") = (long)(arg1); \
+ register long _arg2 __asm__ ("ecx") = (long)(arg2); \
+ register long _arg3 __asm__ ("edx") = (long)(arg3); \
+ \
+ __asm__ volatile ( \
+ "int $0x80\n" \
+ : "=a" (_ret) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
+ "0"(_num) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("eax") = (num); \
+ register long _arg1 __asm__ ("ebx") = (long)(arg1); \
+ register long _arg2 __asm__ ("ecx") = (long)(arg2); \
+ register long _arg3 __asm__ ("edx") = (long)(arg3); \
+ register long _arg4 __asm__ ("esi") = (long)(arg4); \
+ \
+ __asm__ volatile ( \
+ "int $0x80\n" \
+ : "=a" (_ret) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
+ "0"(_num) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("eax") = (num); \
+ register long _arg1 __asm__ ("ebx") = (long)(arg1); \
+ register long _arg2 __asm__ ("ecx") = (long)(arg2); \
+ register long _arg3 __asm__ ("edx") = (long)(arg3); \
+ register long _arg4 __asm__ ("esi") = (long)(arg4); \
+ register long _arg5 __asm__ ("edi") = (long)(arg5); \
+ \
+ __asm__ volatile ( \
+ "int $0x80\n" \
+ : "=a" (_ret) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "0"(_num) \
+ : "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
+({ \
+ long _eax = (long)(num); \
+ long _arg6 = (long)(arg6); /* Always in memory */ \
+ __asm__ volatile ( \
+ "pushl %[_arg6]\n\t" \
+ "pushl %%ebp\n\t" \
+ "movl 4(%%esp),%%ebp\n\t" \
+ "int $0x80\n\t" \
+ "popl %%ebp\n\t" \
+ "addl $4,%%esp\n\t" \
+ : "+a"(_eax) /* %eax */ \
+ : "b"(arg1), /* %ebx */ \
+ "c"(arg2), /* %ecx */ \
+ "d"(arg3), /* %edx */ \
+ "S"(arg4), /* %esi */ \
+ "D"(arg5), /* %edi */ \
+ [_arg6]"m"(_arg6) /* memory */ \
+ : "memory", "cc" \
+ ); \
+ _eax; \
+})
+
+#ifndef NOLIBC_NO_RUNTIME
+/* startup code */
+/*
+ * i386 System V ABI mandates:
+ * 1) last pushed argument must be 16-byte aligned.
+ * 2) The deepest stack frame should be set to zero
+ *
+ */
+void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
+{
+ __asm__ volatile (
+ "xor %ebp, %ebp\n" /* zero the stack frame */
+ "mov %esp, %eax\n" /* save stack pointer to %eax, as arg1 of _start_c */
+ "sub $12, %esp\n" /* sub 12 to keep it aligned after the push %eax */
+ "push %eax\n" /* push arg1 on stack to support plain stack modes too */
+ "call _start_c\n" /* transfer to c runtime */
+ "hlt\n" /* ensure it does not return */
+ );
+ __nolibc_entrypoint_epilogue();
+}
+#endif /* NOLIBC_NO_RUNTIME */
+
+#else /* !defined(__x86_64__) */
+
+/* Syscalls for x86_64 :
+ * - registers are 64-bit
+ * - syscall number is passed in rax
+ * - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively
+ * - the system call is performed by calling the syscall instruction
+ * - syscall return comes in rax
+ * - rcx and r11 are clobbered, others are preserved.
+ * - the arguments are cast to long and assigned into the target registers
+ * which are then simply passed as registers to the asm code, so that we
+ * don't have to experience issues with register constraints.
+ * - the syscall number is always specified last in order to allow to force
+ * some registers before (gcc refuses a %-register at the last position).
+ * - see also x86-64 ABI section A.2 AMD64 Linux Kernel Conventions, A.2.1
+ * Calling Conventions.
+ *
+ * Link x86-64 ABI: https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/home
+ *
+ */
+
+#define my_syscall0(num) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("rax") = (num); \
+ \
+ __asm__ volatile ( \
+ "syscall\n" \
+ : "=a"(_ret) \
+ : "0"(_num) \
+ : "rcx", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall1(num, arg1) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("rax") = (num); \
+ register long _arg1 __asm__ ("rdi") = (long)(arg1); \
+ \
+ __asm__ volatile ( \
+ "syscall\n" \
+ : "=a"(_ret) \
+ : "r"(_arg1), \
+ "0"(_num) \
+ : "rcx", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("rax") = (num); \
+ register long _arg1 __asm__ ("rdi") = (long)(arg1); \
+ register long _arg2 __asm__ ("rsi") = (long)(arg2); \
+ \
+ __asm__ volatile ( \
+ "syscall\n" \
+ : "=a"(_ret) \
+ : "r"(_arg1), "r"(_arg2), \
+ "0"(_num) \
+ : "rcx", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("rax") = (num); \
+ register long _arg1 __asm__ ("rdi") = (long)(arg1); \
+ register long _arg2 __asm__ ("rsi") = (long)(arg2); \
+ register long _arg3 __asm__ ("rdx") = (long)(arg3); \
+ \
+ __asm__ volatile ( \
+ "syscall\n" \
+ : "=a"(_ret) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
+ "0"(_num) \
+ : "rcx", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("rax") = (num); \
+ register long _arg1 __asm__ ("rdi") = (long)(arg1); \
+ register long _arg2 __asm__ ("rsi") = (long)(arg2); \
+ register long _arg3 __asm__ ("rdx") = (long)(arg3); \
+ register long _arg4 __asm__ ("r10") = (long)(arg4); \
+ \
+ __asm__ volatile ( \
+ "syscall\n" \
+ : "=a"(_ret) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
+ "0"(_num) \
+ : "rcx", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("rax") = (num); \
+ register long _arg1 __asm__ ("rdi") = (long)(arg1); \
+ register long _arg2 __asm__ ("rsi") = (long)(arg2); \
+ register long _arg3 __asm__ ("rdx") = (long)(arg3); \
+ register long _arg4 __asm__ ("r10") = (long)(arg4); \
+ register long _arg5 __asm__ ("r8") = (long)(arg5); \
+ \
+ __asm__ volatile ( \
+ "syscall\n" \
+ : "=a"(_ret) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "0"(_num) \
+ : "rcx", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
+({ \
+ long _ret; \
+ register long _num __asm__ ("rax") = (num); \
+ register long _arg1 __asm__ ("rdi") = (long)(arg1); \
+ register long _arg2 __asm__ ("rsi") = (long)(arg2); \
+ register long _arg3 __asm__ ("rdx") = (long)(arg3); \
+ register long _arg4 __asm__ ("r10") = (long)(arg4); \
+ register long _arg5 __asm__ ("r8") = (long)(arg5); \
+ register long _arg6 __asm__ ("r9") = (long)(arg6); \
+ \
+ __asm__ volatile ( \
+ "syscall\n" \
+ : "=a"(_ret) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "r"(_arg6), "0"(_num) \
+ : "rcx", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#ifndef NOLIBC_NO_RUNTIME
+/* startup code */
+/*
+ * x86-64 System V ABI mandates:
+ * 1) %rsp must be 16-byte aligned right before the function call.
+ * 2) The deepest stack frame should be zero (the %rbp).
+ *
+ */
+void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
+{
+ __asm__ volatile (
+ "xor %ebp, %ebp\n" /* zero the stack frame */
+ "mov %rsp, %rdi\n" /* save stack pointer to %rdi, as arg1 of _start_c */
+ "call _start_c\n" /* transfer to c runtime */
+ "hlt\n" /* ensure it does not return */
+ );
+ __nolibc_entrypoint_epilogue();
+}
+#endif /* NOLIBC_NO_RUNTIME */
+
+#define NOLIBC_ARCH_HAS_MEMMOVE
+void *memmove(void *dst, const void *src, size_t len);
+
+#define NOLIBC_ARCH_HAS_MEMCPY
+void *memcpy(void *dst, const void *src, size_t len);
+
+#define NOLIBC_ARCH_HAS_MEMSET
+void *memset(void *dst, int c, size_t len);
+
+__asm__ (
+".pushsection .text.nolibc_memmove_memcpy\n"
+".weak memmove\n"
+".weak memcpy\n"
+"memmove:\n"
+"memcpy:\n"
+ "movq %rdx, %rcx\n\t"
+ "movq %rdi, %rax\n\t"
+ "movq %rdi, %rdx\n\t"
+ "subq %rsi, %rdx\n\t"
+ "cmpq %rcx, %rdx\n\t"
+ "jb 1f\n\t"
+ "rep movsb\n\t"
+ "retq\n"
+"1:" /* backward copy */
+ "leaq -1(%rdi, %rcx, 1), %rdi\n\t"
+ "leaq -1(%rsi, %rcx, 1), %rsi\n\t"
+ "std\n\t"
+ "rep movsb\n\t"
+ "cld\n\t"
+ "retq\n"
+".popsection\n"
+
+".pushsection .text.nolibc_memset\n"
+".weak memset\n"
+"memset:\n"
+ "xchgl %eax, %esi\n\t"
+ "movq %rdx, %rcx\n\t"
+ "pushq %rdi\n\t"
+ "rep stosb\n\t"
+ "popq %rax\n\t"
+ "retq\n"
+".popsection\n"
+);
+
+#endif /* !defined(__x86_64__) */
+#endif /* _NOLIBC_ARCH_X86_H */
diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86_64.h
deleted file mode 100644
index 0e1e9eb8545d..000000000000
--- a/tools/include/nolibc/arch-x86_64.h
+++ /dev/null
@@ -1,215 +0,0 @@
-/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
-/*
- * x86_64 specific definitions for NOLIBC
- * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
- */
-
-#ifndef _NOLIBC_ARCH_X86_64_H
-#define _NOLIBC_ARCH_X86_64_H
-
-/* O_* macros for fcntl/open are architecture-specific */
-#define O_RDONLY 0
-#define O_WRONLY 1
-#define O_RDWR 2
-#define O_CREAT 0x40
-#define O_EXCL 0x80
-#define O_NOCTTY 0x100
-#define O_TRUNC 0x200
-#define O_APPEND 0x400
-#define O_NONBLOCK 0x800
-#define O_DIRECTORY 0x10000
-
-/* The struct returned by the stat() syscall, equivalent to stat64(). The
- * syscall returns 116 bytes and stops in the middle of __unused.
- */
-struct sys_stat_struct {
- unsigned long st_dev;
- unsigned long st_ino;
- unsigned long st_nlink;
- unsigned int st_mode;
- unsigned int st_uid;
-
- unsigned int st_gid;
- unsigned int __pad0;
- unsigned long st_rdev;
- long st_size;
- long st_blksize;
-
- long st_blocks;
- unsigned long st_atime;
- unsigned long st_atime_nsec;
- unsigned long st_mtime;
-
- unsigned long st_mtime_nsec;
- unsigned long st_ctime;
- unsigned long st_ctime_nsec;
- long __unused[3];
-};
-
-/* Syscalls for x86_64 :
- * - registers are 64-bit
- * - syscall number is passed in rax
- * - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively
- * - the system call is performed by calling the syscall instruction
- * - syscall return comes in rax
- * - rcx and r11 are clobbered, others are preserved.
- * - the arguments are cast to long and assigned into the target registers
- * which are then simply passed as registers to the asm code, so that we
- * don't have to experience issues with register constraints.
- * - the syscall number is always specified last in order to allow to force
- * some registers before (gcc refuses a %-register at the last position).
- * - see also x86-64 ABI section A.2 AMD64 Linux Kernel Conventions, A.2.1
- * Calling Conventions.
- *
- * Link x86-64 ABI: https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/home
- *
- */
-
-#define my_syscall0(num) \
-({ \
- long _ret; \
- register long _num __asm__ ("rax") = (num); \
- \
- __asm__ volatile ( \
- "syscall\n" \
- : "=a"(_ret) \
- : "0"(_num) \
- : "rcx", "r11", "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall1(num, arg1) \
-({ \
- long _ret; \
- register long _num __asm__ ("rax") = (num); \
- register long _arg1 __asm__ ("rdi") = (long)(arg1); \
- \
- __asm__ volatile ( \
- "syscall\n" \
- : "=a"(_ret) \
- : "r"(_arg1), \
- "0"(_num) \
- : "rcx", "r11", "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall2(num, arg1, arg2) \
-({ \
- long _ret; \
- register long _num __asm__ ("rax") = (num); \
- register long _arg1 __asm__ ("rdi") = (long)(arg1); \
- register long _arg2 __asm__ ("rsi") = (long)(arg2); \
- \
- __asm__ volatile ( \
- "syscall\n" \
- : "=a"(_ret) \
- : "r"(_arg1), "r"(_arg2), \
- "0"(_num) \
- : "rcx", "r11", "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall3(num, arg1, arg2, arg3) \
-({ \
- long _ret; \
- register long _num __asm__ ("rax") = (num); \
- register long _arg1 __asm__ ("rdi") = (long)(arg1); \
- register long _arg2 __asm__ ("rsi") = (long)(arg2); \
- register long _arg3 __asm__ ("rdx") = (long)(arg3); \
- \
- __asm__ volatile ( \
- "syscall\n" \
- : "=a"(_ret) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), \
- "0"(_num) \
- : "rcx", "r11", "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall4(num, arg1, arg2, arg3, arg4) \
-({ \
- long _ret; \
- register long _num __asm__ ("rax") = (num); \
- register long _arg1 __asm__ ("rdi") = (long)(arg1); \
- register long _arg2 __asm__ ("rsi") = (long)(arg2); \
- register long _arg3 __asm__ ("rdx") = (long)(arg3); \
- register long _arg4 __asm__ ("r10") = (long)(arg4); \
- \
- __asm__ volatile ( \
- "syscall\n" \
- : "=a"(_ret) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \
- "0"(_num) \
- : "rcx", "r11", "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
-({ \
- long _ret; \
- register long _num __asm__ ("rax") = (num); \
- register long _arg1 __asm__ ("rdi") = (long)(arg1); \
- register long _arg2 __asm__ ("rsi") = (long)(arg2); \
- register long _arg3 __asm__ ("rdx") = (long)(arg3); \
- register long _arg4 __asm__ ("r10") = (long)(arg4); \
- register long _arg5 __asm__ ("r8") = (long)(arg5); \
- \
- __asm__ volatile ( \
- "syscall\n" \
- : "=a"(_ret) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
- "0"(_num) \
- : "rcx", "r11", "memory", "cc" \
- ); \
- _ret; \
-})
-
-#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
-({ \
- long _ret; \
- register long _num __asm__ ("rax") = (num); \
- register long _arg1 __asm__ ("rdi") = (long)(arg1); \
- register long _arg2 __asm__ ("rsi") = (long)(arg2); \
- register long _arg3 __asm__ ("rdx") = (long)(arg3); \
- register long _arg4 __asm__ ("r10") = (long)(arg4); \
- register long _arg5 __asm__ ("r8") = (long)(arg5); \
- register long _arg6 __asm__ ("r9") = (long)(arg6); \
- \
- __asm__ volatile ( \
- "syscall\n" \
- : "=a"(_ret) \
- : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
- "r"(_arg6), "0"(_num) \
- : "rcx", "r11", "memory", "cc" \
- ); \
- _ret; \
-})
-
-/* startup code */
-/*
- * x86-64 System V ABI mandates:
- * 1) %rsp must be 16-byte aligned right before the function call.
- * 2) The deepest stack frame should be zero (the %rbp).
- *
- */
-__asm__ (".section .text\n"
- ".weak _start\n"
- "_start:\n"
- "pop %rdi\n" // argc (first arg, %rdi)
- "mov %rsp, %rsi\n" // argv[] (second arg, %rsi)
- "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx)
- "xor %ebp, %ebp\n" // zero the stack frame
- "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned before call
- "call main\n" // main() returns the status code, we'll exit with it.
- "mov %eax, %edi\n" // retrieve exit code (32 bit)
- "mov $60, %eax\n" // NR_exit == 60
- "syscall\n" // really exit
- "hlt\n" // ensure it does not return
- "");
-
-#endif // _NOLIBC_ARCH_X86_64_H
diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h
index 4c6992321b0d..a3adaf433f2c 100644
--- a/tools/include/nolibc/arch.h
+++ b/tools/include/nolibc/arch.h
@@ -3,30 +3,33 @@
* Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
*/
-/* Below comes the architecture-specific code. For each architecture, we have
- * the syscall declarations and the _start code definition. This is the only
- * global part. On all architectures the kernel puts everything in the stack
- * before jumping to _start just above us, without any return address (_start
- * is not a function but an entry pint). So at the stack pointer we find argc.
- * Then argv[] begins, and ends at the first NULL. Then we have envp which
- * starts and ends with a NULL as well. So envp=argv+argc+1.
- */
-
#ifndef _NOLIBC_ARCH_H
#define _NOLIBC_ARCH_H
-#if defined(__x86_64__)
-#include "arch-x86_64.h"
-#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__)
-#include "arch-i386.h"
+#if defined(__x86_64__) || defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__)
+#include "arch-x86.h"
#elif defined(__ARM_EABI__)
#include "arch-arm.h"
#elif defined(__aarch64__)
-#include "arch-aarch64.h"
-#elif defined(__mips__) && defined(_ABIO32)
+#include "arch-arm64.h"
+#elif defined(__mips__)
#include "arch-mips.h"
+#elif defined(__powerpc__)
+#include "arch-powerpc.h"
#elif defined(__riscv)
#include "arch-riscv.h"
+#elif defined(__s390x__)
+#include "arch-s390.h"
+#elif defined(__loongarch__)
+#include "arch-loongarch.h"
+#elif defined(__sparc__)
+#include "arch-sparc.h"
+#elif defined(__m68k__)
+#include "arch-m68k.h"
+#elif defined(__sh__)
+#include "arch-sh.h"
+#else
+#error Unsupported Architecture
#endif
#endif /* _NOLIBC_ARCH_H */
diff --git a/tools/include/nolibc/compiler.h b/tools/include/nolibc/compiler.h
new file mode 100644
index 000000000000..87090bbc53e0
--- /dev/null
+++ b/tools/include/nolibc/compiler.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * NOLIBC compiler support header
+ * Copyright (C) 2023 Thomas Weißschuh <linux@weissschuh.net>
+ */
+#ifndef _NOLIBC_COMPILER_H
+#define _NOLIBC_COMPILER_H
+
+#if defined(__has_attribute)
+# define __nolibc_has_attribute(attr) __has_attribute(attr)
+#else
+# define __nolibc_has_attribute(attr) 0
+#endif
+
+#if defined(__has_feature)
+# define __nolibc_has_feature(feature) __has_feature(feature)
+#else
+# define __nolibc_has_feature(feature) 0
+#endif
+
+#define __nolibc_aligned(alignment) __attribute__((aligned(alignment)))
+#define __nolibc_aligned_as(type) __nolibc_aligned(__alignof__(type))
+
+#if __nolibc_has_attribute(naked)
+# define __nolibc_entrypoint __attribute__((naked))
+# define __nolibc_entrypoint_epilogue()
+#else
+# define __nolibc_entrypoint __attribute__((optimize("Os", "omit-frame-pointer")))
+# define __nolibc_entrypoint_epilogue() __builtin_unreachable()
+#endif /* __nolibc_has_attribute(naked) */
+
+#if defined(__SSP__) || defined(__SSP_STRONG__) || defined(__SSP_ALL__) || defined(__SSP_EXPLICIT__)
+
+#define _NOLIBC_STACKPROTECTOR
+
+#endif /* defined(__SSP__) ... */
+
+#if __nolibc_has_attribute(no_stack_protector)
+# define __no_stack_protector __attribute__((no_stack_protector))
+#else
+# define __no_stack_protector __attribute__((__optimize__("-fno-stack-protector")))
+#endif /* __nolibc_has_attribute(no_stack_protector) */
+
+#if __nolibc_has_attribute(__fallthrough__)
+# define __nolibc_fallthrough do { } while (0); __attribute__((__fallthrough__))
+#else
+# define __nolibc_fallthrough do { } while (0)
+#endif /* __nolibc_has_attribute(fallthrough) */
+
+#endif /* _NOLIBC_COMPILER_H */
diff --git a/tools/include/nolibc/crt.h b/tools/include/nolibc/crt.h
new file mode 100644
index 000000000000..d9262998dae9
--- /dev/null
+++ b/tools/include/nolibc/crt.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * C Run Time support for NOLIBC
+ * Copyright (C) 2023 Zhangjin Wu <falcon@tinylab.org>
+ */
+
+#ifndef _NOLIBC_CRT_H
+#define _NOLIBC_CRT_H
+
+#ifndef NOLIBC_NO_RUNTIME
+
+#include "compiler.h"
+
+char **environ __attribute__((weak));
+const unsigned long *_auxv __attribute__((weak));
+
+void _start(void);
+static void __stack_chk_init(void);
+static void exit(int);
+
+extern void (*const __preinit_array_start[])(int, char **, char**) __attribute__((weak));
+extern void (*const __preinit_array_end[])(int, char **, char**) __attribute__((weak));
+
+extern void (*const __init_array_start[])(int, char **, char**) __attribute__((weak));
+extern void (*const __init_array_end[])(int, char **, char**) __attribute__((weak));
+
+extern void (*const __fini_array_start[])(void) __attribute__((weak));
+extern void (*const __fini_array_end[])(void) __attribute__((weak));
+
+void _start_c(long *sp);
+__attribute__((weak,used))
+#if __nolibc_has_feature(undefined_behavior_sanitizer)
+ __attribute__((no_sanitize("function")))
+#endif
+void _start_c(long *sp)
+{
+ long argc;
+ char **argv;
+ char **envp;
+ int exitcode;
+ void (* const *ctor_func)(int, char **, char **);
+ void (* const *dtor_func)(void);
+ const unsigned long *auxv;
+ /* silence potential warning: conflicting types for 'main' */
+ int _nolibc_main(int, char **, char **) __asm__ ("main");
+
+ /* initialize stack protector */
+ __stack_chk_init();
+
+ /*
+ * sp : argc <-- argument count, required by main()
+ * argv: argv[0] <-- argument vector, required by main()
+ * argv[1]
+ * ...
+ * argv[argc-1]
+ * null
+ * environ: environ[0] <-- environment variables, required by main() and getenv()
+ * environ[1]
+ * ...
+ * null
+ * _auxv: _auxv[0] <-- auxiliary vector, required by getauxval()
+ * _auxv[1]
+ * ...
+ * null
+ */
+
+ /* assign argc and argv */
+ argc = *sp;
+ argv = (void *)(sp + 1);
+
+ /* find environ */
+ environ = envp = argv + argc + 1;
+
+ /* find _auxv */
+ for (auxv = (void *)envp; *auxv++;)
+ ;
+ _auxv = auxv;
+
+ for (ctor_func = __preinit_array_start; ctor_func < __preinit_array_end; ctor_func++)
+ (*ctor_func)(argc, argv, envp);
+ for (ctor_func = __init_array_start; ctor_func < __init_array_end; ctor_func++)
+ (*ctor_func)(argc, argv, envp);
+
+ /* go to application */
+ exitcode = _nolibc_main(argc, argv, envp);
+
+ for (dtor_func = __fini_array_end; dtor_func > __fini_array_start;)
+ (*--dtor_func)();
+
+ exit(exitcode);
+}
+
+#endif /* NOLIBC_NO_RUNTIME */
+#endif /* _NOLIBC_CRT_H */
diff --git a/tools/include/nolibc/ctype.h b/tools/include/nolibc/ctype.h
index 6f90706d0644..470fdf34394a 100644
--- a/tools/include/nolibc/ctype.h
+++ b/tools/include/nolibc/ctype.h
@@ -4,6 +4,9 @@
* Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_CTYPE_H
#define _NOLIBC_CTYPE_H
@@ -96,7 +99,4 @@ int ispunct(int c)
return isgraph(c) && !isalnum(c);
}
-/* make sure to include all global symbols */
-#include "nolibc.h"
-
#endif /* _NOLIBC_CTYPE_H */
diff --git a/tools/include/nolibc/dirent.h b/tools/include/nolibc/dirent.h
new file mode 100644
index 000000000000..61a122a60327
--- /dev/null
+++ b/tools/include/nolibc/dirent.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Directory access for NOLIBC
+ * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net>
+ */
+
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
+#ifndef _NOLIBC_DIRENT_H
+#define _NOLIBC_DIRENT_H
+
+#include "compiler.h"
+#include "stdint.h"
+#include "types.h"
+#include "fcntl.h"
+
+#include <linux/limits.h>
+
+struct dirent {
+ ino_t d_ino;
+ char d_name[NAME_MAX + 1];
+};
+
+/* See comment of FILE in stdio.h */
+typedef struct {
+ char dummy[1];
+} DIR;
+
+static __attribute__((unused))
+DIR *fdopendir(int fd)
+{
+ if (fd < 0) {
+ SET_ERRNO(EBADF);
+ return NULL;
+ }
+ return (DIR *)(intptr_t)~fd;
+}
+
+static __attribute__((unused))
+DIR *opendir(const char *name)
+{
+ int fd;
+
+ fd = open(name, O_RDONLY);
+ if (fd == -1)
+ return NULL;
+ return fdopendir(fd);
+}
+
+static __attribute__((unused))
+int closedir(DIR *dirp)
+{
+ intptr_t i = (intptr_t)dirp;
+
+ if (i >= 0) {
+ SET_ERRNO(EBADF);
+ return -1;
+ }
+ return close(~i);
+}
+
+static __attribute__((unused))
+int readdir_r(DIR *dirp, struct dirent *entry, struct dirent **result)
+{
+ char buf[sizeof(struct linux_dirent64) + NAME_MAX + 1] __nolibc_aligned_as(struct linux_dirent64);
+ struct linux_dirent64 *ldir = (void *)buf;
+ intptr_t i = (intptr_t)dirp;
+ int fd, ret;
+
+ if (i >= 0)
+ return EBADF;
+
+ fd = ~i;
+
+ ret = sys_getdents64(fd, ldir, sizeof(buf));
+ if (ret < 0)
+ return -ret;
+ if (ret == 0) {
+ *result = NULL;
+ return 0;
+ }
+
+ /*
+ * getdents64() returns as many entries as fit the buffer.
+ * readdir() can only return one entry at a time.
+ * Make sure the non-returned ones are not skipped.
+ */
+ ret = sys_lseek(fd, ldir->d_off, SEEK_SET);
+ if (ret < 0)
+ return -ret;
+
+ entry->d_ino = ldir->d_ino;
+ /* the destination should always be big enough */
+ strlcpy(entry->d_name, ldir->d_name, sizeof(entry->d_name));
+ *result = entry;
+ return 0;
+}
+
+#endif /* _NOLIBC_DIRENT_H */
diff --git a/tools/include/nolibc/elf.h b/tools/include/nolibc/elf.h
new file mode 100644
index 000000000000..3e2c5228bf3d
--- /dev/null
+++ b/tools/include/nolibc/elf.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Shim elf.h header for NOLIBC.
+ * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de>
+ */
+
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
+#ifndef _NOLIBC_SYS_ELF_H
+#define _NOLIBC_SYS_ELF_H
+
+#include <linux/elf.h>
+
+#endif /* _NOLIBC_SYS_ELF_H */
diff --git a/tools/include/nolibc/errno.h b/tools/include/nolibc/errno.h
index 9dc4919c769b..08a33c40ec0c 100644
--- a/tools/include/nolibc/errno.h
+++ b/tools/include/nolibc/errno.h
@@ -4,16 +4,17 @@
* Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_ERRNO_H
#define _NOLIBC_ERRNO_H
-#include <asm/errno.h>
-
-/* this way it will be removed if unused */
-static int errno;
+#include <linux/errno.h>
#ifndef NOLIBC_IGNORE_ERRNO
#define SET_ERRNO(v) do { errno = (v); } while (0)
+int errno __attribute__((weak));
#else
#define SET_ERRNO(v) do { } while (0)
#endif
@@ -24,7 +25,4 @@ static int errno;
*/
#define MAX_ERRNO 4095
-/* make sure to include all global symbols */
-#include "nolibc.h"
-
#endif /* _NOLIBC_ERRNO_H */
diff --git a/tools/include/nolibc/fcntl.h b/tools/include/nolibc/fcntl.h
new file mode 100644
index 000000000000..bff2e542f20f
--- /dev/null
+++ b/tools/include/nolibc/fcntl.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * fcntl definition for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
+#ifndef _NOLIBC_FCNTL_H
+#define _NOLIBC_FCNTL_H
+
+#include "arch.h"
+#include "types.h"
+#include "sys.h"
+
+/*
+ * int openat(int dirfd, const char *path, int flags[, mode_t mode]);
+ */
+
+static __attribute__((unused))
+int sys_openat(int dirfd, const char *path, int flags, mode_t mode)
+{
+ return my_syscall4(__NR_openat, dirfd, path, flags, mode);
+}
+
+static __attribute__((unused))
+int openat(int dirfd, const char *path, int flags, ...)
+{
+ mode_t mode = 0;
+
+ if (flags & O_CREAT) {
+ va_list args;
+
+ va_start(args, flags);
+ mode = va_arg(args, mode_t);
+ va_end(args);
+ }
+
+ return __sysret(sys_openat(dirfd, path, flags, mode));
+}
+
+/*
+ * int open(const char *path, int flags[, mode_t mode]);
+ */
+
+static __attribute__((unused))
+int sys_open(const char *path, int flags, mode_t mode)
+{
+ return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode);
+}
+
+static __attribute__((unused))
+int open(const char *path, int flags, ...)
+{
+ mode_t mode = 0;
+
+ if (flags & O_CREAT) {
+ va_list args;
+
+ va_start(args, flags);
+ mode = va_arg(args, mode_t);
+ va_end(args);
+ }
+
+ return __sysret(sys_open(path, flags, mode));
+}
+
+#endif /* _NOLIBC_FCNTL_H */
diff --git a/tools/include/nolibc/getopt.h b/tools/include/nolibc/getopt.h
new file mode 100644
index 000000000000..87565e3b6a33
--- /dev/null
+++ b/tools/include/nolibc/getopt.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * getopt function definitions for NOLIBC, adapted from musl libc
+ * Copyright (C) 2005-2020 Rich Felker, et al.
+ * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net>
+ */
+
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
+#ifndef _NOLIBC_GETOPT_H
+#define _NOLIBC_GETOPT_H
+
+struct FILE;
+static struct FILE *const stderr;
+static int fprintf(struct FILE *stream, const char *fmt, ...);
+
+__attribute__((weak,unused,section(".data.nolibc_getopt")))
+char *optarg;
+
+__attribute__((weak,unused,section(".data.nolibc_getopt")))
+int optind = 1, opterr = 1, optopt;
+
+static __attribute__((unused))
+int getopt(int argc, char * const argv[], const char *optstring)
+{
+ static int __optpos;
+ int i;
+ char c, d;
+ char *optchar;
+
+ if (!optind) {
+ __optpos = 0;
+ optind = 1;
+ }
+
+ if (optind >= argc || !argv[optind])
+ return -1;
+
+ if (argv[optind][0] != '-') {
+ if (optstring[0] == '-') {
+ optarg = argv[optind++];
+ return 1;
+ }
+ return -1;
+ }
+
+ if (!argv[optind][1])
+ return -1;
+
+ if (argv[optind][1] == '-' && !argv[optind][2])
+ return optind++, -1;
+
+ if (!__optpos)
+ __optpos++;
+ c = argv[optind][__optpos];
+ optchar = argv[optind] + __optpos;
+ __optpos++;
+
+ if (!argv[optind][__optpos]) {
+ optind++;
+ __optpos = 0;
+ }
+
+ if (optstring[0] == '-' || optstring[0] == '+')
+ optstring++;
+
+ i = 0;
+ d = 0;
+ do {
+ d = optstring[i++];
+ } while (d && d != c);
+
+ if (d != c || c == ':') {
+ optopt = c;
+ if (optstring[0] != ':' && opterr)
+ fprintf(stderr, "%s: unrecognized option: %c\n", argv[0], *optchar);
+ return '?';
+ }
+ if (optstring[i] == ':') {
+ optarg = NULL;
+ if (optstring[i + 1] != ':' || __optpos) {
+ optarg = argv[optind++];
+ if (__optpos)
+ optarg += __optpos;
+ __optpos = 0;
+ }
+ if (optind > argc) {
+ optopt = c;
+ if (optstring[0] == ':')
+ return ':';
+ if (opterr)
+ fprintf(stderr, "%s: option requires argument: %c\n",
+ argv[0], *optchar);
+ return '?';
+ }
+ }
+ return c;
+}
+
+#endif /* _NOLIBC_GETOPT_H */
diff --git a/tools/include/nolibc/inttypes.h b/tools/include/nolibc/inttypes.h
new file mode 100644
index 000000000000..1977bd74bfeb
--- /dev/null
+++ b/tools/include/nolibc/inttypes.h
@@ -0,0 +1,3 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+
+#include "nolibc.h"
diff --git a/tools/include/nolibc/limits.h b/tools/include/nolibc/limits.h
new file mode 100644
index 000000000000..306d4141f4d2
--- /dev/null
+++ b/tools/include/nolibc/limits.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Shim limits.h header for NOLIBC.
+ * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de>
+ */
+
+#include "nolibc.h"
diff --git a/tools/include/nolibc/math.h b/tools/include/nolibc/math.h
new file mode 100644
index 000000000000..9df823ddd412
--- /dev/null
+++ b/tools/include/nolibc/math.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * math definitions for NOLIBC
+ * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de>
+ */
+
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
+#ifndef _NOLIBC_SYS_MATH_H
+#define _NOLIBC_SYS_MATH_H
+
+static __inline__
+double fabs(double x)
+{
+ return x >= 0 ? x : -x;
+}
+
+static __inline__
+float fabsf(float x)
+{
+ return x >= 0 ? x : -x;
+}
+
+static __inline__
+long double fabsl(long double x)
+{
+ return x >= 0 ? x : -x;
+}
+
+#endif /* _NOLIBC_SYS_MATH_H */
diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
index b2bc48d3cfe4..272dfc961158 100644
--- a/tools/include/nolibc/nolibc.h
+++ b/tools/include/nolibc/nolibc.h
@@ -13,11 +13,10 @@
* Syscalls are split into 3 levels:
* - The lower level is the arch-specific syscall() definition, consisting in
* assembly code in compound expressions. These are called my_syscall0() to
- * my_syscall6() depending on the number of arguments. The MIPS
- * implementation is limited to 5 arguments. All input arguments are cast
- * to a long stored in a register. These expressions always return the
- * syscall's return value as a signed long value which is often either a
- * pointer or the negated errno value.
+ * my_syscall6() depending on the number of arguments. All input arguments
+ * are castto a long stored in a register. These expressions always return
+ * the syscall's return value as a signed long value which is often either
+ * a pointer or the negated errno value.
*
* - The second level is mostly architecture-independent. It is made of
* static functions called sys_<name>() which rely on my_syscallN()
@@ -32,8 +31,7 @@
* - The third level is the libc call definition. It exposes the lower raw
* sys_<name>() calls in a way that looks like what a libc usually does,
* takes care of specific input values, and of setting errno upon error.
- * There can be minor variations compared to standard libc calls. For
- * example the open() call always takes 3 args here.
+ * There can be minor variations compared to standard libc calls.
*
* The errno variable is declared static and unused. This way it can be
* optimized away if not used. However this means that a program made of
@@ -75,10 +73,11 @@
* -I../nolibc -o hello hello.c -lgcc
*
* The available standard (but limited) include files are:
- * ctype.h, errno.h, signal.h, stdio.h, stdlib.h, string.h, time.h
+ * ctype.h, errno.h, signal.h, stdarg.h, stdbool.h stdio.h, stdlib.h,
+ * string.h, time.h
*
* In addition, the following ones are expected to be provided by the compiler:
- * float.h, stdarg.h, stddef.h
+ * float.h, stddef.h
*
* The following ones which are part to the C standard are not provided:
* assert.h, locale.h, math.h, setjmp.h, limits.h
@@ -97,13 +96,39 @@
#include "arch.h"
#include "types.h"
#include "sys.h"
+#include "sys/auxv.h"
+#include "sys/ioctl.h"
+#include "sys/mman.h"
+#include "sys/mount.h"
+#include "sys/prctl.h"
+#include "sys/random.h"
+#include "sys/reboot.h"
+#include "sys/resource.h"
+#include "sys/select.h"
+#include "sys/stat.h"
+#include "sys/syscall.h"
+#include "sys/sysmacros.h"
+#include "sys/time.h"
+#include "sys/timerfd.h"
+#include "sys/uio.h"
+#include "sys/utsname.h"
+#include "sys/wait.h"
#include "ctype.h"
+#include "elf.h"
+#include "sched.h"
#include "signal.h"
+#include "unistd.h"
+#include "stdbool.h"
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
#include "time.h"
-#include "unistd.h"
+#include "stackprotector.h"
+#include "dirent.h"
+#include "fcntl.h"
+#include "getopt.h"
+#include "poll.h"
+#include "math.h"
/* Used by programs to avoid std includes */
#define NOLIBC
diff --git a/tools/include/nolibc/poll.h b/tools/include/nolibc/poll.h
new file mode 100644
index 000000000000..0d053f93ea99
--- /dev/null
+++ b/tools/include/nolibc/poll.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * poll definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
+#ifndef _NOLIBC_POLL_H
+#define _NOLIBC_POLL_H
+
+#include "arch.h"
+#include "sys.h"
+
+#include <linux/poll.h>
+#include <linux/time.h>
+
+/*
+ * int poll(struct pollfd *fds, int nfds, int timeout);
+ */
+
+static __attribute__((unused))
+int sys_poll(struct pollfd *fds, int nfds, int timeout)
+{
+#if defined(__NR_ppoll)
+ struct timespec t;
+
+ if (timeout >= 0) {
+ t.tv_sec = timeout / 1000;
+ t.tv_nsec = (timeout % 1000) * 1000000;
+ }
+ return my_syscall5(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0);
+#elif defined(__NR_ppoll_time64)
+ struct __kernel_timespec t;
+
+ if (timeout >= 0) {
+ t.tv_sec = timeout / 1000;
+ t.tv_nsec = (timeout % 1000) * 1000000;
+ }
+ return my_syscall5(__NR_ppoll_time64, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0);
+#else
+ return my_syscall3(__NR_poll, fds, nfds, timeout);
+#endif
+}
+
+static __attribute__((unused))
+int poll(struct pollfd *fds, int nfds, int timeout)
+{
+ return __sysret(sys_poll(fds, nfds, timeout));
+}
+
+#endif /* _NOLIBC_POLL_H */
diff --git a/tools/include/nolibc/sched.h b/tools/include/nolibc/sched.h
new file mode 100644
index 000000000000..32221562c166
--- /dev/null
+++ b/tools/include/nolibc/sched.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * sched function definitions for NOLIBC
+ * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net>
+ */
+
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
+#ifndef _NOLIBC_SCHED_H
+#define _NOLIBC_SCHED_H
+
+#include "sys.h"
+
+#include <linux/sched.h>
+
+/*
+ * int setns(int fd, int nstype);
+ */
+
+static __attribute__((unused))
+int sys_setns(int fd, int nstype)
+{
+ return my_syscall2(__NR_setns, fd, nstype);
+}
+
+static __attribute__((unused))
+int setns(int fd, int nstype)
+{
+ return __sysret(sys_setns(fd, nstype));
+}
+
+
+/*
+ * int unshare(int flags);
+ */
+
+static __attribute__((unused))
+int sys_unshare(int flags)
+{
+ return my_syscall1(__NR_unshare, flags);
+}
+
+static __attribute__((unused))
+int unshare(int flags)
+{
+ return __sysret(sys_unshare(flags));
+}
+
+#endif /* _NOLIBC_SCHED_H */
diff --git a/tools/include/nolibc/signal.h b/tools/include/nolibc/signal.h
index 137552216e46..ac13e53ac31d 100644
--- a/tools/include/nolibc/signal.h
+++ b/tools/include/nolibc/signal.h
@@ -4,6 +4,9 @@
* Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_SIGNAL_H
#define _NOLIBC_SIGNAL_H
@@ -13,13 +16,11 @@
#include "sys.h"
/* This one is not marked static as it's needed by libgcc for divide by zero */
+int raise(int signal);
__attribute__((weak,unused,section(".text.nolibc_raise")))
int raise(int signal)
{
return sys_kill(sys_getpid(), signal);
}
-/* make sure to include all global symbols */
-#include "nolibc.h"
-
#endif /* _NOLIBC_SIGNAL_H */
diff --git a/tools/include/nolibc/stackprotector.h b/tools/include/nolibc/stackprotector.h
new file mode 100644
index 000000000000..7123aa056cb0
--- /dev/null
+++ b/tools/include/nolibc/stackprotector.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Stack protector support for NOLIBC
+ * Copyright (C) 2023 Thomas Weißschuh <linux@weissschuh.net>
+ */
+
+#ifndef _NOLIBC_STACKPROTECTOR_H
+#define _NOLIBC_STACKPROTECTOR_H
+
+#include "compiler.h"
+
+#ifndef NOLIBC_NO_RUNTIME
+#if defined(_NOLIBC_STACKPROTECTOR)
+
+#include "sys.h"
+#include "stdlib.h"
+
+/* The functions in this header are using raw syscall macros to avoid
+ * triggering stack protector errors themselves
+ */
+
+void __stack_chk_fail(void);
+__attribute__((weak,used,noreturn,section(".text.nolibc_stack_chk")))
+void __stack_chk_fail(void)
+{
+ pid_t pid;
+ my_syscall3(__NR_write, STDERR_FILENO, "!!Stack smashing detected!!\n", 28);
+ pid = my_syscall0(__NR_getpid);
+ my_syscall2(__NR_kill, pid, SIGABRT);
+ for (;;);
+}
+
+void __stack_chk_fail_local(void);
+__attribute__((weak,noreturn,section(".text.nolibc_stack_chk")))
+void __stack_chk_fail_local(void)
+{
+ __stack_chk_fail();
+}
+
+__attribute__((weak,used,section(".data.nolibc_stack_chk")))
+uintptr_t __stack_chk_guard;
+
+static __no_stack_protector void __stack_chk_init(void)
+{
+ my_syscall3(__NR_getrandom, &__stack_chk_guard, sizeof(__stack_chk_guard), 0);
+ /* a bit more randomness in case getrandom() fails, ensure the guard is never 0 */
+ if (__stack_chk_guard != (uintptr_t) &__stack_chk_guard)
+ __stack_chk_guard ^= (uintptr_t) &__stack_chk_guard;
+}
+#else /* !defined(_NOLIBC_STACKPROTECTOR) */
+static void __stack_chk_init(void) {}
+#endif /* defined(_NOLIBC_STACKPROTECTOR) */
+#endif /* NOLIBC_NO_RUNTIME */
+
+#endif /* _NOLIBC_STACKPROTECTOR_H */
diff --git a/tools/include/nolibc/std.h b/tools/include/nolibc/std.h
index 1747ae125392..392f4dd94158 100644
--- a/tools/include/nolibc/std.h
+++ b/tools/include/nolibc/std.h
@@ -13,37 +13,22 @@
* syscall-specific stuff, as this file is expected to be included very early.
*/
-/* note: may already be defined */
-#ifndef NULL
-#define NULL ((void *)0)
-#endif
+#include "stdint.h"
+#include "stddef.h"
-/* stdint types */
-typedef unsigned char uint8_t;
-typedef signed char int8_t;
-typedef unsigned short uint16_t;
-typedef signed short int16_t;
-typedef unsigned int uint32_t;
-typedef signed int int32_t;
-typedef unsigned long long uint64_t;
-typedef signed long long int64_t;
-typedef unsigned long size_t;
-typedef signed long ssize_t;
-typedef unsigned long uintptr_t;
-typedef signed long intptr_t;
-typedef signed long ptrdiff_t;
+#include <linux/types.h>
/* those are commonly provided by sys/types.h */
typedef unsigned int dev_t;
-typedef unsigned long ino_t;
+typedef uint64_t ino_t;
typedef unsigned int mode_t;
typedef signed int pid_t;
typedef unsigned int uid_t;
typedef unsigned int gid_t;
typedef unsigned long nlink_t;
-typedef signed long off_t;
+typedef int64_t off_t;
typedef signed long blksize_t;
typedef signed long blkcnt_t;
-typedef signed long time_t;
+typedef __kernel_time_t time_t;
#endif /* _NOLIBC_STD_H */
diff --git a/tools/include/nolibc/stdarg.h b/tools/include/nolibc/stdarg.h
new file mode 100644
index 000000000000..c628b5783da6
--- /dev/null
+++ b/tools/include/nolibc/stdarg.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Variadic argument support for NOLIBC
+ * Copyright (C) 2005-2020 Rich Felker, et al.
+ */
+
+#ifndef _NOLIBC_STDARG_H
+#define _NOLIBC_STDARG_H
+
+typedef __builtin_va_list va_list;
+#define va_start(v, l) __builtin_va_start(v, l)
+#define va_end(v) __builtin_va_end(v)
+#define va_arg(v, l) __builtin_va_arg(v, l)
+#define va_copy(d, s) __builtin_va_copy(d, s)
+
+#endif /* _NOLIBC_STDARG_H */
diff --git a/tools/include/nolibc/stdbool.h b/tools/include/nolibc/stdbool.h
new file mode 100644
index 000000000000..60feece22f17
--- /dev/null
+++ b/tools/include/nolibc/stdbool.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Boolean types support for NOLIBC
+ * Copyright (C) 2024 Thomas Weißschuh <linux@weissschuh.net>
+ */
+
+#ifndef _NOLIBC_STDBOOL_H
+#define _NOLIBC_STDBOOL_H
+
+#define bool _Bool
+#define true 1
+#define false 0
+
+#define __bool_true_false_are_defined 1
+
+#endif /* _NOLIBC_STDBOOL_H */
diff --git a/tools/include/nolibc/stddef.h b/tools/include/nolibc/stddef.h
new file mode 100644
index 000000000000..ecbd13eab1f5
--- /dev/null
+++ b/tools/include/nolibc/stddef.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Stddef definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
+#ifndef _NOLIBC_STDDEF_H
+#define _NOLIBC_STDDEF_H
+
+#include "stdint.h"
+
+/* note: may already be defined */
+#ifndef NULL
+#define NULL ((void *)0)
+#endif
+
+#ifndef offsetof
+#define offsetof(TYPE, FIELD) ((size_t) &((TYPE *)0)->FIELD)
+#endif
+
+#endif /* _NOLIBC_STDDEF_H */
diff --git a/tools/include/nolibc/stdint.h b/tools/include/nolibc/stdint.h
new file mode 100644
index 000000000000..b052ad6303c3
--- /dev/null
+++ b/tools/include/nolibc/stdint.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Standard definitions and types for NOLIBC
+ * Copyright (C) 2023 Vincent Dagonneau <v@vda.io>
+ */
+
+#ifndef _NOLIBC_STDINT_H
+#define _NOLIBC_STDINT_H
+
+typedef unsigned char uint8_t;
+typedef signed char int8_t;
+typedef unsigned short uint16_t;
+typedef signed short int16_t;
+typedef unsigned int uint32_t;
+typedef signed int int32_t;
+typedef unsigned long long uint64_t;
+typedef signed long long int64_t;
+typedef __SIZE_TYPE__ size_t;
+typedef signed long ssize_t;
+typedef unsigned long uintptr_t;
+typedef signed long intptr_t;
+typedef signed long ptrdiff_t;
+
+typedef int8_t int_least8_t;
+typedef uint8_t uint_least8_t;
+typedef int16_t int_least16_t;
+typedef uint16_t uint_least16_t;
+typedef int32_t int_least32_t;
+typedef uint32_t uint_least32_t;
+typedef int64_t int_least64_t;
+typedef uint64_t uint_least64_t;
+
+typedef int8_t int_fast8_t;
+typedef uint8_t uint_fast8_t;
+typedef ssize_t int_fast16_t;
+typedef size_t uint_fast16_t;
+typedef ssize_t int_fast32_t;
+typedef size_t uint_fast32_t;
+typedef int64_t int_fast64_t;
+typedef uint64_t uint_fast64_t;
+
+typedef __INTMAX_TYPE__ intmax_t;
+typedef __UINTMAX_TYPE__ uintmax_t;
+
+/* limits of integral types */
+
+#define INT8_MIN (-128)
+#define INT16_MIN (-32767-1)
+#define INT32_MIN (-2147483647-1)
+#define INT64_MIN (-9223372036854775807LL-1)
+
+#define INT8_MAX (127)
+#define INT16_MAX (32767)
+#define INT32_MAX (2147483647)
+#define INT64_MAX (9223372036854775807LL)
+
+#define UINT8_MAX (255)
+#define UINT16_MAX (65535)
+#define UINT32_MAX (4294967295U)
+#define UINT64_MAX (18446744073709551615ULL)
+
+#define INT_LEAST8_MIN INT8_MIN
+#define INT_LEAST16_MIN INT16_MIN
+#define INT_LEAST32_MIN INT32_MIN
+#define INT_LEAST64_MIN INT64_MIN
+
+#define INT_LEAST8_MAX INT8_MAX
+#define INT_LEAST16_MAX INT16_MAX
+#define INT_LEAST32_MAX INT32_MAX
+#define INT_LEAST64_MAX INT64_MAX
+
+#define UINT_LEAST8_MAX UINT8_MAX
+#define UINT_LEAST16_MAX UINT16_MAX
+#define UINT_LEAST32_MAX UINT32_MAX
+#define UINT_LEAST64_MAX UINT64_MAX
+
+#define SIZE_MAX ((size_t)(__LONG_MAX__) * 2 + 1)
+#define INTPTR_MIN (-__LONG_MAX__ - 1)
+#define INTPTR_MAX __LONG_MAX__
+#define PTRDIFF_MIN INTPTR_MIN
+#define PTRDIFF_MAX INTPTR_MAX
+#define UINTPTR_MAX SIZE_MAX
+
+#define INT_FAST8_MIN INT8_MIN
+#define INT_FAST16_MIN INTPTR_MIN
+#define INT_FAST32_MIN INTPTR_MIN
+#define INT_FAST64_MIN INT64_MIN
+
+#define INT_FAST8_MAX INT8_MAX
+#define INT_FAST16_MAX INTPTR_MAX
+#define INT_FAST32_MAX INTPTR_MAX
+#define INT_FAST64_MAX INT64_MAX
+
+#define UINT_FAST8_MAX UINT8_MAX
+#define UINT_FAST16_MAX SIZE_MAX
+#define UINT_FAST32_MAX SIZE_MAX
+#define UINT_FAST64_MAX UINT64_MAX
+
+#define INTMAX_MIN INT64_MIN
+#define INTMAX_MAX INT64_MAX
+#define UINTMAX_MAX UINT64_MAX
+
+#ifndef INT_MIN
+#define INT_MIN (-__INT_MAX__ - 1)
+#endif
+#ifndef INT_MAX
+#define INT_MAX __INT_MAX__
+#endif
+
+#ifndef LONG_MIN
+#define LONG_MIN (-__LONG_MAX__ - 1)
+#endif
+#ifndef LONG_MAX
+#define LONG_MAX __LONG_MAX__
+#endif
+
+#ifndef ULONG_MAX
+#define ULONG_MAX ((unsigned long)(__LONG_MAX__) * 2 + 1)
+#endif
+
+#ifndef LLONG_MIN
+#define LLONG_MIN (-__LONG_LONG_MAX__ - 1)
+#endif
+#ifndef LLONG_MAX
+#define LLONG_MAX __LONG_LONG_MAX__
+#endif
+
+#ifndef ULLONG_MAX
+#define ULLONG_MAX ((unsigned long long)(__LONG_LONG_MAX__) * 2 + 1)
+#endif
+
+#endif /* _NOLIBC_STDINT_H */
diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h
index 96ac8afc5aee..1f16dab2ac88 100644
--- a/tools/include/nolibc/stdio.h
+++ b/tools/include/nolibc/stdio.h
@@ -4,34 +4,129 @@
* Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_STDIO_H
#define _NOLIBC_STDIO_H
-#include <stdarg.h>
-
#include "std.h"
#include "arch.h"
#include "errno.h"
+#include "fcntl.h"
#include "types.h"
#include "sys.h"
+#include "stdarg.h"
#include "stdlib.h"
#include "string.h"
+#include "compiler.h"
+
+static const char *strerror(int errnum);
#ifndef EOF
#define EOF (-1)
#endif
-/* just define FILE as a non-empty type */
+/* Buffering mode used by setvbuf. */
+#define _IOFBF 0 /* Fully buffered. */
+#define _IOLBF 1 /* Line buffered. */
+#define _IONBF 2 /* No buffering. */
+
+/* just define FILE as a non-empty type. The value of the pointer gives
+ * the FD: FILE=~fd for fd>=0 or NULL for fd<0. This way positive FILE
+ * are immediately identified as abnormal entries (i.e. possible copies
+ * of valid pointers to something else).
+ */
typedef struct FILE {
char dummy[1];
} FILE;
-/* We define the 3 common stdio files as constant invalid pointers that
- * are easily recognized.
- */
-static __attribute__((unused)) FILE* const stdin = (FILE*)-3;
-static __attribute__((unused)) FILE* const stdout = (FILE*)-2;
-static __attribute__((unused)) FILE* const stderr = (FILE*)-1;
+static __attribute__((unused)) FILE* const stdin = (FILE*)(intptr_t)~STDIN_FILENO;
+static __attribute__((unused)) FILE* const stdout = (FILE*)(intptr_t)~STDOUT_FILENO;
+static __attribute__((unused)) FILE* const stderr = (FILE*)(intptr_t)~STDERR_FILENO;
+
+/* provides a FILE* equivalent of fd. The mode is ignored. */
+static __attribute__((unused))
+FILE *fdopen(int fd, const char *mode __attribute__((unused)))
+{
+ if (fd < 0) {
+ SET_ERRNO(EBADF);
+ return NULL;
+ }
+ return (FILE*)(intptr_t)~fd;
+}
+
+static __attribute__((unused))
+FILE *fopen(const char *pathname, const char *mode)
+{
+ int flags, fd;
+
+ switch (*mode) {
+ case 'r':
+ flags = O_RDONLY;
+ break;
+ case 'w':
+ flags = O_WRONLY | O_CREAT | O_TRUNC;
+ break;
+ case 'a':
+ flags = O_WRONLY | O_CREAT | O_APPEND;
+ break;
+ default:
+ SET_ERRNO(EINVAL); return NULL;
+ }
+
+ if (mode[1] == '+')
+ flags = (flags & ~(O_RDONLY | O_WRONLY)) | O_RDWR;
+
+ fd = open(pathname, flags, 0666);
+ return fdopen(fd, mode);
+}
+
+/* provides the fd of stream. */
+static __attribute__((unused))
+int fileno(FILE *stream)
+{
+ intptr_t i = (intptr_t)stream;
+
+ if (i >= 0) {
+ SET_ERRNO(EBADF);
+ return -1;
+ }
+ return ~i;
+}
+
+/* flush a stream. */
+static __attribute__((unused))
+int fflush(FILE *stream)
+{
+ intptr_t i = (intptr_t)stream;
+
+ /* NULL is valid here. */
+ if (i > 0) {
+ SET_ERRNO(EBADF);
+ return -1;
+ }
+
+ /* Don't do anything, nolibc does not support buffering. */
+ return 0;
+}
+
+/* flush a stream. */
+static __attribute__((unused))
+int fclose(FILE *stream)
+{
+ intptr_t i = (intptr_t)stream;
+
+ if (i >= 0) {
+ SET_ERRNO(EBADF);
+ return -1;
+ }
+
+ if (close(~i))
+ return EOF;
+
+ return 0;
+}
/* getc(), fgetc(), getchar() */
@@ -41,14 +136,8 @@ static __attribute__((unused))
int fgetc(FILE* stream)
{
unsigned char ch;
- int fd;
-
- if (stream < stdin || stream > stderr)
- return EOF;
-
- fd = 3 + (long)stream;
- if (read(fd, &ch, 1) <= 0)
+ if (read(fileno(stream), &ch, 1) <= 0)
return EOF;
return ch;
}
@@ -68,14 +157,8 @@ static __attribute__((unused))
int fputc(int c, FILE* stream)
{
unsigned char ch = c;
- int fd;
- if (stream < stdin || stream > stderr)
- return EOF;
-
- fd = 3 + (long)stream;
-
- if (write(fd, &ch, 1) <= 0)
+ if (write(fileno(stream), &ch, 1) <= 0)
return EOF;
return ch;
}
@@ -96,12 +179,7 @@ static __attribute__((unused))
int _fwrite(const void *buf, size_t size, FILE *stream)
{
ssize_t ret;
- int fd;
-
- if (stream < stdin || stream > stderr)
- return EOF;
-
- fd = 3 + (long)stream;
+ int fd = fileno(stream);
while (size) {
ret = write(fd, buf, size);
@@ -162,28 +240,40 @@ char *fgets(char *s, int size, FILE *stream)
}
-/* minimal vfprintf(). It supports the following formats:
+/* minimal printf(). It supports the following formats:
* - %[l*]{d,u,c,x,p}
* - %s
* - unknown modifiers are ignored.
*/
-static __attribute__((unused))
-int vfprintf(FILE *stream, const char *fmt, va_list args)
+typedef int (*__nolibc_printf_cb)(intptr_t state, const char *buf, size_t size);
+
+static __attribute__((unused, format(printf, 4, 0)))
+int __nolibc_printf(__nolibc_printf_cb cb, intptr_t state, size_t n, const char *fmt, va_list args)
{
char escape, lpref, c;
unsigned long long v;
- unsigned int written;
- size_t len, ofs;
+ unsigned int written, width;
+ size_t len, ofs, w;
char tmpbuf[21];
const char *outstr;
written = ofs = escape = lpref = 0;
while (1) {
c = fmt[ofs++];
+ width = 0;
if (escape) {
/* we're in an escape sequence, ofs == 1 */
escape = 0;
+
+ /* width */
+ while (c >= '0' && c <= '9') {
+ width *= 10;
+ width += c - '0';
+
+ c = fmt[ofs++];
+ }
+
if (c == 'c' || c == 'd' || c == 'u' || c == 'x' || c == 'p') {
char *out = tmpbuf;
@@ -219,7 +309,7 @@ int vfprintf(FILE *stream, const char *fmt, va_list args)
case 'p':
*(out++) = '0';
*(out++) = 'x';
- /* fall through */
+ __nolibc_fallthrough;
default: /* 'x' and 'p' above */
u64toh_r(v, out);
break;
@@ -231,6 +321,13 @@ int vfprintf(FILE *stream, const char *fmt, va_list args)
if (!outstr)
outstr="(null)";
}
+ else if (c == 'm') {
+#ifdef NOLIBC_IGNORE_ERRNO
+ outstr = "unknown error";
+#else
+ outstr = strerror(errno);
+#endif /* NOLIBC_IGNORE_ERRNO */
+ }
else if (c == '%') {
/* queue it verbatim */
continue;
@@ -240,6 +337,8 @@ int vfprintf(FILE *stream, const char *fmt, va_list args)
if (c == 'l') {
/* long format prefix, maintain the escape */
lpref++;
+ } else if (c == 'j') {
+ lpref = 2;
}
escape = 1;
goto do_escape;
@@ -256,8 +355,17 @@ int vfprintf(FILE *stream, const char *fmt, va_list args)
outstr = fmt;
len = ofs - 1;
flush_str:
- if (_fwrite(outstr, len, stream) != 0)
- break;
+ if (n) {
+ w = len < n ? len : n;
+ n -= w;
+ while (width-- > w) {
+ if (cb(state, " ", 1) != 0)
+ return -1;
+ written += 1;
+ }
+ if (cb(state, outstr, w) != 0)
+ return -1;
+ }
written += len;
do_escape:
@@ -273,6 +381,23 @@ int vfprintf(FILE *stream, const char *fmt, va_list args)
return written;
}
+static int __nolibc_fprintf_cb(intptr_t state, const char *buf, size_t size)
+{
+ return _fwrite(buf, size, (FILE *)state);
+}
+
+static __attribute__((unused, format(printf, 2, 0)))
+int vfprintf(FILE *stream, const char *fmt, va_list args)
+{
+ return __nolibc_printf(__nolibc_fprintf_cb, (intptr_t)stream, SIZE_MAX, fmt, args);
+}
+
+static __attribute__((unused, format(printf, 1, 0)))
+int vprintf(const char *fmt, va_list args)
+{
+ return vfprintf(stdout, fmt, args);
+}
+
static __attribute__((unused, format(printf, 2, 3)))
int fprintf(FILE *stream, const char *fmt, ...)
{
@@ -297,13 +422,223 @@ int printf(const char *fmt, ...)
return ret;
}
+static __attribute__((unused, format(printf, 2, 0)))
+int vdprintf(int fd, const char *fmt, va_list args)
+{
+ FILE *stream;
+
+ stream = fdopen(fd, NULL);
+ if (!stream)
+ return -1;
+ /* Technically 'stream' is leaked, but as it's only a wrapper around 'fd' that is fine */
+ return vfprintf(stream, fmt, args);
+}
+
+static __attribute__((unused, format(printf, 2, 3)))
+int dprintf(int fd, const char *fmt, ...)
+{
+ va_list args;
+ int ret;
+
+ va_start(args, fmt);
+ ret = vdprintf(fd, fmt, args);
+ va_end(args);
+
+ return ret;
+}
+
+static int __nolibc_sprintf_cb(intptr_t _state, const char *buf, size_t size)
+{
+ char **state = (char **)_state;
+
+ memcpy(*state, buf, size);
+ *state += size;
+ return 0;
+}
+
+static __attribute__((unused, format(printf, 3, 0)))
+int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
+{
+ char *state = buf;
+ int ret;
+
+ ret = __nolibc_printf(__nolibc_sprintf_cb, (intptr_t)&state, size, fmt, args);
+ if (ret < 0)
+ return ret;
+ buf[(size_t)ret < size ? (size_t)ret : size - 1] = '\0';
+ return ret;
+}
+
+static __attribute__((unused, format(printf, 3, 4)))
+int snprintf(char *buf, size_t size, const char *fmt, ...)
+{
+ va_list args;
+ int ret;
+
+ va_start(args, fmt);
+ ret = vsnprintf(buf, size, fmt, args);
+ va_end(args);
+
+ return ret;
+}
+
+static __attribute__((unused, format(printf, 2, 0)))
+int vsprintf(char *buf, const char *fmt, va_list args)
+{
+ return vsnprintf(buf, SIZE_MAX, fmt, args);
+}
+
+static __attribute__((unused, format(printf, 2, 3)))
+int sprintf(char *buf, const char *fmt, ...)
+{
+ va_list args;
+ int ret;
+
+ va_start(args, fmt);
+ ret = vsprintf(buf, fmt, args);
+ va_end(args);
+
+ return ret;
+}
+
+static __attribute__((unused))
+int vsscanf(const char *str, const char *format, va_list args)
+{
+ uintmax_t uval;
+ intmax_t ival;
+ int base;
+ char *endptr;
+ int matches;
+ int lpref;
+
+ matches = 0;
+
+ while (1) {
+ if (*format == '%') {
+ /* start of pattern */
+ lpref = 0;
+ format++;
+
+ if (*format == 'l') {
+ /* same as in printf() */
+ lpref = 1;
+ format++;
+ if (*format == 'l') {
+ lpref = 2;
+ format++;
+ }
+ }
+
+ if (*format == '%') {
+ /* literal % */
+ if ('%' != *str)
+ goto done;
+ str++;
+ format++;
+ continue;
+ } else if (*format == 'd') {
+ ival = strtoll(str, &endptr, 10);
+ if (lpref == 0)
+ *va_arg(args, int *) = ival;
+ else if (lpref == 1)
+ *va_arg(args, long *) = ival;
+ else if (lpref == 2)
+ *va_arg(args, long long *) = ival;
+ } else if (*format == 'u' || *format == 'x' || *format == 'X') {
+ base = *format == 'u' ? 10 : 16;
+ uval = strtoull(str, &endptr, base);
+ if (lpref == 0)
+ *va_arg(args, unsigned int *) = uval;
+ else if (lpref == 1)
+ *va_arg(args, unsigned long *) = uval;
+ else if (lpref == 2)
+ *va_arg(args, unsigned long long *) = uval;
+ } else if (*format == 'p') {
+ *va_arg(args, void **) = (void *)strtoul(str, &endptr, 16);
+ } else {
+ SET_ERRNO(EILSEQ);
+ goto done;
+ }
+
+ format++;
+ str = endptr;
+ matches++;
+
+ } else if (*format == '\0') {
+ goto done;
+ } else if (isspace(*format)) {
+ /* skip spaces in format and str */
+ while (isspace(*format))
+ format++;
+ while (isspace(*str))
+ str++;
+ } else if (*format == *str) {
+ /* literal match */
+ format++;
+ str++;
+ } else {
+ if (!matches)
+ matches = EOF;
+ goto done;
+ }
+ }
+
+done:
+ return matches;
+}
+
+static __attribute__((unused, format(scanf, 2, 3)))
+int sscanf(const char *str, const char *format, ...)
+{
+ va_list args;
+ int ret;
+
+ va_start(args, format);
+ ret = vsscanf(str, format, args);
+ va_end(args);
+ return ret;
+}
+
static __attribute__((unused))
void perror(const char *msg)
{
+#ifdef NOLIBC_IGNORE_ERRNO
+ fprintf(stderr, "%s%sunknown error\n", (msg && *msg) ? msg : "", (msg && *msg) ? ": " : "");
+#else
fprintf(stderr, "%s%serrno=%d\n", (msg && *msg) ? msg : "", (msg && *msg) ? ": " : "", errno);
+#endif
}
-/* make sure to include all global symbols */
-#include "nolibc.h"
+static __attribute__((unused))
+int setvbuf(FILE *stream __attribute__((unused)),
+ char *buf __attribute__((unused)),
+ int mode,
+ size_t size __attribute__((unused)))
+{
+ /*
+ * nolibc does not support buffering so this is a nop. Just check mode
+ * is valid as required by the spec.
+ */
+ switch (mode) {
+ case _IOFBF:
+ case _IOLBF:
+ case _IONBF:
+ break;
+ default:
+ return EOF;
+ }
+
+ return 0;
+}
+
+static __attribute__((unused))
+const char *strerror(int errno)
+{
+ static char buf[18] = "errno=";
+
+ i64toa_r(errno, &buf[6]);
+
+ return buf;
+}
#endif /* _NOLIBC_STDIO_H */
diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h
index a24000d1e822..f184e108ed0a 100644
--- a/tools/include/nolibc/stdlib.h
+++ b/tools/include/nolibc/stdlib.h
@@ -4,6 +4,9 @@
* Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_STDLIB_H
#define _NOLIBC_STDLIB_H
@@ -12,6 +15,7 @@
#include "types.h"
#include "sys.h"
#include "string.h"
+#include <linux/auxvec.h>
struct nolibc_heap {
size_t len;
@@ -28,7 +32,26 @@ static __attribute__((unused)) char itoa_buffer[21];
* As much as possible, please keep functions alphabetically sorted.
*/
+static __inline__
+int abs(int j)
+{
+ return j >= 0 ? j : -j;
+}
+
+static __inline__
+long labs(long j)
+{
+ return j >= 0 ? j : -j;
+}
+
+static __inline__
+long long llabs(long long j)
+{
+ return j >= 0 ? j : -j;
+}
+
/* must be exported, as it's used by libgcc for various divide functions */
+void abort(void);
__attribute__((weak,unused,noreturn,section(".text.nolibc_abort")))
void abort(void)
{
@@ -77,16 +100,16 @@ void free(void *ptr)
munmap(heap, heap->len);
}
+#ifndef NOLIBC_NO_RUNTIME
/* getenv() tries to find the environment variable named <name> in the
* environment array pointed to by global variable "environ" which must be
* declared as a char **, and must be terminated by a NULL (it is recommended
* to set this variable to the "envp" argument of main()). If the requested
* environment variable exists its value is returned otherwise NULL is
- * returned. getenv() is forcefully inlined so that the reference to "environ"
- * will be dropped if unused, even at -O0.
+ * returned.
*/
static __attribute__((unused))
-char *_getenv(const char *name, char **environ)
+char *getenv(const char *name)
{
int idx, i;
@@ -100,13 +123,7 @@ char *_getenv(const char *name, char **environ)
}
return NULL;
}
-
-static inline __attribute__((unused,always_inline))
-char *getenv(const char *name)
-{
- extern char **environ;
- return _getenv(name, environ);
-}
+#endif /* NOLIBC_NO_RUNTIME */
static __attribute__((unused))
void *malloc(size_t len)
@@ -166,7 +183,7 @@ void *realloc(void *old_ptr, size_t new_size)
if (__builtin_expect(!ret, 0))
return NULL;
- memcpy(ret, heap->user_p, heap->len);
+ memcpy(ret, heap->user_p, user_p_len);
munmap(heap, heap->len);
return ret;
}
@@ -204,7 +221,7 @@ int utoh_r(unsigned long in, char *buffer)
/* converts unsigned long <in> to an hex string using the static itoa_buffer
* and returns the pointer to that string.
*/
-static inline __attribute__((unused))
+static __inline__ __attribute__((unused))
char *utoh(unsigned long in)
{
utoh_r(in, itoa_buffer);
@@ -255,7 +272,7 @@ int itoa_r(long in, char *buffer)
int len = 0;
if (in < 0) {
- in = -in;
+ in = -(unsigned long)in;
*(ptr++) = '-';
len++;
}
@@ -266,7 +283,7 @@ int itoa_r(long in, char *buffer)
/* for historical compatibility, same as above but returns the pointer to the
* buffer.
*/
-static inline __attribute__((unused))
+static __inline__ __attribute__((unused))
char *ltoa_r(long in, char *buffer)
{
itoa_r(in, buffer);
@@ -276,7 +293,7 @@ char *ltoa_r(long in, char *buffer)
/* converts long integer <in> to a string using the static itoa_buffer and
* returns the pointer to that string.
*/
-static inline __attribute__((unused))
+static __inline__ __attribute__((unused))
char *itoa(long in)
{
itoa_r(in, itoa_buffer);
@@ -286,7 +303,7 @@ char *itoa(long in)
/* converts long integer <in> to a string using the static itoa_buffer and
* returns the pointer to that string. Same as above, for compatibility.
*/
-static inline __attribute__((unused))
+static __inline__ __attribute__((unused))
char *ltoa(long in)
{
itoa_r(in, itoa_buffer);
@@ -296,7 +313,7 @@ char *ltoa(long in)
/* converts unsigned long integer <in> to a string using the static itoa_buffer
* and returns the pointer to that string.
*/
-static inline __attribute__((unused))
+static __inline__ __attribute__((unused))
char *utoa(unsigned long in)
{
utoa_r(in, itoa_buffer);
@@ -340,7 +357,7 @@ int u64toh_r(uint64_t in, char *buffer)
/* converts uint64_t <in> to an hex string using the static itoa_buffer and
* returns the pointer to that string.
*/
-static inline __attribute__((unused))
+static __inline__ __attribute__((unused))
char *u64toh(uint64_t in)
{
u64toh_r(in, itoa_buffer);
@@ -391,7 +408,7 @@ int i64toa_r(int64_t in, char *buffer)
int len = 0;
if (in < 0) {
- in = -in;
+ in = -(uint64_t)in;
*(ptr++) = '-';
len++;
}
@@ -402,7 +419,7 @@ int i64toa_r(int64_t in, char *buffer)
/* converts int64_t <in> to a string using the static itoa_buffer and returns
* the pointer to that string.
*/
-static inline __attribute__((unused))
+static __inline__ __attribute__((unused))
char *i64toa(int64_t in)
{
i64toa_r(in, itoa_buffer);
@@ -412,14 +429,120 @@ char *i64toa(int64_t in)
/* converts uint64_t <in> to a string using the static itoa_buffer and returns
* the pointer to that string.
*/
-static inline __attribute__((unused))
+static __inline__ __attribute__((unused))
char *u64toa(uint64_t in)
{
u64toa_r(in, itoa_buffer);
return itoa_buffer;
}
-/* make sure to include all global symbols */
-#include "nolibc.h"
+static __attribute__((unused))
+uintmax_t __strtox(const char *nptr, char **endptr, int base, intmax_t lower_limit, uintmax_t upper_limit)
+{
+ const char signed_ = lower_limit != 0;
+ unsigned char neg = 0, overflow = 0;
+ uintmax_t val = 0, limit, old_val;
+ char c;
+
+ if (base < 0 || base > 36) {
+ SET_ERRNO(EINVAL);
+ goto out;
+ }
+
+ while (isspace(*nptr))
+ nptr++;
+
+ if (*nptr == '+') {
+ nptr++;
+ } else if (*nptr == '-') {
+ neg = 1;
+ nptr++;
+ }
+
+ if (signed_ && neg)
+ limit = -(uintmax_t)lower_limit;
+ else
+ limit = upper_limit;
+
+ if ((base == 0 || base == 16) &&
+ (strncmp(nptr, "0x", 2) == 0 || strncmp(nptr, "0X", 2) == 0)) {
+ base = 16;
+ nptr += 2;
+ } else if (base == 0 && strncmp(nptr, "0", 1) == 0) {
+ base = 8;
+ nptr += 1;
+ } else if (base == 0) {
+ base = 10;
+ }
+
+ while (*nptr) {
+ c = *nptr;
+
+ if (c >= '0' && c <= '9')
+ c -= '0';
+ else if (c >= 'a' && c <= 'z')
+ c = c - 'a' + 10;
+ else if (c >= 'A' && c <= 'Z')
+ c = c - 'A' + 10;
+ else
+ goto out;
+
+ if (c >= base)
+ goto out;
+
+ nptr++;
+ old_val = val;
+ val *= base;
+ val += c;
+
+ if (val > limit || val < old_val)
+ overflow = 1;
+ }
+
+out:
+ if (overflow) {
+ SET_ERRNO(ERANGE);
+ val = limit;
+ }
+ if (endptr)
+ *endptr = (char *)nptr;
+ return neg ? -val : val;
+}
+
+static __attribute__((unused))
+long strtol(const char *nptr, char **endptr, int base)
+{
+ return __strtox(nptr, endptr, base, LONG_MIN, LONG_MAX);
+}
+
+static __attribute__((unused))
+unsigned long strtoul(const char *nptr, char **endptr, int base)
+{
+ return __strtox(nptr, endptr, base, 0, ULONG_MAX);
+}
+
+static __attribute__((unused))
+long long strtoll(const char *nptr, char **endptr, int base)
+{
+ return __strtox(nptr, endptr, base, LLONG_MIN, LLONG_MAX);
+}
+
+static __attribute__((unused))
+unsigned long long strtoull(const char *nptr, char **endptr, int base)
+{
+ return __strtox(nptr, endptr, base, 0, ULLONG_MAX);
+}
+
+static __attribute__((unused))
+intmax_t strtoimax(const char *nptr, char **endptr, int base)
+{
+ return __strtox(nptr, endptr, base, INTMAX_MIN, INTMAX_MAX);
+}
+
+static __attribute__((unused))
+uintmax_t strtoumax(const char *nptr, char **endptr, int base)
+{
+ return __strtox(nptr, endptr, base, 0, UINTMAX_MAX);
+}
#endif /* _NOLIBC_STDLIB_H */
diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h
index fffdaf6ff467..4000926f44ac 100644
--- a/tools/include/nolibc/string.h
+++ b/tools/include/nolibc/string.h
@@ -4,9 +4,13 @@
* Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_STRING_H
#define _NOLIBC_STRING_H
+#include "arch.h"
#include "std.h"
static void *malloc(size_t len);
@@ -27,31 +31,11 @@ int memcmp(const void *s1, const void *s2, size_t n)
return c1;
}
-static __attribute__((unused))
-void *_nolibc_memcpy_up(void *dst, const void *src, size_t len)
-{
- size_t pos = 0;
-
- while (pos < len) {
- ((char *)dst)[pos] = ((const char *)src)[pos];
- pos++;
- }
- return dst;
-}
-
-static __attribute__((unused))
-void *_nolibc_memcpy_down(void *dst, const void *src, size_t len)
-{
- while (len) {
- len--;
- ((char *)dst)[len] = ((const char *)src)[len];
- }
- return dst;
-}
-
+#ifndef NOLIBC_ARCH_HAS_MEMMOVE
/* might be ignored by the compiler without -ffreestanding, then found as
* missing.
*/
+void *memmove(void *dst, const void *src, size_t len);
__attribute__((weak,unused,section(".text.nolibc_memmove")))
void *memmove(void *dst, const void *src, size_t len)
{
@@ -72,17 +56,29 @@ void *memmove(void *dst, const void *src, size_t len)
}
return dst;
}
+#endif /* #ifndef NOLIBC_ARCH_HAS_MEMMOVE */
+#ifndef NOLIBC_ARCH_HAS_MEMCPY
/* must be exported, as it's used by libgcc on ARM */
+void *memcpy(void *dst, const void *src, size_t len);
__attribute__((weak,unused,section(".text.nolibc_memcpy")))
void *memcpy(void *dst, const void *src, size_t len)
{
- return _nolibc_memcpy_up(dst, src, len);
+ size_t pos = 0;
+
+ while (pos < len) {
+ ((char *)dst)[pos] = ((const char *)src)[pos];
+ pos++;
+ }
+ return dst;
}
+#endif /* #ifndef NOLIBC_ARCH_HAS_MEMCPY */
+#ifndef NOLIBC_ARCH_HAS_MEMSET
/* might be ignored by the compiler without -ffreestanding, then found as
* missing.
*/
+void *memset(void *dst, int b, size_t len);
__attribute__((weak,unused,section(".text.nolibc_memset")))
void *memset(void *dst, int b, size_t len)
{
@@ -90,11 +86,27 @@ void *memset(void *dst, int b, size_t len)
while (len--) {
/* prevent gcc from recognizing memset() here */
- asm volatile("");
+ __asm__ volatile("");
*(p++) = b;
}
return dst;
}
+#endif /* #ifndef NOLIBC_ARCH_HAS_MEMSET */
+
+#ifndef NOLIBC_ARCH_HAS_MEMCHR
+static __attribute__((unused))
+void *memchr(const void *s, int c, size_t len)
+{
+ char *p = (char *)s;
+
+ while (len--) {
+ if (*p == (char)c)
+ return p;
+ p++;
+ }
+ return NULL;
+}
+#endif /* #ifndef NOLIBC_ARCH_HAS_MEMCHR */
static __attribute__((unused))
char *strchr(const char *s, int c)
@@ -133,13 +145,14 @@ char *strcpy(char *dst, const char *src)
* thus itself, hence the asm() statement below that's meant to disable this
* confusing practice.
*/
-static __attribute__((unused))
+size_t strlen(const char *str);
+__attribute__((weak,unused,section(".text.nolibc_strlen")))
size_t strlen(const char *str)
{
size_t len;
for (len = 0; str[len]; len++)
- asm("");
+ __asm__("");
return len;
}
@@ -197,22 +210,26 @@ char *strndup(const char *str, size_t maxlen)
static __attribute__((unused))
size_t strlcat(char *dst, const char *src, size_t size)
{
- size_t len;
- char c;
-
- for (len = 0; dst[len]; len++)
- ;
-
- for (;;) {
- c = *src;
- if (len < size)
- dst[len] = c;
- if (!c)
+ size_t len = strnlen(dst, size);
+
+ /*
+ * We want len < size-1. But as size is unsigned and can wrap
+ * around, we use len + 1 instead.
+ */
+ while (len + 1 < size) {
+ dst[len] = *src;
+ if (*src == '\0')
break;
len++;
src++;
}
+ if (len < size)
+ dst[len] = '\0';
+
+ while (*src++)
+ len++;
+
return len;
}
@@ -220,16 +237,18 @@ static __attribute__((unused))
size_t strlcpy(char *dst, const char *src, size_t size)
{
size_t len;
- char c;
- for (len = 0;;) {
- c = src[len];
- if (len < size)
- dst[len] = c;
- if (!c)
- break;
- len++;
+ for (len = 0; len < size; len++) {
+ dst[len] = src[len];
+ if (!dst[len])
+ return len;
}
+ if (size)
+ dst[size-1] = '\0';
+
+ while (src[len])
+ len++;
+
return len;
}
@@ -288,7 +307,40 @@ char *strrchr(const char *s, int c)
return (char *)ret;
}
-/* make sure to include all global symbols */
-#include "nolibc.h"
+static __attribute__((unused))
+char *strstr(const char *haystack, const char *needle)
+{
+ size_t len_haystack, len_needle;
+
+ len_needle = strlen(needle);
+ if (!len_needle)
+ return NULL;
+
+ len_haystack = strlen(haystack);
+ while (len_haystack >= len_needle) {
+ if (!memcmp(haystack, needle, len_needle))
+ return (char *)haystack;
+ haystack++;
+ len_haystack--;
+ }
+
+ return NULL;
+}
+
+static __attribute__((unused))
+int tolower(int c)
+{
+ if (c >= 'A' && c <= 'Z')
+ return c - 'A' + 'a';
+ return c;
+}
+
+static __attribute__((unused))
+int toupper(int c)
+{
+ if (c >= 'a' && c <= 'z')
+ return c - 'a' + 'A';
+ return c;
+}
#endif /* _NOLIBC_STRING_H */
diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index 78473d34e27c..847af1ccbdc9 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -4,26 +4,58 @@
* Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_SYS_H
#define _NOLIBC_SYS_H
-#include <stdarg.h>
#include "std.h"
/* system includes */
-#include <asm/unistd.h>
-#include <asm/signal.h> // for SIGCHLD
-#include <asm/ioctls.h>
-#include <asm/mman.h>
+#include <linux/unistd.h>
+#include <linux/signal.h> /* for SIGCHLD */
+#include <linux/termios.h>
+#include <linux/mman.h>
#include <linux/fs.h>
#include <linux/loop.h>
#include <linux/time.h>
+#include <linux/auxvec.h>
+#include <linux/fcntl.h> /* for O_* and AT_* */
+#include <linux/sched.h> /* for clone_args */
+#include <linux/stat.h> /* for statx() */
-#include "arch.h"
#include "errno.h"
+#include "stdarg.h"
#include "types.h"
+/* Syscall return helper: takes the syscall value in argument and checks for an
+ * error in it. This may only be used with signed returns (int or long), but
+ * not with pointers. An error is any value < 0. When an error is encountered,
+ * -ret is set into errno and -1 is returned. Otherwise the returned value is
+ * passed as-is with its type preserved.
+ */
+
+#define __sysret(arg) \
+({ \
+ __typeof__(arg) __sysret_arg = (arg); \
+ (__sysret_arg < 0) /* error ? */ \
+ ? (({ SET_ERRNO(-__sysret_arg); }), -1) /* ret -1 with errno = -arg */ \
+ : __sysret_arg; /* return original value */ \
+})
+
+/* Syscall ENOSYS helper: Avoids unused-parameter warnings and provides a
+ * debugging hook.
+ */
+
+static __inline__ int __nolibc_enosys(const char *syscall, ...)
+{
+ (void)syscall;
+ return -ENOSYS;
+}
+
+
/* Functions in this file only describe syscalls. They're declared static so
* that the compiler usually decides to inline them while still being allowed
* to pass a pointer to one of their instances. Each syscall exists in two
@@ -73,10 +105,10 @@ int brk(void *addr)
static __attribute__((unused))
void *sbrk(intptr_t inc)
{
- void *ret;
-
/* first call to find current end */
- if ((ret = sys_brk(0)) && (sys_brk(ret + inc) == ret + inc))
+ void *ret = sys_brk(NULL);
+
+ if (ret && sys_brk(ret + inc) == ret + inc)
return ret + inc;
SET_ERRNO(ENOMEM);
@@ -86,6 +118,7 @@ void *sbrk(intptr_t inc)
/*
* int chdir(const char *path);
+ * int fchdir(int fildes);
*/
static __attribute__((unused))
@@ -97,13 +130,19 @@ int sys_chdir(const char *path)
static __attribute__((unused))
int chdir(const char *path)
{
- int ret = sys_chdir(path);
+ return __sysret(sys_chdir(path));
+}
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+static __attribute__((unused))
+int sys_fchdir(int fildes)
+{
+ return my_syscall1(__NR_fchdir, fildes);
+}
+
+static __attribute__((unused))
+int fchdir(int fildes)
+{
+ return __sysret(sys_fchdir(fildes));
}
@@ -114,25 +153,17 @@ int chdir(const char *path)
static __attribute__((unused))
int sys_chmod(const char *path, mode_t mode)
{
-#ifdef __NR_fchmodat
+#if defined(__NR_fchmodat)
return my_syscall4(__NR_fchmodat, AT_FDCWD, path, mode, 0);
-#elif defined(__NR_chmod)
- return my_syscall2(__NR_chmod, path, mode);
#else
-#error Neither __NR_fchmodat nor __NR_chmod defined, cannot implement sys_chmod()
+ return my_syscall2(__NR_chmod, path, mode);
#endif
}
static __attribute__((unused))
int chmod(const char *path, mode_t mode)
{
- int ret = sys_chmod(path, mode);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+ return __sysret(sys_chmod(path, mode));
}
@@ -143,25 +174,17 @@ int chmod(const char *path, mode_t mode)
static __attribute__((unused))
int sys_chown(const char *path, uid_t owner, gid_t group)
{
-#ifdef __NR_fchownat
+#if defined(__NR_fchownat)
return my_syscall5(__NR_fchownat, AT_FDCWD, path, owner, group, 0);
-#elif defined(__NR_chown)
- return my_syscall3(__NR_chown, path, owner, group);
#else
-#error Neither __NR_fchownat nor __NR_chown defined, cannot implement sys_chown()
+ return my_syscall3(__NR_chown, path, owner, group);
#endif
}
static __attribute__((unused))
int chown(const char *path, uid_t owner, gid_t group)
{
- int ret = sys_chown(path, owner, group);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+ return __sysret(sys_chown(path, owner, group));
}
@@ -178,13 +201,7 @@ int sys_chroot(const char *path)
static __attribute__((unused))
int chroot(const char *path)
{
- int ret = sys_chroot(path);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+ return __sysret(sys_chroot(path));
}
@@ -201,13 +218,7 @@ int sys_close(int fd)
static __attribute__((unused))
int close(int fd)
{
- int ret = sys_close(fd);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+ return __sysret(sys_close(fd));
}
@@ -224,13 +235,7 @@ int sys_dup(int fd)
static __attribute__((unused))
int dup(int fd)
{
- int ret = sys_dup(fd);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+ return __sysret(sys_dup(fd));
}
@@ -241,25 +246,30 @@ int dup(int fd)
static __attribute__((unused))
int sys_dup2(int old, int new)
{
-#ifdef __NR_dup3
+#if defined(__NR_dup3)
+ int ret, nr_fcntl;
+
+#ifdef __NR_fcntl64
+ nr_fcntl = __NR_fcntl64;
+#else
+ nr_fcntl = __NR_fcntl;
+#endif
+
+ if (old == new) {
+ ret = my_syscall2(nr_fcntl, old, F_GETFD);
+ return ret < 0 ? ret : old;
+ }
+
return my_syscall3(__NR_dup3, old, new, 0);
-#elif defined(__NR_dup2)
- return my_syscall2(__NR_dup2, old, new);
#else
-#error Neither __NR_dup3 nor __NR_dup2 defined, cannot implement sys_dup2()
+ return my_syscall2(__NR_dup2, old, new);
#endif
}
static __attribute__((unused))
int dup2(int old, int new)
{
- int ret = sys_dup2(old, new);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+ return __sysret(sys_dup2(old, new));
}
@@ -267,7 +277,7 @@ int dup2(int old, int new)
* int dup3(int old, int new, int flags);
*/
-#ifdef __NR_dup3
+#if defined(__NR_dup3)
static __attribute__((unused))
int sys_dup3(int old, int new, int flags)
{
@@ -277,13 +287,7 @@ int sys_dup3(int old, int new, int flags)
static __attribute__((unused))
int dup3(int old, int new, int flags)
{
- int ret = sys_dup3(old, new, flags);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+ return __sysret(sys_dup3(old, new, flags));
}
#endif
@@ -301,13 +305,7 @@ int sys_execve(const char *filename, char *const argv[], char *const envp[])
static __attribute__((unused))
int execve(const char *filename, char *const argv[], char *const envp[])
{
- int ret = sys_execve(filename, argv, envp);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+ return __sysret(sys_execve(filename, argv, envp));
}
@@ -319,48 +317,74 @@ static __attribute__((noreturn,unused))
void sys_exit(int status)
{
my_syscall1(__NR_exit, status & 255);
- while(1); // shut the "noreturn" warnings.
+ while(1); /* shut the "noreturn" warnings. */
}
static __attribute__((noreturn,unused))
-void exit(int status)
+void _exit(int status)
{
sys_exit(status);
}
+static __attribute__((noreturn,unused))
+void exit(int status)
+{
+ _exit(status);
+}
+
/*
* pid_t fork(void);
*/
+#ifndef sys_fork
static __attribute__((unused))
pid_t sys_fork(void)
{
-#ifdef __NR_clone
+#if defined(__NR_clone)
/* note: some archs only have clone() and not fork(). Different archs
* have a different API, but most archs have the flags on first arg and
* will not use the rest with no other flag.
*/
return my_syscall5(__NR_clone, SIGCHLD, 0, 0, 0, 0);
-#elif defined(__NR_fork)
- return my_syscall0(__NR_fork);
#else
-#error Neither __NR_clone nor __NR_fork defined, cannot implement sys_fork()
+ return my_syscall0(__NR_fork);
#endif
}
+#endif
static __attribute__((unused))
pid_t fork(void)
{
- pid_t ret = sys_fork();
+ return __sysret(sys_fork());
+}
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+#ifndef sys_vfork
+static __attribute__((unused))
+pid_t sys_vfork(void)
+{
+#if defined(__NR_vfork)
+ return my_syscall0(__NR_vfork);
+#else
+ /*
+ * clone() could be used but has different argument orders per
+ * architecture.
+ */
+ struct clone_args args = {
+ .flags = CLONE_VM | CLONE_VFORK,
+ .exit_signal = SIGCHLD,
+ };
+
+ return my_syscall2(__NR_clone3, &args, sizeof(args));
+#endif
}
+#endif
+static __attribute__((unused))
+pid_t vfork(void)
+{
+ return __sysret(sys_vfork());
+}
/*
* int fsync(int fd);
@@ -375,13 +399,7 @@ int sys_fsync(int fd)
static __attribute__((unused))
int fsync(int fd)
{
- int ret = sys_fsync(fd);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+ return __sysret(sys_fsync(fd));
}
@@ -398,13 +416,28 @@ int sys_getdents64(int fd, struct linux_dirent64 *dirp, int count)
static __attribute__((unused))
int getdents64(int fd, struct linux_dirent64 *dirp, int count)
{
- int ret = sys_getdents64(fd, dirp, count);
+ return __sysret(sys_getdents64(fd, dirp, count));
+}
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+
+/*
+ * uid_t geteuid(void);
+ */
+
+static __attribute__((unused))
+uid_t sys_geteuid(void)
+{
+#if defined(__NR_geteuid32)
+ return my_syscall0(__NR_geteuid32);
+#else
+ return my_syscall0(__NR_geteuid);
+#endif
+}
+
+static __attribute__((unused))
+uid_t geteuid(void)
+{
+ return sys_geteuid();
}
@@ -421,13 +454,7 @@ pid_t sys_getpgid(pid_t pid)
static __attribute__((unused))
pid_t getpgid(pid_t pid)
{
- pid_t ret = sys_getpgid(pid);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+ return __sysret(sys_getpgid(pid));
}
@@ -498,52 +525,41 @@ pid_t gettid(void)
return sys_gettid();
}
+#ifndef NOLIBC_NO_RUNTIME
+static unsigned long getauxval(unsigned long key);
/*
- * int gettimeofday(struct timeval *tv, struct timezone *tz);
+ * int getpagesize(void);
*/
static __attribute__((unused))
-int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
+int getpagesize(void)
{
- return my_syscall2(__NR_gettimeofday, tv, tz);
+ return __sysret((int)getauxval(AT_PAGESZ) ?: -ENOENT);
}
-
-static __attribute__((unused))
-int gettimeofday(struct timeval *tv, struct timezone *tz)
-{
- int ret = sys_gettimeofday(tv, tz);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
+#endif /* NOLIBC_NO_RUNTIME */
/*
- * int ioctl(int fd, unsigned long req, void *value);
+ * uid_t getuid(void);
*/
static __attribute__((unused))
-int sys_ioctl(int fd, unsigned long req, void *value)
+uid_t sys_getuid(void)
{
- return my_syscall3(__NR_ioctl, fd, req, value);
+#if defined(__NR_getuid32)
+ return my_syscall0(__NR_getuid32);
+#else
+ return my_syscall0(__NR_getuid);
+#endif
}
static __attribute__((unused))
-int ioctl(int fd, unsigned long req, void *value)
+uid_t getuid(void)
{
- int ret = sys_ioctl(fd, req, value);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+ return sys_getuid();
}
+
/*
* int kill(pid_t pid, int signal);
*/
@@ -557,13 +573,7 @@ int sys_kill(pid_t pid, int signal)
static __attribute__((unused))
int kill(pid_t pid, int signal)
{
- int ret = sys_kill(pid, signal);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+ return __sysret(sys_kill(pid, signal));
}
@@ -574,25 +584,17 @@ int kill(pid_t pid, int signal)
static __attribute__((unused))
int sys_link(const char *old, const char *new)
{
-#ifdef __NR_linkat
+#if defined(__NR_linkat)
return my_syscall5(__NR_linkat, AT_FDCWD, old, AT_FDCWD, new, 0);
-#elif defined(__NR_link)
- return my_syscall2(__NR_link, old, new);
#else
-#error Neither __NR_linkat nor __NR_link defined, cannot implement sys_link()
+ return my_syscall2(__NR_link, old, new);
#endif
}
static __attribute__((unused))
int link(const char *old, const char *new)
{
- int ret = sys_link(old, new);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+ return __sysret(sys_link(old, new));
}
@@ -603,19 +605,27 @@ int link(const char *old, const char *new)
static __attribute__((unused))
off_t sys_lseek(int fd, off_t offset, int whence)
{
+#if defined(__NR_llseek)
+ __kernel_loff_t loff = 0;
+ off_t result;
+ int ret;
+
+ ret = my_syscall5(__NR_llseek, fd, offset >> 32, (uint32_t)offset, &loff, whence);
+ if (ret < 0)
+ result = ret;
+ else
+ result = loff;
+
+ return result;
+#else
return my_syscall3(__NR_lseek, fd, offset, whence);
+#endif
}
static __attribute__((unused))
off_t lseek(int fd, off_t offset, int whence)
{
- off_t ret = sys_lseek(fd, offset, whence);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+ return __sysret(sys_lseek(fd, offset, whence));
}
@@ -626,182 +636,82 @@ off_t lseek(int fd, off_t offset, int whence)
static __attribute__((unused))
int sys_mkdir(const char *path, mode_t mode)
{
-#ifdef __NR_mkdirat
+#if defined(__NR_mkdirat)
return my_syscall3(__NR_mkdirat, AT_FDCWD, path, mode);
-#elif defined(__NR_mkdir)
- return my_syscall2(__NR_mkdir, path, mode);
#else
-#error Neither __NR_mkdirat nor __NR_mkdir defined, cannot implement sys_mkdir()
+ return my_syscall2(__NR_mkdir, path, mode);
#endif
}
static __attribute__((unused))
int mkdir(const char *path, mode_t mode)
{
- int ret = sys_mkdir(path, mode);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+ return __sysret(sys_mkdir(path, mode));
}
-
/*
- * int mknod(const char *path, mode_t mode, dev_t dev);
+ * int rmdir(const char *path);
*/
static __attribute__((unused))
-long sys_mknod(const char *path, mode_t mode, dev_t dev)
+int sys_rmdir(const char *path)
{
-#ifdef __NR_mknodat
- return my_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev);
-#elif defined(__NR_mknod)
- return my_syscall3(__NR_mknod, path, mode, dev);
+#if defined(__NR_rmdir)
+ return my_syscall1(__NR_rmdir, path);
#else
-#error Neither __NR_mknodat nor __NR_mknod defined, cannot implement sys_mknod()
+ return my_syscall3(__NR_unlinkat, AT_FDCWD, path, AT_REMOVEDIR);
#endif
}
static __attribute__((unused))
-int mknod(const char *path, mode_t mode, dev_t dev)
+int rmdir(const char *path)
{
- int ret = sys_mknod(path, mode, dev);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+ return __sysret(sys_rmdir(path));
}
-#ifndef MAP_SHARED
-#define MAP_SHARED 0x01 /* Share changes */
-#define MAP_PRIVATE 0x02 /* Changes are private */
-#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */
-#endif
-#ifndef MAP_FAILED
-#define MAP_FAILED ((void *)-1)
-#endif
+/*
+ * int mknod(const char *path, mode_t mode, dev_t dev);
+ */
static __attribute__((unused))
-void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd,
- off_t offset)
+long sys_mknod(const char *path, mode_t mode, dev_t dev)
{
-#ifndef my_syscall6
- /* Function not implemented. */
- return (void *)-ENOSYS;
-#else
-
- int n;
-
-#if defined(__NR_mmap2)
- n = __NR_mmap2;
- offset >>= 12;
+#if defined(__NR_mknodat)
+ return my_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev);
#else
- n = __NR_mmap;
-#endif
-
- return (void *)my_syscall6(n, addr, length, prot, flags, fd, offset);
+ return my_syscall3(__NR_mknod, path, mode, dev);
#endif
}
static __attribute__((unused))
-void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset)
-{
- void *ret = sys_mmap(addr, length, prot, flags, fd, offset);
-
- if ((unsigned long)ret >= -4095UL) {
- SET_ERRNO(-(long)ret);
- ret = MAP_FAILED;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int sys_munmap(void *addr, size_t length)
+int mknod(const char *path, mode_t mode, dev_t dev)
{
- return my_syscall2(__NR_munmap, addr, length);
+ return __sysret(sys_mknod(path, mode, dev));
}
-static __attribute__((unused))
-int munmap(void *addr, size_t length)
-{
- int ret = sys_munmap(addr, length);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
/*
- * int mount(const char *source, const char *target,
- * const char *fstype, unsigned long flags,
- * const void *data);
+ * int pipe2(int pipefd[2], int flags);
+ * int pipe(int pipefd[2]);
*/
-static __attribute__((unused))
-int sys_mount(const char *src, const char *tgt, const char *fst,
- unsigned long flags, const void *data)
-{
- return my_syscall5(__NR_mount, src, tgt, fst, flags, data);
-}
static __attribute__((unused))
-int mount(const char *src, const char *tgt,
- const char *fst, unsigned long flags,
- const void *data)
+int sys_pipe2(int pipefd[2], int flags)
{
- int ret = sys_mount(src, tgt, fst, flags, data);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+ return my_syscall2(__NR_pipe2, pipefd, flags);
}
-
-/*
- * int open(const char *path, int flags[, mode_t mode]);
- */
-
static __attribute__((unused))
-int sys_open(const char *path, int flags, mode_t mode)
+int pipe2(int pipefd[2], int flags)
{
-#ifdef __NR_openat
- return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode);
-#elif defined(__NR_open)
- return my_syscall3(__NR_open, path, flags, mode);
-#else
-#error Neither __NR_openat nor __NR_open defined, cannot implement sys_open()
-#endif
+ return __sysret(sys_pipe2(pipefd, flags));
}
static __attribute__((unused))
-int open(const char *path, int flags, ...)
+int pipe(int pipefd[2])
{
- mode_t mode = 0;
- int ret;
-
- if (flags & O_CREAT) {
- va_list args;
-
- va_start(args, flags);
- mode = va_arg(args, mode_t);
- va_end(args);
- }
-
- ret = sys_open(path, flags, mode);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+ return pipe2(pipefd, 0);
}
@@ -818,48 +728,7 @@ int sys_pivot_root(const char *new, const char *old)
static __attribute__((unused))
int pivot_root(const char *new, const char *old)
{
- int ret = sys_pivot_root(new, old);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-
-/*
- * int poll(struct pollfd *fds, int nfds, int timeout);
- */
-
-static __attribute__((unused))
-int sys_poll(struct pollfd *fds, int nfds, int timeout)
-{
-#if defined(__NR_ppoll)
- struct timespec t;
-
- if (timeout >= 0) {
- t.tv_sec = timeout / 1000;
- t.tv_nsec = (timeout % 1000) * 1000000;
- }
- return my_syscall4(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL);
-#elif defined(__NR_poll)
- return my_syscall3(__NR_poll, fds, nfds, timeout);
-#else
-#error Neither __NR_ppoll nor __NR_poll defined, cannot implement sys_poll()
-#endif
-}
-
-static __attribute__((unused))
-int poll(struct pollfd *fds, int nfds, int timeout)
-{
- int ret = sys_poll(fds, nfds, timeout);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+ return __sysret(sys_pivot_root(new, old));
}
@@ -876,37 +745,7 @@ ssize_t sys_read(int fd, void *buf, size_t count)
static __attribute__((unused))
ssize_t read(int fd, void *buf, size_t count)
{
- ssize_t ret = sys_read(fd, buf, count);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-
-/*
- * int reboot(int cmd);
- * <cmd> is among LINUX_REBOOT_CMD_*
- */
-
-static __attribute__((unused))
-ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg)
-{
- return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg);
-}
-
-static __attribute__((unused))
-int reboot(int cmd)
-{
- int ret = sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+ return __sysret(sys_read(fd, buf, count));
}
@@ -923,82 +762,34 @@ int sys_sched_yield(void)
static __attribute__((unused))
int sched_yield(void)
{
- int ret = sys_sched_yield();
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+ return __sysret(sys_sched_yield());
}
/*
- * int select(int nfds, fd_set *read_fds, fd_set *write_fds,
- * fd_set *except_fds, struct timeval *timeout);
+ * int setpgid(pid_t pid, pid_t pgid);
*/
static __attribute__((unused))
-int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
+int sys_setpgid(pid_t pid, pid_t pgid)
{
-#if defined(__ARCH_WANT_SYS_OLD_SELECT) && !defined(__NR__newselect)
- struct sel_arg_struct {
- unsigned long n;
- fd_set *r, *w, *e;
- struct timeval *t;
- } arg = { .n = nfds, .r = rfds, .w = wfds, .e = efds, .t = timeout };
- return my_syscall1(__NR_select, &arg);
-#elif defined(__ARCH_WANT_SYS_PSELECT6) && defined(__NR_pselect6)
- struct timespec t;
-
- if (timeout) {
- t.tv_sec = timeout->tv_sec;
- t.tv_nsec = timeout->tv_usec * 1000;
- }
- return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL);
-#elif defined(__NR__newselect) || defined(__NR_select)
-#ifndef __NR__newselect
-#define __NR__newselect __NR_select
-#endif
- return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout);
-#else
-#error None of __NR_select, __NR_pselect6, nor __NR__newselect defined, cannot implement sys_select()
-#endif
+ return my_syscall2(__NR_setpgid, pid, pgid);
}
static __attribute__((unused))
-int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
+int setpgid(pid_t pid, pid_t pgid)
{
- int ret = sys_select(nfds, rfds, wfds, efds, timeout);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+ return __sysret(sys_setpgid(pid, pgid));
}
-
/*
- * int setpgid(pid_t pid, pid_t pgid);
+ * pid_t setpgrp(void)
*/
static __attribute__((unused))
-int sys_setpgid(pid_t pid, pid_t pgid)
+pid_t setpgrp(void)
{
- return my_syscall2(__NR_setpgid, pid, pgid);
-}
-
-static __attribute__((unused))
-int setpgid(pid_t pid, pid_t pgid)
-{
- int ret = sys_setpgid(pid, pgid);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+ return setpgid(0, 0);
}
@@ -1015,61 +806,7 @@ pid_t sys_setsid(void)
static __attribute__((unused))
pid_t setsid(void)
{
- pid_t ret = sys_setsid();
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-
-/*
- * int stat(const char *path, struct stat *buf);
- * Warning: the struct stat's layout is arch-dependent.
- */
-
-static __attribute__((unused))
-int sys_stat(const char *path, struct stat *buf)
-{
- struct sys_stat_struct stat;
- long ret;
-
-#ifdef __NR_newfstatat
- /* only solution for arm64 */
- ret = my_syscall4(__NR_newfstatat, AT_FDCWD, path, &stat, 0);
-#elif defined(__NR_stat)
- ret = my_syscall2(__NR_stat, path, &stat);
-#else
-#error Neither __NR_newfstatat nor __NR_stat defined, cannot implement sys_stat()
-#endif
- buf->st_dev = stat.st_dev;
- buf->st_ino = stat.st_ino;
- buf->st_mode = stat.st_mode;
- buf->st_nlink = stat.st_nlink;
- buf->st_uid = stat.st_uid;
- buf->st_gid = stat.st_gid;
- buf->st_rdev = stat.st_rdev;
- buf->st_size = stat.st_size;
- buf->st_blksize = stat.st_blksize;
- buf->st_blocks = stat.st_blocks;
- buf->st_atime = stat.st_atime;
- buf->st_mtime = stat.st_mtime;
- buf->st_ctime = stat.st_ctime;
- return ret;
-}
-
-static __attribute__((unused))
-int stat(const char *path, struct stat *buf)
-{
- int ret = sys_stat(path, buf);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+ return __sysret(sys_setsid());
}
@@ -1080,25 +817,17 @@ int stat(const char *path, struct stat *buf)
static __attribute__((unused))
int sys_symlink(const char *old, const char *new)
{
-#ifdef __NR_symlinkat
+#if defined(__NR_symlinkat)
return my_syscall3(__NR_symlinkat, old, AT_FDCWD, new);
-#elif defined(__NR_symlink)
- return my_syscall2(__NR_symlink, old, new);
#else
-#error Neither __NR_symlinkat nor __NR_symlink defined, cannot implement sys_symlink()
+ return my_syscall2(__NR_symlink, old, new);
#endif
}
static __attribute__((unused))
int symlink(const char *old, const char *new)
{
- int ret = sys_symlink(old, new);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+ return __sysret(sys_symlink(old, new));
}
@@ -1132,13 +861,7 @@ int sys_umount2(const char *path, int flags)
static __attribute__((unused))
int umount2(const char *path, int flags)
{
- int ret = sys_umount2(path, flags);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+ return __sysret(sys_umount2(path, flags));
}
@@ -1149,101 +872,51 @@ int umount2(const char *path, int flags)
static __attribute__((unused))
int sys_unlink(const char *path)
{
-#ifdef __NR_unlinkat
+#if defined(__NR_unlinkat)
return my_syscall3(__NR_unlinkat, AT_FDCWD, path, 0);
-#elif defined(__NR_unlink)
- return my_syscall1(__NR_unlink, path);
#else
-#error Neither __NR_unlinkat nor __NR_unlink defined, cannot implement sys_unlink()
+ return my_syscall1(__NR_unlink, path);
#endif
}
static __attribute__((unused))
int unlink(const char *path)
{
- int ret = sys_unlink(path);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+ return __sysret(sys_unlink(path));
}
/*
- * pid_t wait(int *status);
- * pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage);
- * pid_t waitpid(pid_t pid, int *status, int options);
+ * ssize_t write(int fd, const void *buf, size_t count);
*/
static __attribute__((unused))
-pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage)
-{
- return my_syscall4(__NR_wait4, pid, status, options, rusage);
-}
-
-static __attribute__((unused))
-pid_t wait(int *status)
-{
- pid_t ret = sys_wait4(-1, status, 0, NULL);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
-}
-
-static __attribute__((unused))
-pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage)
+ssize_t sys_write(int fd, const void *buf, size_t count)
{
- pid_t ret = sys_wait4(pid, status, options, rusage);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+ return my_syscall3(__NR_write, fd, buf, count);
}
-
static __attribute__((unused))
-pid_t waitpid(pid_t pid, int *status, int options)
+ssize_t write(int fd, const void *buf, size_t count)
{
- pid_t ret = sys_wait4(pid, status, options, NULL);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+ return __sysret(sys_write(fd, buf, count));
}
/*
- * ssize_t write(int fd, const void *buf, size_t count);
+ * int memfd_create(const char *name, unsigned int flags);
*/
static __attribute__((unused))
-ssize_t sys_write(int fd, const void *buf, size_t count)
+int sys_memfd_create(const char *name, unsigned int flags)
{
- return my_syscall3(__NR_write, fd, buf, count);
+ return my_syscall2(__NR_memfd_create, name, flags);
}
static __attribute__((unused))
-ssize_t write(int fd, const void *buf, size_t count)
+int memfd_create(const char *name, unsigned int flags)
{
- ssize_t ret = sys_write(fd, buf, count);
-
- if (ret < 0) {
- SET_ERRNO(-ret);
- ret = -1;
- }
- return ret;
+ return __sysret(sys_memfd_create(name, flags));
}
-/* make sure to include all global symbols */
-#include "nolibc.h"
-
#endif /* _NOLIBC_SYS_H */
diff --git a/tools/include/nolibc/sys/auxv.h b/tools/include/nolibc/sys/auxv.h
new file mode 100644
index 000000000000..0e98325e7347
--- /dev/null
+++ b/tools/include/nolibc/sys/auxv.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * auxv definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_AUXV_H
+#define _NOLIBC_SYS_AUXV_H
+
+#ifndef NOLIBC_NO_RUNTIME
+
+#include "../crt.h"
+
+static __attribute__((unused))
+unsigned long getauxval(unsigned long type)
+{
+ const unsigned long *auxv = _auxv;
+ unsigned long ret;
+
+ if (!auxv)
+ return 0;
+
+ while (1) {
+ if (!auxv[0] && !auxv[1]) {
+ ret = 0;
+ break;
+ }
+
+ if (auxv[0] == type) {
+ ret = auxv[1];
+ break;
+ }
+
+ auxv += 2;
+ }
+
+ return ret;
+}
+
+#endif /* NOLIBC_NO_RUNTIME */
+#endif /* _NOLIBC_SYS_AUXV_H */
diff --git a/tools/include/nolibc/sys/ioctl.h b/tools/include/nolibc/sys/ioctl.h
new file mode 100644
index 000000000000..fc880687e02a
--- /dev/null
+++ b/tools/include/nolibc/sys/ioctl.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Ioctl definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_IOCTL_H
+#define _NOLIBC_SYS_IOCTL_H
+
+#include "../sys.h"
+
+#include <linux/ioctl.h>
+
+/*
+ * int ioctl(int fd, unsigned long cmd, ... arg);
+ */
+
+static __attribute__((unused))
+long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+ return my_syscall3(__NR_ioctl, fd, cmd, arg);
+}
+
+#define ioctl(fd, cmd, arg) __sysret(sys_ioctl(fd, cmd, (unsigned long)(arg)))
+
+#endif /* _NOLIBC_SYS_IOCTL_H */
diff --git a/tools/include/nolibc/sys/mman.h b/tools/include/nolibc/sys/mman.h
new file mode 100644
index 000000000000..77084ac3405a
--- /dev/null
+++ b/tools/include/nolibc/sys/mman.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * mm definition for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_MMAN_H
+#define _NOLIBC_SYS_MMAN_H
+
+#include "../arch.h"
+#include "../sys.h"
+
+#ifndef sys_mmap
+static __attribute__((unused))
+void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd,
+ off_t offset)
+{
+ int n;
+
+#if defined(__NR_mmap2)
+ n = __NR_mmap2;
+ offset >>= 12;
+#else
+ n = __NR_mmap;
+#endif
+
+ return (void *)my_syscall6(n, addr, length, prot, flags, fd, offset);
+}
+#endif
+
+static __attribute__((unused))
+void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset)
+{
+ void *ret = sys_mmap(addr, length, prot, flags, fd, offset);
+
+ if ((unsigned long)ret >= -4095UL) {
+ SET_ERRNO(-(long)ret);
+ ret = MAP_FAILED;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+void *sys_mremap(void *old_address, size_t old_size, size_t new_size, int flags, void *new_address)
+{
+ return (void *)my_syscall5(__NR_mremap, old_address, old_size,
+ new_size, flags, new_address);
+}
+
+static __attribute__((unused))
+void *mremap(void *old_address, size_t old_size, size_t new_size, int flags, void *new_address)
+{
+ void *ret = sys_mremap(old_address, old_size, new_size, flags, new_address);
+
+ if ((unsigned long)ret >= -4095UL) {
+ SET_ERRNO(-(long)ret);
+ ret = MAP_FAILED;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+int sys_munmap(void *addr, size_t length)
+{
+ return my_syscall2(__NR_munmap, addr, length);
+}
+
+static __attribute__((unused))
+int munmap(void *addr, size_t length)
+{
+ return __sysret(sys_munmap(addr, length));
+}
+
+#endif /* _NOLIBC_SYS_MMAN_H */
diff --git a/tools/include/nolibc/sys/mount.h b/tools/include/nolibc/sys/mount.h
new file mode 100644
index 000000000000..e39ec02ea24c
--- /dev/null
+++ b/tools/include/nolibc/sys/mount.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Mount definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_MOUNT_H
+#define _NOLIBC_SYS_MOUNT_H
+
+#include "../sys.h"
+
+#include <linux/mount.h>
+
+/*
+ * int mount(const char *source, const char *target,
+ * const char *fstype, unsigned long flags,
+ * const void *data);
+ */
+static __attribute__((unused))
+int sys_mount(const char *src, const char *tgt, const char *fst,
+ unsigned long flags, const void *data)
+{
+ return my_syscall5(__NR_mount, src, tgt, fst, flags, data);
+}
+
+static __attribute__((unused))
+int mount(const char *src, const char *tgt,
+ const char *fst, unsigned long flags,
+ const void *data)
+{
+ return __sysret(sys_mount(src, tgt, fst, flags, data));
+}
+
+#endif /* _NOLIBC_SYS_MOUNT_H */
diff --git a/tools/include/nolibc/sys/prctl.h b/tools/include/nolibc/sys/prctl.h
new file mode 100644
index 000000000000..0205907b6ac8
--- /dev/null
+++ b/tools/include/nolibc/sys/prctl.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Prctl definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_PRCTL_H
+#define _NOLIBC_SYS_PRCTL_H
+
+#include "../sys.h"
+
+#include <linux/prctl.h>
+
+/*
+ * int prctl(int option, unsigned long arg2, unsigned long arg3,
+ * unsigned long arg4, unsigned long arg5);
+ */
+
+static __attribute__((unused))
+int sys_prctl(int option, unsigned long arg2, unsigned long arg3,
+ unsigned long arg4, unsigned long arg5)
+{
+ return my_syscall5(__NR_prctl, option, arg2, arg3, arg4, arg5);
+}
+
+static __attribute__((unused))
+int prctl(int option, unsigned long arg2, unsigned long arg3,
+ unsigned long arg4, unsigned long arg5)
+{
+ return __sysret(sys_prctl(option, arg2, arg3, arg4, arg5));
+}
+
+#endif /* _NOLIBC_SYS_PRCTL_H */
diff --git a/tools/include/nolibc/sys/random.h b/tools/include/nolibc/sys/random.h
new file mode 100644
index 000000000000..cd5d25c571a8
--- /dev/null
+++ b/tools/include/nolibc/sys/random.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * random definitions for NOLIBC
+ * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_RANDOM_H
+#define _NOLIBC_SYS_RANDOM_H
+
+#include "../arch.h"
+#include "../sys.h"
+
+#include <linux/random.h>
+
+/*
+ * ssize_t getrandom(void *buf, size_t buflen, unsigned int flags);
+ */
+
+static __attribute__((unused))
+ssize_t sys_getrandom(void *buf, size_t buflen, unsigned int flags)
+{
+ return my_syscall3(__NR_getrandom, buf, buflen, flags);
+}
+
+static __attribute__((unused))
+ssize_t getrandom(void *buf, size_t buflen, unsigned int flags)
+{
+ return __sysret(sys_getrandom(buf, buflen, flags));
+}
+
+#endif /* _NOLIBC_SYS_RANDOM_H */
diff --git a/tools/include/nolibc/sys/reboot.h b/tools/include/nolibc/sys/reboot.h
new file mode 100644
index 000000000000..38274c64a722
--- /dev/null
+++ b/tools/include/nolibc/sys/reboot.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Reboot definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_REBOOT_H
+#define _NOLIBC_SYS_REBOOT_H
+
+#include "../sys.h"
+
+#include <linux/reboot.h>
+
+/*
+ * int reboot(int cmd);
+ * <cmd> is among LINUX_REBOOT_CMD_*
+ */
+
+static __attribute__((unused))
+ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg)
+{
+ return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg);
+}
+
+static __attribute__((unused))
+int reboot(int cmd)
+{
+ return __sysret(sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, NULL));
+}
+
+#endif /* _NOLIBC_SYS_REBOOT_H */
diff --git a/tools/include/nolibc/sys/resource.h b/tools/include/nolibc/sys/resource.h
new file mode 100644
index 000000000000..b990f914dc56
--- /dev/null
+++ b/tools/include/nolibc/sys/resource.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Resource definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_RESOURCE_H
+#define _NOLIBC_SYS_RESOURCE_H
+
+#include "../sys.h"
+
+#include <linux/resource.h>
+
+/*
+ * int getrlimit(int resource, struct rlimit *rlim);
+ * int setrlimit(int resource, const struct rlimit *rlim);
+ */
+
+static __attribute__((unused))
+int sys_prlimit64(pid_t pid, int resource,
+ const struct rlimit64 *new_limit, struct rlimit64 *old_limit)
+{
+ return my_syscall4(__NR_prlimit64, pid, resource, new_limit, old_limit);
+}
+
+static __attribute__((unused))
+int getrlimit(int resource, struct rlimit *rlim)
+{
+ struct rlimit64 rlim64;
+ int ret;
+
+ ret = __sysret(sys_prlimit64(0, resource, NULL, &rlim64));
+ rlim->rlim_cur = rlim64.rlim_cur;
+ rlim->rlim_max = rlim64.rlim_max;
+
+ return ret;
+}
+
+static __attribute__((unused))
+int setrlimit(int resource, const struct rlimit *rlim)
+{
+ struct rlimit64 rlim64 = {
+ .rlim_cur = rlim->rlim_cur,
+ .rlim_max = rlim->rlim_max,
+ };
+
+ return __sysret(sys_prlimit64(0, resource, &rlim64, NULL));
+}
+
+#endif /* _NOLIBC_SYS_RESOURCE_H */
diff --git a/tools/include/nolibc/sys/select.h b/tools/include/nolibc/sys/select.h
new file mode 100644
index 000000000000..2a5619c01277
--- /dev/null
+++ b/tools/include/nolibc/sys/select.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_SELECT_H
+#define _NOLIBC_SYS_SELECT_H
+
+#include <linux/time.h>
+#include <linux/unistd.h>
+
+/* commonly an fd_set represents 256 FDs */
+#ifndef FD_SETSIZE
+#define FD_SETSIZE 256
+#endif
+
+#define FD_SETIDXMASK (8 * sizeof(unsigned long))
+#define FD_SETBITMASK (8 * sizeof(unsigned long)-1)
+
+/* for select() */
+typedef struct {
+ unsigned long fds[(FD_SETSIZE + FD_SETBITMASK) / FD_SETIDXMASK];
+} fd_set;
+
+#define FD_CLR(fd, set) do { \
+ fd_set *__set = (set); \
+ int __fd = (fd); \
+ if (__fd >= 0) \
+ __set->fds[__fd / FD_SETIDXMASK] &= \
+ ~(1U << (__fd & FD_SETBITMASK)); \
+ } while (0)
+
+#define FD_SET(fd, set) do { \
+ fd_set *__set = (set); \
+ int __fd = (fd); \
+ if (__fd >= 0) \
+ __set->fds[__fd / FD_SETIDXMASK] |= \
+ 1 << (__fd & FD_SETBITMASK); \
+ } while (0)
+
+#define FD_ISSET(fd, set) ({ \
+ fd_set *__set = (set); \
+ int __fd = (fd); \
+ int __r = 0; \
+ if (__fd >= 0) \
+ __r = !!(__set->fds[__fd / FD_SETIDXMASK] & \
+1U << (__fd & FD_SETBITMASK)); \
+ __r; \
+ })
+
+#define FD_ZERO(set) do { \
+ fd_set *__set = (set); \
+ int __idx; \
+ int __size = (FD_SETSIZE+FD_SETBITMASK) / FD_SETIDXMASK;\
+ for (__idx = 0; __idx < __size; __idx++) \
+ __set->fds[__idx] = 0; \
+ } while (0)
+
+/*
+ * int select(int nfds, fd_set *read_fds, fd_set *write_fds,
+ * fd_set *except_fds, struct timeval *timeout);
+ */
+
+static __attribute__((unused))
+int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
+{
+#if defined(__ARCH_WANT_SYS_OLD_SELECT) && !defined(__NR__newselect)
+ struct sel_arg_struct {
+ unsigned long n;
+ fd_set *r, *w, *e;
+ struct timeval *t;
+ } arg = { .n = nfds, .r = rfds, .w = wfds, .e = efds, .t = timeout };
+ return my_syscall1(__NR_select, &arg);
+#elif defined(__NR__newselect)
+ return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout);
+#elif defined(__NR_select)
+ return my_syscall5(__NR_select, nfds, rfds, wfds, efds, timeout);
+#elif defined(__NR_pselect6)
+ struct timespec t;
+
+ if (timeout) {
+ t.tv_sec = timeout->tv_sec;
+ t.tv_nsec = timeout->tv_usec * 1000;
+ }
+ return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL);
+#else
+ struct __kernel_timespec t;
+
+ if (timeout) {
+ t.tv_sec = timeout->tv_sec;
+ t.tv_nsec = timeout->tv_usec * 1000;
+ }
+ return my_syscall6(__NR_pselect6_time64, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL);
+#endif
+}
+
+static __attribute__((unused))
+int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
+{
+ return __sysret(sys_select(nfds, rfds, wfds, efds, timeout));
+}
+
+
+#endif /* _NOLIBC_SYS_SELECT_H */
diff --git a/tools/include/nolibc/sys/stat.h b/tools/include/nolibc/sys/stat.h
new file mode 100644
index 000000000000..8b4d80e3ea03
--- /dev/null
+++ b/tools/include/nolibc/sys/stat.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * stat definition for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_STAT_H
+#define _NOLIBC_SYS_STAT_H
+
+#include "../arch.h"
+#include "../types.h"
+#include "../sys.h"
+
+/*
+ * int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf);
+ * int stat(const char *path, struct stat *buf);
+ * int fstatat(int fd, const char *path, struct stat *buf, int flag);
+ * int fstat(int fildes, struct stat *buf);
+ * int lstat(const char *path, struct stat *buf);
+ */
+
+static __attribute__((unused))
+int sys_statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf)
+{
+#ifdef __NR_statx
+ return my_syscall5(__NR_statx, fd, path, flags, mask, buf);
+#else
+ return __nolibc_enosys(__func__, fd, path, flags, mask, buf);
+#endif
+}
+
+static __attribute__((unused))
+int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf)
+{
+ return __sysret(sys_statx(fd, path, flags, mask, buf));
+}
+
+
+static __attribute__((unused))
+int fstatat(int fd, const char *path, struct stat *buf, int flag)
+{
+ struct statx statx;
+ long ret;
+
+ ret = __sysret(sys_statx(fd, path, flag | AT_NO_AUTOMOUNT, STATX_BASIC_STATS, &statx));
+ if (ret == -1)
+ return ret;
+
+ buf->st_dev = ((statx.stx_dev_minor & 0xff)
+ | (statx.stx_dev_major << 8)
+ | ((statx.stx_dev_minor & ~0xff) << 12));
+ buf->st_ino = statx.stx_ino;
+ buf->st_mode = statx.stx_mode;
+ buf->st_nlink = statx.stx_nlink;
+ buf->st_uid = statx.stx_uid;
+ buf->st_gid = statx.stx_gid;
+ buf->st_rdev = ((statx.stx_rdev_minor & 0xff)
+ | (statx.stx_rdev_major << 8)
+ | ((statx.stx_rdev_minor & ~0xff) << 12));
+ buf->st_size = statx.stx_size;
+ buf->st_blksize = statx.stx_blksize;
+ buf->st_blocks = statx.stx_blocks;
+ buf->st_atim.tv_sec = statx.stx_atime.tv_sec;
+ buf->st_atim.tv_nsec = statx.stx_atime.tv_nsec;
+ buf->st_mtim.tv_sec = statx.stx_mtime.tv_sec;
+ buf->st_mtim.tv_nsec = statx.stx_mtime.tv_nsec;
+ buf->st_ctim.tv_sec = statx.stx_ctime.tv_sec;
+ buf->st_ctim.tv_nsec = statx.stx_ctime.tv_nsec;
+
+ return 0;
+}
+
+static __attribute__((unused))
+int stat(const char *path, struct stat *buf)
+{
+ return fstatat(AT_FDCWD, path, buf, 0);
+}
+
+static __attribute__((unused))
+int fstat(int fildes, struct stat *buf)
+{
+ return fstatat(fildes, "", buf, AT_EMPTY_PATH);
+}
+
+static __attribute__((unused))
+int lstat(const char *path, struct stat *buf)
+{
+ return fstatat(AT_FDCWD, path, buf, AT_SYMLINK_NOFOLLOW);
+}
+
+#endif /* _NOLIBC_SYS_STAT_H */
diff --git a/tools/include/nolibc/sys/syscall.h b/tools/include/nolibc/sys/syscall.h
new file mode 100644
index 000000000000..4bf97f1386a0
--- /dev/null
+++ b/tools/include/nolibc/sys/syscall.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * syscall() definition for NOLIBC
+ * Copyright (C) 2024 Thomas Weißschuh <linux@weissschuh.net>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_SYSCALL_H
+#define _NOLIBC_SYS_SYSCALL_H
+
+#define __syscall_narg(_0, _1, _2, _3, _4, _5, _6, N, ...) N
+#define _syscall_narg(...) __syscall_narg(__VA_ARGS__, 6, 5, 4, 3, 2, 1, 0)
+#define _syscall(N, ...) __sysret(my_syscall##N(__VA_ARGS__))
+#define _syscall_n(N, ...) _syscall(N, __VA_ARGS__)
+#define syscall(...) _syscall_n(_syscall_narg(__VA_ARGS__), ##__VA_ARGS__)
+
+#endif /* _NOLIBC_SYS_SYSCALL_H */
diff --git a/tools/include/nolibc/sys/sysmacros.h b/tools/include/nolibc/sys/sysmacros.h
new file mode 100644
index 000000000000..37c33f030f02
--- /dev/null
+++ b/tools/include/nolibc/sys/sysmacros.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Sysmacro definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_SYSMACROS_H
+#define _NOLIBC_SYS_SYSMACROS_H
+
+#include "../std.h"
+
+/* WARNING, it only deals with the 4096 first majors and 256 first minors */
+#define makedev(major, minor) ((dev_t)((((major) & 0xfff) << 8) | ((minor) & 0xff)))
+#define major(dev) ((unsigned int)(((dev) >> 8) & 0xfff))
+#define minor(dev) ((unsigned int)((dev) & 0xff))
+
+#endif /* _NOLIBC_SYS_SYSMACROS_H */
diff --git a/tools/include/nolibc/sys/time.h b/tools/include/nolibc/sys/time.h
new file mode 100644
index 000000000000..33782a19aae9
--- /dev/null
+++ b/tools/include/nolibc/sys/time.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * time definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_TIME_H
+#define _NOLIBC_SYS_TIME_H
+
+#include "../arch.h"
+#include "../sys.h"
+
+static int sys_clock_gettime(clockid_t clockid, struct timespec *tp);
+
+/*
+ * int gettimeofday(struct timeval *tv, struct timezone *tz);
+ */
+
+static __attribute__((unused))
+int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+#ifdef __NR_gettimeofday
+ return my_syscall2(__NR_gettimeofday, tv, tz);
+#else
+ (void) tz; /* Non-NULL tz is undefined behaviour */
+
+ struct timespec tp;
+ int ret;
+
+ ret = sys_clock_gettime(CLOCK_REALTIME, &tp);
+ if (!ret && tv) {
+ tv->tv_sec = tp.tv_sec;
+ tv->tv_usec = tp.tv_nsec / 1000;
+ }
+
+ return ret;
+#endif
+}
+
+static __attribute__((unused))
+int gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+ return __sysret(sys_gettimeofday(tv, tz));
+}
+
+#endif /* _NOLIBC_SYS_TIME_H */
diff --git a/tools/include/nolibc/sys/timerfd.h b/tools/include/nolibc/sys/timerfd.h
new file mode 100644
index 000000000000..5dd61030c991
--- /dev/null
+++ b/tools/include/nolibc/sys/timerfd.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * timerfd definitions for NOLIBC
+ * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_TIMERFD_H
+#define _NOLIBC_SYS_TIMERFD_H
+
+#include "../sys.h"
+#include "../time.h"
+
+#include <linux/timerfd.h>
+
+
+static __attribute__((unused))
+int sys_timerfd_create(int clockid, int flags)
+{
+ return my_syscall2(__NR_timerfd_create, clockid, flags);
+}
+
+static __attribute__((unused))
+int timerfd_create(int clockid, int flags)
+{
+ return __sysret(sys_timerfd_create(clockid, flags));
+}
+
+
+static __attribute__((unused))
+int sys_timerfd_gettime(int fd, struct itimerspec *curr_value)
+{
+#if defined(__NR_timerfd_gettime)
+ return my_syscall2(__NR_timerfd_gettime, fd, curr_value);
+#else
+ struct __kernel_itimerspec kcurr_value;
+ int ret;
+
+ ret = my_syscall2(__NR_timerfd_gettime64, fd, &kcurr_value);
+ __nolibc_timespec_kernel_to_user(&kcurr_value.it_interval, &curr_value->it_interval);
+ __nolibc_timespec_kernel_to_user(&kcurr_value.it_value, &curr_value->it_value);
+ return ret;
+#endif
+}
+
+static __attribute__((unused))
+int timerfd_gettime(int fd, struct itimerspec *curr_value)
+{
+ return __sysret(sys_timerfd_gettime(fd, curr_value));
+}
+
+
+static __attribute__((unused))
+int sys_timerfd_settime(int fd, int flags,
+ const struct itimerspec *new_value, struct itimerspec *old_value)
+{
+#if defined(__NR_timerfd_settime)
+ return my_syscall4(__NR_timerfd_settime, fd, flags, new_value, old_value);
+#else
+ struct __kernel_itimerspec knew_value, kold_value;
+ int ret;
+
+ __nolibc_timespec_user_to_kernel(&new_value->it_value, &knew_value.it_value);
+ __nolibc_timespec_user_to_kernel(&new_value->it_interval, &knew_value.it_interval);
+ ret = my_syscall4(__NR_timerfd_settime64, fd, flags, &knew_value, &kold_value);
+ if (old_value) {
+ __nolibc_timespec_kernel_to_user(&kold_value.it_interval, &old_value->it_interval);
+ __nolibc_timespec_kernel_to_user(&kold_value.it_value, &old_value->it_value);
+ }
+ return ret;
+#endif
+}
+
+static __attribute__((unused))
+int timerfd_settime(int fd, int flags,
+ const struct itimerspec *new_value, struct itimerspec *old_value)
+{
+ return __sysret(sys_timerfd_settime(fd, flags, new_value, old_value));
+}
+
+#endif /* _NOLIBC_SYS_TIMERFD_H */
diff --git a/tools/include/nolibc/sys/types.h b/tools/include/nolibc/sys/types.h
new file mode 100644
index 000000000000..8a264a13275c
--- /dev/null
+++ b/tools/include/nolibc/sys/types.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * sys/types.h shim for NOLIBC
+ * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de>
+ */
+
+#include "../types.h"
diff --git a/tools/include/nolibc/sys/uio.h b/tools/include/nolibc/sys/uio.h
new file mode 100644
index 000000000000..7ad42b927d2f
--- /dev/null
+++ b/tools/include/nolibc/sys/uio.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * uio for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ * Copyright (C) 2025 Intel Corporation
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_UIO_H
+#define _NOLIBC_SYS_UIO_H
+
+#include "../sys.h"
+#include <linux/uio.h>
+
+
+/*
+ * ssize_t readv(int fd, const struct iovec *iovec, int count);
+ */
+static __attribute__((unused))
+ssize_t sys_readv(int fd, const struct iovec *iovec, int count)
+{
+ return my_syscall3(__NR_readv, fd, iovec, count);
+}
+
+static __attribute__((unused))
+ssize_t readv(int fd, const struct iovec *iovec, int count)
+{
+ return __sysret(sys_readv(fd, iovec, count));
+}
+
+/*
+ * ssize_t writev(int fd, const struct iovec *iovec, int count);
+ */
+static __attribute__((unused))
+ssize_t sys_writev(int fd, const struct iovec *iovec, int count)
+{
+ return my_syscall3(__NR_writev, fd, iovec, count);
+}
+
+static __attribute__((unused))
+ssize_t writev(int fd, const struct iovec *iovec, int count)
+{
+ return __sysret(sys_writev(fd, iovec, count));
+}
+
+
+#endif /* _NOLIBC_SYS_UIO_H */
diff --git a/tools/include/nolibc/sys/utsname.h b/tools/include/nolibc/sys/utsname.h
new file mode 100644
index 000000000000..01023e1bb439
--- /dev/null
+++ b/tools/include/nolibc/sys/utsname.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Utsname definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_UTSNAME_H
+#define _NOLIBC_SYS_UTSNAME_H
+
+#include "../sys.h"
+
+#include <linux/utsname.h>
+
+/*
+ * int uname(struct utsname *buf);
+ */
+
+struct utsname {
+ char sysname[65];
+ char nodename[65];
+ char release[65];
+ char version[65];
+ char machine[65];
+ char domainname[65];
+};
+
+static __attribute__((unused))
+int sys_uname(struct utsname *buf)
+{
+ return my_syscall1(__NR_uname, buf);
+}
+
+static __attribute__((unused))
+int uname(struct utsname *buf)
+{
+ return __sysret(sys_uname(buf));
+}
+
+#endif /* _NOLIBC_SYS_UTSNAME_H */
diff --git a/tools/include/nolibc/sys/wait.h b/tools/include/nolibc/sys/wait.h
new file mode 100644
index 000000000000..9d9319ba92cb
--- /dev/null
+++ b/tools/include/nolibc/sys/wait.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * wait definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_WAIT_H
+#define _NOLIBC_SYS_WAIT_H
+
+#include "../arch.h"
+#include "../std.h"
+#include "../types.h"
+
+/*
+ * pid_t wait(int *status);
+ * pid_t waitpid(pid_t pid, int *status, int options);
+ * int waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options);
+ */
+
+static __attribute__((unused))
+int sys_waitid(int which, pid_t pid, siginfo_t *infop, int options, struct rusage *rusage)
+{
+ return my_syscall5(__NR_waitid, which, pid, infop, options, rusage);
+}
+
+static __attribute__((unused))
+int waitid(int which, pid_t pid, siginfo_t *infop, int options)
+{
+ return __sysret(sys_waitid(which, pid, infop, options, NULL));
+}
+
+
+static __attribute__((unused))
+pid_t waitpid(pid_t pid, int *status, int options)
+{
+ int idtype, ret;
+ siginfo_t info;
+ pid_t id;
+
+ if (pid == INT_MIN) {
+ SET_ERRNO(ESRCH);
+ return -1;
+ } else if (pid < -1) {
+ idtype = P_PGID;
+ id = -pid;
+ } else if (pid == -1) {
+ idtype = P_ALL;
+ id = 0;
+ } else if (pid == 0) {
+ idtype = P_PGID;
+ id = 0;
+ } else {
+ idtype = P_PID;
+ id = pid;
+ }
+
+ options |= WEXITED;
+
+ ret = waitid(idtype, id, &info, options);
+ if (ret)
+ return -1;
+
+ switch (info.si_code) {
+ case 0:
+ if (status)
+ *status = 0;
+ break;
+ case CLD_EXITED:
+ if (status)
+ *status = (info.si_status & 0xff) << 8;
+ break;
+ case CLD_KILLED:
+ if (status)
+ *status = info.si_status & 0x7f;
+ break;
+ case CLD_DUMPED:
+ if (status)
+ *status = (info.si_status & 0x7f) | 0x80;
+ break;
+ case CLD_STOPPED:
+ case CLD_TRAPPED:
+ if (status)
+ *status = (info.si_status << 8) + 0x7f;
+ break;
+ case CLD_CONTINUED:
+ if (status)
+ *status = 0xffff;
+ break;
+ default:
+ return -1;
+ }
+
+ return info.si_pid;
+}
+
+static __attribute__((unused))
+pid_t wait(int *status)
+{
+ return waitpid(-1, status, 0);
+}
+
+#endif /* _NOLIBC_SYS_WAIT_H */
diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h
index 84655361b9ad..48e78f8becf9 100644
--- a/tools/include/nolibc/time.h
+++ b/tools/include/nolibc/time.h
@@ -4,6 +4,9 @@
* Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_TIME_H
#define _NOLIBC_TIME_H
@@ -12,6 +15,133 @@
#include "types.h"
#include "sys.h"
+#include <linux/signal.h>
+#include <linux/time.h>
+
+static __inline__
+void __nolibc_timespec_user_to_kernel(const struct timespec *ts, struct __kernel_timespec *kts)
+{
+ kts->tv_sec = ts->tv_sec;
+ kts->tv_nsec = ts->tv_nsec;
+}
+
+static __inline__
+void __nolibc_timespec_kernel_to_user(const struct __kernel_timespec *kts, struct timespec *ts)
+{
+ ts->tv_sec = kts->tv_sec;
+ ts->tv_nsec = kts->tv_nsec;
+}
+
+/*
+ * int clock_getres(clockid_t clockid, struct timespec *res);
+ * int clock_gettime(clockid_t clockid, struct timespec *tp);
+ * int clock_settime(clockid_t clockid, const struct timespec *tp);
+ * int clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqtp,
+ * struct timespec *rmtp)
+ */
+
+static __attribute__((unused))
+int sys_clock_getres(clockid_t clockid, struct timespec *res)
+{
+#if defined(__NR_clock_getres)
+ return my_syscall2(__NR_clock_getres, clockid, res);
+#else
+ struct __kernel_timespec kres;
+ int ret;
+
+ ret = my_syscall2(__NR_clock_getres_time64, clockid, &kres);
+ if (res)
+ __nolibc_timespec_kernel_to_user(&kres, res);
+ return ret;
+#endif
+}
+
+static __attribute__((unused))
+int clock_getres(clockid_t clockid, struct timespec *res)
+{
+ return __sysret(sys_clock_getres(clockid, res));
+}
+
+static __attribute__((unused))
+int sys_clock_gettime(clockid_t clockid, struct timespec *tp)
+{
+#if defined(__NR_clock_gettime)
+ return my_syscall2(__NR_clock_gettime, clockid, tp);
+#else
+ struct __kernel_timespec ktp;
+ int ret;
+
+ ret = my_syscall2(__NR_clock_gettime64, clockid, &ktp);
+ if (tp)
+ __nolibc_timespec_kernel_to_user(&ktp, tp);
+ return ret;
+#endif
+}
+
+static __attribute__((unused))
+int clock_gettime(clockid_t clockid, struct timespec *tp)
+{
+ return __sysret(sys_clock_gettime(clockid, tp));
+}
+
+static __attribute__((unused))
+int sys_clock_settime(clockid_t clockid, struct timespec *tp)
+{
+#if defined(__NR_clock_settime)
+ return my_syscall2(__NR_clock_settime, clockid, tp);
+#else
+ struct __kernel_timespec ktp;
+
+ __nolibc_timespec_user_to_kernel(tp, &ktp);
+ return my_syscall2(__NR_clock_settime64, clockid, &ktp);
+#endif
+}
+
+static __attribute__((unused))
+int clock_settime(clockid_t clockid, struct timespec *tp)
+{
+ return __sysret(sys_clock_settime(clockid, tp));
+}
+
+static __attribute__((unused))
+int sys_clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqtp,
+ struct timespec *rmtp)
+{
+#if defined(__NR_clock_nanosleep)
+ return my_syscall4(__NR_clock_nanosleep, clockid, flags, rqtp, rmtp);
+#else
+ struct __kernel_timespec krqtp, krmtp;
+ int ret;
+
+ __nolibc_timespec_user_to_kernel(rqtp, &krqtp);
+ ret = my_syscall4(__NR_clock_nanosleep_time64, clockid, flags, &krqtp, &krmtp);
+ if (rmtp)
+ __nolibc_timespec_kernel_to_user(&krmtp, rmtp);
+ return ret;
+#endif
+}
+
+static __attribute__((unused))
+int clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqtp,
+ struct timespec *rmtp)
+{
+ /* Directly return a positive error number */
+ return -sys_clock_nanosleep(clockid, flags, rqtp, rmtp);
+}
+
+static __inline__
+double difftime(time_t time1, time_t time2)
+{
+ return time1 - time2;
+}
+
+static __inline__
+int nanosleep(const struct timespec *rqtp, struct timespec *rmtp)
+{
+ return __sysret(sys_clock_nanosleep(CLOCK_REALTIME, 0, rqtp, rmtp));
+}
+
+
static __attribute__((unused))
time_t time(time_t *tptr)
{
@@ -25,7 +155,85 @@ time_t time(time_t *tptr)
return tv.tv_sec;
}
-/* make sure to include all global symbols */
-#include "nolibc.h"
+
+/*
+ * int timer_create(clockid_t clockid, struct sigevent *evp, timer_t *timerid);
+ * int timer_gettime(timer_t timerid, struct itimerspec *curr_value);
+ * int timer_settime(timer_t timerid, int flags, const struct itimerspec *new_value, struct itimerspec *old_value);
+ */
+
+static __attribute__((unused))
+int sys_timer_create(clockid_t clockid, struct sigevent *evp, timer_t *timerid)
+{
+ return my_syscall3(__NR_timer_create, clockid, evp, timerid);
+}
+
+static __attribute__((unused))
+int timer_create(clockid_t clockid, struct sigevent *evp, timer_t *timerid)
+{
+ return __sysret(sys_timer_create(clockid, evp, timerid));
+}
+
+static __attribute__((unused))
+int sys_timer_delete(timer_t timerid)
+{
+ return my_syscall1(__NR_timer_delete, timerid);
+}
+
+static __attribute__((unused))
+int timer_delete(timer_t timerid)
+{
+ return __sysret(sys_timer_delete(timerid));
+}
+
+static __attribute__((unused))
+int sys_timer_gettime(timer_t timerid, struct itimerspec *curr_value)
+{
+#if defined(__NR_timer_gettime)
+ return my_syscall2(__NR_timer_gettime, timerid, curr_value);
+#else
+ struct __kernel_itimerspec kcurr_value;
+ int ret;
+
+ ret = my_syscall2(__NR_timer_gettime64, timerid, &kcurr_value);
+ __nolibc_timespec_kernel_to_user(&kcurr_value.it_interval, &curr_value->it_interval);
+ __nolibc_timespec_kernel_to_user(&kcurr_value.it_value, &curr_value->it_value);
+ return ret;
+#endif
+}
+
+static __attribute__((unused))
+int timer_gettime(timer_t timerid, struct itimerspec *curr_value)
+{
+ return __sysret(sys_timer_gettime(timerid, curr_value));
+}
+
+static __attribute__((unused))
+int sys_timer_settime(timer_t timerid, int flags,
+ const struct itimerspec *new_value, struct itimerspec *old_value)
+{
+#if defined(__NR_timer_settime)
+ return my_syscall4(__NR_timer_settime, timerid, flags, new_value, old_value);
+#else
+ struct __kernel_itimerspec knew_value, kold_value;
+ int ret;
+
+ __nolibc_timespec_user_to_kernel(&new_value->it_value, &knew_value.it_value);
+ __nolibc_timespec_user_to_kernel(&new_value->it_interval, &knew_value.it_interval);
+ ret = my_syscall4(__NR_timer_settime64, timerid, flags, &knew_value, &kold_value);
+ if (old_value) {
+ __nolibc_timespec_kernel_to_user(&kold_value.it_interval, &old_value->it_interval);
+ __nolibc_timespec_kernel_to_user(&kold_value.it_value, &old_value->it_value);
+ }
+ return ret;
+#endif
+}
+
+static __attribute__((unused))
+int timer_settime(timer_t timerid, int flags,
+ const struct itimerspec *new_value, struct itimerspec *old_value)
+{
+ return __sysret(sys_timer_settime(timerid, flags, new_value, old_value));
+}
#endif /* _NOLIBC_TIME_H */
diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h
index fbbc0e68c001..470a5f77bc0f 100644
--- a/tools/include/nolibc/types.h
+++ b/tools/include/nolibc/types.h
@@ -4,19 +4,29 @@
* Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_TYPES_H
#define _NOLIBC_TYPES_H
#include "std.h"
+#include <linux/mman.h>
+#include <linux/stat.h>
#include <linux/time.h>
+#include <linux/wait.h>
/* Only the generic macros and types may be defined here. The arch-specific
- * ones such as the O_RDONLY and related macros used by fcntl() and open(), or
- * the layout of sys_stat_struct must not be defined here.
+ * ones such as the O_RDONLY and related macros used by fcntl() and open()
+ * must not be defined here.
*/
-/* stat flags (WARNING, octal here) */
+/* stat flags (WARNING, octal here). We need to check for an existing
+ * definition because linux/stat.h may omit to define those if it finds
+ * that any glibc header was already included.
+ */
+#if !defined(S_IFMT)
#define S_IFDIR 0040000
#define S_IFCHR 0020000
#define S_IFBLK 0060000
@@ -34,6 +44,22 @@
#define S_ISLNK(mode) (((mode) & S_IFMT) == S_IFLNK)
#define S_ISSOCK(mode) (((mode) & S_IFMT) == S_IFSOCK)
+#define S_IRWXU 00700
+#define S_IRUSR 00400
+#define S_IWUSR 00200
+#define S_IXUSR 00100
+
+#define S_IRWXG 00070
+#define S_IRGRP 00040
+#define S_IWGRP 00020
+#define S_IXGRP 00010
+
+#define S_IRWXO 00007
+#define S_IROTH 00004
+#define S_IWOTH 00002
+#define S_IXOTH 00001
+#endif
+
/* dirent types */
#define DT_UNKNOWN 0x0
#define DT_FIFO 0x1
@@ -44,11 +70,6 @@
#define DT_LNK 0xa
#define DT_SOCK 0xc
-/* commonly an fd_set represents 256 FDs */
-#ifndef FD_SETSIZE
-#define FD_SETSIZE 256
-#endif
-
/* PATH_MAX and MAXPATHLEN are often used and found with plenty of different
* values.
*/
@@ -60,9 +81,9 @@
#define MAXPATHLEN (PATH_MAX)
#endif
-/* Special FD used by all the *at functions */
-#ifndef AT_FDCWD
-#define AT_FDCWD (-100)
+/* flags for mmap */
+#ifndef MAP_FAILED
+#define MAP_FAILED ((void *)-1)
#endif
/* whence values for lseek() */
@@ -70,81 +91,25 @@
#define SEEK_CUR 1
#define SEEK_END 2
-/* cmd for reboot() */
-#define LINUX_REBOOT_MAGIC1 0xfee1dead
-#define LINUX_REBOOT_MAGIC2 0x28121969
-#define LINUX_REBOOT_CMD_HALT 0xcdef0123
-#define LINUX_REBOOT_CMD_POWER_OFF 0x4321fedc
-#define LINUX_REBOOT_CMD_RESTART 0x01234567
-#define LINUX_REBOOT_CMD_SW_SUSPEND 0xd000fce2
+/* flags for reboot */
+#define RB_AUTOBOOT LINUX_REBOOT_CMD_RESTART
+#define RB_HALT_SYSTEM LINUX_REBOOT_CMD_HALT
+#define RB_ENABLE_CAD LINUX_REBOOT_CMD_CAD_ON
+#define RB_DISABLE_CAD LINUX_REBOOT_CMD_CAD_OFF
+#define RB_POWER_OFF LINUX_REBOOT_CMD_POWER_OFF
+#define RB_SW_SUSPEND LINUX_REBOOT_CMD_SW_SUSPEND
+#define RB_KEXEC LINUX_REBOOT_CMD_KEXEC
/* Macros used on waitpid()'s return status */
#define WEXITSTATUS(status) (((status) & 0xff00) >> 8)
#define WIFEXITED(status) (((status) & 0x7f) == 0)
-
-/* waitpid() flags */
-#define WNOHANG 1
+#define WTERMSIG(status) ((status) & 0x7f)
+#define WIFSIGNALED(status) ((status) - 1 < 0xff)
/* standard exit() codes */
#define EXIT_SUCCESS 0
#define EXIT_FAILURE 1
-#define FD_SETIDXMASK (8 * sizeof(unsigned long))
-#define FD_SETBITMASK (8 * sizeof(unsigned long)-1)
-
-/* for select() */
-typedef struct {
- unsigned long fds[(FD_SETSIZE + FD_SETBITMASK) / FD_SETIDXMASK];
-} fd_set;
-
-#define FD_CLR(fd, set) do { \
- fd_set *__set = (set); \
- int __fd = (fd); \
- if (__fd >= 0) \
- __set->fds[__fd / FD_SETIDXMASK] &= \
- ~(1U << (__fd & FX_SETBITMASK)); \
- } while (0)
-
-#define FD_SET(fd, set) do { \
- fd_set *__set = (set); \
- int __fd = (fd); \
- if (__fd >= 0) \
- __set->fds[__fd / FD_SETIDXMASK] |= \
- 1 << (__fd & FD_SETBITMASK); \
- } while (0)
-
-#define FD_ISSET(fd, set) ({ \
- fd_set *__set = (set); \
- int __fd = (fd); \
- int __r = 0; \
- if (__fd >= 0) \
- __r = !!(__set->fds[__fd / FD_SETIDXMASK] & \
-1U << (__fd & FD_SET_BITMASK)); \
- __r; \
- })
-
-#define FD_ZERO(set) do { \
- fd_set *__set = (set); \
- int __idx; \
- int __size = (FD_SETSIZE+FD_SETBITMASK) / FD_SETIDXMASK;\
- for (__idx = 0; __idx < __size; __idx++) \
- __set->fds[__idx] = 0; \
- } while (0)
-
-/* for poll() */
-#define POLLIN 0x0001
-#define POLLPRI 0x0002
-#define POLLOUT 0x0004
-#define POLLERR 0x0008
-#define POLLHUP 0x0010
-#define POLLNVAL 0x0020
-
-struct pollfd {
- int fd;
- short int events;
- short int revents;
-};
-
/* for getdents64() */
struct linux_dirent64 {
uint64_t d_ino;
@@ -154,26 +119,6 @@ struct linux_dirent64 {
char d_name[];
};
-/* needed by wait4() */
-struct rusage {
- struct timeval ru_utime;
- struct timeval ru_stime;
- long ru_maxrss;
- long ru_ixrss;
- long ru_idrss;
- long ru_isrss;
- long ru_minflt;
- long ru_majflt;
- long ru_nswap;
- long ru_inblock;
- long ru_oublock;
- long ru_msgsnd;
- long ru_msgrcv;
- long ru_nsignals;
- long ru_nvcsw;
- long ru_nivcsw;
-};
-
/* The format of the struct as returned by the libc to the application, which
* significantly differs from the format returned by the stat() syscall flavours.
*/
@@ -188,19 +133,13 @@ struct stat {
off_t st_size; /* total size, in bytes */
blksize_t st_blksize; /* blocksize for file system I/O */
blkcnt_t st_blocks; /* number of 512B blocks allocated */
- time_t st_atime; /* time of last access */
- time_t st_mtime; /* time of last modification */
- time_t st_ctime; /* time of last status change */
+ union { time_t st_atime; struct timespec st_atim; }; /* time of last access */
+ union { time_t st_mtime; struct timespec st_mtim; }; /* time of last modification */
+ union { time_t st_ctime; struct timespec st_ctim; }; /* time of last status change */
};
-/* WARNING, it only deals with the 4096 first majors and 256 first minors */
-#define makedev(major, minor) ((dev_t)((((major) & 0xfff) << 8) | ((minor) & 0xff)))
-#define major(dev) ((unsigned int)(((dev) >> 8) & 0xfff))
-#define minor(dev) ((unsigned int)(((dev) & 0xff))
-
-#ifndef offsetof
-#define offsetof(TYPE, FIELD) ((size_t) &((TYPE *)0)->FIELD)
-#endif
+typedef __kernel_clockid_t clockid_t;
+typedef int timer_t;
#ifndef container_of
#define container_of(PTR, TYPE, FIELD) ({ \
@@ -209,7 +148,4 @@ struct stat {
})
#endif
-/* make sure to include all global symbols */
-#include "nolibc.h"
-
#endif /* _NOLIBC_TYPES_H */
diff --git a/tools/include/nolibc/unistd.h b/tools/include/nolibc/unistd.h
index 1cfcd52106a4..bb5e80f3f05d 100644
--- a/tools/include/nolibc/unistd.h
+++ b/tools/include/nolibc/unistd.h
@@ -4,6 +4,9 @@
* Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_UNISTD_H
#define _NOLIBC_UNISTD_H
@@ -13,12 +16,45 @@
#include "sys.h"
+#define STDIN_FILENO 0
+#define STDOUT_FILENO 1
+#define STDERR_FILENO 2
+
+#define F_OK 0
+#define X_OK 1
+#define W_OK 2
+#define R_OK 4
+
+/*
+ * int access(const char *path, int amode);
+ * int faccessat(int fd, const char *path, int amode, int flag);
+ */
+
+static __attribute__((unused))
+int sys_faccessat(int fd, const char *path, int amode, int flag)
+{
+ return my_syscall4(__NR_faccessat, fd, path, amode, flag);
+}
+
+static __attribute__((unused))
+int faccessat(int fd, const char *path, int amode, int flag)
+{
+ return __sysret(sys_faccessat(fd, path, amode, flag));
+}
+
+static __attribute__((unused))
+int access(const char *path, int amode)
+{
+ return faccessat(AT_FDCWD, path, amode, 0);
+}
+
+
static __attribute__((unused))
int msleep(unsigned int msecs)
{
struct timeval my_timeval = { msecs / 1000, (msecs % 1000) * 1000 };
- if (sys_select(0, 0, 0, 0, &my_timeval) < 0)
+ if (sys_select(0, NULL, NULL, NULL, &my_timeval) < 0)
return (my_timeval.tv_sec * 1000) +
(my_timeval.tv_usec / 1000) +
!!(my_timeval.tv_usec % 1000);
@@ -31,7 +67,7 @@ unsigned int sleep(unsigned int seconds)
{
struct timeval my_timeval = { seconds, 0 };
- if (sys_select(0, 0, 0, 0, &my_timeval) < 0)
+ if (sys_select(0, NULL, NULL, NULL, &my_timeval) < 0)
return my_timeval.tv_sec + !!my_timeval.tv_usec;
else
return 0;
@@ -42,7 +78,7 @@ int usleep(unsigned int usecs)
{
struct timeval my_timeval = { usecs / 1000000, usecs % 1000000 };
- return sys_select(0, 0, 0, 0, &my_timeval);
+ return sys_select(0, NULL, NULL, NULL, &my_timeval);
}
static __attribute__((unused))
@@ -51,7 +87,4 @@ int tcsetpgrp(int fd, pid_t pid)
return ioctl(fd, TIOCSPGRP, &pid);
}
-/* make sure to include all global symbols */
-#include "nolibc.h"
-
#endif /* _NOLIBC_UNISTD_H */
diff --git a/tools/include/perf/arm_pmuv3.h b/tools/include/perf/arm_pmuv3.h
new file mode 100644
index 000000000000..1e397d55384e
--- /dev/null
+++ b/tools/include/perf/arm_pmuv3.h
@@ -0,0 +1,317 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ */
+
+#ifndef __PERF_ARM_PMUV3_H
+#define __PERF_ARM_PMUV3_H
+
+#include <assert.h>
+#include <asm/bug.h>
+
+#define ARMV8_PMU_MAX_COUNTERS 32
+#define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1)
+
+/*
+ * Common architectural and microarchitectural event numbers.
+ */
+#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x0000
+#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL 0x0001
+#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL 0x0002
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x0003
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x0004
+#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL 0x0005
+#define ARMV8_PMUV3_PERFCTR_LD_RETIRED 0x0006
+#define ARMV8_PMUV3_PERFCTR_ST_RETIRED 0x0007
+#define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x0008
+#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN 0x0009
+#define ARMV8_PMUV3_PERFCTR_EXC_RETURN 0x000A
+#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED 0x000B
+#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED 0x000C
+#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED 0x000D
+#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED 0x000E
+#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED 0x000F
+#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x0010
+#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x0011
+#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x0012
+#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS 0x0013
+#define ARMV8_PMUV3_PERFCTR_L1I_CACHE 0x0014
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB 0x0015
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE 0x0016
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL 0x0017
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB 0x0018
+#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS 0x0019
+#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR 0x001A
+#define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x001B
+#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED 0x001C
+#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES 0x001D
+#define ARMV8_PMUV3_PERFCTR_CHAIN 0x001E
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE 0x001F
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE 0x0020
+#define ARMV8_PMUV3_PERFCTR_BR_RETIRED 0x0021
+#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED 0x0022
+#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND 0x0023
+#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND 0x0024
+#define ARMV8_PMUV3_PERFCTR_L1D_TLB 0x0025
+#define ARMV8_PMUV3_PERFCTR_L1I_TLB 0x0026
+#define ARMV8_PMUV3_PERFCTR_L2I_CACHE 0x0027
+#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL 0x0028
+#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE 0x0029
+#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL 0x002A
+#define ARMV8_PMUV3_PERFCTR_L3D_CACHE 0x002B
+#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB 0x002C
+#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL 0x002D
+#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL 0x002E
+#define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x002F
+#define ARMV8_PMUV3_PERFCTR_L2I_TLB 0x0030
+#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS 0x0031
+#define ARMV8_PMUV3_PERFCTR_LL_CACHE 0x0032
+#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS 0x0033
+#define ARMV8_PMUV3_PERFCTR_DTLB_WALK 0x0034
+#define ARMV8_PMUV3_PERFCTR_ITLB_WALK 0x0035
+#define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD 0x0036
+#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD 0x0037
+#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD 0x0038
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD 0x0039
+#define ARMV8_PMUV3_PERFCTR_OP_RETIRED 0x003A
+#define ARMV8_PMUV3_PERFCTR_OP_SPEC 0x003B
+#define ARMV8_PMUV3_PERFCTR_STALL 0x003C
+#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND 0x003D
+#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND 0x003E
+#define ARMV8_PMUV3_PERFCTR_STALL_SLOT 0x003F
+
+/* Statistical profiling extension microarchitectural events */
+#define ARMV8_SPE_PERFCTR_SAMPLE_POP 0x4000
+#define ARMV8_SPE_PERFCTR_SAMPLE_FEED 0x4001
+#define ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE 0x4002
+#define ARMV8_SPE_PERFCTR_SAMPLE_COLLISION 0x4003
+
+/* AMUv1 architecture events */
+#define ARMV8_AMU_PERFCTR_CNT_CYCLES 0x4004
+#define ARMV8_AMU_PERFCTR_STALL_BACKEND_MEM 0x4005
+
+/* long-latency read miss events */
+#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_LMISS 0x4006
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD 0x4009
+#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS 0x400A
+#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD 0x400B
+
+/* Trace buffer events */
+#define ARMV8_PMUV3_PERFCTR_TRB_WRAP 0x400C
+#define ARMV8_PMUV3_PERFCTR_TRB_TRIG 0x400E
+
+/* Trace unit events */
+#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT0 0x4010
+#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT1 0x4011
+#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT2 0x4012
+#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT3 0x4013
+#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT4 0x4018
+#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT5 0x4019
+#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT6 0x401A
+#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT7 0x401B
+
+/* additional latency from alignment events */
+#define ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT 0x4020
+#define ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT 0x4021
+#define ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT 0x4022
+
+/* Armv8.5 Memory Tagging Extension events */
+#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED 0x4024
+#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_RD 0x4025
+#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR 0x4026
+
+/* ARMv8 recommended implementation defined event types */
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD 0x0040
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x0041
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD 0x0042
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR 0x0043
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER 0x0044
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_OUTER 0x0045
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_VICTIM 0x0046
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_CLEAN 0x0047
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_INVAL 0x0048
+
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD 0x004C
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR 0x004D
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD 0x004E
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR 0x004F
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_RD 0x0050
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WR 0x0051
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_RD 0x0052
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_WR 0x0053
+
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_VICTIM 0x0056
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_CLEAN 0x0057
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_INVAL 0x0058
+
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_RD 0x005C
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_WR 0x005D
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_RD 0x005E
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_WR 0x005F
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD 0x0060
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR 0x0061
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_SHARED 0x0062
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NOT_SHARED 0x0063
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NORMAL 0x0064
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_PERIPH 0x0065
+#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_RD 0x0066
+#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_WR 0x0067
+#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LD_SPEC 0x0068
+#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_ST_SPEC 0x0069
+#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LDST_SPEC 0x006A
+
+#define ARMV8_IMPDEF_PERFCTR_LDREX_SPEC 0x006C
+#define ARMV8_IMPDEF_PERFCTR_STREX_PASS_SPEC 0x006D
+#define ARMV8_IMPDEF_PERFCTR_STREX_FAIL_SPEC 0x006E
+#define ARMV8_IMPDEF_PERFCTR_STREX_SPEC 0x006F
+#define ARMV8_IMPDEF_PERFCTR_LD_SPEC 0x0070
+#define ARMV8_IMPDEF_PERFCTR_ST_SPEC 0x0071
+#define ARMV8_IMPDEF_PERFCTR_LDST_SPEC 0x0072
+#define ARMV8_IMPDEF_PERFCTR_DP_SPEC 0x0073
+#define ARMV8_IMPDEF_PERFCTR_ASE_SPEC 0x0074
+#define ARMV8_IMPDEF_PERFCTR_VFP_SPEC 0x0075
+#define ARMV8_IMPDEF_PERFCTR_PC_WRITE_SPEC 0x0076
+#define ARMV8_IMPDEF_PERFCTR_CRYPTO_SPEC 0x0077
+#define ARMV8_IMPDEF_PERFCTR_BR_IMMED_SPEC 0x0078
+#define ARMV8_IMPDEF_PERFCTR_BR_RETURN_SPEC 0x0079
+#define ARMV8_IMPDEF_PERFCTR_BR_INDIRECT_SPEC 0x007A
+
+#define ARMV8_IMPDEF_PERFCTR_ISB_SPEC 0x007C
+#define ARMV8_IMPDEF_PERFCTR_DSB_SPEC 0x007D
+#define ARMV8_IMPDEF_PERFCTR_DMB_SPEC 0x007E
+
+#define ARMV8_IMPDEF_PERFCTR_EXC_UNDEF 0x0081
+#define ARMV8_IMPDEF_PERFCTR_EXC_SVC 0x0082
+#define ARMV8_IMPDEF_PERFCTR_EXC_PABORT 0x0083
+#define ARMV8_IMPDEF_PERFCTR_EXC_DABORT 0x0084
+
+#define ARMV8_IMPDEF_PERFCTR_EXC_IRQ 0x0086
+#define ARMV8_IMPDEF_PERFCTR_EXC_FIQ 0x0087
+#define ARMV8_IMPDEF_PERFCTR_EXC_SMC 0x0088
+
+#define ARMV8_IMPDEF_PERFCTR_EXC_HVC 0x008A
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_PABORT 0x008B
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_DABORT 0x008C
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_OTHER 0x008D
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_IRQ 0x008E
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_FIQ 0x008F
+#define ARMV8_IMPDEF_PERFCTR_RC_LD_SPEC 0x0090
+#define ARMV8_IMPDEF_PERFCTR_RC_ST_SPEC 0x0091
+
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_RD 0x00A0
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WR 0x00A1
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_RD 0x00A2
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_WR 0x00A3
+
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_VICTIM 0x00A6
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_CLEAN 0x00A7
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_INVAL 0x00A8
+
+/*
+ * Per-CPU PMCR: config reg
+ */
+#define ARMV8_PMU_PMCR_E (1 << 0) /* Enable all counters */
+#define ARMV8_PMU_PMCR_P (1 << 1) /* Reset all counters */
+#define ARMV8_PMU_PMCR_C (1 << 2) /* Cycle counter reset */
+#define ARMV8_PMU_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */
+#define ARMV8_PMU_PMCR_X (1 << 4) /* Export to ETM */
+#define ARMV8_PMU_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
+#define ARMV8_PMU_PMCR_LC (1 << 6) /* Overflow on 64 bit cycle counter */
+#define ARMV8_PMU_PMCR_LP (1 << 7) /* Long event counter enable */
+#define ARMV8_PMU_PMCR_N GENMASK(15, 11) /* Number of counters supported */
+/* Mask for writable bits */
+#define ARMV8_PMU_PMCR_MASK (ARMV8_PMU_PMCR_E | ARMV8_PMU_PMCR_P | \
+ ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_D | \
+ ARMV8_PMU_PMCR_X | ARMV8_PMU_PMCR_DP | \
+ ARMV8_PMU_PMCR_LC | ARMV8_PMU_PMCR_LP)
+
+/*
+ * PMOVSR: counters overflow flag status reg
+ */
+#define ARMV8_PMU_OVSR_P GENMASK(30, 0)
+#define ARMV8_PMU_OVSR_C BIT(31)
+/* Mask for writable bits is both P and C fields */
+#define ARMV8_PMU_OVERFLOWED_MASK (ARMV8_PMU_OVSR_P | ARMV8_PMU_OVSR_C)
+
+/*
+ * PMXEVTYPER: Event selection reg
+ */
+#define ARMV8_PMU_EVTYPE_EVENT GENMASK(15, 0) /* Mask for EVENT bits */
+#define ARMV8_PMU_EVTYPE_TH GENMASK(43, 32)
+#define ARMV8_PMU_EVTYPE_TC GENMASK(63, 61)
+
+/*
+ * Event filters for PMUv3
+ */
+#define ARMV8_PMU_EXCLUDE_EL1 (1U << 31)
+#define ARMV8_PMU_EXCLUDE_EL0 (1U << 30)
+#define ARMV8_PMU_EXCLUDE_NS_EL1 (1U << 29)
+#define ARMV8_PMU_EXCLUDE_NS_EL0 (1U << 28)
+#define ARMV8_PMU_INCLUDE_EL2 (1U << 27)
+#define ARMV8_PMU_EXCLUDE_EL3 (1U << 26)
+
+/*
+ * PMUSERENR: user enable reg
+ */
+#define ARMV8_PMU_USERENR_EN (1 << 0) /* PMU regs can be accessed at EL0 */
+#define ARMV8_PMU_USERENR_SW (1 << 1) /* PMSWINC can be written at EL0 */
+#define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */
+#define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */
+/* Mask for writable bits */
+#define ARMV8_PMU_USERENR_MASK (ARMV8_PMU_USERENR_EN | ARMV8_PMU_USERENR_SW | \
+ ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_ER)
+
+/* PMMIR_EL1.SLOTS mask */
+#define ARMV8_PMU_SLOTS GENMASK(7, 0)
+#define ARMV8_PMU_BUS_SLOTS GENMASK(15, 8)
+#define ARMV8_PMU_BUS_WIDTH GENMASK(19, 16)
+#define ARMV8_PMU_THWIDTH GENMASK(23, 20)
+
+/*
+ * This code is really good
+ */
+
+#define PMEVN_CASE(n, case_macro) \
+ case n: case_macro(n); break
+
+#define PMEVN_SWITCH(x, case_macro) \
+ do { \
+ switch (x) { \
+ PMEVN_CASE(0, case_macro); \
+ PMEVN_CASE(1, case_macro); \
+ PMEVN_CASE(2, case_macro); \
+ PMEVN_CASE(3, case_macro); \
+ PMEVN_CASE(4, case_macro); \
+ PMEVN_CASE(5, case_macro); \
+ PMEVN_CASE(6, case_macro); \
+ PMEVN_CASE(7, case_macro); \
+ PMEVN_CASE(8, case_macro); \
+ PMEVN_CASE(9, case_macro); \
+ PMEVN_CASE(10, case_macro); \
+ PMEVN_CASE(11, case_macro); \
+ PMEVN_CASE(12, case_macro); \
+ PMEVN_CASE(13, case_macro); \
+ PMEVN_CASE(14, case_macro); \
+ PMEVN_CASE(15, case_macro); \
+ PMEVN_CASE(16, case_macro); \
+ PMEVN_CASE(17, case_macro); \
+ PMEVN_CASE(18, case_macro); \
+ PMEVN_CASE(19, case_macro); \
+ PMEVN_CASE(20, case_macro); \
+ PMEVN_CASE(21, case_macro); \
+ PMEVN_CASE(22, case_macro); \
+ PMEVN_CASE(23, case_macro); \
+ PMEVN_CASE(24, case_macro); \
+ PMEVN_CASE(25, case_macro); \
+ PMEVN_CASE(26, case_macro); \
+ PMEVN_CASE(27, case_macro); \
+ PMEVN_CASE(28, case_macro); \
+ PMEVN_CASE(29, case_macro); \
+ PMEVN_CASE(30, case_macro); \
+ default: \
+ WARN(1, "Invalid PMEV* index\n"); \
+ assert(0); \
+ } \
+ } while (0)
+
+#endif
diff --git a/tools/include/uapi/README b/tools/include/uapi/README
new file mode 100644
index 000000000000..7147b1b2cb28
--- /dev/null
+++ b/tools/include/uapi/README
@@ -0,0 +1,73 @@
+Why we want a copy of kernel headers in tools?
+==============================================
+
+There used to be no copies, with tools/ code using kernel headers
+directly. From time to time tools/perf/ broke due to legitimate kernel
+hacking. At some point Linus complained about such direct usage. Then we
+adopted the current model.
+
+The way these headers are used in perf are not restricted to just
+including them to compile something.
+
+There are sometimes used in scripts that convert defines into string
+tables, etc, so some change may break one of these scripts, or new MSRs
+may use some different #define pattern, etc.
+
+E.g.:
+
+ $ ls -1 tools/perf/trace/beauty/*.sh | head -5
+ tools/perf/trace/beauty/arch_errno_names.sh
+ tools/perf/trace/beauty/drm_ioctl.sh
+ tools/perf/trace/beauty/fadvise.sh
+ tools/perf/trace/beauty/fsconfig.sh
+ tools/perf/trace/beauty/fsmount.sh
+ $
+ $ tools/perf/trace/beauty/fadvise.sh
+ static const char *fadvise_advices[] = {
+ [0] = "NORMAL",
+ [1] = "RANDOM",
+ [2] = "SEQUENTIAL",
+ [3] = "WILLNEED",
+ [4] = "DONTNEED",
+ [5] = "NOREUSE",
+ };
+ $
+
+The tools/perf/check-headers.sh script, part of the tools/ build
+process, points out changes in the original files.
+
+So its important not to touch the copies in tools/ when doing changes in
+the original kernel headers, that will be done later, when
+check-headers.sh inform about the change to the perf tools hackers.
+
+Another explanation from Ingo Molnar:
+It's better than all the alternatives we tried so far:
+
+ - Symbolic links and direct #includes: this was the original approach but
+ was pushed back on from the kernel side, when tooling modified the
+ headers and broke them accidentally for kernel builds.
+
+ - Duplicate self-defined ABI headers like glibc: double the maintenance
+ burden, double the chance for mistakes, plus there's no tech-driven
+ notification mechanism to look at new kernel side changes.
+
+What we are doing now is a third option:
+
+ - A software-enforced copy-on-write mechanism of kernel headers to
+ tooling, driven by non-fatal warnings on the tooling side build when
+ kernel headers get modified:
+
+ Warning: Kernel ABI header differences:
+ diff -u tools/include/uapi/drm/i915_drm.h include/uapi/drm/i915_drm.h
+ diff -u tools/include/uapi/linux/fs.h include/uapi/linux/fs.h
+ diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h
+ ...
+
+ The tooling policy is to always pick up the kernel side headers as-is,
+ and integate them into the tooling build. The warnings above serve as a
+ notification to tooling maintainers that there's changes on the kernel
+ side.
+
+We've been using this for many years now, and it might seem hacky, but
+works surprisingly well.
+
diff --git a/tools/include/uapi/asm-generic/bitsperlong.h b/tools/include/uapi/asm-generic/bitsperlong.h
index 23e6c416b85f..fadb3f857f28 100644
--- a/tools/include/uapi/asm-generic/bitsperlong.h
+++ b/tools/include/uapi/asm-generic/bitsperlong.h
@@ -1,6 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _UAPI__ASM_GENERIC_BITS_PER_LONG
#define _UAPI__ASM_GENERIC_BITS_PER_LONG
+#ifndef __BITS_PER_LONG
+/*
+ * In order to keep safe and avoid regression, only unify uapi
+ * bitsperlong.h for some archs which are using newer toolchains
+ * that have the definitions of __CHAR_BIT__ and __SIZEOF_LONG__.
+ * See the following link for more info:
+ * https://lore.kernel.org/linux-arch/b9624545-2c80-49a1-ac3c-39264a591f7b@app.fastmail.com/
+ */
+#if defined(__CHAR_BIT__) && defined(__SIZEOF_LONG__)
+#define __BITS_PER_LONG (__CHAR_BIT__ * __SIZEOF_LONG__)
+#else
/*
* There seems to be no way of detecting this automatically from user
* space, so 64 bit architectures should override this in their
@@ -8,8 +20,12 @@
* both 32 and 64 bit user space must not rely on CONFIG_64BIT
* to decide it, but rather check a compiler provided macro.
*/
-#ifndef __BITS_PER_LONG
#define __BITS_PER_LONG 32
#endif
+#endif
+
+#ifndef __BITS_PER_LONG_LONG
+#define __BITS_PER_LONG_LONG 64
+#endif
#endif /* _UAPI__ASM_GENERIC_BITS_PER_LONG */
diff --git a/tools/include/uapi/asm-generic/fcntl.h b/tools/include/uapi/asm-generic/fcntl.h
deleted file mode 100644
index b02c8e0f4057..000000000000
--- a/tools/include/uapi/asm-generic/fcntl.h
+++ /dev/null
@@ -1,222 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _ASM_GENERIC_FCNTL_H
-#define _ASM_GENERIC_FCNTL_H
-
-#include <linux/types.h>
-
-/*
- * FMODE_EXEC is 0x20
- * FMODE_NONOTIFY is 0x4000000
- * These cannot be used by userspace O_* until internal and external open
- * flags are split.
- * -Eric Paris
- */
-
-/*
- * When introducing new O_* bits, please check its uniqueness in fcntl_init().
- */
-
-#define O_ACCMODE 00000003
-#define O_RDONLY 00000000
-#define O_WRONLY 00000001
-#define O_RDWR 00000002
-#ifndef O_CREAT
-#define O_CREAT 00000100 /* not fcntl */
-#endif
-#ifndef O_EXCL
-#define O_EXCL 00000200 /* not fcntl */
-#endif
-#ifndef O_NOCTTY
-#define O_NOCTTY 00000400 /* not fcntl */
-#endif
-#ifndef O_TRUNC
-#define O_TRUNC 00001000 /* not fcntl */
-#endif
-#ifndef O_APPEND
-#define O_APPEND 00002000
-#endif
-#ifndef O_NONBLOCK
-#define O_NONBLOCK 00004000
-#endif
-#ifndef O_DSYNC
-#define O_DSYNC 00010000 /* used to be O_SYNC, see below */
-#endif
-#ifndef FASYNC
-#define FASYNC 00020000 /* fcntl, for BSD compatibility */
-#endif
-#ifndef O_DIRECT
-#define O_DIRECT 00040000 /* direct disk access hint */
-#endif
-#ifndef O_LARGEFILE
-#define O_LARGEFILE 00100000
-#endif
-#ifndef O_DIRECTORY
-#define O_DIRECTORY 00200000 /* must be a directory */
-#endif
-#ifndef O_NOFOLLOW
-#define O_NOFOLLOW 00400000 /* don't follow links */
-#endif
-#ifndef O_NOATIME
-#define O_NOATIME 01000000
-#endif
-#ifndef O_CLOEXEC
-#define O_CLOEXEC 02000000 /* set close_on_exec */
-#endif
-
-/*
- * Before Linux 2.6.33 only O_DSYNC semantics were implemented, but using
- * the O_SYNC flag. We continue to use the existing numerical value
- * for O_DSYNC semantics now, but using the correct symbolic name for it.
- * This new value is used to request true Posix O_SYNC semantics. It is
- * defined in this strange way to make sure applications compiled against
- * new headers get at least O_DSYNC semantics on older kernels.
- *
- * This has the nice side-effect that we can simply test for O_DSYNC
- * wherever we do not care if O_DSYNC or O_SYNC is used.
- *
- * Note: __O_SYNC must never be used directly.
- */
-#ifndef O_SYNC
-#define __O_SYNC 04000000
-#define O_SYNC (__O_SYNC|O_DSYNC)
-#endif
-
-#ifndef O_PATH
-#define O_PATH 010000000
-#endif
-
-#ifndef __O_TMPFILE
-#define __O_TMPFILE 020000000
-#endif
-
-/* a horrid kludge trying to make sure that this will fail on old kernels */
-#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
-#define O_TMPFILE_MASK (__O_TMPFILE | O_DIRECTORY | O_CREAT)
-
-#ifndef O_NDELAY
-#define O_NDELAY O_NONBLOCK
-#endif
-
-#define F_DUPFD 0 /* dup */
-#define F_GETFD 1 /* get close_on_exec */
-#define F_SETFD 2 /* set/clear close_on_exec */
-#define F_GETFL 3 /* get file->f_flags */
-#define F_SETFL 4 /* set file->f_flags */
-#ifndef F_GETLK
-#define F_GETLK 5
-#define F_SETLK 6
-#define F_SETLKW 7
-#endif
-#ifndef F_SETOWN
-#define F_SETOWN 8 /* for sockets. */
-#define F_GETOWN 9 /* for sockets. */
-#endif
-#ifndef F_SETSIG
-#define F_SETSIG 10 /* for sockets. */
-#define F_GETSIG 11 /* for sockets. */
-#endif
-
-#if __BITS_PER_LONG == 32 || defined(__KERNEL__)
-#ifndef F_GETLK64
-#define F_GETLK64 12 /* using 'struct flock64' */
-#define F_SETLK64 13
-#define F_SETLKW64 14
-#endif
-#endif /* __BITS_PER_LONG == 32 || defined(__KERNEL__) */
-
-#ifndef F_SETOWN_EX
-#define F_SETOWN_EX 15
-#define F_GETOWN_EX 16
-#endif
-
-#ifndef F_GETOWNER_UIDS
-#define F_GETOWNER_UIDS 17
-#endif
-
-/*
- * Open File Description Locks
- *
- * Usually record locks held by a process are released on *any* close and are
- * not inherited across a fork().
- *
- * These cmd values will set locks that conflict with process-associated
- * record locks, but are "owned" by the open file description, not the
- * process. This means that they are inherited across fork() like BSD (flock)
- * locks, and they are only released automatically when the last reference to
- * the open file against which they were acquired is put.
- */
-#define F_OFD_GETLK 36
-#define F_OFD_SETLK 37
-#define F_OFD_SETLKW 38
-
-#define F_OWNER_TID 0
-#define F_OWNER_PID 1
-#define F_OWNER_PGRP 2
-
-struct f_owner_ex {
- int type;
- __kernel_pid_t pid;
-};
-
-/* for F_[GET|SET]FL */
-#define FD_CLOEXEC 1 /* actually anything with low bit set goes */
-
-/* for posix fcntl() and lockf() */
-#ifndef F_RDLCK
-#define F_RDLCK 0
-#define F_WRLCK 1
-#define F_UNLCK 2
-#endif
-
-/* for old implementation of bsd flock () */
-#ifndef F_EXLCK
-#define F_EXLCK 4 /* or 3 */
-#define F_SHLCK 8 /* or 4 */
-#endif
-
-/* operations for bsd flock(), also used by the kernel implementation */
-#define LOCK_SH 1 /* shared lock */
-#define LOCK_EX 2 /* exclusive lock */
-#define LOCK_NB 4 /* or'd with one of the above to prevent
- blocking */
-#define LOCK_UN 8 /* remove lock */
-
-/*
- * LOCK_MAND support has been removed from the kernel. We leave the symbols
- * here to not break legacy builds, but these should not be used in new code.
- */
-#define LOCK_MAND 32 /* This is a mandatory flock ... */
-#define LOCK_READ 64 /* which allows concurrent read operations */
-#define LOCK_WRITE 128 /* which allows concurrent write operations */
-#define LOCK_RW 192 /* which allows concurrent read & write ops */
-
-#define F_LINUX_SPECIFIC_BASE 1024
-
-#ifndef HAVE_ARCH_STRUCT_FLOCK
-struct flock {
- short l_type;
- short l_whence;
- __kernel_off_t l_start;
- __kernel_off_t l_len;
- __kernel_pid_t l_pid;
-#ifdef __ARCH_FLOCK_EXTRA_SYSID
- __ARCH_FLOCK_EXTRA_SYSID
-#endif
-#ifdef __ARCH_FLOCK_PAD
- __ARCH_FLOCK_PAD
-#endif
-};
-
-struct flock64 {
- short l_type;
- short l_whence;
- __kernel_loff_t l_start;
- __kernel_loff_t l_len;
- __kernel_pid_t l_pid;
-#ifdef __ARCH_FLOCK64_PAD
- __ARCH_FLOCK64_PAD
-#endif
-};
-#endif /* HAVE_ARCH_STRUCT_FLOCK */
-
-#endif /* _ASM_GENERIC_FCNTL_H */
diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h
index 6ce1f1ceb432..ef1c27fa3c57 100644
--- a/tools/include/uapi/asm-generic/mman-common.h
+++ b/tools/include/uapi/asm-generic/mman-common.h
@@ -79,9 +79,13 @@
#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */
+#define MADV_GUARD_INSTALL 102 /* fatal signal on access to range */
+#define MADV_GUARD_REMOVE 103 /* unguard range */
+
/* compatibility flags */
#define MAP_FILE 0
+#define PKEY_UNRESTRICTED 0x0
#define PKEY_DISABLE_ACCESS 0x1
#define PKEY_DISABLE_WRITE 0x2
#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
diff --git a/tools/include/uapi/asm-generic/mman.h b/tools/include/uapi/asm-generic/mman.h
index 406f7718f9ad..51d2556af54a 100644
--- a/tools/include/uapi/asm-generic/mman.h
+++ b/tools/include/uapi/asm-generic/mman.h
@@ -19,4 +19,8 @@
#define MCL_FUTURE 2 /* lock all future mappings */
#define MCL_ONFAULT 4 /* lock all pages that are faulted in */
+#define SHADOW_STACK_SET_TOKEN (1ULL << 0) /* Set up a restore token in the shadow stack */
+#define SHADOW_STACK_SET_MARKER (1ULL << 1) /* Set up a top of stack marker in the shadow stack */
+
+
#endif /* __ASM_GENERIC_MMAN_H */
diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h
index 8756df13be50..f333a0ac4ee4 100644
--- a/tools/include/uapi/asm-generic/socket.h
+++ b/tools/include/uapi/asm-generic/socket.h
@@ -119,8 +119,34 @@
#define SO_DETACH_REUSEPORT_BPF 68
+#define SO_PREFER_BUSY_POLL 69
+#define SO_BUSY_POLL_BUDGET 70
+
+#define SO_NETNS_COOKIE 71
+
+#define SO_BUF_LOCK 72
+
+#define SO_RESERVE_MEM 73
+
+#define SO_TXREHASH 74
+
#define SO_RCVMARK 75
+#define SO_PASSPIDFD 76
+#define SO_PEERPIDFD 77
+
+#define SO_DEVMEM_LINEAR 78
+#define SCM_DEVMEM_LINEAR SO_DEVMEM_LINEAR
+#define SO_DEVMEM_DMABUF 79
+#define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF
+#define SO_DEVMEM_DONTNEED 80
+
+#define SCM_TS_OPT_ID 81
+
+#define SO_RCVPRIORITY 82
+
+#define SO_PASSRIGHTS 83
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 45fa180cc56a..04e0077fb4c9 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -38,12 +38,12 @@ __SYSCALL(__NR_io_destroy, sys_io_destroy)
__SC_COMP(__NR_io_submit, sys_io_submit, compat_sys_io_submit)
#define __NR_io_cancel 3
__SYSCALL(__NR_io_cancel, sys_io_cancel)
+
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_io_getevents 4
__SC_3264(__NR_io_getevents, sys_io_getevents_time32, sys_io_getevents)
#endif
-/* fs/xattr.c */
#define __NR_setxattr 5
__SYSCALL(__NR_setxattr, sys_setxattr)
#define __NR_lsetxattr 6
@@ -68,58 +68,38 @@ __SYSCALL(__NR_removexattr, sys_removexattr)
__SYSCALL(__NR_lremovexattr, sys_lremovexattr)
#define __NR_fremovexattr 16
__SYSCALL(__NR_fremovexattr, sys_fremovexattr)
-
-/* fs/dcache.c */
#define __NR_getcwd 17
__SYSCALL(__NR_getcwd, sys_getcwd)
-
-/* fs/cookies.c */
#define __NR_lookup_dcookie 18
-__SC_COMP(__NR_lookup_dcookie, sys_lookup_dcookie, compat_sys_lookup_dcookie)
-
-/* fs/eventfd.c */
+__SYSCALL(__NR_lookup_dcookie, sys_ni_syscall)
#define __NR_eventfd2 19
__SYSCALL(__NR_eventfd2, sys_eventfd2)
-
-/* fs/eventpoll.c */
#define __NR_epoll_create1 20
__SYSCALL(__NR_epoll_create1, sys_epoll_create1)
#define __NR_epoll_ctl 21
__SYSCALL(__NR_epoll_ctl, sys_epoll_ctl)
#define __NR_epoll_pwait 22
__SC_COMP(__NR_epoll_pwait, sys_epoll_pwait, compat_sys_epoll_pwait)
-
-/* fs/fcntl.c */
#define __NR_dup 23
__SYSCALL(__NR_dup, sys_dup)
#define __NR_dup3 24
__SYSCALL(__NR_dup3, sys_dup3)
#define __NR3264_fcntl 25
__SC_COMP_3264(__NR3264_fcntl, sys_fcntl64, sys_fcntl, compat_sys_fcntl64)
-
-/* fs/inotify_user.c */
#define __NR_inotify_init1 26
__SYSCALL(__NR_inotify_init1, sys_inotify_init1)
#define __NR_inotify_add_watch 27
__SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch)
#define __NR_inotify_rm_watch 28
__SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch)
-
-/* fs/ioctl.c */
#define __NR_ioctl 29
__SC_COMP(__NR_ioctl, sys_ioctl, compat_sys_ioctl)
-
-/* fs/ioprio.c */
#define __NR_ioprio_set 30
__SYSCALL(__NR_ioprio_set, sys_ioprio_set)
#define __NR_ioprio_get 31
__SYSCALL(__NR_ioprio_get, sys_ioprio_get)
-
-/* fs/locks.c */
#define __NR_flock 32
__SYSCALL(__NR_flock, sys_flock)
-
-/* fs/namei.c */
#define __NR_mknodat 33
__SYSCALL(__NR_mknodat, sys_mknodat)
#define __NR_mkdirat 34
@@ -130,25 +110,21 @@ __SYSCALL(__NR_unlinkat, sys_unlinkat)
__SYSCALL(__NR_symlinkat, sys_symlinkat)
#define __NR_linkat 37
__SYSCALL(__NR_linkat, sys_linkat)
+
#ifdef __ARCH_WANT_RENAMEAT
/* renameat is superseded with flags by renameat2 */
#define __NR_renameat 38
__SYSCALL(__NR_renameat, sys_renameat)
#endif /* __ARCH_WANT_RENAMEAT */
-/* fs/namespace.c */
#define __NR_umount2 39
__SYSCALL(__NR_umount2, sys_umount)
#define __NR_mount 40
__SYSCALL(__NR_mount, sys_mount)
#define __NR_pivot_root 41
__SYSCALL(__NR_pivot_root, sys_pivot_root)
-
-/* fs/nfsctl.c */
#define __NR_nfsservctl 42
__SYSCALL(__NR_nfsservctl, sys_ni_syscall)
-
-/* fs/open.c */
#define __NR3264_statfs 43
__SC_COMP_3264(__NR3264_statfs, sys_statfs64, sys_statfs, \
compat_sys_statfs64)
@@ -161,7 +137,6 @@ __SC_COMP_3264(__NR3264_truncate, sys_truncate64, sys_truncate, \
#define __NR3264_ftruncate 46
__SC_COMP_3264(__NR3264_ftruncate, sys_ftruncate64, sys_ftruncate, \
compat_sys_ftruncate64)
-
#define __NR_fallocate 47
__SC_COMP(__NR_fallocate, sys_fallocate, compat_sys_fallocate)
#define __NR_faccessat 48
@@ -186,20 +161,12 @@ __SYSCALL(__NR_openat, sys_openat)
__SYSCALL(__NR_close, sys_close)
#define __NR_vhangup 58
__SYSCALL(__NR_vhangup, sys_vhangup)
-
-/* fs/pipe.c */
#define __NR_pipe2 59
__SYSCALL(__NR_pipe2, sys_pipe2)
-
-/* fs/quota.c */
#define __NR_quotactl 60
__SYSCALL(__NR_quotactl, sys_quotactl)
-
-/* fs/readdir.c */
#define __NR_getdents64 61
__SYSCALL(__NR_getdents64, sys_getdents64)
-
-/* fs/read_write.c */
#define __NR3264_lseek 62
__SC_3264(__NR3264_lseek, sys_llseek, sys_lseek)
#define __NR_read 63
@@ -218,12 +185,9 @@ __SC_COMP(__NR_pwrite64, sys_pwrite64, compat_sys_pwrite64)
__SC_COMP(__NR_preadv, sys_preadv, compat_sys_preadv)
#define __NR_pwritev 70
__SC_COMP(__NR_pwritev, sys_pwritev, compat_sys_pwritev)
-
-/* fs/sendfile.c */
#define __NR3264_sendfile 71
__SYSCALL(__NR3264_sendfile, sys_sendfile64)
-/* fs/select.c */
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_pselect6 72
__SC_COMP_3264(__NR_pselect6, sys_pselect6_time32, sys_pselect6, compat_sys_pselect6_time32)
@@ -231,21 +195,17 @@ __SC_COMP_3264(__NR_pselect6, sys_pselect6_time32, sys_pselect6, compat_sys_psel
__SC_COMP_3264(__NR_ppoll, sys_ppoll_time32, sys_ppoll, compat_sys_ppoll_time32)
#endif
-/* fs/signalfd.c */
#define __NR_signalfd4 74
__SC_COMP(__NR_signalfd4, sys_signalfd4, compat_sys_signalfd4)
-
-/* fs/splice.c */
#define __NR_vmsplice 75
__SYSCALL(__NR_vmsplice, sys_vmsplice)
#define __NR_splice 76
__SYSCALL(__NR_splice, sys_splice)
#define __NR_tee 77
__SYSCALL(__NR_tee, sys_tee)
-
-/* fs/stat.c */
#define __NR_readlinkat 78
__SYSCALL(__NR_readlinkat, sys_readlinkat)
+
#if defined(__ARCH_WANT_NEW_STAT) || defined(__ARCH_WANT_STAT64)
#define __NR3264_fstatat 79
__SC_3264(__NR3264_fstatat, sys_fstatat64, sys_newfstatat)
@@ -253,13 +213,13 @@ __SC_3264(__NR3264_fstatat, sys_fstatat64, sys_newfstatat)
__SC_3264(__NR3264_fstat, sys_fstat64, sys_newfstat)
#endif
-/* fs/sync.c */
#define __NR_sync 81
__SYSCALL(__NR_sync, sys_sync)
#define __NR_fsync 82
__SYSCALL(__NR_fsync, sys_fsync)
#define __NR_fdatasync 83
__SYSCALL(__NR_fdatasync, sys_fdatasync)
+
#ifdef __ARCH_WANT_SYNC_FILE_RANGE2
#define __NR_sync_file_range2 84
__SC_COMP(__NR_sync_file_range2, sys_sync_file_range2, \
@@ -270,9 +230,9 @@ __SC_COMP(__NR_sync_file_range, sys_sync_file_range, \
compat_sys_sync_file_range)
#endif
-/* fs/timerfd.c */
#define __NR_timerfd_create 85
__SYSCALL(__NR_timerfd_create, sys_timerfd_create)
+
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_timerfd_settime 86
__SC_3264(__NR_timerfd_settime, sys_timerfd_settime32, \
@@ -282,45 +242,35 @@ __SC_3264(__NR_timerfd_gettime, sys_timerfd_gettime32, \
sys_timerfd_gettime)
#endif
-/* fs/utimes.c */
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_utimensat 88
__SC_3264(__NR_utimensat, sys_utimensat_time32, sys_utimensat)
#endif
-/* kernel/acct.c */
#define __NR_acct 89
__SYSCALL(__NR_acct, sys_acct)
-
-/* kernel/capability.c */
#define __NR_capget 90
__SYSCALL(__NR_capget, sys_capget)
#define __NR_capset 91
__SYSCALL(__NR_capset, sys_capset)
-
-/* kernel/exec_domain.c */
#define __NR_personality 92
__SYSCALL(__NR_personality, sys_personality)
-
-/* kernel/exit.c */
#define __NR_exit 93
__SYSCALL(__NR_exit, sys_exit)
#define __NR_exit_group 94
__SYSCALL(__NR_exit_group, sys_exit_group)
#define __NR_waitid 95
__SC_COMP(__NR_waitid, sys_waitid, compat_sys_waitid)
-
-/* kernel/fork.c */
#define __NR_set_tid_address 96
__SYSCALL(__NR_set_tid_address, sys_set_tid_address)
#define __NR_unshare 97
__SYSCALL(__NR_unshare, sys_unshare)
-/* kernel/futex.c */
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_futex 98
__SC_3264(__NR_futex, sys_futex_time32, sys_futex)
#endif
+
#define __NR_set_robust_list 99
__SC_COMP(__NR_set_robust_list, sys_set_robust_list, \
compat_sys_set_robust_list)
@@ -328,43 +278,40 @@ __SC_COMP(__NR_set_robust_list, sys_set_robust_list, \
__SC_COMP(__NR_get_robust_list, sys_get_robust_list, \
compat_sys_get_robust_list)
-/* kernel/hrtimer.c */
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_nanosleep 101
__SC_3264(__NR_nanosleep, sys_nanosleep_time32, sys_nanosleep)
#endif
-/* kernel/itimer.c */
#define __NR_getitimer 102
__SC_COMP(__NR_getitimer, sys_getitimer, compat_sys_getitimer)
#define __NR_setitimer 103
__SC_COMP(__NR_setitimer, sys_setitimer, compat_sys_setitimer)
-
-/* kernel/kexec.c */
#define __NR_kexec_load 104
__SC_COMP(__NR_kexec_load, sys_kexec_load, compat_sys_kexec_load)
-
-/* kernel/module.c */
#define __NR_init_module 105
__SYSCALL(__NR_init_module, sys_init_module)
#define __NR_delete_module 106
__SYSCALL(__NR_delete_module, sys_delete_module)
-
-/* kernel/posix-timers.c */
#define __NR_timer_create 107
__SC_COMP(__NR_timer_create, sys_timer_create, compat_sys_timer_create)
+
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_timer_gettime 108
__SC_3264(__NR_timer_gettime, sys_timer_gettime32, sys_timer_gettime)
#endif
+
#define __NR_timer_getoverrun 109
__SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun)
+
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_timer_settime 110
__SC_3264(__NR_timer_settime, sys_timer_settime32, sys_timer_settime)
#endif
+
#define __NR_timer_delete 111
__SYSCALL(__NR_timer_delete, sys_timer_delete)
+
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_clock_settime 112
__SC_3264(__NR_clock_settime, sys_clock_settime32, sys_clock_settime)
@@ -377,15 +324,10 @@ __SC_3264(__NR_clock_nanosleep, sys_clock_nanosleep_time32, \
sys_clock_nanosleep)
#endif
-/* kernel/printk.c */
#define __NR_syslog 116
__SYSCALL(__NR_syslog, sys_syslog)
-
-/* kernel/ptrace.c */
#define __NR_ptrace 117
__SC_COMP(__NR_ptrace, sys_ptrace, compat_sys_ptrace)
-
-/* kernel/sched/core.c */
#define __NR_sched_setparam 118
__SYSCALL(__NR_sched_setparam, sys_sched_setparam)
#define __NR_sched_setscheduler 119
@@ -406,13 +348,13 @@ __SYSCALL(__NR_sched_yield, sys_sched_yield)
__SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max)
#define __NR_sched_get_priority_min 126
__SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min)
+
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_sched_rr_get_interval 127
__SC_3264(__NR_sched_rr_get_interval, sys_sched_rr_get_interval_time32, \
sys_sched_rr_get_interval)
#endif
-/* kernel/signal.c */
#define __NR_restart_syscall 128
__SYSCALL(__NR_restart_syscall, sys_restart_syscall)
#define __NR_kill 129
@@ -431,18 +373,18 @@ __SC_COMP(__NR_rt_sigaction, sys_rt_sigaction, compat_sys_rt_sigaction)
__SC_COMP(__NR_rt_sigprocmask, sys_rt_sigprocmask, compat_sys_rt_sigprocmask)
#define __NR_rt_sigpending 136
__SC_COMP(__NR_rt_sigpending, sys_rt_sigpending, compat_sys_rt_sigpending)
+
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_rt_sigtimedwait 137
__SC_COMP_3264(__NR_rt_sigtimedwait, sys_rt_sigtimedwait_time32, \
sys_rt_sigtimedwait, compat_sys_rt_sigtimedwait_time32)
#endif
+
#define __NR_rt_sigqueueinfo 138
__SC_COMP(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo, \
compat_sys_rt_sigqueueinfo)
#define __NR_rt_sigreturn 139
__SC_COMP(__NR_rt_sigreturn, sys_rt_sigreturn, compat_sys_rt_sigreturn)
-
-/* kernel/sys.c */
#define __NR_setpriority 140
__SYSCALL(__NR_setpriority, sys_setpriority)
#define __NR_getpriority 141
@@ -507,7 +449,6 @@ __SYSCALL(__NR_prctl, sys_prctl)
#define __NR_getcpu 168
__SYSCALL(__NR_getcpu, sys_getcpu)
-/* kernel/time.c */
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_gettimeofday 169
__SC_COMP(__NR_gettimeofday, sys_gettimeofday, compat_sys_gettimeofday)
@@ -517,7 +458,6 @@ __SC_COMP(__NR_settimeofday, sys_settimeofday, compat_sys_settimeofday)
__SC_3264(__NR_adjtimex, sys_adjtimex_time32, sys_adjtimex)
#endif
-/* kernel/sys.c */
#define __NR_getpid 172
__SYSCALL(__NR_getpid, sys_getpid)
#define __NR_getppid 173
@@ -534,12 +474,11 @@ __SYSCALL(__NR_getegid, sys_getegid)
__SYSCALL(__NR_gettid, sys_gettid)
#define __NR_sysinfo 179
__SC_COMP(__NR_sysinfo, sys_sysinfo, compat_sys_sysinfo)
-
-/* ipc/mqueue.c */
#define __NR_mq_open 180
__SC_COMP(__NR_mq_open, sys_mq_open, compat_sys_mq_open)
#define __NR_mq_unlink 181
__SYSCALL(__NR_mq_unlink, sys_mq_unlink)
+
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_mq_timedsend 182
__SC_3264(__NR_mq_timedsend, sys_mq_timedsend_time32, sys_mq_timedsend)
@@ -547,12 +486,11 @@ __SC_3264(__NR_mq_timedsend, sys_mq_timedsend_time32, sys_mq_timedsend)
__SC_3264(__NR_mq_timedreceive, sys_mq_timedreceive_time32, \
sys_mq_timedreceive)
#endif
+
#define __NR_mq_notify 184
__SC_COMP(__NR_mq_notify, sys_mq_notify, compat_sys_mq_notify)
#define __NR_mq_getsetattr 185
__SC_COMP(__NR_mq_getsetattr, sys_mq_getsetattr, compat_sys_mq_getsetattr)
-
-/* ipc/msg.c */
#define __NR_msgget 186
__SYSCALL(__NR_msgget, sys_msgget)
#define __NR_msgctl 187
@@ -561,20 +499,18 @@ __SC_COMP(__NR_msgctl, sys_msgctl, compat_sys_msgctl)
__SC_COMP(__NR_msgrcv, sys_msgrcv, compat_sys_msgrcv)
#define __NR_msgsnd 189
__SC_COMP(__NR_msgsnd, sys_msgsnd, compat_sys_msgsnd)
-
-/* ipc/sem.c */
#define __NR_semget 190
__SYSCALL(__NR_semget, sys_semget)
#define __NR_semctl 191
__SC_COMP(__NR_semctl, sys_semctl, compat_sys_semctl)
+
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_semtimedop 192
__SC_3264(__NR_semtimedop, sys_semtimedop_time32, sys_semtimedop)
#endif
+
#define __NR_semop 193
__SYSCALL(__NR_semop, sys_semop)
-
-/* ipc/shm.c */
#define __NR_shmget 194
__SYSCALL(__NR_shmget, sys_shmget)
#define __NR_shmctl 195
@@ -583,8 +519,6 @@ __SC_COMP(__NR_shmctl, sys_shmctl, compat_sys_shmctl)
__SC_COMP(__NR_shmat, sys_shmat, compat_sys_shmat)
#define __NR_shmdt 197
__SYSCALL(__NR_shmdt, sys_shmdt)
-
-/* net/socket.c */
#define __NR_socket 198
__SYSCALL(__NR_socket, sys_socket)
#define __NR_socketpair 199
@@ -615,40 +549,30 @@ __SYSCALL(__NR_shutdown, sys_shutdown)
__SC_COMP(__NR_sendmsg, sys_sendmsg, compat_sys_sendmsg)
#define __NR_recvmsg 212
__SC_COMP(__NR_recvmsg, sys_recvmsg, compat_sys_recvmsg)
-
-/* mm/filemap.c */
#define __NR_readahead 213
__SC_COMP(__NR_readahead, sys_readahead, compat_sys_readahead)
-
-/* mm/nommu.c, also with MMU */
#define __NR_brk 214
__SYSCALL(__NR_brk, sys_brk)
#define __NR_munmap 215
__SYSCALL(__NR_munmap, sys_munmap)
#define __NR_mremap 216
__SYSCALL(__NR_mremap, sys_mremap)
-
-/* security/keys/keyctl.c */
#define __NR_add_key 217
__SYSCALL(__NR_add_key, sys_add_key)
#define __NR_request_key 218
__SYSCALL(__NR_request_key, sys_request_key)
#define __NR_keyctl 219
__SC_COMP(__NR_keyctl, sys_keyctl, compat_sys_keyctl)
-
-/* arch/example/kernel/sys_example.c */
#define __NR_clone 220
__SYSCALL(__NR_clone, sys_clone)
#define __NR_execve 221
__SC_COMP(__NR_execve, sys_execve, compat_sys_execve)
-
#define __NR3264_mmap 222
__SC_3264(__NR3264_mmap, sys_mmap2, sys_mmap)
-/* mm/fadvise.c */
#define __NR3264_fadvise64 223
__SC_COMP(__NR3264_fadvise64, sys_fadvise64_64, compat_sys_fadvise64_64)
-/* mm/, CONFIG_MMU only */
+/* CONFIG_MMU only */
#ifndef __ARCH_NOMMU
#define __NR_swapon 224
__SYSCALL(__NR_swapon, sys_swapon)
@@ -691,6 +615,7 @@ __SC_COMP(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo, \
__SYSCALL(__NR_perf_event_open, sys_perf_event_open)
#define __NR_accept4 242
__SYSCALL(__NR_accept4, sys_accept4)
+
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_recvmmsg 243
__SC_COMP_3264(__NR_recvmmsg, sys_recvmmsg_time32, sys_recvmmsg, compat_sys_recvmmsg_time32)
@@ -706,6 +631,7 @@ __SC_COMP_3264(__NR_recvmmsg, sys_recvmmsg_time32, sys_recvmmsg, compat_sys_recv
#define __NR_wait4 260
__SC_COMP(__NR_wait4, sys_wait4, compat_sys_wait4)
#endif
+
#define __NR_prlimit64 261
__SYSCALL(__NR_prlimit64, sys_prlimit64)
#define __NR_fanotify_init 262
@@ -716,10 +642,12 @@ __SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
#define __NR_open_by_handle_at 265
__SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
+
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_clock_adjtime 266
__SC_3264(__NR_clock_adjtime, sys_clock_adjtime32, sys_clock_adjtime)
#endif
+
#define __NR_syncfs 267
__SYSCALL(__NR_syncfs, sys_syncfs)
#define __NR_setns 268
@@ -770,15 +698,19 @@ __SYSCALL(__NR_pkey_alloc, sys_pkey_alloc)
__SYSCALL(__NR_pkey_free, sys_pkey_free)
#define __NR_statx 291
__SYSCALL(__NR_statx, sys_statx)
+
#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
#define __NR_io_pgetevents 292
__SC_COMP_3264(__NR_io_pgetevents, sys_io_pgetevents_time32, sys_io_pgetevents, compat_sys_io_pgetevents)
#endif
+
#define __NR_rseq 293
__SYSCALL(__NR_rseq, sys_rseq)
#define __NR_kexec_file_load 294
__SYSCALL(__NR_kexec_file_load, sys_kexec_file_load)
+
/* 295 through 402 are unassigned to sync up with generic numbers, don't use */
+
#if defined(__SYSCALL_COMPAT) || __BITS_PER_LONG == 32
#define __NR_clock_gettime64 403
__SYSCALL(__NR_clock_gettime64, sys_clock_gettime)
@@ -805,7 +737,7 @@ __SC_COMP(__NR_pselect6_time64, sys_pselect6, compat_sys_pselect6_time64)
#define __NR_ppoll_time64 414
__SC_COMP(__NR_ppoll_time64, sys_ppoll, compat_sys_ppoll_time64)
#define __NR_io_pgetevents_time64 416
-__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents)
+__SC_COMP(__NR_io_pgetevents_time64, sys_io_pgetevents, compat_sys_io_pgetevents_time64)
#define __NR_recvmmsg_time64 417
__SC_COMP(__NR_recvmmsg_time64, sys_recvmmsg, compat_sys_recvmmsg_time64)
#define __NR_mq_timedsend_time64 418
@@ -844,13 +776,10 @@ __SYSCALL(__NR_fsmount, sys_fsmount)
__SYSCALL(__NR_fspick, sys_fspick)
#define __NR_pidfd_open 434
__SYSCALL(__NR_pidfd_open, sys_pidfd_open)
-#ifdef __ARCH_WANT_SYS_CLONE3
#define __NR_clone3 435
__SYSCALL(__NR_clone3, sys_clone3)
-#endif
#define __NR_close_range 436
__SYSCALL(__NR_close_range, sys_close_range)
-
#define __NR_openat2 437
__SYSCALL(__NR_openat2, sys_openat2)
#define __NR_pidfd_getfd 438
@@ -865,7 +794,6 @@ __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
__SYSCALL(__NR_mount_setattr, sys_mount_setattr)
#define __NR_quotactl_fd 443
__SYSCALL(__NR_quotactl_fd, sys_quotactl_fd)
-
#define __NR_landlock_create_ruleset 444
__SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
#define __NR_landlock_add_rule 445
@@ -877,17 +805,61 @@ __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self)
#define __NR_memfd_secret 447
__SYSCALL(__NR_memfd_secret, sys_memfd_secret)
#endif
+
#define __NR_process_mrelease 448
__SYSCALL(__NR_process_mrelease, sys_process_mrelease)
-
#define __NR_futex_waitv 449
__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
-
#define __NR_set_mempolicy_home_node 450
__SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
+#define __NR_cachestat 451
+__SYSCALL(__NR_cachestat, sys_cachestat)
+#define __NR_fchmodat2 452
+__SYSCALL(__NR_fchmodat2, sys_fchmodat2)
+#define __NR_map_shadow_stack 453
+__SYSCALL(__NR_map_shadow_stack, sys_map_shadow_stack)
+#define __NR_futex_wake 454
+__SYSCALL(__NR_futex_wake, sys_futex_wake)
+#define __NR_futex_wait 455
+__SYSCALL(__NR_futex_wait, sys_futex_wait)
+#define __NR_futex_requeue 456
+__SYSCALL(__NR_futex_requeue, sys_futex_requeue)
+
+#define __NR_statmount 457
+__SYSCALL(__NR_statmount, sys_statmount)
+
+#define __NR_listmount 458
+__SYSCALL(__NR_listmount, sys_listmount)
+
+#define __NR_lsm_get_self_attr 459
+__SYSCALL(__NR_lsm_get_self_attr, sys_lsm_get_self_attr)
+#define __NR_lsm_set_self_attr 460
+__SYSCALL(__NR_lsm_set_self_attr, sys_lsm_set_self_attr)
+#define __NR_lsm_list_modules 461
+__SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules)
+
+#define __NR_mseal 462
+__SYSCALL(__NR_mseal, sys_mseal)
+
+#define __NR_setxattrat 463
+__SYSCALL(__NR_setxattrat, sys_setxattrat)
+#define __NR_getxattrat 464
+__SYSCALL(__NR_getxattrat, sys_getxattrat)
+#define __NR_listxattrat 465
+__SYSCALL(__NR_listxattrat, sys_listxattrat)
+#define __NR_removexattrat 466
+__SYSCALL(__NR_removexattrat, sys_removexattrat)
+#define __NR_open_tree_attr 467
+__SYSCALL(__NR_open_tree_attr, sys_open_tree_attr)
+
+/* fs/inode.c */
+#define __NR_file_getattr 468
+__SYSCALL(__NR_file_getattr, sys_file_getattr)
+#define __NR_file_setattr 469
+__SYSCALL(__NR_file_setattr, sys_file_setattr)
#undef __NR_syscalls
-#define __NR_syscalls 451
+#define __NR_syscalls 470
/*
* 32 bit systems traditionally used different
diff --git a/tools/include/uapi/asm/bitsperlong.h b/tools/include/uapi/asm/bitsperlong.h
index da5206517158..c65267afc341 100644
--- a/tools/include/uapi/asm/bitsperlong.h
+++ b/tools/include/uapi/asm/bitsperlong.h
@@ -1,8 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
#if defined(__i386__) || defined(__x86_64__)
#include "../../../arch/x86/include/uapi/asm/bitsperlong.h"
-#elif defined(__aarch64__)
-#include "../../../arch/arm64/include/uapi/asm/bitsperlong.h"
#elif defined(__powerpc__)
#include "../../../arch/powerpc/include/uapi/asm/bitsperlong.h"
#elif defined(__s390__)
@@ -13,12 +11,8 @@
#include "../../../arch/mips/include/uapi/asm/bitsperlong.h"
#elif defined(__ia64__)
#include "../../../arch/ia64/include/uapi/asm/bitsperlong.h"
-#elif defined(__riscv)
-#include "../../../arch/riscv/include/uapi/asm/bitsperlong.h"
#elif defined(__alpha__)
#include "../../../arch/alpha/include/uapi/asm/bitsperlong.h"
-#elif defined(__loongarch__)
-#include "../../../arch/loongarch/include/uapi/asm/bitsperlong.h"
#else
#include <asm-generic/bitsperlong.h>
#endif
diff --git a/tools/include/uapi/asm/bpf_perf_event.h b/tools/include/uapi/asm/bpf_perf_event.h
index d7dfeab0d71a..ff52668abf8c 100644
--- a/tools/include/uapi/asm/bpf_perf_event.h
+++ b/tools/include/uapi/asm/bpf_perf_event.h
@@ -6,6 +6,8 @@
#include "../../arch/s390/include/uapi/asm/bpf_perf_event.h"
#elif defined(__riscv)
#include "../../arch/riscv/include/uapi/asm/bpf_perf_event.h"
+#elif defined(__loongarch__)
+#include "../../arch/loongarch/include/uapi/asm/bpf_perf_event.h"
#else
#include <uapi/asm-generic/bpf_perf_event.h>
#endif
diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
index 642808520d92..3cd5cf15e3c9 100644
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -597,35 +597,66 @@ struct drm_set_version {
int drm_dd_minor;
};
-/* DRM_IOCTL_GEM_CLOSE ioctl argument type */
+/**
+ * struct drm_gem_close - Argument for &DRM_IOCTL_GEM_CLOSE ioctl.
+ * @handle: Handle of the object to be closed.
+ * @pad: Padding.
+ *
+ * Releases the handle to an mm object.
+ */
struct drm_gem_close {
- /** Handle of the object to be closed. */
__u32 handle;
__u32 pad;
};
-/* DRM_IOCTL_GEM_FLINK ioctl argument type */
+/**
+ * struct drm_gem_flink - Argument for &DRM_IOCTL_GEM_FLINK ioctl.
+ * @handle: Handle for the object being named.
+ * @name: Returned global name.
+ *
+ * Create a global name for an object, returning the name.
+ *
+ * Note that the name does not hold a reference; when the object
+ * is freed, the name goes away.
+ */
struct drm_gem_flink {
- /** Handle for the object being named */
__u32 handle;
-
- /** Returned global name */
__u32 name;
};
-/* DRM_IOCTL_GEM_OPEN ioctl argument type */
+/**
+ * struct drm_gem_open - Argument for &DRM_IOCTL_GEM_OPEN ioctl.
+ * @name: Name of object being opened.
+ * @handle: Returned handle for the object.
+ * @size: Returned size of the object
+ *
+ * Open an object using the global name, returning a handle and the size.
+ *
+ * This handle (of course) holds a reference to the object, so the object
+ * will not go away until the handle is deleted.
+ */
struct drm_gem_open {
- /** Name of object being opened */
__u32 name;
-
- /** Returned handle for the object */
__u32 handle;
-
- /** Returned size of the object */
__u64 size;
};
/**
+ * struct drm_gem_change_handle - Argument for &DRM_IOCTL_GEM_CHANGE_HANDLE ioctl.
+ * @handle: The handle of a gem object.
+ * @new_handle: An available gem handle.
+ *
+ * This ioctl changes the handle of a GEM object to the specified one.
+ * The new handle must be unused. On success the old handle is closed
+ * and all further IOCTL should refer to the new handle only.
+ * Calls to DRM_IOCTL_PRIME_FD_TO_HANDLE will return the new handle.
+ */
+struct drm_gem_change_handle {
+ __u32 handle;
+ __u32 new_handle;
+};
+
+/**
* DRM_CAP_DUMB_BUFFER
*
* If set to 1, the driver supports creating dumb buffers via the
@@ -673,8 +704,11 @@ struct drm_gem_open {
* Bitfield of supported PRIME sharing capabilities. See &DRM_PRIME_CAP_IMPORT
* and &DRM_PRIME_CAP_EXPORT.
*
- * PRIME buffers are exposed as dma-buf file descriptors. See
- * Documentation/gpu/drm-mm.rst, section "PRIME Buffer Sharing".
+ * Starting from kernel version 6.6, both &DRM_PRIME_CAP_IMPORT and
+ * &DRM_PRIME_CAP_EXPORT are always advertised.
+ *
+ * PRIME buffers are exposed as dma-buf file descriptors.
+ * See :ref:`prime_buffer_sharing`.
*/
#define DRM_CAP_PRIME 0x5
/**
@@ -682,6 +716,8 @@ struct drm_gem_open {
*
* If this bit is set in &DRM_CAP_PRIME, the driver supports importing PRIME
* buffers via the &DRM_IOCTL_PRIME_FD_TO_HANDLE ioctl.
+ *
+ * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME.
*/
#define DRM_PRIME_CAP_IMPORT 0x1
/**
@@ -689,6 +725,8 @@ struct drm_gem_open {
*
* If this bit is set in &DRM_CAP_PRIME, the driver supports exporting PRIME
* buffers via the &DRM_IOCTL_PRIME_HANDLE_TO_FD ioctl.
+ *
+ * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME.
*/
#define DRM_PRIME_CAP_EXPORT 0x2
/**
@@ -706,7 +744,8 @@ struct drm_gem_open {
/**
* DRM_CAP_ASYNC_PAGE_FLIP
*
- * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC.
+ * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for legacy
+ * page-flips.
*/
#define DRM_CAP_ASYNC_PAGE_FLIP 0x7
/**
@@ -756,17 +795,23 @@ struct drm_gem_open {
/**
* DRM_CAP_SYNCOBJ
*
- * If set to 1, the driver supports sync objects. See
- * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects".
+ * If set to 1, the driver supports sync objects. See :ref:`drm_sync_objects`.
*/
#define DRM_CAP_SYNCOBJ 0x13
/**
* DRM_CAP_SYNCOBJ_TIMELINE
*
* If set to 1, the driver supports timeline operations on sync objects. See
- * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects".
+ * :ref:`drm_sync_objects`.
*/
#define DRM_CAP_SYNCOBJ_TIMELINE 0x14
+/**
+ * DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP
+ *
+ * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for atomic
+ * commits.
+ */
+#define DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP 0x15
/* DRM_IOCTL_GET_CAP ioctl argument type */
struct drm_get_cap {
@@ -836,6 +881,31 @@ struct drm_get_cap {
*/
#define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS 5
+/**
+ * DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT
+ *
+ * Drivers for para-virtualized hardware (e.g. vmwgfx, qxl, virtio and
+ * virtualbox) have additional restrictions for cursor planes (thus
+ * making cursor planes on those drivers not truly universal,) e.g.
+ * they need cursor planes to act like one would expect from a mouse
+ * cursor and have correctly set hotspot properties.
+ * If this client cap is not set the DRM core will hide cursor plane on
+ * those virtualized drivers because not setting it implies that the
+ * client is not capable of dealing with those extra restictions.
+ * Clients which do set cursor hotspot and treat the cursor plane
+ * like a mouse cursor should set this property.
+ * The client must enable &DRM_CLIENT_CAP_ATOMIC first.
+ *
+ * Setting this property on drivers which do not special case
+ * cursor planes (i.e. non-virtualized drivers) will return
+ * EOPNOTSUPP, which can be used by userspace to gauge
+ * requirements of the hardware/drivers they're running on.
+ *
+ * This capability is always supported for atomic-capable virtualized
+ * drivers starting from kernel version 6.6.
+ */
+#define DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT 6
+
/* DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */
struct drm_set_client_cap {
__u64 capability;
@@ -866,13 +936,17 @@ struct drm_syncobj_destroy {
};
#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE (1 << 0)
+#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_TIMELINE (1 << 1)
#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE (1 << 0)
+#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_TIMELINE (1 << 1)
struct drm_syncobj_handle {
__u32 handle;
__u32 flags;
__s32 fd;
__u32 pad;
+
+ __u64 point;
};
struct drm_syncobj_transfer {
@@ -887,6 +961,7 @@ struct drm_syncobj_transfer {
#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2) /* wait for time point to become available */
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE (1 << 3) /* set fence deadline to deadline_nsec */
struct drm_syncobj_wait {
__u64 handles;
/* absolute timeout */
@@ -895,6 +970,14 @@ struct drm_syncobj_wait {
__u32 flags;
__u32 first_signaled; /* only valid when not waiting all */
__u32 pad;
+ /**
+ * @deadline_nsec - fence deadline hint
+ *
+ * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing
+ * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is
+ * set.
+ */
+ __u64 deadline_nsec;
};
struct drm_syncobj_timeline_wait {
@@ -907,6 +990,35 @@ struct drm_syncobj_timeline_wait {
__u32 flags;
__u32 first_signaled; /* only valid when not waiting all */
__u32 pad;
+ /**
+ * @deadline_nsec - fence deadline hint
+ *
+ * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing
+ * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is
+ * set.
+ */
+ __u64 deadline_nsec;
+};
+
+/**
+ * struct drm_syncobj_eventfd
+ * @handle: syncobj handle.
+ * @flags: Zero to wait for the point to be signalled, or
+ * &DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE to wait for a fence to be
+ * available for the point.
+ * @point: syncobj timeline point (set to zero for binary syncobjs).
+ * @fd: Existing eventfd to sent events to.
+ * @pad: Must be zero.
+ *
+ * Register an eventfd to be signalled by a syncobj. The eventfd counter will
+ * be incremented by one.
+ */
+struct drm_syncobj_eventfd {
+ __u32 handle;
+ __u32 flags;
+ __u64 point;
+ __s32 fd;
+ __u32 pad;
};
@@ -947,6 +1059,13 @@ struct drm_crtc_queue_sequence {
__u64 user_data; /* user data passed to event */
};
+#define DRM_CLIENT_NAME_MAX_LEN 64
+struct drm_set_client_name {
+ __u64 name_len;
+ __u64 name;
+};
+
+
#if defined(__cplusplus)
}
#endif
@@ -972,6 +1091,19 @@ extern "C" {
#define DRM_IOCTL_GET_STATS DRM_IOR( 0x06, struct drm_stats)
#define DRM_IOCTL_SET_VERSION DRM_IOWR(0x07, struct drm_set_version)
#define DRM_IOCTL_MODESET_CTL DRM_IOW(0x08, struct drm_modeset_ctl)
+/**
+ * DRM_IOCTL_GEM_CLOSE - Close a GEM handle.
+ *
+ * GEM handles are not reference-counted by the kernel. User-space is
+ * responsible for managing their lifetime. For example, if user-space imports
+ * the same memory object twice on the same DRM file description, the same GEM
+ * handle is returned by both imports, and user-space needs to ensure
+ * &DRM_IOCTL_GEM_CLOSE is performed once only. The same situation can happen
+ * when a memory object is allocated, then exported and imported again on the
+ * same DRM file description. The &DRM_IOCTL_MODE_GETFB2 IOCTL is an exception
+ * and always returns fresh new GEM handles even if an existing GEM handle
+ * already refers to the same memory object before the IOCTL is performed.
+ */
#define DRM_IOCTL_GEM_CLOSE DRM_IOW (0x09, struct drm_gem_close)
#define DRM_IOCTL_GEM_FLINK DRM_IOWR(0x0a, struct drm_gem_flink)
#define DRM_IOCTL_GEM_OPEN DRM_IOWR(0x0b, struct drm_gem_open)
@@ -1012,7 +1144,37 @@ extern "C" {
#define DRM_IOCTL_UNLOCK DRM_IOW( 0x2b, struct drm_lock)
#define DRM_IOCTL_FINISH DRM_IOW( 0x2c, struct drm_lock)
+/**
+ * DRM_IOCTL_PRIME_HANDLE_TO_FD - Convert a GEM handle to a DMA-BUF FD.
+ *
+ * User-space sets &drm_prime_handle.handle with the GEM handle to export and
+ * &drm_prime_handle.flags, and gets back a DMA-BUF file descriptor in
+ * &drm_prime_handle.fd.
+ *
+ * The export can fail for any driver-specific reason, e.g. because export is
+ * not supported for this specific GEM handle (but might be for others).
+ *
+ * Support for exporting DMA-BUFs is advertised via &DRM_PRIME_CAP_EXPORT.
+ */
#define DRM_IOCTL_PRIME_HANDLE_TO_FD DRM_IOWR(0x2d, struct drm_prime_handle)
+/**
+ * DRM_IOCTL_PRIME_FD_TO_HANDLE - Convert a DMA-BUF FD to a GEM handle.
+ *
+ * User-space sets &drm_prime_handle.fd with a DMA-BUF file descriptor to
+ * import, and gets back a GEM handle in &drm_prime_handle.handle.
+ * &drm_prime_handle.flags is unused.
+ *
+ * If an existing GEM handle refers to the memory object backing the DMA-BUF,
+ * that GEM handle is returned. Therefore user-space which needs to handle
+ * arbitrary DMA-BUFs must have a user-space lookup data structure to manually
+ * reference-count duplicated GEM handles. For more information see
+ * &DRM_IOCTL_GEM_CLOSE.
+ *
+ * The import can fail for any driver-specific reason, e.g. because import is
+ * only supported for DMA-BUFs allocated on this DRM device.
+ *
+ * Support for importing DMA-BUFs is advertised via &DRM_PRIME_CAP_IMPORT.
+ */
#define DRM_IOCTL_PRIME_FD_TO_HANDLE DRM_IOWR(0x2e, struct drm_prime_handle)
#define DRM_IOCTL_AGP_ACQUIRE DRM_IO( 0x30)
@@ -1064,6 +1226,26 @@ extern "C" {
#define DRM_IOCTL_MODE_PAGE_FLIP DRM_IOWR(0xB0, struct drm_mode_crtc_page_flip)
#define DRM_IOCTL_MODE_DIRTYFB DRM_IOWR(0xB1, struct drm_mode_fb_dirty_cmd)
+/**
+ * DRM_IOCTL_MODE_CREATE_DUMB - Create a new dumb buffer object.
+ *
+ * KMS dumb buffers provide a very primitive way to allocate a buffer object
+ * suitable for scanout and map it for software rendering. KMS dumb buffers are
+ * not suitable for hardware-accelerated rendering nor video decoding. KMS dumb
+ * buffers are not suitable to be displayed on any other device than the KMS
+ * device where they were allocated from. Also see
+ * :ref:`kms_dumb_buffer_objects`.
+ *
+ * The IOCTL argument is a struct drm_mode_create_dumb.
+ *
+ * User-space is expected to create a KMS dumb buffer via this IOCTL, then add
+ * it as a KMS framebuffer via &DRM_IOCTL_MODE_ADDFB and map it via
+ * &DRM_IOCTL_MODE_MAP_DUMB.
+ *
+ * &DRM_CAP_DUMB_BUFFER indicates whether this IOCTL is supported.
+ * &DRM_CAP_DUMB_PREFERRED_DEPTH and &DRM_CAP_DUMB_PREFER_SHADOW indicate
+ * driver preferences for dumb buffers.
+ */
#define DRM_IOCTL_MODE_CREATE_DUMB DRM_IOWR(0xB2, struct drm_mode_create_dumb)
#define DRM_IOCTL_MODE_MAP_DUMB DRM_IOWR(0xB3, struct drm_mode_map_dumb)
#define DRM_IOCTL_MODE_DESTROY_DUMB DRM_IOWR(0xB4, struct drm_mode_destroy_dumb)
@@ -1104,8 +1286,13 @@ extern "C" {
* struct as the output.
*
* If the client is DRM master or has &CAP_SYS_ADMIN, &drm_mode_fb_cmd2.handles
- * will be filled with GEM buffer handles. Planes are valid until one has a
- * zero handle -- this can be used to compute the number of planes.
+ * will be filled with GEM buffer handles. Fresh new GEM handles are always
+ * returned, even if another GEM handle referring to the same memory object
+ * already exists on the DRM file description. The caller is responsible for
+ * removing the new handles, e.g. via the &DRM_IOCTL_GEM_CLOSE IOCTL. The same
+ * new handle will be returned for multiple planes in case they use the same
+ * memory object. Planes are valid until one has a zero handle -- this can be
+ * used to compute the number of planes.
*
* Otherwise, &drm_mode_fb_cmd2.handles will be zeroed and planes are valid
* until one has a zero &drm_mode_fb_cmd2.pitches.
@@ -1113,9 +1300,54 @@ extern "C" {
* If the framebuffer has a format modifier, &DRM_MODE_FB_MODIFIERS will be set
* in &drm_mode_fb_cmd2.flags and &drm_mode_fb_cmd2.modifier will contain the
* modifier. Otherwise, user-space must ignore &drm_mode_fb_cmd2.modifier.
+ *
+ * To obtain DMA-BUF FDs for each plane without leaking GEM handles, user-space
+ * can export each handle via &DRM_IOCTL_PRIME_HANDLE_TO_FD, then immediately
+ * close each unique handle via &DRM_IOCTL_GEM_CLOSE, making sure to not
+ * double-close handles which are specified multiple times in the array.
*/
#define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2)
+#define DRM_IOCTL_SYNCOBJ_EVENTFD DRM_IOWR(0xCF, struct drm_syncobj_eventfd)
+
+/**
+ * DRM_IOCTL_MODE_CLOSEFB - Close a framebuffer.
+ *
+ * This closes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL
+ * argument is a framebuffer object ID.
+ *
+ * This IOCTL is similar to &DRM_IOCTL_MODE_RMFB, except it doesn't disable
+ * planes and CRTCs. As long as the framebuffer is used by a plane, it's kept
+ * alive. When the plane no longer uses the framebuffer (because the
+ * framebuffer is replaced with another one, or the plane is disabled), the
+ * framebuffer is cleaned up.
+ *
+ * This is useful to implement flicker-free transitions between two processes.
+ *
+ * Depending on the threat model, user-space may want to ensure that the
+ * framebuffer doesn't expose any sensitive user information: closed
+ * framebuffers attached to a plane can be read back by the next DRM master.
+ */
+#define DRM_IOCTL_MODE_CLOSEFB DRM_IOWR(0xD0, struct drm_mode_closefb)
+
+/**
+ * DRM_IOCTL_SET_CLIENT_NAME - Attach a name to a drm_file
+ *
+ * Having a name allows for easier tracking and debugging.
+ * The length of the name (without null ending char) must be
+ * <= DRM_CLIENT_NAME_MAX_LEN.
+ * The call will fail if the name contains whitespaces or non-printable chars.
+ */
+#define DRM_IOCTL_SET_CLIENT_NAME DRM_IOWR(0xD1, struct drm_set_client_name)
+
+/**
+ * DRM_IOCTL_GEM_CHANGE_HANDLE - Move an object to a different handle
+ *
+ * Some applications (notably CRIU) need objects to have specific gem handles.
+ * This ioctl changes the object at one gem handle to use a new gem handle.
+ */
+#define DRM_IOCTL_GEM_CHANGE_HANDLE DRM_IOWR(0xD2, struct drm_gem_change_handle)
+
/*
* Device specific ioctls should only be in their respective headers
* The device specific ioctl range is from 0x40 to 0x9f.
@@ -1127,25 +1359,50 @@ extern "C" {
#define DRM_COMMAND_BASE 0x40
#define DRM_COMMAND_END 0xA0
-/*
- * Header for events written back to userspace on the drm fd. The
- * type defines the type of event, the length specifies the total
- * length of the event (including the header), and user_data is
- * typically a 64 bit value passed with the ioctl that triggered the
- * event. A read on the drm fd will always only return complete
- * events, that is, if for example the read buffer is 100 bytes, and
- * there are two 64 byte events pending, only one will be returned.
+/**
+ * struct drm_event - Header for DRM events
+ * @type: event type.
+ * @length: total number of payload bytes (including header).
*
- * Event types 0 - 0x7fffffff are generic drm events, 0x80000000 and
- * up are chipset specific.
+ * This struct is a header for events written back to user-space on the DRM FD.
+ * A read on the DRM FD will always only return complete events: e.g. if the
+ * read buffer is 100 bytes large and there are two 64 byte events pending,
+ * only one will be returned.
+ *
+ * Event types 0 - 0x7fffffff are generic DRM events, 0x80000000 and
+ * up are chipset specific. Generic DRM events include &DRM_EVENT_VBLANK,
+ * &DRM_EVENT_FLIP_COMPLETE and &DRM_EVENT_CRTC_SEQUENCE.
*/
struct drm_event {
__u32 type;
__u32 length;
};
+/**
+ * DRM_EVENT_VBLANK - vertical blanking event
+ *
+ * This event is sent in response to &DRM_IOCTL_WAIT_VBLANK with the
+ * &_DRM_VBLANK_EVENT flag set.
+ *
+ * The event payload is a struct drm_event_vblank.
+ */
#define DRM_EVENT_VBLANK 0x01
+/**
+ * DRM_EVENT_FLIP_COMPLETE - page-flip completion event
+ *
+ * This event is sent in response to an atomic commit or legacy page-flip with
+ * the &DRM_MODE_PAGE_FLIP_EVENT flag set.
+ *
+ * The event payload is a struct drm_event_vblank.
+ */
#define DRM_EVENT_FLIP_COMPLETE 0x02
+/**
+ * DRM_EVENT_CRTC_SEQUENCE - CRTC sequence event
+ *
+ * This event is sent in response to &DRM_IOCTL_CRTC_QUEUE_SEQUENCE.
+ *
+ * The event payload is a struct drm_event_crtc_sequence.
+ */
#define DRM_EVENT_CRTC_SEQUENCE 0x03
struct drm_event_vblank {
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 8df261c5ab9b..535cb68fdb5c 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -38,13 +38,13 @@ extern "C" {
*/
/**
- * DOC: uevents generated by i915 on it's device node
+ * DOC: uevents generated by i915 on its device node
*
* I915_L3_PARITY_UEVENT - Generated when the driver receives a parity mismatch
- * event from the gpu l3 cache. Additional information supplied is ROW,
+ * event from the GPU L3 cache. Additional information supplied is ROW,
* BANK, SUBBANK, SLICE of the affected cacheline. Userspace should keep
- * track of these events and if a specific cache-line seems to have a
- * persistent error remap it with the l3 remapping tool supplied in
+ * track of these events, and if a specific cache-line seems to have a
+ * persistent error, remap it with the L3 remapping tool supplied in
* intel-gpu-tools. The value supplied with the event is always 1.
*
* I915_ERROR_UEVENT - Generated upon error detection, currently only via
@@ -280,7 +280,16 @@ enum drm_i915_pmu_engine_sample {
#define I915_PMU_ENGINE_SEMA(class, instance) \
__I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
-#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
+/*
+ * Top 4 bits of every non-engine counter are GT id.
+ */
+#define __I915_PMU_GT_SHIFT (60)
+
+#define ___I915_PMU_OTHER(gt, x) \
+ (((__u64)__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x)) | \
+ ((__u64)(gt) << __I915_PMU_GT_SHIFT))
+
+#define __I915_PMU_OTHER(x) ___I915_PMU_OTHER(0, x)
#define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0)
#define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1)
@@ -290,6 +299,12 @@ enum drm_i915_pmu_engine_sample {
#define I915_PMU_LAST /* Deprecated - do not use */ I915_PMU_RC6_RESIDENCY
+#define __I915_PMU_ACTUAL_FREQUENCY(gt) ___I915_PMU_OTHER(gt, 0)
+#define __I915_PMU_REQUESTED_FREQUENCY(gt) ___I915_PMU_OTHER(gt, 1)
+#define __I915_PMU_INTERRUPTS(gt) ___I915_PMU_OTHER(gt, 2)
+#define __I915_PMU_RC6_RESIDENCY(gt) ___I915_PMU_OTHER(gt, 3)
+#define __I915_PMU_SOFTWARE_GT_AWAKE_TIME(gt) ___I915_PMU_OTHER(gt, 4)
+
/* Each region is a minimum of 16k, and there are at most 255 of them.
*/
#define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use
@@ -659,7 +674,8 @@ typedef struct drm_i915_irq_wait {
* If the IOCTL is successful, the returned parameter will be set to one of the
* following values:
* * 0 if HuC firmware load is not complete,
- * * 1 if HuC firmware is authenticated and running.
+ * * 1 if HuC firmware is loaded and fully authenticated,
+ * * 2 if HuC firmware is loaded and authenticated for clear media only
*/
#define I915_PARAM_HUC_STATUS 42
@@ -677,7 +693,7 @@ typedef struct drm_i915_irq_wait {
#define I915_PARAM_HAS_EXEC_FENCE 44
/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to capture
- * user specified bufffers for post-mortem debugging of GPU hangs. See
+ * user-specified buffers for post-mortem debugging of GPU hangs. See
* EXEC_OBJECT_CAPTURE.
*/
#define I915_PARAM_HAS_EXEC_CAPTURE 45
@@ -771,6 +787,31 @@ typedef struct drm_i915_irq_wait {
*/
#define I915_PARAM_OA_TIMESTAMP_FREQUENCY 57
+/*
+ * Query the status of PXP support in i915.
+ *
+ * The query can fail in the following scenarios with the listed error codes:
+ * -ENODEV = PXP support is not available on the GPU device or in the
+ * kernel due to missing component drivers or kernel configs.
+ *
+ * If the IOCTL is successful, the returned parameter will be set to one of
+ * the following values:
+ * 1 = PXP feature is supported and is ready for use.
+ * 2 = PXP feature is supported but should be ready soon (pending
+ * initialization of non-i915 system dependencies).
+ *
+ * NOTE: When param is supported (positive return values), user space should
+ * still refer to the GEM PXP context-creation UAPI header specs to be
+ * aware of possible failure due to system state machine at the time.
+ */
+#define I915_PARAM_PXP_STATUS 58
+
+/*
+ * Query if kernel allows marking a context to send a Freq hint to SLPC. This
+ * will enable use of the strategies allowed by the SLPC algorithm.
+ */
+#define I915_PARAM_HAS_CONTEXT_FREQ_HINT 59
+
/* Must be kept compact -- no holes and well documented */
/**
@@ -1571,7 +1612,7 @@ struct drm_i915_gem_busy {
* is accurate.
*
* The returned dword is split into two fields to indicate both
- * the engine classess on which the object is being read, and the
+ * the engine classes on which the object is being read, and the
* engine class on which it is currently being written (if any).
*
* The low word (bits 0:15) indicate if the object is being written
@@ -1780,7 +1821,7 @@ struct drm_i915_gem_madvise {
__u32 handle;
/* Advice: either the buffer will be needed again in the near future,
- * or wont be and could be discarded under memory pressure.
+ * or won't be and could be discarded under memory pressure.
*/
__u32 madv;
@@ -2096,8 +2137,41 @@ struct drm_i915_gem_context_param {
*
* -ENODEV: feature not available
* -EPERM: trying to mark a recoverable or not bannable context as protected
+ * -ENXIO: A dependency such as a component driver or firmware is not yet
+ * loaded so user space may need to attempt again. Depending on the
+ * device, this error may be reported if protected context creation is
+ * attempted very early after kernel start because the internal timeout
+ * waiting for such dependencies is not guaranteed to be larger than
+ * required (numbers differ depending on system and kernel config):
+ * - ADL/RPL: dependencies may take up to 3 seconds from kernel start
+ * while context creation internal timeout is 250 milisecs
+ * - MTL: dependencies may take up to 8 seconds from kernel start
+ * while context creation internal timeout is 250 milisecs
+ * NOTE: such dependencies happen once, so a subsequent call to create a
+ * protected context after a prior successful call will not experience
+ * such timeouts and will not return -ENXIO (unless the driver is reloaded,
+ * or, depending on the device, resumes from a suspended state).
+ * -EIO: The firmware did not succeed in creating the protected context.
*/
#define I915_CONTEXT_PARAM_PROTECTED_CONTENT 0xd
+
+/*
+ * I915_CONTEXT_PARAM_LOW_LATENCY:
+ *
+ * Mark this context as a low latency workload which requires aggressive GT
+ * frequency scaling. Use I915_PARAM_HAS_CONTEXT_FREQ_HINT to check if the kernel
+ * supports this per context flag.
+ */
+#define I915_CONTEXT_PARAM_LOW_LATENCY 0xe
+
+/*
+ * I915_CONTEXT_PARAM_CONTEXT_IMAGE:
+ *
+ * Allows userspace to provide own context images.
+ *
+ * Note that this is a debug API not available on production kernel builds.
+ */
+#define I915_CONTEXT_PARAM_CONTEXT_IMAGE 0xf
/* Must be kept compact -- no holes and well documented */
/** @value: Context parameter value to be set or queried */
@@ -2491,7 +2565,7 @@ struct i915_context_param_engines {
#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
#define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */
#define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */
- struct i915_engine_class_instance engines[0];
+ struct i915_engine_class_instance engines[];
} __attribute__((packed));
#define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__, N__) struct { \
@@ -2499,6 +2573,24 @@ struct i915_context_param_engines {
struct i915_engine_class_instance engines[N__]; \
} __attribute__((packed)) name__
+struct i915_gem_context_param_context_image {
+ /** @engine: Engine class & instance to be configured. */
+ struct i915_engine_class_instance engine;
+
+ /** @flags: One of the supported flags or zero. */
+ __u32 flags;
+#define I915_CONTEXT_IMAGE_FLAG_ENGINE_INDEX (1u << 0)
+
+ /** @size: Size of the image blob pointed to by @image. */
+ __u32 size;
+
+ /** @mbz: Must be zero. */
+ __u32 mbz;
+
+ /** @image: Userspace memory containing the context image. */
+ __u64 image;
+} __attribute__((packed));
+
/**
* struct drm_i915_gem_context_create_ext_setparam - Context parameter
* to set or query during context creation.
@@ -2573,19 +2665,29 @@ struct drm_i915_reg_read {
*
*/
+/*
+ * struct drm_i915_reset_stats - Return global reset and other context stats
+ *
+ * Driver keeps few stats for each contexts and also global reset count.
+ * This struct can be used to query those stats.
+ */
struct drm_i915_reset_stats {
+ /** @ctx_id: ID of the requested context */
__u32 ctx_id;
+
+ /** @flags: MBZ */
__u32 flags;
- /* All resets since boot/module reload, for all contexts */
+ /** @reset_count: All resets since boot/module reload, for all contexts */
__u32 reset_count;
- /* Number of batches lost when active in GPU, for this context */
+ /** @batch_active: Number of batches lost when active in GPU, for this context */
__u32 batch_active;
- /* Number of batches lost pending for execution, for this context */
+ /** @batch_pending: Number of batches lost pending for execution, for this context */
__u32 batch_pending;
+ /** @pad: MBZ */
__u32 pad;
};
@@ -2676,6 +2778,10 @@ enum drm_i915_oa_format {
I915_OAR_FORMAT_A32u40_A4u32_B8_C8,
I915_OA_FORMAT_A24u40_A14u32_B8_C8,
+ /* MTL OAM */
+ I915_OAM_FORMAT_MPEC8u64_B8_C8,
+ I915_OAM_FORMAT_MPEC8u32_B8_C8,
+
I915_OA_FORMAT_MAX /* non-ABI */
};
@@ -2758,6 +2864,25 @@ enum drm_i915_perf_property_id {
*/
DRM_I915_PERF_PROP_POLL_OA_PERIOD,
+ /**
+ * Multiple engines may be mapped to the same OA unit. The OA unit is
+ * identified by class:instance of any engine mapped to it.
+ *
+ * This parameter specifies the engine class and must be passed along
+ * with DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE.
+ *
+ * This property is available in perf revision 6.
+ */
+ DRM_I915_PERF_PROP_OA_ENGINE_CLASS,
+
+ /**
+ * This parameter specifies the engine instance and must be passed along
+ * with DRM_I915_PERF_PROP_OA_ENGINE_CLASS.
+ *
+ * This property is available in perf revision 6.
+ */
+ DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE,
+
DRM_I915_PERF_PROP_MAX /* non-ABI */
};
@@ -2940,6 +3065,7 @@ struct drm_i915_query_item {
* - %DRM_I915_QUERY_MEMORY_REGIONS (see struct drm_i915_query_memory_regions)
* - %DRM_I915_QUERY_HWCONFIG_BLOB (see `GuC HWCONFIG blob uAPI`)
* - %DRM_I915_QUERY_GEOMETRY_SUBSLICES (see struct drm_i915_query_topology_info)
+ * - %DRM_I915_QUERY_GUC_SUBMISSION_VERSION (see struct drm_i915_query_guc_submission_version)
*/
__u64 query_id;
#define DRM_I915_QUERY_TOPOLOGY_INFO 1
@@ -2948,6 +3074,7 @@ struct drm_i915_query_item {
#define DRM_I915_QUERY_MEMORY_REGIONS 4
#define DRM_I915_QUERY_HWCONFIG_BLOB 5
#define DRM_I915_QUERY_GEOMETRY_SUBSLICES 6
+#define DRM_I915_QUERY_GUC_SUBMISSION_VERSION 7
/* Must be kept compact -- no holes and well documented */
/**
@@ -3173,7 +3300,7 @@ struct drm_i915_query_topology_info {
* // enough to hold our array of engines. The kernel will fill out the
* // item.length for us, which is the number of bytes we need.
* //
- * // Alternatively a large buffer can be allocated straight away enabling
+ * // Alternatively a large buffer can be allocated straightaway enabling
* // querying in one pass, in which case item.length should contain the
* // length of the provided buffer.
* err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
@@ -3183,7 +3310,7 @@ struct drm_i915_query_topology_info {
* // Now that we allocated the required number of bytes, we call the ioctl
* // again, this time with the data_ptr pointing to our newly allocated
* // blob, which the kernel can then populate with info on all engines.
- * item.data_ptr = (uintptr_t)&info,
+ * item.data_ptr = (uintptr_t)&info;
*
* err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
* if (err) ...
@@ -3213,7 +3340,7 @@ struct drm_i915_query_topology_info {
/**
* struct drm_i915_engine_info
*
- * Describes one engine and it's capabilities as known to the driver.
+ * Describes one engine and its capabilities as known to the driver.
*/
struct drm_i915_engine_info {
/** @engine: Engine class and instance. */
@@ -3494,6 +3621,20 @@ struct drm_i915_query_memory_regions {
};
/**
+ * struct drm_i915_query_guc_submission_version - query GuC submission interface version
+ */
+struct drm_i915_query_guc_submission_version {
+ /** @branch: Firmware branch version. */
+ __u32 branch;
+ /** @major: Firmware major version. */
+ __u32 major;
+ /** @minor: Firmware minor version. */
+ __u32 minor;
+ /** @patch: Firmware patch version. */
+ __u32 patch;
+};
+
+/**
* DOC: GuC HWCONFIG blob uAPI
*
* The GuC produces a blob with information about the current device.
@@ -3607,9 +3748,13 @@ struct drm_i915_gem_create_ext {
*
* For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
* struct drm_i915_gem_create_ext_protected_content.
+ *
+ * For I915_GEM_CREATE_EXT_SET_PAT usage see
+ * struct drm_i915_gem_create_ext_set_pat.
*/
#define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0
#define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1
+#define I915_GEM_CREATE_EXT_SET_PAT 2
__u64 extensions;
};
@@ -3724,6 +3869,43 @@ struct drm_i915_gem_create_ext_protected_content {
__u32 flags;
};
+/**
+ * struct drm_i915_gem_create_ext_set_pat - The
+ * I915_GEM_CREATE_EXT_SET_PAT extension.
+ *
+ * If this extension is provided, the specified caching policy (PAT index) is
+ * applied to the buffer object.
+ *
+ * Below is an example on how to create an object with specific caching policy:
+ *
+ * .. code-block:: C
+ *
+ * struct drm_i915_gem_create_ext_set_pat set_pat_ext = {
+ * .base = { .name = I915_GEM_CREATE_EXT_SET_PAT },
+ * .pat_index = 0,
+ * };
+ * struct drm_i915_gem_create_ext create_ext = {
+ * .size = PAGE_SIZE,
+ * .extensions = (uintptr_t)&set_pat_ext,
+ * };
+ *
+ * int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
+ * if (err) ...
+ */
+struct drm_i915_gem_create_ext_set_pat {
+ /** @base: Extension link. See struct i915_user_extension. */
+ struct i915_user_extension base;
+ /**
+ * @pat_index: PAT index to be set
+ * PAT index is a bit field in Page Table Entry to control caching
+ * behaviors for GPU accesses. The definition of PAT index is
+ * platform dependent and can be found in hardware specifications,
+ */
+ __u32 pat_index;
+ /** @rsvd: reserved for future use */
+ __u32 rsvd;
+};
+
/* ID of the protected content session managed by i915 when PXP is active */
#define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf
diff --git a/tools/include/uapi/linux/bits.h b/tools/include/uapi/linux/bits.h
new file mode 100644
index 000000000000..a04afef9efca
--- /dev/null
+++ b/tools/include/uapi/linux/bits.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* bits.h: Macros for dealing with bitmasks. */
+
+#ifndef _UAPI_LINUX_BITS_H
+#define _UAPI_LINUX_BITS_H
+
+#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h))))
+
+#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h))))
+
+#define __GENMASK_U128(h, l) \
+ ((_BIT128((h)) << 1) - (_BIT128(l)))
+
+#endif /* _UAPI_LINUX_BITS_H */
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 464ca3f01fe7..be7d8e060e10 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -19,6 +19,7 @@
/* ld/ldx fields */
#define BPF_DW 0x18 /* double word (64-bit) */
+#define BPF_MEMSX 0x80 /* load with sign extension */
#define BPF_ATOMIC 0xc0 /* atomic memory ops - op type in immediate */
#define BPF_XADD 0xc0 /* exclusive add - legacy name */
@@ -41,6 +42,7 @@
#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */
#define BPF_JSLT 0xc0 /* SLT is signed, '<' */
#define BPF_JSLE 0xd0 /* SLE is signed, '<=' */
+#define BPF_JCOND 0xe0 /* conditional pseudo jumps: may_goto, goto_or_nop */
#define BPF_CALL 0x80 /* function call */
#define BPF_EXIT 0x90 /* function return */
@@ -49,6 +51,13 @@
#define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */
#define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */
+#define BPF_LOAD_ACQ 0x100 /* load-acquire */
+#define BPF_STORE_REL 0x110 /* store-release */
+
+enum bpf_cond_pseudo_jmp {
+ BPF_MAY_GOTO = 0,
+};
+
/* Register numbers */
enum {
BPF_REG_0 = 0,
@@ -76,12 +85,29 @@ struct bpf_insn {
__s32 imm; /* signed immediate constant */
};
-/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
+/* Deprecated: use struct bpf_lpm_trie_key_u8 (when the "data" member is needed for
+ * byte access) or struct bpf_lpm_trie_key_hdr (when using an alternative type for
+ * the trailing flexible array member) instead.
+ */
struct bpf_lpm_trie_key {
__u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */
__u8 data[0]; /* Arbitrary size */
};
+/* Header for bpf_lpm_trie_key structs */
+struct bpf_lpm_trie_key_hdr {
+ __u32 prefixlen;
+};
+
+/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry, with trailing byte array. */
+struct bpf_lpm_trie_key_u8 {
+ union {
+ struct bpf_lpm_trie_key_hdr hdr;
+ __u32 prefixlen;
+ };
+ __u8 data[]; /* Arbitrary size */
+};
+
struct bpf_cgroup_storage_key {
__u64 cgroup_inode_id; /* cgroup inode id */
__u32 attach_type; /* program attach type (enum bpf_attach_type) */
@@ -424,6 +450,7 @@ union bpf_iter_link_info {
* * **struct bpf_map_info**
* * **struct bpf_btf_info**
* * **struct bpf_link_info**
+ * * **struct bpf_token_info**
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
@@ -616,7 +643,11 @@ union bpf_iter_link_info {
* to NULL to begin the batched operation. After each subsequent
* **BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant
* *out_batch* as the *in_batch* for the next operation to
- * continue iteration from the current point.
+ * continue iteration from the current point. Both *in_batch* and
+ * *out_batch* must point to memory large enough to hold a key,
+ * except for maps of type **BPF_MAP_TYPE_{HASH, PERCPU_HASH,
+ * LRU_HASH, LRU_PERCPU_HASH}**, for which batch parameters
+ * must be at least 4 bytes wide regardless of key size.
*
* The *keys* and *values* are output parameters which must point
* to memory large enough to hold *count* items based on the key
@@ -846,6 +877,47 @@ union bpf_iter_link_info {
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
+ * BPF_TOKEN_CREATE
+ * Description
+ * Create BPF token with embedded information about what
+ * BPF-related functionality it allows:
+ * - a set of allowed bpf() syscall commands;
+ * - a set of allowed BPF map types to be created with
+ * BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed;
+ * - a set of allowed BPF program types and BPF program attach
+ * types to be loaded with BPF_PROG_LOAD command, if
+ * BPF_PROG_LOAD itself is allowed.
+ *
+ * BPF token is created (derived) from an instance of BPF FS,
+ * assuming it has necessary delegation mount options specified.
+ * This BPF token can be passed as an extra parameter to various
+ * bpf() syscall commands to grant BPF subsystem functionality to
+ * unprivileged processes.
+ *
+ * When created, BPF token is "associated" with the owning
+ * user namespace of BPF FS instance (super block) that it was
+ * derived from, and subsequent BPF operations performed with
+ * BPF token would be performing capabilities checks (i.e.,
+ * CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within
+ * that user namespace. Without BPF token, such capabilities
+ * have to be granted in init user namespace, making bpf()
+ * syscall incompatible with user namespace, for the most part.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_PROG_STREAM_READ_BY_FD
+ * Description
+ * Read data of a program's BPF stream. The program is identified
+ * by *prog_fd*, and the stream is identified by the *stream_id*.
+ * The data is copied to a buffer pointed to by *stream_buf*, and
+ * filled less than or equal to *stream_buf_len* bytes.
+ *
+ * Return
+ * Number of bytes read from the stream on success, or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
* NOTES
* eBPF objects (maps and programs) can be shared between processes.
*
@@ -900,6 +972,9 @@ enum bpf_cmd {
BPF_ITER_CREATE,
BPF_LINK_DETACH,
BPF_PROG_BIND_MAP,
+ BPF_TOKEN_CREATE,
+ BPF_PROG_STREAM_READ_BY_FD,
+ __MAX_BPF_CMD,
};
enum bpf_map_type {
@@ -931,7 +1006,14 @@ enum bpf_map_type {
*/
BPF_MAP_TYPE_CGROUP_STORAGE = BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED,
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
- BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+ BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED,
+ /* BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE is available to bpf programs
+ * attaching to a cgroup. The new mechanism (BPF_MAP_TYPE_CGRP_STORAGE +
+ * local percpu kptr) supports all BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE
+ * functionality and more. So mark * BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE
+ * deprecated.
+ */
+ BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED,
BPF_MAP_TYPE_QUEUE,
BPF_MAP_TYPE_STACK,
BPF_MAP_TYPE_SK_STORAGE,
@@ -943,6 +1025,9 @@ enum bpf_map_type {
BPF_MAP_TYPE_BLOOM_FILTER,
BPF_MAP_TYPE_USER_RINGBUF,
BPF_MAP_TYPE_CGRP_STORAGE,
+ BPF_MAP_TYPE_ARENA,
+ BPF_MAP_TYPE_INSN_ARRAY,
+ __MAX_BPF_MAP_TYPE
};
/* Note that tracing related programs such as
@@ -986,6 +1071,8 @@ enum bpf_prog_type {
BPF_PROG_TYPE_LSM,
BPF_PROG_TYPE_SK_LOOKUP,
BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
+ BPF_PROG_TYPE_NETFILTER,
+ __MAX_BPF_PROG_TYPE
};
enum bpf_attach_type {
@@ -1033,11 +1120,28 @@ enum bpf_attach_type {
BPF_PERF_EVENT,
BPF_TRACE_KPROBE_MULTI,
BPF_LSM_CGROUP,
+ BPF_STRUCT_OPS,
+ BPF_NETFILTER,
+ BPF_TCX_INGRESS,
+ BPF_TCX_EGRESS,
+ BPF_TRACE_UPROBE_MULTI,
+ BPF_CGROUP_UNIX_CONNECT,
+ BPF_CGROUP_UNIX_SENDMSG,
+ BPF_CGROUP_UNIX_RECVMSG,
+ BPF_CGROUP_UNIX_GETPEERNAME,
+ BPF_CGROUP_UNIX_GETSOCKNAME,
+ BPF_NETKIT_PRIMARY,
+ BPF_NETKIT_PEER,
+ BPF_TRACE_KPROBE_SESSION,
+ BPF_TRACE_UPROBE_SESSION,
__MAX_BPF_ATTACH_TYPE
};
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
+/* Add BPF_LINK_TYPE(type, name) in bpf_types.h to keep bpf_link_type_strs[]
+ * in sync with the definitions below.
+ */
enum bpf_link_type {
BPF_LINK_TYPE_UNSPEC = 0,
BPF_LINK_TYPE_RAW_TRACEPOINT = 1,
@@ -1049,8 +1153,24 @@ enum bpf_link_type {
BPF_LINK_TYPE_PERF_EVENT = 7,
BPF_LINK_TYPE_KPROBE_MULTI = 8,
BPF_LINK_TYPE_STRUCT_OPS = 9,
+ BPF_LINK_TYPE_NETFILTER = 10,
+ BPF_LINK_TYPE_TCX = 11,
+ BPF_LINK_TYPE_UPROBE_MULTI = 12,
+ BPF_LINK_TYPE_NETKIT = 13,
+ BPF_LINK_TYPE_SOCKMAP = 14,
+ __MAX_BPF_LINK_TYPE,
+};
+
+#define MAX_BPF_LINK_TYPE __MAX_BPF_LINK_TYPE
- MAX_BPF_LINK_TYPE,
+enum bpf_perf_event_type {
+ BPF_PERF_EVENT_UNSPEC = 0,
+ BPF_PERF_EVENT_UPROBE = 1,
+ BPF_PERF_EVENT_URETPROBE = 2,
+ BPF_PERF_EVENT_KPROBE = 3,
+ BPF_PERF_EVENT_KRETPROBE = 4,
+ BPF_PERF_EVENT_TRACEPOINT = 5,
+ BPF_PERF_EVENT_EVENT = 6,
};
/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
@@ -1099,7 +1219,13 @@ enum bpf_link_type {
*/
#define BPF_F_ALLOW_OVERRIDE (1U << 0)
#define BPF_F_ALLOW_MULTI (1U << 1)
+/* Generic attachment flags. */
#define BPF_F_REPLACE (1U << 2)
+#define BPF_F_BEFORE (1U << 3)
+#define BPF_F_AFTER (1U << 4)
+#define BPF_F_ID (1U << 5)
+#define BPF_F_PREORDER (1U << 6)
+#define BPF_F_LINK BPF_F_LINK /* 1 << 13 */
/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
* verifier will perform strict alignment checking as if the kernel
@@ -1108,7 +1234,7 @@ enum bpf_link_type {
*/
#define BPF_F_STRICT_ALIGNMENT (1U << 0)
-/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the
+/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROG_LOAD command, the
* verifier will allow any alignment whatsoever. On platforms
* with strict alignment requirements for loads ands stores (such
* as sparc and mips) the verifier validates that all loads and
@@ -1156,10 +1282,32 @@ enum bpf_link_type {
*/
#define BPF_F_XDP_HAS_FRAGS (1U << 5)
+/* If BPF_F_XDP_DEV_BOUND_ONLY is used in BPF_PROG_LOAD command, the loaded
+ * program becomes device-bound but can access XDP metadata.
+ */
+#define BPF_F_XDP_DEV_BOUND_ONLY (1U << 6)
+
+/* The verifier internal test flag. Behavior is undefined */
+#define BPF_F_TEST_REG_INVARIANTS (1U << 7)
+
/* link_create.kprobe_multi.flags used in LINK_CREATE command for
* BPF_TRACE_KPROBE_MULTI attach type to create return probe.
*/
-#define BPF_F_KPROBE_MULTI_RETURN (1U << 0)
+enum {
+ BPF_F_KPROBE_MULTI_RETURN = (1U << 0)
+};
+
+/* link_create.uprobe_multi.flags used in LINK_CREATE command for
+ * BPF_TRACE_UPROBE_MULTI attach type to create return probe.
+ */
+enum {
+ BPF_F_UPROBE_MULTI_RETURN = (1U << 0)
+};
+
+/* link_create.netfilter.flags used in LINK_CREATE command for
+ * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation.
+ */
+#define BPF_F_NETFILTER_IP_DEFRAG (1U << 0)
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
* the following extensions:
@@ -1215,6 +1363,10 @@ enum bpf_link_type {
*/
#define BPF_PSEUDO_KFUNC_CALL 2
+enum bpf_addr_space_cast {
+ BPF_ADDR_SPACE_CAST = 1,
+};
+
/* flags for BPF_MAP_UPDATE_ELEM command */
enum {
BPF_ANY = 0, /* create new element or update existing */
@@ -1261,6 +1413,27 @@ enum {
/* Create a map that is suitable to be an inner map with dynamic max entries */
BPF_F_INNER_MAP = (1U << 12),
+
+/* Create a map that will be registered/unregesitered by the backed bpf_link */
+ BPF_F_LINK = (1U << 13),
+
+/* Get path from provided FD in BPF_OBJ_PIN/BPF_OBJ_GET commands */
+ BPF_F_PATH_FD = (1U << 14),
+
+/* Flag for value_type_btf_obj_fd, the fd is available */
+ BPF_F_VTYPE_BTF_OBJ_FD = (1U << 15),
+
+/* BPF token FD is passed in a corresponding command's token_fd field */
+ BPF_F_TOKEN_FD = (1U << 16),
+
+/* When user space page faults in bpf_arena send SIGSEGV instead of inserting new page */
+ BPF_F_SEGV_ON_FAULT = (1U << 17),
+
+/* Do not translate kernel bpf_arena pointers to user pointers */
+ BPF_F_NO_USER_CONV = (1U << 18),
+
+/* Enable BPF ringbuf overwrite mode */
+ BPF_F_RB_OVERWRITE = (1U << 19),
};
/* Flags for BPF_PROG_QUERY. */
@@ -1277,6 +1450,8 @@ enum {
#define BPF_F_TEST_RUN_ON_CPU (1U << 0)
/* If set, XDP frames will be transmitted after processing */
#define BPF_F_TEST_XDP_LIVE_FRAMES (1U << 1)
+/* If set, apply CHECKSUM_COMPLETE to skb and validate the checksum */
+#define BPF_F_TEST_SKB_CHECKSUM_COMPLETE (1U << 2)
/* type for BPF_ENABLE_STATS */
enum bpf_stats_type {
@@ -1305,6 +1480,11 @@ struct bpf_stack_build_id {
#define BPF_OBJ_NAME_LEN 16U
+enum {
+ BPF_STREAM_STDOUT = 1,
+ BPF_STREAM_STDERR = 2,
+};
+
union bpf_attr {
struct { /* anonymous struct used by BPF_MAP_CREATE command */
__u32 map_type; /* one of enum bpf_map_type */
@@ -1332,11 +1512,29 @@ union bpf_attr {
* BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the
* number of hash functions (if 0, the bloom filter will default
* to using 5 hash functions).
+ *
+ * BPF_MAP_TYPE_ARENA - contains the address where user space
+ * is going to mmap() the arena. It has to be page aligned.
*/
__u64 map_extra;
+
+ __s32 value_type_btf_obj_fd; /* fd pointing to a BTF
+ * type data for
+ * btf_vmlinux_value_type_id.
+ */
+ /* BPF token FD to use with BPF_MAP_CREATE operation.
+ * If provided, map_flags should have BPF_F_TOKEN_FD flag set.
+ */
+ __s32 map_token_fd;
+
+ /* Hash of the program that has exclusive access to the map.
+ */
+ __aligned_u64 excl_prog_hash;
+ /* Size of the passed excl_prog_hash. */
+ __u32 excl_prog_hash_size;
};
- struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
+ struct { /* anonymous struct used by BPF_MAP_*_ELEM and BPF_MAP_FREEZE commands */
__u32 map_fd;
__aligned_u64 key;
union {
@@ -1398,23 +1596,64 @@ union bpf_attr {
__aligned_u64 fd_array; /* array of FDs */
__aligned_u64 core_relos;
__u32 core_relo_rec_size; /* sizeof(struct bpf_core_relo) */
+ /* output: actual total log contents size (including termintaing zero).
+ * It could be both larger than original log_size (if log was
+ * truncated), or smaller (if log buffer wasn't filled completely).
+ */
+ __u32 log_true_size;
+ /* BPF token FD to use with BPF_PROG_LOAD operation.
+ * If provided, prog_flags should have BPF_F_TOKEN_FD flag set.
+ */
+ __s32 prog_token_fd;
+ /* The fd_array_cnt can be used to pass the length of the
+ * fd_array array. In this case all the [map] file descriptors
+ * passed in this array will be bound to the program, even if
+ * the maps are not referenced directly. The functionality is
+ * similar to the BPF_PROG_BIND_MAP syscall, but maps can be
+ * used by the verifier during the program load. If provided,
+ * then the fd_array[0,...,fd_array_cnt-1] is expected to be
+ * continuous.
+ */
+ __u32 fd_array_cnt;
+ /* Pointer to a buffer containing the signature of the BPF
+ * program.
+ */
+ __aligned_u64 signature;
+ /* Size of the signature buffer in bytes. */
+ __u32 signature_size;
+ /* ID of the kernel keyring to be used for signature
+ * verification.
+ */
+ __s32 keyring_id;
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
__aligned_u64 pathname;
__u32 bpf_fd;
__u32 file_flags;
+ /* Same as dirfd in openat() syscall; see openat(2)
+ * manpage for details of path FD and pathname semantics;
+ * path_fd should accompanied by BPF_F_PATH_FD flag set in
+ * file_flags field, otherwise it should be set to zero;
+ * if BPF_F_PATH_FD flag is not set, AT_FDCWD is assumed.
+ */
+ __s32 path_fd;
};
struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
- __u32 target_fd; /* container object to attach to */
- __u32 attach_bpf_fd; /* eBPF program to attach */
+ union {
+ __u32 target_fd; /* target object to attach to or ... */
+ __u32 target_ifindex; /* target ifindex */
+ };
+ __u32 attach_bpf_fd;
__u32 attach_type;
__u32 attach_flags;
- __u32 replace_bpf_fd; /* previously attached eBPF
- * program to replace if
- * BPF_F_REPLACE is used
- */
+ __u32 replace_bpf_fd;
+ union {
+ __u32 relative_fd;
+ __u32 relative_id;
+ };
+ __u64 expected_revision;
};
struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
@@ -1451,6 +1690,7 @@ union bpf_attr {
};
__u32 next_id;
__u32 open_flags;
+ __s32 fd_by_id_token_fd;
};
struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */
@@ -1460,21 +1700,33 @@ union bpf_attr {
} info;
struct { /* anonymous struct used by BPF_PROG_QUERY command */
- __u32 target_fd; /* container object to query */
+ union {
+ __u32 target_fd; /* target object to query or ... */
+ __u32 target_ifindex; /* target ifindex */
+ };
__u32 attach_type;
__u32 query_flags;
__u32 attach_flags;
__aligned_u64 prog_ids;
- __u32 prog_cnt;
+ union {
+ __u32 prog_cnt;
+ __u32 count;
+ };
+ __u32 :32;
/* output: per-program attach_flags.
* not allowed to be set during effective query.
*/
__aligned_u64 prog_attach_flags;
+ __aligned_u64 link_ids;
+ __aligned_u64 link_attach_flags;
+ __u64 revision;
} query;
struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */
- __u64 name;
- __u32 prog_fd;
+ __u64 name;
+ __u32 prog_fd;
+ __u32 :32;
+ __aligned_u64 cookie;
} raw_tracepoint;
struct { /* anonymous struct for BPF_BTF_LOAD */
@@ -1483,6 +1735,16 @@ union bpf_attr {
__u32 btf_size;
__u32 btf_log_size;
__u32 btf_log_level;
+ /* output: actual total log contents size (including termintaing zero).
+ * It could be both larger than original log_size (if log was
+ * truncated), or smaller (if log buffer wasn't filled completely).
+ */
+ __u32 btf_log_true_size;
+ __u32 btf_flags;
+ /* BPF token FD to use with BPF_BTF_LOAD operation.
+ * If provided, btf_flags should have BPF_F_TOKEN_FD flag set.
+ */
+ __s32 btf_token_fd;
};
struct {
@@ -1502,15 +1764,18 @@ union bpf_attr {
} task_fd_query;
struct { /* struct used by BPF_LINK_CREATE command */
- __u32 prog_fd; /* eBPF program to attach */
union {
- __u32 target_fd; /* object to attach to */
- __u32 target_ifindex; /* target ifindex */
+ __u32 prog_fd; /* eBPF program to attach */
+ __u32 map_fd; /* struct_ops to attach */
+ };
+ union {
+ __u32 target_fd; /* target object to attach to or ... */
+ __u32 target_ifindex; /* target ifindex */
};
__u32 attach_type; /* attach type */
__u32 flags; /* extra flags */
union {
- __u32 target_btf_id; /* btf_id of target to attach to */
+ __u32 target_btf_id; /* btf_id of target to attach to */
struct {
__aligned_u64 iter_info; /* extra bpf_iter_link_info */
__u32 iter_info_len; /* iter_info length */
@@ -1538,17 +1803,64 @@ union bpf_attr {
*/
__u64 cookie;
} tracing;
+ struct {
+ __u32 pf;
+ __u32 hooknum;
+ __s32 priority;
+ __u32 flags;
+ } netfilter;
+ struct {
+ union {
+ __u32 relative_fd;
+ __u32 relative_id;
+ };
+ __u64 expected_revision;
+ } tcx;
+ struct {
+ __aligned_u64 path;
+ __aligned_u64 offsets;
+ __aligned_u64 ref_ctr_offsets;
+ __aligned_u64 cookies;
+ __u32 cnt;
+ __u32 flags;
+ __u32 pid;
+ } uprobe_multi;
+ struct {
+ union {
+ __u32 relative_fd;
+ __u32 relative_id;
+ };
+ __u64 expected_revision;
+ } netkit;
+ struct {
+ union {
+ __u32 relative_fd;
+ __u32 relative_id;
+ };
+ __u64 expected_revision;
+ } cgroup;
};
} link_create;
struct { /* struct used by BPF_LINK_UPDATE command */
__u32 link_fd; /* link fd */
- /* new program fd to update link with */
- __u32 new_prog_fd;
+ union {
+ /* new program fd to update link with */
+ __u32 new_prog_fd;
+ /* new struct_ops map fd to update link with */
+ __u32 new_map_fd;
+ };
__u32 flags; /* extra flags */
- /* expected link's program fd; is specified only if
- * BPF_F_REPLACE flag is set in flags */
- __u32 old_prog_fd;
+ union {
+ /* expected link's program fd; is specified only if
+ * BPF_F_REPLACE flag is set in flags.
+ */
+ __u32 old_prog_fd;
+ /* expected link's map fd; is specified only
+ * if BPF_F_REPLACE flag is set.
+ */
+ __u32 old_map_fd;
+ };
} link_update;
struct {
@@ -1570,6 +1882,18 @@ union bpf_attr {
__u32 flags; /* extra flags */
} prog_bind_map;
+ struct { /* struct used by BPF_TOKEN_CREATE command */
+ __u32 flags;
+ __u32 bpffs_fd;
+ } token_create;
+
+ struct {
+ __aligned_u64 stream_buf;
+ __u32 stream_buf_len;
+ __u32 stream_id;
+ __u32 prog_fd;
+ } prog_stream_read;
+
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
@@ -1642,17 +1966,17 @@ union bpf_attr {
* Description
* This helper is a "printk()-like" facility for debugging. It
* prints a message defined by format *fmt* (of size *fmt_size*)
- * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if
+ * to file *\/sys/kernel/tracing/trace* from TraceFS, if
* available. It can take up to three additional **u64**
* arguments (as an eBPF helpers, the total number of arguments is
* limited to five).
*
* Each time the helper is called, it appends a line to the trace.
- * Lines are discarded while *\/sys/kernel/debug/tracing/trace* is
- * open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this.
+ * Lines are discarded while *\/sys/kernel/tracing/trace* is
+ * open, use *\/sys/kernel/tracing/trace_pipe* to avoid this.
* The format of the trace is customizable, and the exact output
* one will get depends on the options set in
- * *\/sys/kernel/debug/tracing/trace_options* (see also the
+ * *\/sys/kernel/tracing/trace_options* (see also the
* *README* file under the same directory). However, it usually
* defaults to something like:
*
@@ -1717,15 +2041,21 @@ union bpf_attr {
* program.
* Return
* The SMP id of the processor running the program.
+ * Attributes
+ * __bpf_fastcall
*
* long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
* Description
* Store *len* bytes from address *from* into the packet
- * associated to *skb*, at *offset*. *flags* are a combination of
- * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the
- * checksum for the packet after storing the bytes) and
- * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\
- * **->swhash** and *skb*\ **->l4hash** to 0).
+ * associated to *skb*, at *offset*. The *flags* are a combination
+ * of the following values:
+ *
+ * **BPF_F_RECOMPUTE_CSUM**
+ * Automatically update *skb*\ **->csum** after storing the
+ * bytes.
+ * **BPF_F_INVALIDATE_HASH**
+ * Set *skb*\ **->hash**, *skb*\ **->swhash** and *skb*\
+ * **->l4hash** to 0.
*
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
@@ -1777,7 +2107,8 @@ union bpf_attr {
* untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and
* for updates resulting in a null checksum the value is set to
* **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
- * the checksum is to be computed against a pseudo-header.
+ * that the modified header field is part of the pseudo-header.
+ * Flag **BPF_F_IPV6** should be set for IPv6 packets.
*
* This helper works in combination with **bpf_csum_diff**\ (),
* which does not update the checksum in-place, but offers more
@@ -1845,7 +2176,9 @@ union bpf_attr {
* performed again, if the helper is used in combination with
* direct packet access.
* Return
- * 0 on success, or a negative error in case of failure.
+ * 0 on success, or a negative error in case of failure. Positive
+ * error indicates a potential drop or congestion in the target
+ * device. The particular positive error codes are not defined.
*
* u64 bpf_get_current_pid_tgid(void)
* Description
@@ -2001,6 +2334,9 @@ union bpf_attr {
* sending the packet. This flag was added for GRE
* encapsulation, but might be used with other protocols
* as well in the future.
+ * **BPF_F_NO_TUNNEL_KEY**
+ * Add a flag to tunnel metadata indicating that no tunnel
+ * key should be set in the resulting tunnel header.
*
* Here is a typical usage on the transmit path:
*
@@ -2119,7 +2455,7 @@ union bpf_attr {
* into it. An example is available in file
* *samples/bpf/trace_output_user.c* in the Linux kernel source
* tree (the eBPF program counterpart is in
- * *samples/bpf/trace_output_kern.c*).
+ * *samples/bpf/trace_output.bpf.c*).
*
* **bpf_perf_event_output**\ () achieves better performance
* than **bpf_trace_printk**\ () for sharing data with user
@@ -2575,8 +2911,8 @@ union bpf_attr {
* *bpf_socket* should be one of the following:
*
* * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
- * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
- * and **BPF_CGROUP_INET6_CONNECT**.
+ * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**,
+ * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**.
*
* This helper actually implements a subset of **setsockopt()**.
* It supports the following *level*\ s:
@@ -2593,7 +2929,7 @@ union bpf_attr {
* **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**,
* **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**,
* **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**,
- * **TCP_BPF_RTO_MIN**.
+ * **TCP_BPF_RTO_MIN**, **TCP_BPF_SOCK_OPS_CB_FLAGS**.
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
* * **IPPROTO_IPV6**, which supports the following *optname*\ s:
* **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**.
@@ -2644,6 +2980,11 @@ union bpf_attr {
* Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
* L2 type as Ethernet.
*
+ * * **BPF_F_ADJ_ROOM_DECAP_L3_IPV4**,
+ * **BPF_F_ADJ_ROOM_DECAP_L3_IPV6**:
+ * Indicate the new IP header version after decapsulating the outer
+ * IP header. Used when the inner and outer IP versions are different.
+ *
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
@@ -2788,7 +3129,7 @@ union bpf_attr {
*
* long bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
* Description
- * For en eBPF program attached to a perf event, retrieve the
+ * For an eBPF program attached to a perf event, retrieve the
* value of the event counter associated to *ctx* and store it in
* the structure pointed by *buf* and of size *buf_size*. Enabled
* and running times are also stored in the structure (see
@@ -2809,8 +3150,8 @@ union bpf_attr {
* *bpf_socket* should be one of the following:
*
* * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
- * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
- * and **BPF_CGROUP_INET6_CONNECT**.
+ * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**,
+ * **BPF_CGROUP_INET6_CONNECT** and **BPF_CGROUP_UNIX_CONNECT**.
*
* This helper actually implements a subset of **getsockopt()**.
* It supports the same set of *optname*\ s that is supported by
@@ -2838,10 +3179,6 @@ union bpf_attr {
* with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration
* option, and in this case it only works on functions tagged with
* **ALLOW_ERROR_INJECTION** in the kernel code.
- *
- * Also, the helper is only available for the architectures having
- * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing,
- * x86 architecture is the only one to support this feature.
* Return
* 0
*
@@ -3118,9 +3455,27 @@ union bpf_attr {
* **BPF_FIB_LOOKUP_DIRECT**
* Do a direct table lookup vs full lookup using FIB
* rules.
+ * **BPF_FIB_LOOKUP_TBID**
+ * Used with BPF_FIB_LOOKUP_DIRECT.
+ * Use the routing table ID present in *params*->tbid
+ * for the fib lookup.
* **BPF_FIB_LOOKUP_OUTPUT**
* Perform lookup from an egress perspective (default is
* ingress).
+ * **BPF_FIB_LOOKUP_SKIP_NEIGH**
+ * Skip the neighbour table lookup. *params*->dmac
+ * and *params*->smac will not be set as output. A common
+ * use case is to call **bpf_redirect_neigh**\ () after
+ * doing **bpf_fib_lookup**\ ().
+ * **BPF_FIB_LOOKUP_SRC**
+ * Derive and set source IP addr in *params*->ipv{4,6}_src
+ * for the nexthop. If the src addr cannot be derived,
+ * **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
+ * case, *params*->dmac and *params*->smac are not set either.
+ * **BPF_FIB_LOOKUP_MARK**
+ * Use the mark present in *params*->mark for the fib lookup.
+ * This option should not be used with BPF_FIB_LOOKUP_DIRECT,
+ * as it only has meaning for full lookups.
*
* *ctx* is either **struct xdp_md** for XDP programs or
* **struct sk_buff** tc cls_act programs.
@@ -4090,9 +4445,6 @@ union bpf_attr {
* **-EOPNOTSUPP** if the operation is not supported, for example
* a call from outside of TC ingress.
*
- * **-ESOCKTNOSUPPORT** if the socket type is not supported
- * (reuseport).
- *
* long bpf_sk_assign(struct bpf_sk_lookup *ctx, struct bpf_sock *sk, u64 flags)
* Description
* Helper is overloaded depending on BPF program type. This
@@ -4357,6 +4709,8 @@ union bpf_attr {
* long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags)
* Description
* Return a user or a kernel stack in bpf program provided buffer.
+ * Note: the user stack will only be populated if the *task* is
+ * the current task; all other tasks will return -EOPNOTSUPP.
* To achieve this, the helper needs *task*, which is a valid
* pointer to **struct task_struct**. To store the stacktrace, the
* bpf program provides *buf* with a nonnegative *size*.
@@ -4368,6 +4722,7 @@ union bpf_attr {
*
* **BPF_F_USER_STACK**
* Collect a user space stack instead of a kernel stack.
+ * The *task* must be the current task.
* **BPF_F_USER_BUILD_ID**
* Collect buildid+offset instead of ips for user stack,
* only valid if **BPF_F_USER_STACK** is also specified.
@@ -4540,7 +4895,7 @@ union bpf_attr {
*
* **-ENOENT** if the bpf_local_storage cannot be found.
*
- * long bpf_d_path(struct path *path, char *buf, u32 sz)
+ * long bpf_d_path(const struct path *path, char *buf, u32 sz)
* Description
* Return full path for given **struct path** object, which
* needs to be the kernel BTF *path* object. The path is
@@ -4670,10 +5025,13 @@ union bpf_attr {
* the netns switch takes place from ingress to ingress without
* going through the CPU's backlog queue.
*
+ * *skb*\ **->mark** and *skb*\ **->tstamp** are not cleared during
+ * the netns switch.
+ *
* The *flags* argument is reserved and must be 0. The helper is
- * currently only supported for tc BPF program types at the ingress
- * hook and for veth device types. The peer device must reside in a
- * different network namespace.
+ * currently only supported for tc BPF program types at the
+ * ingress hook and for veth and netkit target device types. The
+ * peer device must reside in a different network namespace.
* Return
* The helper returns **TC_ACT_REDIRECT** on success or
* **TC_ACT_SHOT** on error.
@@ -4749,7 +5107,7 @@ union bpf_attr {
* bytes will be copied to *dst*
* Return
* The **hash_algo** is returned on success,
- * **-EOPNOTSUP** if IMA is disabled or **-EINVAL** if
+ * **-EOPNOTSUPP** if IMA is disabled or **-EINVAL** if
* invalid arguments are passed.
*
* struct socket *bpf_sock_from_file(struct file *file)
@@ -4951,6 +5309,14 @@ union bpf_attr {
* different maps if key/value layout matches across maps.
* Every bpf_timer_set_callback() can have different callback_fn.
*
+ * *flags* can be one of:
+ *
+ * **BPF_F_TIMER_ABS**
+ * Start the timer in absolute expire value instead of the
+ * default relative one.
+ * **BPF_F_TIMER_CPU_PIN**
+ * Timer will be pinned to the CPU of the caller.
+ *
* Return
* 0 on success.
* **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier
@@ -4969,9 +5335,14 @@ union bpf_attr {
* u64 bpf_get_func_ip(void *ctx)
* Description
* Get address of the traced function (for tracing and kprobe programs).
+ *
+ * When called for kprobe program attached as uprobe it returns
+ * probe address for both entry and return uprobe.
+ *
* Return
- * Address of the traced function.
+ * Address of the traced function for kprobe.
* 0 for kprobes placed within the function (not at the entry).
+ * Address of the probe for uprobe and return uprobe.
*
* u64 bpf_get_attach_cookie(void *ctx)
* Description
@@ -5075,7 +5446,7 @@ union bpf_attr {
* Currently, the **flags** must be 0. Currently, nr_loops is
* limited to 1 << 23 (~8 million) loops.
*
- * long (\*callback_fn)(u32 index, void \*ctx);
+ * long (\*callback_fn)(u64 index, void \*ctx);
*
* where **index** is the current index in the loop. The index
* is zero-indexed.
@@ -5222,14 +5593,15 @@ union bpf_attr {
* bytes will be copied to *dst*
* Return
* The **hash_algo** is returned on success,
- * **-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if
+ * **-EOPNOTSUPP** if the hash calculation failed or **-EINVAL** if
* invalid arguments are passed.
*
- * void *bpf_kptr_xchg(void *map_value, void *ptr)
+ * void *bpf_kptr_xchg(void *dst, void *ptr)
* Description
- * Exchange kptr at pointer *map_value* with *ptr*, and return the
- * old value. *ptr* can be NULL, otherwise it must be a referenced
- * pointer which will be released when this helper is called.
+ * Exchange kptr at pointer *dst* with *ptr*, and return the old value.
+ * *dst* can be map value or local kptr. *ptr* can be NULL, otherwise
+ * it must be a referenced pointer which will be released when this helper
+ * is called.
* Return
* The old value of kptr (which can be NULL). The returned pointer
* if not NULL, is a reference which must be released using its
@@ -5250,7 +5622,7 @@ union bpf_attr {
* Return
* *sk* if casting is valid, or **NULL** otherwise.
*
- * long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr)
+ * long bpf_dynptr_from_mem(void *data, u64 size, u64 flags, struct bpf_dynptr *ptr)
* Description
* Get a dynptr to local memory *data*.
*
@@ -5293,7 +5665,7 @@ union bpf_attr {
* Return
* Nothing. Always succeeds.
*
- * long bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr *src, u32 offset, u64 flags)
+ * long bpf_dynptr_read(void *dst, u64 len, const struct bpf_dynptr *src, u64 offset, u64 flags)
* Description
* Read *len* bytes from *src* into *dst*, starting from *offset*
* into *src*.
@@ -5303,22 +5675,36 @@ union bpf_attr {
* of *src*'s data, -EINVAL if *src* is an invalid dynptr or if
* *flags* is not 0.
*
- * long bpf_dynptr_write(const struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
+ * long bpf_dynptr_write(const struct bpf_dynptr *dst, u64 offset, void *src, u64 len, u64 flags)
* Description
* Write *len* bytes from *src* into *dst*, starting from *offset*
* into *dst*.
- * *flags* is currently unused.
+ *
+ * *flags* must be 0 except for skb-type dynptrs.
+ *
+ * For skb-type dynptrs:
+ * * All data slices of the dynptr are automatically
+ * invalidated after **bpf_dynptr_write**\ (). This is
+ * because writing may pull the skb and change the
+ * underlying packet buffer.
+ *
+ * * For *flags*, please see the flags accepted by
+ * **bpf_skb_store_bytes**\ ().
* Return
* 0 on success, -E2BIG if *offset* + *len* exceeds the length
* of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
- * is a read-only dynptr or if *flags* is not 0.
+ * is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs,
+ * other errors correspond to errors returned by **bpf_skb_store_bytes**\ ().
*
- * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len)
+ * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u64 offset, u64 len)
* Description
* Get a pointer to the underlying dynptr data.
*
* *len* must be a statically known value. The returned data slice
* is invalidated whenever the dynptr is invalidated.
+ *
+ * skb and xdp type dynptrs may not use bpf_dynptr_data. They should
+ * instead use bpf_dynptr_slice and bpf_dynptr_slice_rdwr.
* Return
* Pointer to the underlying dynptr data, NULL if the dynptr is
* read-only, if the dynptr is invalid, or if the offset and length
@@ -5698,7 +6084,10 @@ union bpf_attr {
FN(user_ringbuf_drain, 209, ##ctx) \
FN(cgrp_storage_get, 210, ##ctx) \
FN(cgrp_storage_delete, 211, ##ctx) \
- /* */
+ /* This helper list is effectively frozen. If you are trying to \
+ * add a new helper, you should add a kfunc instead which has \
+ * less stability guarantees. See Documentation/bpf/kfuncs.rst \
+ */
/* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't
* know or care about integer value that is now passed as second argument
@@ -5736,11 +6125,7 @@ enum {
BPF_F_PSEUDO_HDR = (1ULL << 4),
BPF_F_MARK_MANGLED_0 = (1ULL << 5),
BPF_F_MARK_ENFORCE = (1ULL << 6),
-};
-
-/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
-enum {
- BPF_F_INGRESS = (1ULL << 0),
+ BPF_F_IPV6 = (1ULL << 7),
};
/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
@@ -5764,6 +6149,7 @@ enum {
BPF_F_ZERO_CSUM_TX = (1ULL << 1),
BPF_F_DONT_FRAGMENT = (1ULL << 2),
BPF_F_SEQ_NUMBER = (1ULL << 3),
+ BPF_F_NO_TUNNEL_KEY = (1ULL << 4),
};
/* BPF_FUNC_skb_get_tunnel_key flags. */
@@ -5803,6 +6189,8 @@ enum {
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5),
BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
+ BPF_F_ADJ_ROOM_DECAP_L3_IPV4 = (1ULL << 7),
+ BPF_F_ADJ_ROOM_DECAP_L3_IPV6 = (1ULL << 8),
};
enum {
@@ -5847,6 +6235,7 @@ enum {
BPF_RB_RING_SIZE = 1,
BPF_RB_CONS_POS = 2,
BPF_RB_PROD_POS = 3,
+ BPF_RB_OVERWRITE_POS = 4,
};
/* BPF ring buffer constants */
@@ -5886,10 +6275,12 @@ enum {
BPF_F_BPRM_SECUREEXEC = (1ULL << 0),
};
-/* Flags for bpf_redirect_map helper */
+/* Flags for bpf_redirect and bpf_redirect_map helpers */
enum {
- BPF_F_BROADCAST = (1ULL << 3),
- BPF_F_EXCLUDE_INGRESS = (1ULL << 4),
+ BPF_F_INGRESS = (1ULL << 0), /* used for skb path */
+ BPF_F_BROADCAST = (1ULL << 3), /* used for XDP path */
+ BPF_F_EXCLUDE_INGRESS = (1ULL << 4), /* used for XDP path */
+#define BPF_F_REDIRECT_FLAGS (BPF_F_INGRESS | BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS)
};
#define __bpf_md_ptr(type, name) \
@@ -5898,12 +6289,17 @@ union { \
__u64 :64; \
} __attribute__((aligned(8)))
+/* The enum used in skb->tstamp_type. It specifies the clock type
+ * of the time stored in the skb->tstamp.
+ */
enum {
- BPF_SKB_TSTAMP_UNSPEC,
- BPF_SKB_TSTAMP_DELIVERY_MONO, /* tstamp has mono delivery time */
- /* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle,
- * the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC
- * and try to deduce it by ingress, egress or skb->sk->sk_clockid.
+ BPF_SKB_TSTAMP_UNSPEC = 0, /* DEPRECATED */
+ BPF_SKB_TSTAMP_DELIVERY_MONO = 1, /* DEPRECATED */
+ BPF_SKB_CLOCK_REALTIME = 0,
+ BPF_SKB_CLOCK_MONOTONIC = 1,
+ BPF_SKB_CLOCK_TAI = 2,
+ /* For any future BPF_SKB_CLOCK_* that the bpf prog cannot handle,
+ * the bpf prog can try to deduce it by ingress/egress/skb->sk->sk_clockid.
*/
};
@@ -6095,6 +6491,19 @@ struct bpf_sock_tuple {
};
};
+/* (Simplified) user return codes for tcx prog type.
+ * A valid tcx program must return one of these defined values. All other
+ * return codes are reserved for future use. Must remain compatible with
+ * their TC_ACT_* counter-parts. For compatibility in behavior, unknown
+ * return codes are mapped to TCX_NEXT.
+ */
+enum tcx_action_base {
+ TCX_NEXT = -1,
+ TCX_PASS = 0,
+ TCX_DROP = 2,
+ TCX_REDIRECT = 7,
+};
+
struct bpf_xdp_sock {
__u32 queue_id;
};
@@ -6276,8 +6685,10 @@ struct bpf_map_info {
__u32 btf_id;
__u32 btf_key_type_id;
__u32 btf_value_type_id;
- __u32 :32; /* alignment pad */
+ __u32 btf_vmlinux_id;
__u64 map_extra;
+ __aligned_u64 hash;
+ __u32 hash_size;
} __attribute__((aligned(8)));
struct bpf_btf_info {
@@ -6297,11 +6708,15 @@ struct bpf_link_info {
struct {
__aligned_u64 tp_name; /* in/out: tp_name buffer ptr */
__u32 tp_name_len; /* in/out: tp_name buffer len */
+ __u32 :32;
+ __u64 cookie;
} raw_tracepoint;
struct {
__u32 attach_type;
__u32 target_obj_id; /* prog_id for PROG_EXT, otherwise btf object id */
__u32 target_btf_id; /* BTF type id inside the object */
+ __u32 :32;
+ __u64 cookie;
} tracing;
struct {
__u64 cgroup_id;
@@ -6338,9 +6753,87 @@ struct bpf_link_info {
struct {
__u32 ifindex;
} xdp;
+ struct {
+ __u32 map_id;
+ } struct_ops;
+ struct {
+ __u32 pf;
+ __u32 hooknum;
+ __s32 priority;
+ __u32 flags;
+ } netfilter;
+ struct {
+ __aligned_u64 addrs;
+ __u32 count; /* in/out: kprobe_multi function count */
+ __u32 flags;
+ __u64 missed;
+ __aligned_u64 cookies;
+ } kprobe_multi;
+ struct {
+ __aligned_u64 path;
+ __aligned_u64 offsets;
+ __aligned_u64 ref_ctr_offsets;
+ __aligned_u64 cookies;
+ __u32 path_size; /* in/out: real path size on success, including zero byte */
+ __u32 count; /* in/out: uprobe_multi offsets/ref_ctr_offsets/cookies count */
+ __u32 flags;
+ __u32 pid;
+ } uprobe_multi;
+ struct {
+ __u32 type; /* enum bpf_perf_event_type */
+ __u32 :32;
+ union {
+ struct {
+ __aligned_u64 file_name; /* in/out */
+ __u32 name_len;
+ __u32 offset; /* offset from file_name */
+ __u64 cookie;
+ __u64 ref_ctr_offset;
+ } uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */
+ struct {
+ __aligned_u64 func_name; /* in/out */
+ __u32 name_len;
+ __u32 offset; /* offset from func_name */
+ __u64 addr;
+ __u64 missed;
+ __u64 cookie;
+ } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */
+ struct {
+ __aligned_u64 tp_name; /* in/out */
+ __u32 name_len;
+ __u32 :32;
+ __u64 cookie;
+ } tracepoint; /* BPF_PERF_EVENT_TRACEPOINT */
+ struct {
+ __u64 config;
+ __u32 type;
+ __u32 :32;
+ __u64 cookie;
+ } event; /* BPF_PERF_EVENT_EVENT */
+ };
+ } perf_event;
+ struct {
+ __u32 ifindex;
+ __u32 attach_type;
+ } tcx;
+ struct {
+ __u32 ifindex;
+ __u32 attach_type;
+ } netkit;
+ struct {
+ __u32 map_id;
+ __u32 attach_type;
+ } sockmap;
};
} __attribute__((aligned(8)));
+struct bpf_token_info {
+ __u64 allowed_cmds;
+ __u64 allowed_maps;
+ __u64 allowed_progs;
+ __u64 allowed_attachs;
+} __attribute__((aligned(8)));
+
/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
* by user and intended to be used by socket (e.g. to bind to, depends on
* attach type).
@@ -6504,6 +6997,12 @@ enum {
BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F,
};
+enum {
+ SK_BPF_CB_TX_TIMESTAMPING = 1<<0,
+ SK_BPF_CB_MASK = (SK_BPF_CB_TX_TIMESTAMPING - 1) |
+ SK_BPF_CB_TX_TIMESTAMPING
+};
+
/* List of known BPF sock_ops operators.
* New entries can only be added at the end
*/
@@ -6556,6 +7055,8 @@ enum {
* socket transition to LISTEN state.
*/
BPF_SOCK_OPS_RTT_CB, /* Called on every RTT.
+ * Arg1: measured RTT input (mrtt)
+ * Arg2: updated srtt
*/
BPF_SOCK_OPS_PARSE_HDR_OPT_CB, /* Parse the header option.
* It will be called to handle
@@ -6614,6 +7115,29 @@ enum {
* by the kernel or the
* earlier bpf-progs.
*/
+ BPF_SOCK_OPS_TSTAMP_SCHED_CB, /* Called when skb is passing
+ * through dev layer when
+ * SK_BPF_CB_TX_TIMESTAMPING
+ * feature is on.
+ */
+ BPF_SOCK_OPS_TSTAMP_SND_SW_CB, /* Called when skb is about to send
+ * to the nic when SK_BPF_CB_TX_TIMESTAMPING
+ * feature is on.
+ */
+ BPF_SOCK_OPS_TSTAMP_SND_HW_CB, /* Called in hardware phase when
+ * SK_BPF_CB_TX_TIMESTAMPING feature
+ * is on.
+ */
+ BPF_SOCK_OPS_TSTAMP_ACK_CB, /* Called when all the skbs in the
+ * same sendmsg call are acked
+ * when SK_BPF_CB_TX_TIMESTAMPING
+ * feature is on.
+ */
+ BPF_SOCK_OPS_TSTAMP_SENDMSG_CB, /* Called when every sendmsg syscall
+ * is triggered. It's used to correlate
+ * sendmsg timestamp with corresponding
+ * tskey.
+ */
};
/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
@@ -6634,6 +7158,7 @@ enum {
BPF_TCP_LISTEN,
BPF_TCP_CLOSING, /* Now a valid state */
BPF_TCP_NEW_SYN_RECV,
+ BPF_TCP_BOUND_INACTIVE,
BPF_TCP_MAX_STATES /* Leave at the end! */
};
@@ -6678,6 +7203,9 @@ enum {
TCP_BPF_SYN = 1005, /* Copy the TCP header */
TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */
TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */
+ TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */
+ SK_BPF_CB_FLAGS = 1009, /* Get or set sock ops flags in socket */
+ SK_BPF_BYPASS_PROT_MEM = 1010, /* Get or Set sk->sk_bypass_prot_mem */
};
enum {
@@ -6734,6 +7262,10 @@ struct bpf_raw_tracepoint_args {
enum {
BPF_FIB_LOOKUP_DIRECT = (1U << 0),
BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
+ BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
+ BPF_FIB_LOOKUP_TBID = (1U << 3),
+ BPF_FIB_LOOKUP_SRC = (1U << 4),
+ BPF_FIB_LOOKUP_MARK = (1U << 5),
};
enum {
@@ -6746,6 +7278,7 @@ enum {
BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
+ BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */
};
struct bpf_fib_lookup {
@@ -6765,7 +7298,7 @@ struct bpf_fib_lookup {
/* output: MTU value */
__u16 mtu_result;
- };
+ } __attribute__((packed, aligned(2)));
/* input: L3 device index for lookup
* output: device index from FIB lookup
*/
@@ -6780,6 +7313,9 @@ struct bpf_fib_lookup {
__u32 rt_metric;
};
+ /* input: source address to consider for lookup
+ * output: source address result from lookup
+ */
union {
__be32 ipv4_src;
__u32 ipv6_src[4]; /* in6_addr; network order */
@@ -6794,11 +7330,32 @@ struct bpf_fib_lookup {
__u32 ipv6_dst[4]; /* in6_addr; network order */
};
- /* output */
- __be16 h_vlan_proto;
- __be16 h_vlan_TCI;
- __u8 smac[6]; /* ETH_ALEN */
- __u8 dmac[6]; /* ETH_ALEN */
+ union {
+ struct {
+ /* output */
+ __be16 h_vlan_proto;
+ __be16 h_vlan_TCI;
+ };
+ /* input: when accompanied with the
+ * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a
+ * specific routing table to use for the fib lookup.
+ */
+ __u32 tbid;
+ };
+
+ union {
+ /* input */
+ struct {
+ __u32 mark; /* policy routing */
+ /* 2 4-byte holes for input */
+ };
+
+ /* output: source and dest mac */
+ struct {
+ __u8 smac[6]; /* ETH_ALEN */
+ __u8 dmac[6]; /* ETH_ALEN */
+ };
+ };
};
struct bpf_redir_neigh {
@@ -6882,25 +7439,41 @@ struct bpf_spin_lock {
};
struct bpf_timer {
- __u64 :64;
- __u64 :64;
+ __u64 __opaque[2];
+} __attribute__((aligned(8)));
+
+struct bpf_task_work {
+ __u64 __opaque;
+} __attribute__((aligned(8)));
+
+struct bpf_wq {
+ __u64 __opaque[2];
} __attribute__((aligned(8)));
struct bpf_dynptr {
- __u64 :64;
- __u64 :64;
+ __u64 __opaque[2];
} __attribute__((aligned(8)));
struct bpf_list_head {
- __u64 :64;
- __u64 :64;
+ __u64 __opaque[2];
} __attribute__((aligned(8)));
struct bpf_list_node {
- __u64 :64;
- __u64 :64;
+ __u64 __opaque[3];
} __attribute__((aligned(8)));
+struct bpf_rb_root {
+ __u64 __opaque[2];
+} __attribute__((aligned(8)));
+
+struct bpf_rb_node {
+ __u64 __opaque[4];
+} __attribute__((aligned(8)));
+
+struct bpf_refcount {
+ __u32 __opaque[1];
+} __attribute__((aligned(4)));
+
struct bpf_sysctl {
__u32 write; /* Sysctl is being read (= 0) or written (= 1).
* Allows 1,2,4-byte read, but no write.
@@ -7050,4 +7623,52 @@ struct bpf_core_relo {
enum bpf_core_relo_kind kind;
};
+/*
+ * Flags to control bpf_timer_start() behaviour.
+ * - BPF_F_TIMER_ABS: Timeout passed is absolute time, by default it is
+ * relative to current time.
+ * - BPF_F_TIMER_CPU_PIN: Timer will be pinned to the CPU of the caller.
+ */
+enum {
+ BPF_F_TIMER_ABS = (1ULL << 0),
+ BPF_F_TIMER_CPU_PIN = (1ULL << 1),
+};
+
+/* BPF numbers iterator state */
+struct bpf_iter_num {
+ /* opaque iterator state; having __u64 here allows to preserve correct
+ * alignment requirements in vmlinux.h, generated from BTF
+ */
+ __u64 __opaque[1];
+} __attribute__((aligned(8)));
+
+/*
+ * Flags to control BPF kfunc behaviour.
+ * - BPF_F_PAD_ZEROS: Pad destination buffer with zeros. (See the respective
+ * helper documentation for details.)
+ */
+enum bpf_kfunc_flags {
+ BPF_F_PAD_ZEROS = (1ULL << 0),
+};
+
+/*
+ * Values of a BPF_MAP_TYPE_INSN_ARRAY entry must be of this type.
+ *
+ * Before the map is used the orig_off field should point to an
+ * instruction inside the program being loaded. The other fields
+ * must be set to 0.
+ *
+ * After the program is loaded, the xlated_off will be adjusted
+ * by the verifier to point to the index of the original instruction
+ * in the xlated program. If the instruction is deleted, it will
+ * be set to (u32)-1. The jitted_off will be set to the corresponding
+ * offset in the jitted image of the program.
+ */
+struct bpf_insn_array_value {
+ __u32 orig_off;
+ __u32 xlated_off;
+ __u32 jitted_off;
+ __u32 :32;
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
index ec1798b6d3ff..266d4ffa6c07 100644
--- a/tools/include/uapi/linux/btf.h
+++ b/tools/include/uapi/linux/btf.h
@@ -36,7 +36,8 @@ struct btf_type {
* bits 24-28: kind (e.g. int, ptr, array...etc)
* bits 29-30: unused
* bit 31: kind_flag, currently used by
- * struct, union, enum, fwd and enum64
+ * struct, union, enum, fwd, enum64,
+ * decl_tag and type_tag
*/
__u32 info;
/* "size" is used by INT, ENUM, STRUCT, UNION, DATASEC and ENUM64.
diff --git a/tools/include/uapi/linux/const.h b/tools/include/uapi/linux/const.h
index af2a44c08683..b8f629ef135f 100644
--- a/tools/include/uapi/linux/const.h
+++ b/tools/include/uapi/linux/const.h
@@ -28,7 +28,24 @@
#define _BITUL(x) (_UL(1) << (x))
#define _BITULL(x) (_ULL(1) << (x))
-#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
+#if !defined(__ASSEMBLY__)
+/*
+ * Missing asm support
+ *
+ * __BIT128() would not work in the asm code, as it shifts an
+ * 'unsigned __int128' data type as direct representation of
+ * 128 bit constants is not supported in the gcc compiler, as
+ * they get silently truncated.
+ *
+ * TODO: Please revisit this implementation when gcc compiler
+ * starts representing 128 bit constants directly like long
+ * and unsigned long etc. Subsequently drop the comment for
+ * GENMASK_U128() which would then start supporting asm code.
+ */
+#define _BIT128(x) ((unsigned __int128)(1) << (x))
+#endif
+
+#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (__typeof__(x))(a) - 1)
#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask))
#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
diff --git a/tools/include/uapi/linux/coredump.h b/tools/include/uapi/linux/coredump.h
new file mode 100644
index 000000000000..dc3789b78af0
--- /dev/null
+++ b/tools/include/uapi/linux/coredump.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+
+#ifndef _UAPI_LINUX_COREDUMP_H
+#define _UAPI_LINUX_COREDUMP_H
+
+#include <linux/types.h>
+
+/**
+ * coredump_{req,ack} flags
+ * @COREDUMP_KERNEL: kernel writes coredump
+ * @COREDUMP_USERSPACE: userspace writes coredump
+ * @COREDUMP_REJECT: don't generate coredump
+ * @COREDUMP_WAIT: wait for coredump server
+ */
+enum {
+ COREDUMP_KERNEL = (1ULL << 0),
+ COREDUMP_USERSPACE = (1ULL << 1),
+ COREDUMP_REJECT = (1ULL << 2),
+ COREDUMP_WAIT = (1ULL << 3),
+};
+
+/**
+ * struct coredump_req - message kernel sends to userspace
+ * @size: size of struct coredump_req
+ * @size_ack: known size of struct coredump_ack on this kernel
+ * @mask: supported features
+ *
+ * When a coredump happens the kernel will connect to the coredump
+ * socket and send a coredump request to the coredump server. The @size
+ * member is set to the size of struct coredump_req and provides a hint
+ * to userspace how much data can be read. Userspace may use MSG_PEEK to
+ * peek the size of struct coredump_req and then choose to consume it in
+ * one go. Userspace may also simply read a COREDUMP_ACK_SIZE_VER0
+ * request. If the size the kernel sends is larger userspace simply
+ * discards any remaining data.
+ *
+ * The coredump_req->mask member is set to the currently know features.
+ * Userspace may only set coredump_ack->mask to the bits raised by the
+ * kernel in coredump_req->mask.
+ *
+ * The coredump_req->size_ack member is set by the kernel to the size of
+ * struct coredump_ack the kernel knows. Userspace may only send up to
+ * coredump_req->size_ack bytes to the kernel and must set
+ * coredump_ack->size accordingly.
+ */
+struct coredump_req {
+ __u32 size;
+ __u32 size_ack;
+ __u64 mask;
+};
+
+enum {
+ COREDUMP_REQ_SIZE_VER0 = 16U, /* size of first published struct */
+};
+
+/**
+ * struct coredump_ack - message userspace sends to kernel
+ * @size: size of the struct
+ * @spare: unused
+ * @mask: features kernel is supposed to use
+ *
+ * The @size member must be set to the size of struct coredump_ack. It
+ * may never exceed what the kernel returned in coredump_req->size_ack
+ * but it may of course be smaller (>= COREDUMP_ACK_SIZE_VER0 and <=
+ * coredump_req->size_ack).
+ *
+ * The @mask member must be set to the features the coredump server
+ * wants the kernel to use. Only bits the kernel returned in
+ * coredump_req->mask may be set.
+ */
+struct coredump_ack {
+ __u32 size;
+ __u32 spare;
+ __u64 mask;
+};
+
+enum {
+ COREDUMP_ACK_SIZE_VER0 = 16U, /* size of first published struct */
+};
+
+/**
+ * enum coredump_mark - Markers for the coredump socket
+ *
+ * The kernel will place a single byte on the coredump socket. The
+ * markers notify userspace whether the coredump ack succeeded or
+ * failed.
+ *
+ * @COREDUMP_MARK_MINSIZE: the provided coredump_ack size was too small
+ * @COREDUMP_MARK_MAXSIZE: the provided coredump_ack size was too big
+ * @COREDUMP_MARK_UNSUPPORTED: the provided coredump_ack mask was invalid
+ * @COREDUMP_MARK_CONFLICTING: the provided coredump_ack mask has conflicting options
+ * @COREDUMP_MARK_REQACK: the coredump request and ack was successful
+ * @__COREDUMP_MARK_MAX: the maximum coredump mark value
+ */
+enum coredump_mark {
+ COREDUMP_MARK_REQACK = 0U,
+ COREDUMP_MARK_MINSIZE = 1U,
+ COREDUMP_MARK_MAXSIZE = 2U,
+ COREDUMP_MARK_UNSUPPORTED = 3U,
+ COREDUMP_MARK_CONFLICTING = 4U,
+ __COREDUMP_MARK_MAX = (1U << 31),
+};
+
+#endif /* _UAPI_LINUX_COREDUMP_H */
diff --git a/tools/include/uapi/linux/elf.h b/tools/include/uapi/linux/elf.h
new file mode 100644
index 000000000000..5834b83d7f9a
--- /dev/null
+++ b/tools/include/uapi/linux/elf.h
@@ -0,0 +1,524 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _LINUX_ELF_H
+#define _LINUX_ELF_H
+
+#include <linux/types.h>
+#include <linux/elf-em.h>
+
+/* 32-bit ELF base types. */
+typedef __u32 Elf32_Addr;
+typedef __u16 Elf32_Half;
+typedef __u32 Elf32_Off;
+typedef __s32 Elf32_Sword;
+typedef __u32 Elf32_Word;
+typedef __u16 Elf32_Versym;
+
+/* 64-bit ELF base types. */
+typedef __u64 Elf64_Addr;
+typedef __u16 Elf64_Half;
+typedef __s16 Elf64_SHalf;
+typedef __u64 Elf64_Off;
+typedef __s32 Elf64_Sword;
+typedef __u32 Elf64_Word;
+typedef __u64 Elf64_Xword;
+typedef __s64 Elf64_Sxword;
+typedef __u16 Elf64_Versym;
+
+/* These constants are for the segment types stored in the image headers */
+#define PT_NULL 0
+#define PT_LOAD 1
+#define PT_DYNAMIC 2
+#define PT_INTERP 3
+#define PT_NOTE 4
+#define PT_SHLIB 5
+#define PT_PHDR 6
+#define PT_TLS 7 /* Thread local storage segment */
+#define PT_LOOS 0x60000000 /* OS-specific */
+#define PT_HIOS 0x6fffffff /* OS-specific */
+#define PT_LOPROC 0x70000000
+#define PT_HIPROC 0x7fffffff
+#define PT_GNU_EH_FRAME (PT_LOOS + 0x474e550)
+#define PT_GNU_STACK (PT_LOOS + 0x474e551)
+#define PT_GNU_RELRO (PT_LOOS + 0x474e552)
+#define PT_GNU_PROPERTY (PT_LOOS + 0x474e553)
+
+
+/* ARM MTE memory tag segment type */
+#define PT_AARCH64_MEMTAG_MTE (PT_LOPROC + 0x2)
+
+/*
+ * Extended Numbering
+ *
+ * If the real number of program header table entries is larger than
+ * or equal to PN_XNUM(0xffff), it is set to sh_info field of the
+ * section header at index 0, and PN_XNUM is set to e_phnum
+ * field. Otherwise, the section header at index 0 is zero
+ * initialized, if it exists.
+ *
+ * Specifications are available in:
+ *
+ * - Oracle: Linker and Libraries.
+ * Part No: 817–1984–19, August 2011.
+ * https://docs.oracle.com/cd/E18752_01/pdf/817-1984.pdf
+ *
+ * - System V ABI AMD64 Architecture Processor Supplement
+ * Draft Version 0.99.4,
+ * January 13, 2010.
+ * http://www.cs.washington.edu/education/courses/cse351/12wi/supp-docs/abi.pdf
+ */
+#define PN_XNUM 0xffff
+
+/* These constants define the different elf file types */
+#define ET_NONE 0
+#define ET_REL 1
+#define ET_EXEC 2
+#define ET_DYN 3
+#define ET_CORE 4
+#define ET_LOPROC 0xff00
+#define ET_HIPROC 0xffff
+
+/* This is the info that is needed to parse the dynamic section of the file */
+#define DT_NULL 0
+#define DT_NEEDED 1
+#define DT_PLTRELSZ 2
+#define DT_PLTGOT 3
+#define DT_HASH 4
+#define DT_STRTAB 5
+#define DT_SYMTAB 6
+#define DT_RELA 7
+#define DT_RELASZ 8
+#define DT_RELAENT 9
+#define DT_STRSZ 10
+#define DT_SYMENT 11
+#define DT_INIT 12
+#define DT_FINI 13
+#define DT_SONAME 14
+#define DT_RPATH 15
+#define DT_SYMBOLIC 16
+#define DT_REL 17
+#define DT_RELSZ 18
+#define DT_RELENT 19
+#define DT_PLTREL 20
+#define DT_DEBUG 21
+#define DT_TEXTREL 22
+#define DT_JMPREL 23
+#define DT_ENCODING 32
+#define OLD_DT_LOOS 0x60000000
+#define DT_LOOS 0x6000000d
+#define DT_HIOS 0x6ffff000
+#define DT_VALRNGLO 0x6ffffd00
+#define DT_VALRNGHI 0x6ffffdff
+#define DT_ADDRRNGLO 0x6ffffe00
+#define DT_GNU_HASH 0x6ffffef5
+#define DT_ADDRRNGHI 0x6ffffeff
+#define DT_VERSYM 0x6ffffff0
+#define DT_RELACOUNT 0x6ffffff9
+#define DT_RELCOUNT 0x6ffffffa
+#define DT_FLAGS_1 0x6ffffffb
+#define DT_VERDEF 0x6ffffffc
+#define DT_VERDEFNUM 0x6ffffffd
+#define DT_VERNEED 0x6ffffffe
+#define DT_VERNEEDNUM 0x6fffffff
+#define OLD_DT_HIOS 0x6fffffff
+#define DT_LOPROC 0x70000000
+#define DT_HIPROC 0x7fffffff
+
+/* This info is needed when parsing the symbol table */
+#define STB_LOCAL 0
+#define STB_GLOBAL 1
+#define STB_WEAK 2
+
+#define STN_UNDEF 0
+
+#define STT_NOTYPE 0
+#define STT_OBJECT 1
+#define STT_FUNC 2
+#define STT_SECTION 3
+#define STT_FILE 4
+#define STT_COMMON 5
+#define STT_TLS 6
+
+#define VER_FLG_BASE 0x1
+#define VER_FLG_WEAK 0x2
+
+#define ELF_ST_BIND(x) ((x) >> 4)
+#define ELF_ST_TYPE(x) ((x) & 0xf)
+#define ELF32_ST_BIND(x) ELF_ST_BIND(x)
+#define ELF32_ST_TYPE(x) ELF_ST_TYPE(x)
+#define ELF64_ST_BIND(x) ELF_ST_BIND(x)
+#define ELF64_ST_TYPE(x) ELF_ST_TYPE(x)
+
+typedef struct {
+ Elf32_Sword d_tag;
+ union {
+ Elf32_Sword d_val;
+ Elf32_Addr d_ptr;
+ } d_un;
+} Elf32_Dyn;
+
+typedef struct {
+ Elf64_Sxword d_tag; /* entry tag value */
+ union {
+ Elf64_Xword d_val;
+ Elf64_Addr d_ptr;
+ } d_un;
+} Elf64_Dyn;
+
+/* The following are used with relocations */
+#define ELF32_R_SYM(x) ((x) >> 8)
+#define ELF32_R_TYPE(x) ((x) & 0xff)
+
+#define ELF64_R_SYM(i) ((i) >> 32)
+#define ELF64_R_TYPE(i) ((i) & 0xffffffff)
+
+typedef struct elf32_rel {
+ Elf32_Addr r_offset;
+ Elf32_Word r_info;
+} Elf32_Rel;
+
+typedef struct elf64_rel {
+ Elf64_Addr r_offset; /* Location at which to apply the action */
+ Elf64_Xword r_info; /* index and type of relocation */
+} Elf64_Rel;
+
+typedef struct elf32_rela {
+ Elf32_Addr r_offset;
+ Elf32_Word r_info;
+ Elf32_Sword r_addend;
+} Elf32_Rela;
+
+typedef struct elf64_rela {
+ Elf64_Addr r_offset; /* Location at which to apply the action */
+ Elf64_Xword r_info; /* index and type of relocation */
+ Elf64_Sxword r_addend; /* Constant addend used to compute value */
+} Elf64_Rela;
+
+typedef struct elf32_sym {
+ Elf32_Word st_name;
+ Elf32_Addr st_value;
+ Elf32_Word st_size;
+ unsigned char st_info;
+ unsigned char st_other;
+ Elf32_Half st_shndx;
+} Elf32_Sym;
+
+typedef struct elf64_sym {
+ Elf64_Word st_name; /* Symbol name, index in string tbl */
+ unsigned char st_info; /* Type and binding attributes */
+ unsigned char st_other; /* No defined meaning, 0 */
+ Elf64_Half st_shndx; /* Associated section index */
+ Elf64_Addr st_value; /* Value of the symbol */
+ Elf64_Xword st_size; /* Associated symbol size */
+} Elf64_Sym;
+
+
+#define EI_NIDENT 16
+
+typedef struct elf32_hdr {
+ unsigned char e_ident[EI_NIDENT];
+ Elf32_Half e_type;
+ Elf32_Half e_machine;
+ Elf32_Word e_version;
+ Elf32_Addr e_entry; /* Entry point */
+ Elf32_Off e_phoff;
+ Elf32_Off e_shoff;
+ Elf32_Word e_flags;
+ Elf32_Half e_ehsize;
+ Elf32_Half e_phentsize;
+ Elf32_Half e_phnum;
+ Elf32_Half e_shentsize;
+ Elf32_Half e_shnum;
+ Elf32_Half e_shstrndx;
+} Elf32_Ehdr;
+
+typedef struct elf64_hdr {
+ unsigned char e_ident[EI_NIDENT]; /* ELF "magic number" */
+ Elf64_Half e_type;
+ Elf64_Half e_machine;
+ Elf64_Word e_version;
+ Elf64_Addr e_entry; /* Entry point virtual address */
+ Elf64_Off e_phoff; /* Program header table file offset */
+ Elf64_Off e_shoff; /* Section header table file offset */
+ Elf64_Word e_flags;
+ Elf64_Half e_ehsize;
+ Elf64_Half e_phentsize;
+ Elf64_Half e_phnum;
+ Elf64_Half e_shentsize;
+ Elf64_Half e_shnum;
+ Elf64_Half e_shstrndx;
+} Elf64_Ehdr;
+
+/* These constants define the permissions on sections in the program
+ header, p_flags. */
+#define PF_R 0x4
+#define PF_W 0x2
+#define PF_X 0x1
+
+typedef struct elf32_phdr {
+ Elf32_Word p_type;
+ Elf32_Off p_offset;
+ Elf32_Addr p_vaddr;
+ Elf32_Addr p_paddr;
+ Elf32_Word p_filesz;
+ Elf32_Word p_memsz;
+ Elf32_Word p_flags;
+ Elf32_Word p_align;
+} Elf32_Phdr;
+
+typedef struct elf64_phdr {
+ Elf64_Word p_type;
+ Elf64_Word p_flags;
+ Elf64_Off p_offset; /* Segment file offset */
+ Elf64_Addr p_vaddr; /* Segment virtual address */
+ Elf64_Addr p_paddr; /* Segment physical address */
+ Elf64_Xword p_filesz; /* Segment size in file */
+ Elf64_Xword p_memsz; /* Segment size in memory */
+ Elf64_Xword p_align; /* Segment alignment, file & memory */
+} Elf64_Phdr;
+
+/* sh_type */
+#define SHT_NULL 0
+#define SHT_PROGBITS 1
+#define SHT_SYMTAB 2
+#define SHT_STRTAB 3
+#define SHT_RELA 4
+#define SHT_HASH 5
+#define SHT_DYNAMIC 6
+#define SHT_NOTE 7
+#define SHT_NOBITS 8
+#define SHT_REL 9
+#define SHT_SHLIB 10
+#define SHT_DYNSYM 11
+#define SHT_NUM 12
+#define SHT_LOPROC 0x70000000
+#define SHT_HIPROC 0x7fffffff
+#define SHT_LOUSER 0x80000000
+#define SHT_HIUSER 0xffffffff
+
+/* sh_flags */
+#define SHF_WRITE 0x1
+#define SHF_ALLOC 0x2
+#define SHF_EXECINSTR 0x4
+#define SHF_RELA_LIVEPATCH 0x00100000
+#define SHF_RO_AFTER_INIT 0x00200000
+#define SHF_MASKPROC 0xf0000000
+
+/* special section indexes */
+#define SHN_UNDEF 0
+#define SHN_LORESERVE 0xff00
+#define SHN_LOPROC 0xff00
+#define SHN_HIPROC 0xff1f
+#define SHN_LIVEPATCH 0xff20
+#define SHN_ABS 0xfff1
+#define SHN_COMMON 0xfff2
+#define SHN_HIRESERVE 0xffff
+
+typedef struct elf32_shdr {
+ Elf32_Word sh_name;
+ Elf32_Word sh_type;
+ Elf32_Word sh_flags;
+ Elf32_Addr sh_addr;
+ Elf32_Off sh_offset;
+ Elf32_Word sh_size;
+ Elf32_Word sh_link;
+ Elf32_Word sh_info;
+ Elf32_Word sh_addralign;
+ Elf32_Word sh_entsize;
+} Elf32_Shdr;
+
+typedef struct elf64_shdr {
+ Elf64_Word sh_name; /* Section name, index in string tbl */
+ Elf64_Word sh_type; /* Type of section */
+ Elf64_Xword sh_flags; /* Miscellaneous section attributes */
+ Elf64_Addr sh_addr; /* Section virtual addr at execution */
+ Elf64_Off sh_offset; /* Section file offset */
+ Elf64_Xword sh_size; /* Size of section in bytes */
+ Elf64_Word sh_link; /* Index of another section */
+ Elf64_Word sh_info; /* Additional section information */
+ Elf64_Xword sh_addralign; /* Section alignment */
+ Elf64_Xword sh_entsize; /* Entry size if section holds table */
+} Elf64_Shdr;
+
+#define EI_MAG0 0 /* e_ident[] indexes */
+#define EI_MAG1 1
+#define EI_MAG2 2
+#define EI_MAG3 3
+#define EI_CLASS 4
+#define EI_DATA 5
+#define EI_VERSION 6
+#define EI_OSABI 7
+#define EI_PAD 8
+
+#define ELFMAG0 0x7f /* EI_MAG */
+#define ELFMAG1 'E'
+#define ELFMAG2 'L'
+#define ELFMAG3 'F'
+#define ELFMAG "\177ELF"
+#define SELFMAG 4
+
+#define ELFCLASSNONE 0 /* EI_CLASS */
+#define ELFCLASS32 1
+#define ELFCLASS64 2
+#define ELFCLASSNUM 3
+
+#define ELFDATANONE 0 /* e_ident[EI_DATA] */
+#define ELFDATA2LSB 1
+#define ELFDATA2MSB 2
+
+#define EV_NONE 0 /* e_version, EI_VERSION */
+#define EV_CURRENT 1
+#define EV_NUM 2
+
+#define ELFOSABI_NONE 0
+#define ELFOSABI_LINUX 3
+
+#ifndef ELF_OSABI
+#define ELF_OSABI ELFOSABI_NONE
+#endif
+
+/*
+ * Notes used in ET_CORE. Architectures export some of the arch register sets
+ * using the corresponding note types via the PTRACE_GETREGSET and
+ * PTRACE_SETREGSET requests.
+ * The note name for these types is "LINUX", except NT_PRFPREG that is named
+ * "CORE".
+ */
+#define NT_PRSTATUS 1
+#define NT_PRFPREG 2
+#define NT_PRPSINFO 3
+#define NT_TASKSTRUCT 4
+#define NT_AUXV 6
+/*
+ * Note to userspace developers: size of NT_SIGINFO note may increase
+ * in the future to accomodate more fields, don't assume it is fixed!
+ */
+#define NT_SIGINFO 0x53494749
+#define NT_FILE 0x46494c45
+#define NT_PRXFPREG 0x46e62b7f /* copied from gdb5.1/include/elf/common.h */
+#define NT_PPC_VMX 0x100 /* PowerPC Altivec/VMX registers */
+#define NT_PPC_SPE 0x101 /* PowerPC SPE/EVR registers */
+#define NT_PPC_VSX 0x102 /* PowerPC VSX registers */
+#define NT_PPC_TAR 0x103 /* Target Address Register */
+#define NT_PPC_PPR 0x104 /* Program Priority Register */
+#define NT_PPC_DSCR 0x105 /* Data Stream Control Register */
+#define NT_PPC_EBB 0x106 /* Event Based Branch Registers */
+#define NT_PPC_PMU 0x107 /* Performance Monitor Registers */
+#define NT_PPC_TM_CGPR 0x108 /* TM checkpointed GPR Registers */
+#define NT_PPC_TM_CFPR 0x109 /* TM checkpointed FPR Registers */
+#define NT_PPC_TM_CVMX 0x10a /* TM checkpointed VMX Registers */
+#define NT_PPC_TM_CVSX 0x10b /* TM checkpointed VSX Registers */
+#define NT_PPC_TM_SPR 0x10c /* TM Special Purpose Registers */
+#define NT_PPC_TM_CTAR 0x10d /* TM checkpointed Target Address Register */
+#define NT_PPC_TM_CPPR 0x10e /* TM checkpointed Program Priority Register */
+#define NT_PPC_TM_CDSCR 0x10f /* TM checkpointed Data Stream Control Register */
+#define NT_PPC_PKEY 0x110 /* Memory Protection Keys registers */
+#define NT_PPC_DEXCR 0x111 /* PowerPC DEXCR registers */
+#define NT_PPC_HASHKEYR 0x112 /* PowerPC HASHKEYR register */
+#define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */
+#define NT_386_IOPERM 0x201 /* x86 io permission bitmap (1=deny) */
+#define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */
+/* Old binutils treats 0x203 as a CET state */
+#define NT_X86_SHSTK 0x204 /* x86 SHSTK state */
+#define NT_X86_XSAVE_LAYOUT 0x205 /* XSAVE layout description */
+#define NT_S390_HIGH_GPRS 0x300 /* s390 upper register halves */
+#define NT_S390_TIMER 0x301 /* s390 timer register */
+#define NT_S390_TODCMP 0x302 /* s390 TOD clock comparator register */
+#define NT_S390_TODPREG 0x303 /* s390 TOD programmable register */
+#define NT_S390_CTRS 0x304 /* s390 control registers */
+#define NT_S390_PREFIX 0x305 /* s390 prefix register */
+#define NT_S390_LAST_BREAK 0x306 /* s390 breaking event address */
+#define NT_S390_SYSTEM_CALL 0x307 /* s390 system call restart data */
+#define NT_S390_TDB 0x308 /* s390 transaction diagnostic block */
+#define NT_S390_VXRS_LOW 0x309 /* s390 vector registers 0-15 upper half */
+#define NT_S390_VXRS_HIGH 0x30a /* s390 vector registers 16-31 */
+#define NT_S390_GS_CB 0x30b /* s390 guarded storage registers */
+#define NT_S390_GS_BC 0x30c /* s390 guarded storage broadcast control block */
+#define NT_S390_RI_CB 0x30d /* s390 runtime instrumentation */
+#define NT_S390_PV_CPU_DATA 0x30e /* s390 protvirt cpu dump data */
+#define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */
+#define NT_ARM_TLS 0x401 /* ARM TLS register */
+#define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */
+#define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */
+#define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */
+#define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension registers */
+#define NT_ARM_PAC_MASK 0x406 /* ARM pointer authentication code masks */
+#define NT_ARM_PACA_KEYS 0x407 /* ARM pointer authentication address keys */
+#define NT_ARM_PACG_KEYS 0x408 /* ARM pointer authentication generic key */
+#define NT_ARM_TAGGED_ADDR_CTRL 0x409 /* arm64 tagged address control (prctl()) */
+#define NT_ARM_PAC_ENABLED_KEYS 0x40a /* arm64 ptr auth enabled keys (prctl()) */
+#define NT_ARM_SSVE 0x40b /* ARM Streaming SVE registers */
+#define NT_ARM_ZA 0x40c /* ARM SME ZA registers */
+#define NT_ARM_ZT 0x40d /* ARM SME ZT registers */
+#define NT_ARM_FPMR 0x40e /* ARM floating point mode register */
+#define NT_ARM_POE 0x40f /* ARM POE registers */
+#define NT_ARM_GCS 0x410 /* ARM GCS state */
+#define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */
+#define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */
+#define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */
+#define NT_MIPS_FP_MODE 0x801 /* MIPS floating-point mode */
+#define NT_MIPS_MSA 0x802 /* MIPS SIMD registers */
+#define NT_RISCV_CSR 0x900 /* RISC-V Control and Status Registers */
+#define NT_RISCV_VECTOR 0x901 /* RISC-V vector registers */
+#define NT_RISCV_TAGGED_ADDR_CTRL 0x902 /* RISC-V tagged address control (prctl()) */
+#define NT_LOONGARCH_CPUCFG 0xa00 /* LoongArch CPU config registers */
+#define NT_LOONGARCH_CSR 0xa01 /* LoongArch control and status registers */
+#define NT_LOONGARCH_LSX 0xa02 /* LoongArch Loongson SIMD Extension registers */
+#define NT_LOONGARCH_LASX 0xa03 /* LoongArch Loongson Advanced SIMD Extension registers */
+#define NT_LOONGARCH_LBT 0xa04 /* LoongArch Loongson Binary Translation registers */
+#define NT_LOONGARCH_HW_BREAK 0xa05 /* LoongArch hardware breakpoint registers */
+#define NT_LOONGARCH_HW_WATCH 0xa06 /* LoongArch hardware watchpoint registers */
+
+/* Note types with note name "GNU" */
+#define NT_GNU_PROPERTY_TYPE_0 5
+
+/* Note header in a PT_NOTE section */
+typedef struct elf32_note {
+ Elf32_Word n_namesz; /* Name size */
+ Elf32_Word n_descsz; /* Content size */
+ Elf32_Word n_type; /* Content type */
+} Elf32_Nhdr;
+
+/* Note header in a PT_NOTE section */
+typedef struct elf64_note {
+ Elf64_Word n_namesz; /* Name size */
+ Elf64_Word n_descsz; /* Content size */
+ Elf64_Word n_type; /* Content type */
+} Elf64_Nhdr;
+
+/* .note.gnu.property types for EM_AARCH64: */
+#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000
+
+/* Bits for GNU_PROPERTY_AARCH64_FEATURE_1_BTI */
+#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0)
+
+typedef struct {
+ Elf32_Half vd_version;
+ Elf32_Half vd_flags;
+ Elf32_Half vd_ndx;
+ Elf32_Half vd_cnt;
+ Elf32_Word vd_hash;
+ Elf32_Word vd_aux;
+ Elf32_Word vd_next;
+} Elf32_Verdef;
+
+typedef struct {
+ Elf64_Half vd_version;
+ Elf64_Half vd_flags;
+ Elf64_Half vd_ndx;
+ Elf64_Half vd_cnt;
+ Elf64_Word vd_hash;
+ Elf64_Word vd_aux;
+ Elf64_Word vd_next;
+} Elf64_Verdef;
+
+typedef struct {
+ Elf32_Word vda_name;
+ Elf32_Word vda_next;
+} Elf32_Verdaux;
+
+typedef struct {
+ Elf64_Word vda_name;
+ Elf64_Word vda_next;
+} Elf64_Verdaux;
+
+#endif /* _LINUX_ELF_H */
diff --git a/tools/include/uapi/linux/ethtool.h b/tools/include/uapi/linux/ethtool.h
deleted file mode 100644
index 47afae3895ec..000000000000
--- a/tools/include/uapi/linux/ethtool.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * ethtool.h: Defines for Linux ethtool.
- *
- * Copyright (C) 1998 David S. Miller (davem@redhat.com)
- * Copyright 2001 Jeff Garzik <jgarzik@pobox.com>
- * Portions Copyright 2001 Sun Microsystems (thockin@sun.com)
- * Portions Copyright 2002 Intel (eli.kupermann@intel.com,
- * christopher.leech@intel.com,
- * scott.feldman@intel.com)
- * Portions Copyright (C) Sun Microsystems 2008
- */
-
-#ifndef _UAPI_LINUX_ETHTOOL_H
-#define _UAPI_LINUX_ETHTOOL_H
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/if_ether.h>
-
-#define ETHTOOL_GCHANNELS 0x0000003c /* Get no of channels */
-
-/**
- * struct ethtool_channels - configuring number of network channel
- * @cmd: ETHTOOL_{G,S}CHANNELS
- * @max_rx: Read only. Maximum number of receive channel the driver support.
- * @max_tx: Read only. Maximum number of transmit channel the driver support.
- * @max_other: Read only. Maximum number of other channel the driver support.
- * @max_combined: Read only. Maximum number of combined channel the driver
- * support. Set of queues RX, TX or other.
- * @rx_count: Valid values are in the range 1 to the max_rx.
- * @tx_count: Valid values are in the range 1 to the max_tx.
- * @other_count: Valid values are in the range 1 to the max_other.
- * @combined_count: Valid values are in the range 1 to the max_combined.
- *
- * This can be used to configure RX, TX and other channels.
- */
-
-struct ethtool_channels {
- __u32 cmd;
- __u32 max_rx;
- __u32 max_tx;
- __u32 max_other;
- __u32 max_combined;
- __u32 rx_count;
- __u32 tx_count;
- __u32 other_count;
- __u32 combined_count;
-};
-
-#define ETHTOOL_FWVERS_LEN 32
-#define ETHTOOL_BUSINFO_LEN 32
-#define ETHTOOL_EROMVERS_LEN 32
-
-/**
- * struct ethtool_drvinfo - general driver and device information
- * @cmd: Command number = %ETHTOOL_GDRVINFO
- * @driver: Driver short name. This should normally match the name
- * in its bus driver structure (e.g. pci_driver::name). Must
- * not be an empty string.
- * @version: Driver version string; may be an empty string
- * @fw_version: Firmware version string; may be an empty string
- * @erom_version: Expansion ROM version string; may be an empty string
- * @bus_info: Device bus address. This should match the dev_name()
- * string for the underlying bus device, if there is one. May be
- * an empty string.
- * @reserved2: Reserved for future use; see the note on reserved space.
- * @n_priv_flags: Number of flags valid for %ETHTOOL_GPFLAGS and
- * %ETHTOOL_SPFLAGS commands; also the number of strings in the
- * %ETH_SS_PRIV_FLAGS set
- * @n_stats: Number of u64 statistics returned by the %ETHTOOL_GSTATS
- * command; also the number of strings in the %ETH_SS_STATS set
- * @testinfo_len: Number of results returned by the %ETHTOOL_TEST
- * command; also the number of strings in the %ETH_SS_TEST set
- * @eedump_len: Size of EEPROM accessible through the %ETHTOOL_GEEPROM
- * and %ETHTOOL_SEEPROM commands, in bytes
- * @regdump_len: Size of register dump returned by the %ETHTOOL_GREGS
- * command, in bytes
- *
- * Users can use the %ETHTOOL_GSSET_INFO command to get the number of
- * strings in any string set (from Linux 2.6.34).
- *
- * Drivers should set at most @driver, @version, @fw_version and
- * @bus_info in their get_drvinfo() implementation. The ethtool
- * core fills in the other fields using other driver operations.
- */
-struct ethtool_drvinfo {
- __u32 cmd;
- char driver[32];
- char version[32];
- char fw_version[ETHTOOL_FWVERS_LEN];
- char bus_info[ETHTOOL_BUSINFO_LEN];
- char erom_version[ETHTOOL_EROMVERS_LEN];
- char reserved2[12];
- __u32 n_priv_flags;
- __u32 n_stats;
- __u32 testinfo_len;
- __u32 eedump_len;
- __u32 regdump_len;
-};
-
-#define ETHTOOL_GDRVINFO 0x00000003
-
-#endif /* _UAPI_LINUX_ETHTOOL_H */
diff --git a/tools/include/uapi/linux/fanotify.h b/tools/include/uapi/linux/fanotify.h
new file mode 100644
index 000000000000..e710967c7c26
--- /dev/null
+++ b/tools/include/uapi/linux/fanotify.h
@@ -0,0 +1,274 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_FANOTIFY_H
+#define _UAPI_LINUX_FANOTIFY_H
+
+#include <linux/types.h>
+
+/* the following events that user-space can register for */
+#define FAN_ACCESS 0x00000001 /* File was accessed */
+#define FAN_MODIFY 0x00000002 /* File was modified */
+#define FAN_ATTRIB 0x00000004 /* Metadata changed */
+#define FAN_CLOSE_WRITE 0x00000008 /* Writable file closed */
+#define FAN_CLOSE_NOWRITE 0x00000010 /* Unwritable file closed */
+#define FAN_OPEN 0x00000020 /* File was opened */
+#define FAN_MOVED_FROM 0x00000040 /* File was moved from X */
+#define FAN_MOVED_TO 0x00000080 /* File was moved to Y */
+#define FAN_CREATE 0x00000100 /* Subfile was created */
+#define FAN_DELETE 0x00000200 /* Subfile was deleted */
+#define FAN_DELETE_SELF 0x00000400 /* Self was deleted */
+#define FAN_MOVE_SELF 0x00000800 /* Self was moved */
+#define FAN_OPEN_EXEC 0x00001000 /* File was opened for exec */
+
+#define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */
+#define FAN_FS_ERROR 0x00008000 /* Filesystem error */
+
+#define FAN_OPEN_PERM 0x00010000 /* File open in perm check */
+#define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */
+#define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */
+/* #define FAN_DIR_MODIFY 0x00080000 */ /* Deprecated (reserved) */
+
+#define FAN_PRE_ACCESS 0x00100000 /* Pre-content access hook */
+#define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */
+#define FAN_MNT_DETACH 0x02000000 /* Mount was detached */
+
+#define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */
+
+#define FAN_RENAME 0x10000000 /* File was renamed */
+
+#define FAN_ONDIR 0x40000000 /* Event occurred against dir */
+
+/* helper events */
+#define FAN_CLOSE (FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE) /* close */
+#define FAN_MOVE (FAN_MOVED_FROM | FAN_MOVED_TO) /* moves */
+
+/* flags used for fanotify_init() */
+#define FAN_CLOEXEC 0x00000001
+#define FAN_NONBLOCK 0x00000002
+
+/* These are NOT bitwise flags. Both bits are used together. */
+#define FAN_CLASS_NOTIF 0x00000000
+#define FAN_CLASS_CONTENT 0x00000004
+#define FAN_CLASS_PRE_CONTENT 0x00000008
+
+/* Deprecated - do not use this in programs and do not add new flags here! */
+#define FAN_ALL_CLASS_BITS (FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \
+ FAN_CLASS_PRE_CONTENT)
+
+#define FAN_UNLIMITED_QUEUE 0x00000010
+#define FAN_UNLIMITED_MARKS 0x00000020
+#define FAN_ENABLE_AUDIT 0x00000040
+
+/* Flags to determine fanotify event format */
+#define FAN_REPORT_PIDFD 0x00000080 /* Report pidfd for event->pid */
+#define FAN_REPORT_TID 0x00000100 /* event->pid is thread id */
+#define FAN_REPORT_FID 0x00000200 /* Report unique file id */
+#define FAN_REPORT_DIR_FID 0x00000400 /* Report unique directory id */
+#define FAN_REPORT_NAME 0x00000800 /* Report events with name */
+#define FAN_REPORT_TARGET_FID 0x00001000 /* Report dirent target id */
+#define FAN_REPORT_FD_ERROR 0x00002000 /* event->fd can report error */
+#define FAN_REPORT_MNT 0x00004000 /* Report mount events */
+
+/* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */
+#define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME)
+/* Convenience macro - FAN_REPORT_TARGET_FID requires all other FID flags */
+#define FAN_REPORT_DFID_NAME_TARGET (FAN_REPORT_DFID_NAME | \
+ FAN_REPORT_FID | FAN_REPORT_TARGET_FID)
+
+/* Deprecated - do not use this in programs and do not add new flags here! */
+#define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \
+ FAN_ALL_CLASS_BITS | FAN_UNLIMITED_QUEUE |\
+ FAN_UNLIMITED_MARKS)
+
+/* flags used for fanotify_modify_mark() */
+#define FAN_MARK_ADD 0x00000001
+#define FAN_MARK_REMOVE 0x00000002
+#define FAN_MARK_DONT_FOLLOW 0x00000004
+#define FAN_MARK_ONLYDIR 0x00000008
+/* FAN_MARK_MOUNT is 0x00000010 */
+#define FAN_MARK_IGNORED_MASK 0x00000020
+#define FAN_MARK_IGNORED_SURV_MODIFY 0x00000040
+#define FAN_MARK_FLUSH 0x00000080
+/* FAN_MARK_FILESYSTEM is 0x00000100 */
+#define FAN_MARK_EVICTABLE 0x00000200
+/* This bit is mutually exclusive with FAN_MARK_IGNORED_MASK bit */
+#define FAN_MARK_IGNORE 0x00000400
+
+/* These are NOT bitwise flags. Both bits can be used togther. */
+#define FAN_MARK_INODE 0x00000000
+#define FAN_MARK_MOUNT 0x00000010
+#define FAN_MARK_FILESYSTEM 0x00000100
+#define FAN_MARK_MNTNS 0x00000110
+
+/*
+ * Convenience macro - FAN_MARK_IGNORE requires FAN_MARK_IGNORED_SURV_MODIFY
+ * for non-inode mark types.
+ */
+#define FAN_MARK_IGNORE_SURV (FAN_MARK_IGNORE | FAN_MARK_IGNORED_SURV_MODIFY)
+
+/* Deprecated - do not use this in programs and do not add new flags here! */
+#define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\
+ FAN_MARK_REMOVE |\
+ FAN_MARK_DONT_FOLLOW |\
+ FAN_MARK_ONLYDIR |\
+ FAN_MARK_MOUNT |\
+ FAN_MARK_IGNORED_MASK |\
+ FAN_MARK_IGNORED_SURV_MODIFY |\
+ FAN_MARK_FLUSH)
+
+/* Deprecated - do not use this in programs and do not add new flags here! */
+#define FAN_ALL_EVENTS (FAN_ACCESS |\
+ FAN_MODIFY |\
+ FAN_CLOSE |\
+ FAN_OPEN)
+
+/*
+ * All events which require a permission response from userspace
+ */
+/* Deprecated - do not use this in programs and do not add new flags here! */
+#define FAN_ALL_PERM_EVENTS (FAN_OPEN_PERM |\
+ FAN_ACCESS_PERM)
+
+/* Deprecated - do not use this in programs and do not add new flags here! */
+#define FAN_ALL_OUTGOING_EVENTS (FAN_ALL_EVENTS |\
+ FAN_ALL_PERM_EVENTS |\
+ FAN_Q_OVERFLOW)
+
+#define FANOTIFY_METADATA_VERSION 3
+
+struct fanotify_event_metadata {
+ __u32 event_len;
+ __u8 vers;
+ __u8 reserved;
+ __u16 metadata_len;
+ __aligned_u64 mask;
+ __s32 fd;
+ __s32 pid;
+};
+
+#define FAN_EVENT_INFO_TYPE_FID 1
+#define FAN_EVENT_INFO_TYPE_DFID_NAME 2
+#define FAN_EVENT_INFO_TYPE_DFID 3
+#define FAN_EVENT_INFO_TYPE_PIDFD 4
+#define FAN_EVENT_INFO_TYPE_ERROR 5
+#define FAN_EVENT_INFO_TYPE_RANGE 6
+#define FAN_EVENT_INFO_TYPE_MNT 7
+
+/* Special info types for FAN_RENAME */
+#define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME 10
+/* Reserved for FAN_EVENT_INFO_TYPE_OLD_DFID 11 */
+#define FAN_EVENT_INFO_TYPE_NEW_DFID_NAME 12
+/* Reserved for FAN_EVENT_INFO_TYPE_NEW_DFID 13 */
+
+/* Variable length info record following event metadata */
+struct fanotify_event_info_header {
+ __u8 info_type;
+ __u8 pad;
+ __u16 len;
+};
+
+/*
+ * Unique file identifier info record.
+ * This structure is used for records of types FAN_EVENT_INFO_TYPE_FID,
+ * FAN_EVENT_INFO_TYPE_DFID and FAN_EVENT_INFO_TYPE_DFID_NAME.
+ * For FAN_EVENT_INFO_TYPE_DFID_NAME there is additionally a null terminated
+ * name immediately after the file handle.
+ */
+struct fanotify_event_info_fid {
+ struct fanotify_event_info_header hdr;
+ __kernel_fsid_t fsid;
+ /*
+ * Following is an opaque struct file_handle that can be passed as
+ * an argument to open_by_handle_at(2).
+ */
+ unsigned char handle[];
+};
+
+/*
+ * This structure is used for info records of type FAN_EVENT_INFO_TYPE_PIDFD.
+ * It holds a pidfd for the pid that was responsible for generating an event.
+ */
+struct fanotify_event_info_pidfd {
+ struct fanotify_event_info_header hdr;
+ __s32 pidfd;
+};
+
+struct fanotify_event_info_error {
+ struct fanotify_event_info_header hdr;
+ __s32 error;
+ __u32 error_count;
+};
+
+struct fanotify_event_info_range {
+ struct fanotify_event_info_header hdr;
+ __u32 pad;
+ __u64 offset;
+ __u64 count;
+};
+
+struct fanotify_event_info_mnt {
+ struct fanotify_event_info_header hdr;
+ __u64 mnt_id;
+};
+
+/*
+ * User space may need to record additional information about its decision.
+ * The extra information type records what kind of information is included.
+ * The default is none. We also define an extra information buffer whose
+ * size is determined by the extra information type.
+ *
+ * If the information type is Audit Rule, then the information following
+ * is the rule number that triggered the user space decision that
+ * requires auditing.
+ */
+
+#define FAN_RESPONSE_INFO_NONE 0
+#define FAN_RESPONSE_INFO_AUDIT_RULE 1
+
+struct fanotify_response {
+ __s32 fd;
+ __u32 response;
+};
+
+struct fanotify_response_info_header {
+ __u8 type;
+ __u8 pad;
+ __u16 len;
+};
+
+struct fanotify_response_info_audit_rule {
+ struct fanotify_response_info_header hdr;
+ __u32 rule_number;
+ __u32 subj_trust;
+ __u32 obj_trust;
+};
+
+/* Legit userspace responses to a _PERM event */
+#define FAN_ALLOW 0x01
+#define FAN_DENY 0x02
+/* errno other than EPERM can specified in upper byte of deny response */
+#define FAN_ERRNO_BITS 8
+#define FAN_ERRNO_SHIFT (32 - FAN_ERRNO_BITS)
+#define FAN_ERRNO_MASK ((1 << FAN_ERRNO_BITS) - 1)
+#define FAN_DENY_ERRNO(err) \
+ (FAN_DENY | ((((__u32)(err)) & FAN_ERRNO_MASK) << FAN_ERRNO_SHIFT))
+
+#define FAN_AUDIT 0x10 /* Bitmask to create audit record for result */
+#define FAN_INFO 0x20 /* Bitmask to indicate additional information */
+
+/* No fd set in event */
+#define FAN_NOFD -1
+#define FAN_NOPIDFD FAN_NOFD
+#define FAN_EPIDFD -2
+
+/* Helper functions to deal with fanotify_event_metadata buffers */
+#define FAN_EVENT_METADATA_LEN (sizeof(struct fanotify_event_metadata))
+
+#define FAN_EVENT_NEXT(meta, len) ((len) -= (meta)->event_len, \
+ (struct fanotify_event_metadata*)(((char *)(meta)) + \
+ (meta)->event_len))
+
+#define FAN_EVENT_OK(meta, len) ((long)(len) >= (long)FAN_EVENT_METADATA_LEN && \
+ (long)(meta)->event_len >= (long)FAN_EVENT_METADATA_LEN && \
+ (long)(meta)->event_len <= (long)(len))
+
+#endif /* _UAPI_LINUX_FANOTIFY_H */
diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h
deleted file mode 100644
index 2f86b2ad6d7e..000000000000
--- a/tools/include/uapi/linux/fcntl.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _UAPI_LINUX_FCNTL_H
-#define _UAPI_LINUX_FCNTL_H
-
-#include <asm/fcntl.h>
-#include <linux/openat2.h>
-
-#define F_SETLEASE (F_LINUX_SPECIFIC_BASE + 0)
-#define F_GETLEASE (F_LINUX_SPECIFIC_BASE + 1)
-
-/*
- * Cancel a blocking posix lock; internal use only until we expose an
- * asynchronous lock api to userspace:
- */
-#define F_CANCELLK (F_LINUX_SPECIFIC_BASE + 5)
-
-/* Create a file descriptor with FD_CLOEXEC set. */
-#define F_DUPFD_CLOEXEC (F_LINUX_SPECIFIC_BASE + 6)
-
-/*
- * Request nofications on a directory.
- * See below for events that may be notified.
- */
-#define F_NOTIFY (F_LINUX_SPECIFIC_BASE+2)
-
-/*
- * Set and get of pipe page size array
- */
-#define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7)
-#define F_GETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 8)
-
-/*
- * Set/Get seals
- */
-#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
-#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
-
-/*
- * Types of seals
- */
-#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
-#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
-#define F_SEAL_GROW 0x0004 /* prevent file from growing */
-#define F_SEAL_WRITE 0x0008 /* prevent writes */
-#define F_SEAL_FUTURE_WRITE 0x0010 /* prevent future writes while mapped */
-/* (1U << 31) is reserved for signed error codes */
-
-/*
- * Set/Get write life time hints. {GET,SET}_RW_HINT operate on the
- * underlying inode, while {GET,SET}_FILE_RW_HINT operate only on
- * the specific file.
- */
-#define F_GET_RW_HINT (F_LINUX_SPECIFIC_BASE + 11)
-#define F_SET_RW_HINT (F_LINUX_SPECIFIC_BASE + 12)
-#define F_GET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 13)
-#define F_SET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 14)
-
-/*
- * Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be
- * used to clear any hints previously set.
- */
-#define RWH_WRITE_LIFE_NOT_SET 0
-#define RWH_WRITE_LIFE_NONE 1
-#define RWH_WRITE_LIFE_SHORT 2
-#define RWH_WRITE_LIFE_MEDIUM 3
-#define RWH_WRITE_LIFE_LONG 4
-#define RWH_WRITE_LIFE_EXTREME 5
-
-/*
- * The originally introduced spelling is remained from the first
- * versions of the patch set that introduced the feature, see commit
- * v4.13-rc1~212^2~51.
- */
-#define RWF_WRITE_LIFE_NOT_SET RWH_WRITE_LIFE_NOT_SET
-
-/*
- * Types of directory notifications that may be requested.
- */
-#define DN_ACCESS 0x00000001 /* File accessed */
-#define DN_MODIFY 0x00000002 /* File modified */
-#define DN_CREATE 0x00000004 /* File created */
-#define DN_DELETE 0x00000008 /* File removed */
-#define DN_RENAME 0x00000010 /* File renamed */
-#define DN_ATTRIB 0x00000020 /* File changed attibutes */
-#define DN_MULTISHOT 0x80000000 /* Don't remove notifier */
-
-/*
- * The constants AT_REMOVEDIR and AT_EACCESS have the same value. AT_EACCESS is
- * meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to
- * unlinkat. The two functions do completely different things and therefore,
- * the flags can be allowed to overlap. For example, passing AT_REMOVEDIR to
- * faccessat would be undefined behavior and thus treating it equivalent to
- * AT_EACCESS is valid undefined behavior.
- */
-#define AT_FDCWD -100 /* Special value used to indicate
- openat should use the current
- working directory. */
-#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */
-#define AT_EACCESS 0x200 /* Test access permitted for
- effective IDs, not real IDs. */
-#define AT_REMOVEDIR 0x200 /* Remove directory instead of
- unlinking file. */
-#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */
-#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */
-#define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */
-
-#define AT_STATX_SYNC_TYPE 0x6000 /* Type of synchronisation required from statx() */
-#define AT_STATX_SYNC_AS_STAT 0x0000 /* - Do whatever stat() does */
-#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */
-#define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */
-
-#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
-
-#endif /* _UAPI_LINUX_FCNTL_H */
diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h
index b7b56871029c..24ddf7bc4f25 100644
--- a/tools/include/uapi/linux/fs.h
+++ b/tools/include/uapi/linux/fs.h
@@ -28,8 +28,8 @@
* nr_file rlimit, so it's safe to set up a ridiculously high absolute
* upper limit on files-per-process.
*
- * Some programs (notably those using select()) may have to be
- * recompiled to take full advantage of the new limits..
+ * Some programs (notably those using select()) may have to be
+ * recompiled to take full advantage of the new limits..
*/
/* Fixed constants first: */
@@ -40,6 +40,15 @@
#define BLOCK_SIZE_BITS 10
#define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
+/* flags for integrity meta */
+#define IO_INTEGRITY_CHK_GUARD (1U << 0) /* enforce guard check */
+#define IO_INTEGRITY_CHK_REFTAG (1U << 1) /* enforce ref check */
+#define IO_INTEGRITY_CHK_APPTAG (1U << 2) /* enforce app check */
+
+#define IO_INTEGRITY_VALID_FLAGS (IO_INTEGRITY_CHK_GUARD | \
+ IO_INTEGRITY_CHK_REFTAG | \
+ IO_INTEGRITY_CHK_APPTAG)
+
#define SEEK_SET 0 /* seek relative to beginning of file */
#define SEEK_CUR 1 /* seek relative to current file position */
#define SEEK_END 2 /* seek relative to end of file */
@@ -64,6 +73,24 @@ struct fstrim_range {
__u64 minlen;
};
+/*
+ * We include a length field because some filesystems (vfat) have an identifier
+ * that we do want to expose as a UUID, but doesn't have the standard length.
+ *
+ * We use a fixed size buffer beacuse this interface will, by fiat, never
+ * support "UUIDs" longer than 16 bytes; we don't want to force all downstream
+ * users to have to deal with that.
+ */
+struct fsuuid2 {
+ __u8 len;
+ __u8 uuid[16];
+};
+
+struct fs_sysfs_path {
+ __u8 len;
+ __u8 name[128];
+};
+
/* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */
#define FILE_DEDUPE_RANGE_SAME 0
#define FILE_DEDUPE_RANGE_DIFFERS 1
@@ -215,6 +242,13 @@ struct fsxattr {
#define FS_IOC_FSSETXATTR _IOW('X', 32, struct fsxattr)
#define FS_IOC_GETFSLABEL _IOR(0x94, 49, char[FSLABEL_MAX])
#define FS_IOC_SETFSLABEL _IOW(0x94, 50, char[FSLABEL_MAX])
+/* Returns the external filesystem UUID, the same one blkid returns */
+#define FS_IOC_GETFSUUID _IOR(0x15, 0, struct fsuuid2)
+/*
+ * Returns the path component under /sys/fs/ that refers to this filesystem;
+ * also /sys/kernel/debug/ for filesystems with debugfs exports
+ */
+#define FS_IOC_GETFSSYSFSPATH _IOR(0x15, 1, struct fs_sysfs_path)
/*
* Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
@@ -301,8 +335,235 @@ typedef int __bitwise __kernel_rwf_t;
/* per-IO O_APPEND */
#define RWF_APPEND ((__force __kernel_rwf_t)0x00000010)
+/* per-IO negation of O_APPEND */
+#define RWF_NOAPPEND ((__force __kernel_rwf_t)0x00000020)
+
+/* Atomic Write */
+#define RWF_ATOMIC ((__force __kernel_rwf_t)0x00000040)
+
+/* buffered IO that drops the cache after reading or writing data */
+#define RWF_DONTCACHE ((__force __kernel_rwf_t)0x00000080)
+
/* mask of flags supported by the kernel */
#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
- RWF_APPEND)
+ RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\
+ RWF_DONTCACHE)
+
+#define PROCFS_IOCTL_MAGIC 'f'
+
+/* Pagemap ioctl */
+#define PAGEMAP_SCAN _IOWR(PROCFS_IOCTL_MAGIC, 16, struct pm_scan_arg)
+
+/* Bitmasks provided in pm_scan_args masks and reported in page_region.categories. */
+#define PAGE_IS_WPALLOWED (1 << 0)
+#define PAGE_IS_WRITTEN (1 << 1)
+#define PAGE_IS_FILE (1 << 2)
+#define PAGE_IS_PRESENT (1 << 3)
+#define PAGE_IS_SWAPPED (1 << 4)
+#define PAGE_IS_PFNZERO (1 << 5)
+#define PAGE_IS_HUGE (1 << 6)
+#define PAGE_IS_SOFT_DIRTY (1 << 7)
+#define PAGE_IS_GUARD (1 << 8)
+
+/*
+ * struct page_region - Page region with flags
+ * @start: Start of the region
+ * @end: End of the region (exclusive)
+ * @categories: PAGE_IS_* category bitmask for the region
+ */
+struct page_region {
+ __u64 start;
+ __u64 end;
+ __u64 categories;
+};
+
+/* Flags for PAGEMAP_SCAN ioctl */
+#define PM_SCAN_WP_MATCHING (1 << 0) /* Write protect the pages matched. */
+#define PM_SCAN_CHECK_WPASYNC (1 << 1) /* Abort the scan when a non-WP-enabled page is found. */
+
+/*
+ * struct pm_scan_arg - Pagemap ioctl argument
+ * @size: Size of the structure
+ * @flags: Flags for the IOCTL
+ * @start: Starting address of the region
+ * @end: Ending address of the region
+ * @walk_end Address where the scan stopped (written by kernel).
+ * walk_end == end (address tags cleared) informs that the scan completed on entire range.
+ * @vec: Address of page_region struct array for output
+ * @vec_len: Length of the page_region struct array
+ * @max_pages: Optional limit for number of returned pages (0 = disabled)
+ * @category_inverted: PAGE_IS_* categories which values match if 0 instead of 1
+ * @category_mask: Skip pages for which any category doesn't match
+ * @category_anyof_mask: Skip pages for which no category matches
+ * @return_mask: PAGE_IS_* categories that are to be reported in `page_region`s returned
+ */
+struct pm_scan_arg {
+ __u64 size;
+ __u64 flags;
+ __u64 start;
+ __u64 end;
+ __u64 walk_end;
+ __u64 vec;
+ __u64 vec_len;
+ __u64 max_pages;
+ __u64 category_inverted;
+ __u64 category_mask;
+ __u64 category_anyof_mask;
+ __u64 return_mask;
+};
+
+/* /proc/<pid>/maps ioctl */
+#define PROCMAP_QUERY _IOWR(PROCFS_IOCTL_MAGIC, 17, struct procmap_query)
+
+enum procmap_query_flags {
+ /*
+ * VMA permission flags.
+ *
+ * Can be used as part of procmap_query.query_flags field to look up
+ * only VMAs satisfying specified subset of permissions. E.g., specifying
+ * PROCMAP_QUERY_VMA_READABLE only will return both readable and read/write VMAs,
+ * while having PROCMAP_QUERY_VMA_READABLE | PROCMAP_QUERY_VMA_WRITABLE will only
+ * return read/write VMAs, though both executable/non-executable and
+ * private/shared will be ignored.
+ *
+ * PROCMAP_QUERY_VMA_* flags are also returned in procmap_query.vma_flags
+ * field to specify actual VMA permissions.
+ */
+ PROCMAP_QUERY_VMA_READABLE = 0x01,
+ PROCMAP_QUERY_VMA_WRITABLE = 0x02,
+ PROCMAP_QUERY_VMA_EXECUTABLE = 0x04,
+ PROCMAP_QUERY_VMA_SHARED = 0x08,
+ /*
+ * Query modifier flags.
+ *
+ * By default VMA that covers provided address is returned, or -ENOENT
+ * is returned. With PROCMAP_QUERY_COVERING_OR_NEXT_VMA flag set, closest
+ * VMA with vma_start > addr will be returned if no covering VMA is
+ * found.
+ *
+ * PROCMAP_QUERY_FILE_BACKED_VMA instructs query to consider only VMAs that
+ * have file backing. Can be combined with PROCMAP_QUERY_COVERING_OR_NEXT_VMA
+ * to iterate all VMAs with file backing.
+ */
+ PROCMAP_QUERY_COVERING_OR_NEXT_VMA = 0x10,
+ PROCMAP_QUERY_FILE_BACKED_VMA = 0x20,
+};
+
+/*
+ * Input/output argument structured passed into ioctl() call. It can be used
+ * to query a set of VMAs (Virtual Memory Areas) of a process.
+ *
+ * Each field can be one of three kinds, marked in a short comment to the
+ * right of the field:
+ * - "in", input argument, user has to provide this value, kernel doesn't modify it;
+ * - "out", output argument, kernel sets this field with VMA data;
+ * - "in/out", input and output argument; user provides initial value (used
+ * to specify maximum allowable buffer size), and kernel sets it to actual
+ * amount of data written (or zero, if there is no data).
+ *
+ * If matching VMA is found (according to criterias specified by
+ * query_addr/query_flags, all the out fields are filled out, and ioctl()
+ * returns 0. If there is no matching VMA, -ENOENT will be returned.
+ * In case of any other error, negative error code other than -ENOENT is
+ * returned.
+ *
+ * Most of the data is similar to the one returned as text in /proc/<pid>/maps
+ * file, but procmap_query provides more querying flexibility. There are no
+ * consistency guarantees between subsequent ioctl() calls, but data returned
+ * for matched VMA is self-consistent.
+ */
+struct procmap_query {
+ /* Query struct size, for backwards/forward compatibility */
+ __u64 size;
+ /*
+ * Query flags, a combination of enum procmap_query_flags values.
+ * Defines query filtering and behavior, see enum procmap_query_flags.
+ *
+ * Input argument, provided by user. Kernel doesn't modify it.
+ */
+ __u64 query_flags; /* in */
+ /*
+ * Query address. By default, VMA that covers this address will
+ * be looked up. PROCMAP_QUERY_* flags above modify this default
+ * behavior further.
+ *
+ * Input argument, provided by user. Kernel doesn't modify it.
+ */
+ __u64 query_addr; /* in */
+ /* VMA starting (inclusive) and ending (exclusive) address, if VMA is found. */
+ __u64 vma_start; /* out */
+ __u64 vma_end; /* out */
+ /* VMA permissions flags. A combination of PROCMAP_QUERY_VMA_* flags. */
+ __u64 vma_flags; /* out */
+ /* VMA backing page size granularity. */
+ __u64 vma_page_size; /* out */
+ /*
+ * VMA file offset. If VMA has file backing, this specifies offset
+ * within the file that VMA's start address corresponds to.
+ * Is set to zero if VMA has no backing file.
+ */
+ __u64 vma_offset; /* out */
+ /* Backing file's inode number, or zero, if VMA has no backing file. */
+ __u64 inode; /* out */
+ /* Backing file's device major/minor number, or zero, if VMA has no backing file. */
+ __u32 dev_major; /* out */
+ __u32 dev_minor; /* out */
+ /*
+ * If set to non-zero value, signals the request to return VMA name
+ * (i.e., VMA's backing file's absolute path, with " (deleted)" suffix
+ * appended, if file was unlinked from FS) for matched VMA. VMA name
+ * can also be some special name (e.g., "[heap]", "[stack]") or could
+ * be even user-supplied with prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME).
+ *
+ * Kernel will set this field to zero, if VMA has no associated name.
+ * Otherwise kernel will return actual amount of bytes filled in
+ * user-supplied buffer (see vma_name_addr field below), including the
+ * terminating zero.
+ *
+ * If VMA name is longer that user-supplied maximum buffer size,
+ * -E2BIG error is returned.
+ *
+ * If this field is set to non-zero value, vma_name_addr should point
+ * to valid user space memory buffer of at least vma_name_size bytes.
+ * If set to zero, vma_name_addr should be set to zero as well
+ */
+ __u32 vma_name_size; /* in/out */
+ /*
+ * If set to non-zero value, signals the request to extract and return
+ * VMA's backing file's build ID, if the backing file is an ELF file
+ * and it contains embedded build ID.
+ *
+ * Kernel will set this field to zero, if VMA has no backing file,
+ * backing file is not an ELF file, or ELF file has no build ID
+ * embedded.
+ *
+ * Build ID is a binary value (not a string). Kernel will set
+ * build_id_size field to exact number of bytes used for build ID.
+ * If build ID is requested and present, but needs more bytes than
+ * user-supplied maximum buffer size (see build_id_addr field below),
+ * -E2BIG error will be returned.
+ *
+ * If this field is set to non-zero value, build_id_addr should point
+ * to valid user space memory buffer of at least build_id_size bytes.
+ * If set to zero, build_id_addr should be set to zero as well
+ */
+ __u32 build_id_size; /* in/out */
+ /*
+ * User-supplied address of a buffer of at least vma_name_size bytes
+ * for kernel to fill with matched VMA's name (see vma_name_size field
+ * description above for details).
+ *
+ * Should be set to zero if VMA name should not be returned.
+ */
+ __u64 vma_name_addr; /* in */
+ /*
+ * User-supplied address of a buffer of at least build_id_size bytes
+ * for kernel to fill with matched VMA's ELF build ID, if available
+ * (see build_id_size field description above for details).
+ *
+ * Should be set to zero if build ID should not be returned.
+ */
+ __u64 build_id_addr; /* in */
+};
#endif /* _UAPI_LINUX_FS_H */
diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h
index fd1fb0d5389d..3aff99f2696a 100644
--- a/tools/include/uapi/linux/fscrypt.h
+++ b/tools/include/uapi/linux/fscrypt.h
@@ -71,7 +71,8 @@ struct fscrypt_policy_v2 {
__u8 contents_encryption_mode;
__u8 filenames_encryption_mode;
__u8 flags;
- __u8 __reserved[4];
+ __u8 log2_data_unit_size;
+ __u8 __reserved[3];
__u8 master_key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE];
};
@@ -118,7 +119,7 @@ struct fscrypt_key_specifier {
*/
struct fscrypt_provisioning_key_payload {
__u32 type;
- __u32 __reserved;
+ __u32 flags;
__u8 raw[];
};
@@ -127,7 +128,9 @@ struct fscrypt_add_key_arg {
struct fscrypt_key_specifier key_spec;
__u32 raw_size;
__u32 key_id;
- __u32 __reserved[8];
+#define FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED 0x00000001
+ __u32 flags;
+ __u32 __reserved[7];
__u8 raw[];
};
diff --git a/tools/include/uapi/linux/genetlink.h b/tools/include/uapi/linux/genetlink.h
new file mode 100644
index 000000000000..ddba3ca01e39
--- /dev/null
+++ b/tools/include/uapi/linux/genetlink.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_GENERIC_NETLINK_H
+#define _UAPI__LINUX_GENERIC_NETLINK_H
+
+#include <linux/types.h>
+#include <linux/netlink.h>
+
+#define GENL_NAMSIZ 16 /* length of family name */
+
+#define GENL_MIN_ID NLMSG_MIN_TYPE
+#define GENL_MAX_ID 1023
+
+struct genlmsghdr {
+ __u8 cmd;
+ __u8 version;
+ __u16 reserved;
+};
+
+#define GENL_HDRLEN NLMSG_ALIGN(sizeof(struct genlmsghdr))
+
+#define GENL_ADMIN_PERM 0x01
+#define GENL_CMD_CAP_DO 0x02
+#define GENL_CMD_CAP_DUMP 0x04
+#define GENL_CMD_CAP_HASPOL 0x08
+#define GENL_UNS_ADMIN_PERM 0x10
+
+/*
+ * List of reserved static generic netlink identifiers:
+ */
+#define GENL_ID_CTRL NLMSG_MIN_TYPE
+#define GENL_ID_VFS_DQUOT (NLMSG_MIN_TYPE + 1)
+#define GENL_ID_PMCRAID (NLMSG_MIN_TYPE + 2)
+/* must be last reserved + 1 */
+#define GENL_START_ALLOC (NLMSG_MIN_TYPE + 3)
+
+/**************************************************************************
+ * Controller
+ **************************************************************************/
+
+enum {
+ CTRL_CMD_UNSPEC,
+ CTRL_CMD_NEWFAMILY,
+ CTRL_CMD_DELFAMILY,
+ CTRL_CMD_GETFAMILY,
+ CTRL_CMD_NEWOPS,
+ CTRL_CMD_DELOPS,
+ CTRL_CMD_GETOPS,
+ CTRL_CMD_NEWMCAST_GRP,
+ CTRL_CMD_DELMCAST_GRP,
+ CTRL_CMD_GETMCAST_GRP, /* unused */
+ CTRL_CMD_GETPOLICY,
+ __CTRL_CMD_MAX,
+};
+
+#define CTRL_CMD_MAX (__CTRL_CMD_MAX - 1)
+
+enum {
+ CTRL_ATTR_UNSPEC,
+ CTRL_ATTR_FAMILY_ID,
+ CTRL_ATTR_FAMILY_NAME,
+ CTRL_ATTR_VERSION,
+ CTRL_ATTR_HDRSIZE,
+ CTRL_ATTR_MAXATTR,
+ CTRL_ATTR_OPS,
+ CTRL_ATTR_MCAST_GROUPS,
+ CTRL_ATTR_POLICY,
+ CTRL_ATTR_OP_POLICY,
+ CTRL_ATTR_OP,
+ __CTRL_ATTR_MAX,
+};
+
+#define CTRL_ATTR_MAX (__CTRL_ATTR_MAX - 1)
+
+enum {
+ CTRL_ATTR_OP_UNSPEC,
+ CTRL_ATTR_OP_ID,
+ CTRL_ATTR_OP_FLAGS,
+ __CTRL_ATTR_OP_MAX,
+};
+
+#define CTRL_ATTR_OP_MAX (__CTRL_ATTR_OP_MAX - 1)
+
+enum {
+ CTRL_ATTR_MCAST_GRP_UNSPEC,
+ CTRL_ATTR_MCAST_GRP_NAME,
+ CTRL_ATTR_MCAST_GRP_ID,
+ __CTRL_ATTR_MCAST_GRP_MAX,
+};
+
+#define CTRL_ATTR_MCAST_GRP_MAX (__CTRL_ATTR_MCAST_GRP_MAX - 1)
+
+enum {
+ CTRL_ATTR_POLICY_UNSPEC,
+ CTRL_ATTR_POLICY_DO,
+ CTRL_ATTR_POLICY_DUMP,
+
+ __CTRL_ATTR_POLICY_DUMP_MAX,
+ CTRL_ATTR_POLICY_DUMP_MAX = __CTRL_ATTR_POLICY_DUMP_MAX - 1
+};
+
+#define CTRL_ATTR_POLICY_MAX (__CTRL_ATTR_POLICY_DUMP_MAX - 1)
+
+#endif /* _UAPI__LINUX_GENERIC_NETLINK_H */
diff --git a/tools/include/uapi/linux/hw_breakpoint.h b/tools/include/uapi/linux/hw_breakpoint.h
index 965e4d8606d8..1575d3ca6f0d 100644
--- a/tools/include/uapi/linux/hw_breakpoint.h
+++ b/tools/include/uapi/linux/hw_breakpoint.h
@@ -22,14 +22,4 @@ enum {
HW_BREAKPOINT_INVALID = HW_BREAKPOINT_RW | HW_BREAKPOINT_X,
};
-enum bp_type_idx {
- TYPE_INST = 0,
-#ifdef CONFIG_HAVE_MIXED_BREAKPOINTS_REGS
- TYPE_DATA = 0,
-#else
- TYPE_DATA = 1,
-#endif
- TYPE_MAX
-};
-
#endif /* _UAPI_LINUX_HW_BREAKPOINT_H */
diff --git a/tools/include/uapi/linux/if_addr.h b/tools/include/uapi/linux/if_addr.h
new file mode 100644
index 000000000000..aa7958b4e41d
--- /dev/null
+++ b/tools/include/uapi/linux/if_addr.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_IF_ADDR_H
+#define _UAPI__LINUX_IF_ADDR_H
+
+#include <linux/types.h>
+#include <linux/netlink.h>
+
+struct ifaddrmsg {
+ __u8 ifa_family;
+ __u8 ifa_prefixlen; /* The prefix length */
+ __u8 ifa_flags; /* Flags */
+ __u8 ifa_scope; /* Address scope */
+ __u32 ifa_index; /* Link index */
+};
+
+/*
+ * Important comment:
+ * IFA_ADDRESS is prefix address, rather than local interface address.
+ * It makes no difference for normally configured broadcast interfaces,
+ * but for point-to-point IFA_ADDRESS is DESTINATION address,
+ * local address is supplied in IFA_LOCAL attribute.
+ *
+ * IFA_FLAGS is a u32 attribute that extends the u8 field ifa_flags.
+ * If present, the value from struct ifaddrmsg will be ignored.
+ */
+enum {
+ IFA_UNSPEC,
+ IFA_ADDRESS,
+ IFA_LOCAL,
+ IFA_LABEL,
+ IFA_BROADCAST,
+ IFA_ANYCAST,
+ IFA_CACHEINFO,
+ IFA_MULTICAST,
+ IFA_FLAGS,
+ IFA_RT_PRIORITY, /* u32, priority/metric for prefix route */
+ IFA_TARGET_NETNSID,
+ IFA_PROTO, /* u8, address protocol */
+ __IFA_MAX,
+};
+
+#define IFA_MAX (__IFA_MAX - 1)
+
+/* ifa_flags */
+#define IFA_F_SECONDARY 0x01
+#define IFA_F_TEMPORARY IFA_F_SECONDARY
+
+#define IFA_F_NODAD 0x02
+#define IFA_F_OPTIMISTIC 0x04
+#define IFA_F_DADFAILED 0x08
+#define IFA_F_HOMEADDRESS 0x10
+#define IFA_F_DEPRECATED 0x20
+#define IFA_F_TENTATIVE 0x40
+#define IFA_F_PERMANENT 0x80
+#define IFA_F_MANAGETEMPADDR 0x100
+#define IFA_F_NOPREFIXROUTE 0x200
+#define IFA_F_MCAUTOJOIN 0x400
+#define IFA_F_STABLE_PRIVACY 0x800
+
+struct ifa_cacheinfo {
+ __u32 ifa_prefered;
+ __u32 ifa_valid;
+ __u32 cstamp; /* created timestamp, hundredths of seconds */
+ __u32 tstamp; /* updated timestamp, hundredths of seconds */
+};
+
+/* backwards compatibility for userspace */
+#ifndef __KERNEL__
+#define IFA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifaddrmsg))))
+#define IFA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifaddrmsg))
+#endif
+
+/* ifa_proto */
+#define IFAPROT_UNSPEC 0
+#define IFAPROT_KERNEL_LO 1 /* loopback */
+#define IFAPROT_KERNEL_RA 2 /* set by kernel from router announcement */
+#define IFAPROT_KERNEL_LL 3 /* link-local set by kernel */
+
+#endif
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 901d98b865a1..7e46ca4cd31b 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -211,6 +211,9 @@ struct rtnl_link_stats {
* @rx_nohandler: Number of packets received on the interface
* but dropped by the networking stack because the device is
* not designated to receive packets (e.g. backup link in a bond).
+ *
+ * @rx_otherhost_dropped: Number of packets dropped due to mismatch
+ * in destination MAC address.
*/
struct rtnl_link_stats64 {
__u64 rx_packets;
@@ -243,6 +246,23 @@ struct rtnl_link_stats64 {
__u64 rx_compressed;
__u64 tx_compressed;
__u64 rx_nohandler;
+
+ __u64 rx_otherhost_dropped;
+};
+
+/* Subset of link stats useful for in-HW collection. Meaning of the fields is as
+ * for struct rtnl_link_stats64.
+ */
+struct rtnl_hw_stats64 {
+ __u64 rx_packets;
+ __u64 tx_packets;
+ __u64 rx_bytes;
+ __u64 tx_bytes;
+ __u64 rx_errors;
+ __u64 tx_errors;
+ __u64 rx_dropped;
+ __u64 tx_dropped;
+ __u64 multicast;
};
/* The struct should be in sync with struct ifmap */
@@ -350,7 +370,14 @@ enum {
IFLA_GRO_MAX_SIZE,
IFLA_TSO_MAX_SIZE,
IFLA_TSO_MAX_SEGS,
+ IFLA_ALLMULTI, /* Allmulti count: > 0 means acts ALLMULTI */
+
+ IFLA_DEVLINK_PORT,
+ IFLA_GSO_IPV4_MAX_SIZE,
+ IFLA_GRO_IPV4_MAX_SIZE,
+ IFLA_DPLL_PIN,
+ IFLA_MAX_PACING_OFFLOAD_HORIZON,
__IFLA_MAX
};
@@ -435,6 +462,286 @@ enum in6_addr_gen_mode {
/* Bridge section */
+/**
+ * DOC: Bridge enum definition
+ *
+ * Please *note* that the timer values in the following section are expected
+ * in clock_t format, which is seconds multiplied by USER_HZ (generally
+ * defined as 100).
+ *
+ * @IFLA_BR_FORWARD_DELAY
+ * The bridge forwarding delay is the time spent in LISTENING state
+ * (before moving to LEARNING) and in LEARNING state (before moving
+ * to FORWARDING). Only relevant if STP is enabled.
+ *
+ * The valid values are between (2 * USER_HZ) and (30 * USER_HZ).
+ * The default value is (15 * USER_HZ).
+ *
+ * @IFLA_BR_HELLO_TIME
+ * The time between hello packets sent by the bridge, when it is a root
+ * bridge or a designated bridge. Only relevant if STP is enabled.
+ *
+ * The valid values are between (1 * USER_HZ) and (10 * USER_HZ).
+ * The default value is (2 * USER_HZ).
+ *
+ * @IFLA_BR_MAX_AGE
+ * The hello packet timeout is the time until another bridge in the
+ * spanning tree is assumed to be dead, after reception of its last hello
+ * message. Only relevant if STP is enabled.
+ *
+ * The valid values are between (6 * USER_HZ) and (40 * USER_HZ).
+ * The default value is (20 * USER_HZ).
+ *
+ * @IFLA_BR_AGEING_TIME
+ * Configure the bridge's FDB entries aging time. It is the time a MAC
+ * address will be kept in the FDB after a packet has been received from
+ * that address. After this time has passed, entries are cleaned up.
+ * Allow values outside the 802.1 standard specification for special cases:
+ *
+ * * 0 - entry never ages (all permanent)
+ * * 1 - entry disappears (no persistence)
+ *
+ * The default value is (300 * USER_HZ).
+ *
+ * @IFLA_BR_STP_STATE
+ * Turn spanning tree protocol on (*IFLA_BR_STP_STATE* > 0) or off
+ * (*IFLA_BR_STP_STATE* == 0) for this bridge.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_PRIORITY
+ * Set this bridge's spanning tree priority, used during STP root bridge
+ * election.
+ *
+ * The valid values are between 0 and 65535.
+ *
+ * @IFLA_BR_VLAN_FILTERING
+ * Turn VLAN filtering on (*IFLA_BR_VLAN_FILTERING* > 0) or off
+ * (*IFLA_BR_VLAN_FILTERING* == 0). When disabled, the bridge will not
+ * consider the VLAN tag when handling packets.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_VLAN_PROTOCOL
+ * Set the protocol used for VLAN filtering.
+ *
+ * The valid values are 0x8100(802.1Q) or 0x88A8(802.1AD). The default value
+ * is 0x8100(802.1Q).
+ *
+ * @IFLA_BR_GROUP_FWD_MASK
+ * The group forwarding mask. This is the bitmask that is applied to
+ * decide whether to forward incoming frames destined to link-local
+ * addresses (of the form 01:80:C2:00:00:0X).
+ *
+ * The default value is 0, which means the bridge does not forward any
+ * link-local frames coming on this port.
+ *
+ * @IFLA_BR_ROOT_ID
+ * The bridge root id, read only.
+ *
+ * @IFLA_BR_BRIDGE_ID
+ * The bridge id, read only.
+ *
+ * @IFLA_BR_ROOT_PORT
+ * The bridge root port, read only.
+ *
+ * @IFLA_BR_ROOT_PATH_COST
+ * The bridge root path cost, read only.
+ *
+ * @IFLA_BR_TOPOLOGY_CHANGE
+ * The bridge topology change, read only.
+ *
+ * @IFLA_BR_TOPOLOGY_CHANGE_DETECTED
+ * The bridge topology change detected, read only.
+ *
+ * @IFLA_BR_HELLO_TIMER
+ * The bridge hello timer, read only.
+ *
+ * @IFLA_BR_TCN_TIMER
+ * The bridge tcn timer, read only.
+ *
+ * @IFLA_BR_TOPOLOGY_CHANGE_TIMER
+ * The bridge topology change timer, read only.
+ *
+ * @IFLA_BR_GC_TIMER
+ * The bridge gc timer, read only.
+ *
+ * @IFLA_BR_GROUP_ADDR
+ * Set the MAC address of the multicast group this bridge uses for STP.
+ * The address must be a link-local address in standard Ethernet MAC address
+ * format. It is an address of the form 01:80:C2:00:00:0X, with X in [0, 4..f].
+ *
+ * The default value is 0.
+ *
+ * @IFLA_BR_FDB_FLUSH
+ * Flush bridge's fdb dynamic entries.
+ *
+ * @IFLA_BR_MCAST_ROUTER
+ * Set bridge's multicast router if IGMP snooping is enabled.
+ * The valid values are:
+ *
+ * * 0 - disabled.
+ * * 1 - automatic (queried).
+ * * 2 - permanently enabled.
+ *
+ * The default value is 1.
+ *
+ * @IFLA_BR_MCAST_SNOOPING
+ * Turn multicast snooping on (*IFLA_BR_MCAST_SNOOPING* > 0) or off
+ * (*IFLA_BR_MCAST_SNOOPING* == 0).
+ *
+ * The default value is 1.
+ *
+ * @IFLA_BR_MCAST_QUERY_USE_IFADDR
+ * If enabled use the bridge's own IP address as source address for IGMP
+ * queries (*IFLA_BR_MCAST_QUERY_USE_IFADDR* > 0) or the default of 0.0.0.0
+ * (*IFLA_BR_MCAST_QUERY_USE_IFADDR* == 0).
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_MCAST_QUERIER
+ * Enable (*IFLA_BR_MULTICAST_QUERIER* > 0) or disable
+ * (*IFLA_BR_MULTICAST_QUERIER* == 0) IGMP querier, ie sending of multicast
+ * queries by the bridge.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_MCAST_HASH_ELASTICITY
+ * Set multicast database hash elasticity, It is the maximum chain length in
+ * the multicast hash table. This attribute is *deprecated* and the value
+ * is always 16.
+ *
+ * @IFLA_BR_MCAST_HASH_MAX
+ * Set maximum size of the multicast hash table
+ *
+ * The default value is 4096, the value must be a power of 2.
+ *
+ * @IFLA_BR_MCAST_LAST_MEMBER_CNT
+ * The Last Member Query Count is the number of Group-Specific Queries
+ * sent before the router assumes there are no local members. The Last
+ * Member Query Count is also the number of Group-and-Source-Specific
+ * Queries sent before the router assumes there are no listeners for a
+ * particular source.
+ *
+ * The default value is 2.
+ *
+ * @IFLA_BR_MCAST_STARTUP_QUERY_CNT
+ * The Startup Query Count is the number of Queries sent out on startup,
+ * separated by the Startup Query Interval.
+ *
+ * The default value is 2.
+ *
+ * @IFLA_BR_MCAST_LAST_MEMBER_INTVL
+ * The Last Member Query Interval is the Max Response Time inserted into
+ * Group-Specific Queries sent in response to Leave Group messages, and
+ * is also the amount of time between Group-Specific Query messages.
+ *
+ * The default value is (1 * USER_HZ).
+ *
+ * @IFLA_BR_MCAST_MEMBERSHIP_INTVL
+ * The interval after which the bridge will leave a group, if no membership
+ * reports for this group are received.
+ *
+ * The default value is (260 * USER_HZ).
+ *
+ * @IFLA_BR_MCAST_QUERIER_INTVL
+ * The interval between queries sent by other routers. if no queries are
+ * seen after this delay has passed, the bridge will start to send its own
+ * queries (as if *IFLA_BR_MCAST_QUERIER_INTVL* was enabled).
+ *
+ * The default value is (255 * USER_HZ).
+ *
+ * @IFLA_BR_MCAST_QUERY_INTVL
+ * The Query Interval is the interval between General Queries sent by
+ * the Querier.
+ *
+ * The default value is (125 * USER_HZ). The minimum value is (1 * USER_HZ).
+ *
+ * @IFLA_BR_MCAST_QUERY_RESPONSE_INTVL
+ * The Max Response Time used to calculate the Max Resp Code inserted
+ * into the periodic General Queries.
+ *
+ * The default value is (10 * USER_HZ).
+ *
+ * @IFLA_BR_MCAST_STARTUP_QUERY_INTVL
+ * The interval between queries in the startup phase.
+ *
+ * The default value is (125 * USER_HZ) / 4. The minimum value is (1 * USER_HZ).
+ *
+ * @IFLA_BR_NF_CALL_IPTABLES
+ * Enable (*NF_CALL_IPTABLES* > 0) or disable (*NF_CALL_IPTABLES* == 0)
+ * iptables hooks on the bridge.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_NF_CALL_IP6TABLES
+ * Enable (*NF_CALL_IP6TABLES* > 0) or disable (*NF_CALL_IP6TABLES* == 0)
+ * ip6tables hooks on the bridge.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_NF_CALL_ARPTABLES
+ * Enable (*NF_CALL_ARPTABLES* > 0) or disable (*NF_CALL_ARPTABLES* == 0)
+ * arptables hooks on the bridge.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_VLAN_DEFAULT_PVID
+ * VLAN ID applied to untagged and priority-tagged incoming packets.
+ *
+ * The default value is 1. Setting to the special value 0 makes all ports of
+ * this bridge not have a PVID by default, which means that they will
+ * not accept VLAN-untagged traffic.
+ *
+ * @IFLA_BR_PAD
+ * Bridge attribute padding type for netlink message.
+ *
+ * @IFLA_BR_VLAN_STATS_ENABLED
+ * Enable (*IFLA_BR_VLAN_STATS_ENABLED* == 1) or disable
+ * (*IFLA_BR_VLAN_STATS_ENABLED* == 0) per-VLAN stats accounting.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_MCAST_STATS_ENABLED
+ * Enable (*IFLA_BR_MCAST_STATS_ENABLED* > 0) or disable
+ * (*IFLA_BR_MCAST_STATS_ENABLED* == 0) multicast (IGMP/MLD) stats
+ * accounting.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_MCAST_IGMP_VERSION
+ * Set the IGMP version.
+ *
+ * The valid values are 2 and 3. The default value is 2.
+ *
+ * @IFLA_BR_MCAST_MLD_VERSION
+ * Set the MLD version.
+ *
+ * The valid values are 1 and 2. The default value is 1.
+ *
+ * @IFLA_BR_VLAN_STATS_PER_PORT
+ * Enable (*IFLA_BR_VLAN_STATS_PER_PORT* == 1) or disable
+ * (*IFLA_BR_VLAN_STATS_PER_PORT* == 0) per-VLAN per-port stats accounting.
+ * Can be changed only when there are no port VLANs configured.
+ *
+ * The default value is 0 (disabled).
+ *
+ * @IFLA_BR_MULTI_BOOLOPT
+ * The multi_boolopt is used to control new boolean options to avoid adding
+ * new netlink attributes. You can look at ``enum br_boolopt_id`` for those
+ * options.
+ *
+ * @IFLA_BR_MCAST_QUERIER_STATE
+ * Bridge mcast querier states, read only.
+ *
+ * @IFLA_BR_FDB_N_LEARNED
+ * The number of dynamically learned FDB entries for the current bridge,
+ * read only.
+ *
+ * @IFLA_BR_FDB_MAX_LEARNED
+ * Set the number of max dynamically learned FDB entries for the current
+ * bridge.
+ */
enum {
IFLA_BR_UNSPEC,
IFLA_BR_FORWARD_DELAY,
@@ -484,6 +791,8 @@ enum {
IFLA_BR_VLAN_STATS_PER_PORT,
IFLA_BR_MULTI_BOOLOPT,
IFLA_BR_MCAST_QUERIER_STATE,
+ IFLA_BR_FDB_N_LEARNED,
+ IFLA_BR_FDB_MAX_LEARNED,
__IFLA_BR_MAX,
};
@@ -494,11 +803,252 @@ struct ifla_bridge_id {
__u8 addr[6]; /* ETH_ALEN */
};
+/**
+ * DOC: Bridge mode enum definition
+ *
+ * @BRIDGE_MODE_HAIRPIN
+ * Controls whether traffic may be sent back out of the port on which it
+ * was received. This option is also called reflective relay mode, and is
+ * used to support basic VEPA (Virtual Ethernet Port Aggregator)
+ * capabilities. By default, this flag is turned off and the bridge will
+ * not forward traffic back out of the receiving port.
+ */
enum {
BRIDGE_MODE_UNSPEC,
BRIDGE_MODE_HAIRPIN,
};
+/**
+ * DOC: Bridge port enum definition
+ *
+ * @IFLA_BRPORT_STATE
+ * The operation state of the port. Here are the valid values.
+ *
+ * * 0 - port is in STP *DISABLED* state. Make this port completely
+ * inactive for STP. This is also called BPDU filter and could be used
+ * to disable STP on an untrusted port, like a leaf virtual device.
+ * The traffic forwarding is also stopped on this port.
+ * * 1 - port is in STP *LISTENING* state. Only valid if STP is enabled
+ * on the bridge. In this state the port listens for STP BPDUs and
+ * drops all other traffic frames.
+ * * 2 - port is in STP *LEARNING* state. Only valid if STP is enabled on
+ * the bridge. In this state the port will accept traffic only for the
+ * purpose of updating MAC address tables.
+ * * 3 - port is in STP *FORWARDING* state. Port is fully active.
+ * * 4 - port is in STP *BLOCKING* state. Only valid if STP is enabled on
+ * the bridge. This state is used during the STP election process.
+ * In this state, port will only process STP BPDUs.
+ *
+ * @IFLA_BRPORT_PRIORITY
+ * The STP port priority. The valid values are between 0 and 255.
+ *
+ * @IFLA_BRPORT_COST
+ * The STP path cost of the port. The valid values are between 1 and 65535.
+ *
+ * @IFLA_BRPORT_MODE
+ * Set the bridge port mode. See *BRIDGE_MODE_HAIRPIN* for more details.
+ *
+ * @IFLA_BRPORT_GUARD
+ * Controls whether STP BPDUs will be processed by the bridge port. By
+ * default, the flag is turned off to allow BPDU processing. Turning this
+ * flag on will disable the bridge port if a STP BPDU packet is received.
+ *
+ * If the bridge has Spanning Tree enabled, hostile devices on the network
+ * may send BPDU on a port and cause network failure. Setting *guard on*
+ * will detect and stop this by disabling the port. The port will be
+ * restarted if the link is brought down, or removed and reattached.
+ *
+ * @IFLA_BRPORT_PROTECT
+ * Controls whether a given port is allowed to become a root port or not.
+ * Only used when STP is enabled on the bridge. By default the flag is off.
+ *
+ * This feature is also called root port guard. If BPDU is received from a
+ * leaf (edge) port, it should not be elected as root port. This could
+ * be used if using STP on a bridge and the downstream bridges are not fully
+ * trusted; this prevents a hostile guest from rerouting traffic.
+ *
+ * @IFLA_BRPORT_FAST_LEAVE
+ * This flag allows the bridge to immediately stop multicast traffic
+ * forwarding on a port that receives an IGMP Leave message. It is only used
+ * when IGMP snooping is enabled on the bridge. By default the flag is off.
+ *
+ * @IFLA_BRPORT_LEARNING
+ * Controls whether a given port will learn *source* MAC addresses from
+ * received traffic or not. Also controls whether dynamic FDB entries
+ * (which can also be added by software) will be refreshed by incoming
+ * traffic. By default this flag is on.
+ *
+ * @IFLA_BRPORT_UNICAST_FLOOD
+ * Controls whether unicast traffic for which there is no FDB entry will
+ * be flooded towards this port. By default this flag is on.
+ *
+ * @IFLA_BRPORT_PROXYARP
+ * Enable proxy ARP on this port.
+ *
+ * @IFLA_BRPORT_LEARNING_SYNC
+ * Controls whether a given port will sync MAC addresses learned on device
+ * port to bridge FDB.
+ *
+ * @IFLA_BRPORT_PROXYARP_WIFI
+ * Enable proxy ARP on this port which meets extended requirements by
+ * IEEE 802.11 and Hotspot 2.0 specifications.
+ *
+ * @IFLA_BRPORT_ROOT_ID
+ *
+ * @IFLA_BRPORT_BRIDGE_ID
+ *
+ * @IFLA_BRPORT_DESIGNATED_PORT
+ *
+ * @IFLA_BRPORT_DESIGNATED_COST
+ *
+ * @IFLA_BRPORT_ID
+ *
+ * @IFLA_BRPORT_NO
+ *
+ * @IFLA_BRPORT_TOPOLOGY_CHANGE_ACK
+ *
+ * @IFLA_BRPORT_CONFIG_PENDING
+ *
+ * @IFLA_BRPORT_MESSAGE_AGE_TIMER
+ *
+ * @IFLA_BRPORT_FORWARD_DELAY_TIMER
+ *
+ * @IFLA_BRPORT_HOLD_TIMER
+ *
+ * @IFLA_BRPORT_FLUSH
+ * Flush bridge ports' fdb dynamic entries.
+ *
+ * @IFLA_BRPORT_MULTICAST_ROUTER
+ * Configure the port's multicast router presence. A port with
+ * a multicast router will receive all multicast traffic.
+ * The valid values are:
+ *
+ * * 0 disable multicast routers on this port
+ * * 1 let the system detect the presence of routers (default)
+ * * 2 permanently enable multicast traffic forwarding on this port
+ * * 3 enable multicast routers temporarily on this port, not depending
+ * on incoming queries.
+ *
+ * @IFLA_BRPORT_PAD
+ *
+ * @IFLA_BRPORT_MCAST_FLOOD
+ * Controls whether a given port will flood multicast traffic for which
+ * there is no MDB entry. By default this flag is on.
+ *
+ * @IFLA_BRPORT_MCAST_TO_UCAST
+ * Controls whether a given port will replicate packets using unicast
+ * instead of multicast. By default this flag is off.
+ *
+ * This is done by copying the packet per host and changing the multicast
+ * destination MAC to a unicast one accordingly.
+ *
+ * *mcast_to_unicast* works on top of the multicast snooping feature of the
+ * bridge. Which means unicast copies are only delivered to hosts which
+ * are interested in unicast and signaled this via IGMP/MLD reports previously.
+ *
+ * This feature is intended for interface types which have a more reliable
+ * and/or efficient way to deliver unicast packets than broadcast ones
+ * (e.g. WiFi).
+ *
+ * However, it should only be enabled on interfaces where no IGMPv2/MLDv1
+ * report suppression takes place. IGMP/MLD report suppression issue is
+ * usually overcome by the network daemon (supplicant) enabling AP isolation
+ * and by that separating all STAs.
+ *
+ * Delivery of STA-to-STA IP multicast is made possible again by enabling
+ * and utilizing the bridge hairpin mode, which considers the incoming port
+ * as a potential outgoing port, too (see *BRIDGE_MODE_HAIRPIN* option).
+ * Hairpin mode is performed after multicast snooping, therefore leading
+ * to only deliver reports to STAs running a multicast router.
+ *
+ * @IFLA_BRPORT_VLAN_TUNNEL
+ * Controls whether vlan to tunnel mapping is enabled on the port.
+ * By default this flag is off.
+ *
+ * @IFLA_BRPORT_BCAST_FLOOD
+ * Controls flooding of broadcast traffic on the given port. By default
+ * this flag is on.
+ *
+ * @IFLA_BRPORT_GROUP_FWD_MASK
+ * Set the group forward mask. This is a bitmask that is applied to
+ * decide whether to forward incoming frames destined to link-local
+ * addresses. The addresses of the form are 01:80:C2:00:00:0X (defaults
+ * to 0, which means the bridge does not forward any link-local frames
+ * coming on this port).
+ *
+ * @IFLA_BRPORT_NEIGH_SUPPRESS
+ * Controls whether neighbor discovery (arp and nd) proxy and suppression
+ * is enabled on the port. By default this flag is off.
+ *
+ * @IFLA_BRPORT_ISOLATED
+ * Controls whether a given port will be isolated, which means it will be
+ * able to communicate with non-isolated ports only. By default this
+ * flag is off.
+ *
+ * @IFLA_BRPORT_BACKUP_PORT
+ * Set a backup port. If the port loses carrier all traffic will be
+ * redirected to the configured backup port. Set the value to 0 to disable
+ * it.
+ *
+ * @IFLA_BRPORT_MRP_RING_OPEN
+ *
+ * @IFLA_BRPORT_MRP_IN_OPEN
+ *
+ * @IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT
+ * The number of per-port EHT hosts limit. The default value is 512.
+ * Setting to 0 is not allowed.
+ *
+ * @IFLA_BRPORT_MCAST_EHT_HOSTS_CNT
+ * The current number of tracked hosts, read only.
+ *
+ * @IFLA_BRPORT_LOCKED
+ * Controls whether a port will be locked, meaning that hosts behind the
+ * port will not be able to communicate through the port unless an FDB
+ * entry with the unit's MAC address is in the FDB. The common use case is
+ * that hosts are allowed access through authentication with the IEEE 802.1X
+ * protocol or based on whitelists. By default this flag is off.
+ *
+ * Please note that secure 802.1X deployments should always use the
+ * *BR_BOOLOPT_NO_LL_LEARN* flag, to not permit the bridge to populate its
+ * FDB based on link-local (EAPOL) traffic received on the port.
+ *
+ * @IFLA_BRPORT_MAB
+ * Controls whether a port will use MAC Authentication Bypass (MAB), a
+ * technique through which select MAC addresses may be allowed on a locked
+ * port, without using 802.1X authentication. Packets with an unknown source
+ * MAC address generates a "locked" FDB entry on the incoming bridge port.
+ * The common use case is for user space to react to these bridge FDB
+ * notifications and optionally replace the locked FDB entry with a normal
+ * one, allowing traffic to pass for whitelisted MAC addresses.
+ *
+ * Setting this flag also requires *IFLA_BRPORT_LOCKED* and
+ * *IFLA_BRPORT_LEARNING*. *IFLA_BRPORT_LOCKED* ensures that unauthorized
+ * data packets are dropped, and *IFLA_BRPORT_LEARNING* allows the dynamic
+ * FDB entries installed by user space (as replacements for the locked FDB
+ * entries) to be refreshed and/or aged out.
+ *
+ * @IFLA_BRPORT_MCAST_N_GROUPS
+ *
+ * @IFLA_BRPORT_MCAST_MAX_GROUPS
+ * Sets the maximum number of MDB entries that can be registered for a
+ * given port. Attempts to register more MDB entries at the port than this
+ * limit allows will be rejected, whether they are done through netlink
+ * (e.g. the bridge tool), or IGMP or MLD membership reports. Setting a
+ * limit of 0 disables the limit. The default value is 0.
+ *
+ * @IFLA_BRPORT_NEIGH_VLAN_SUPPRESS
+ * Controls whether neighbor discovery (arp and nd) proxy and suppression is
+ * enabled for a given port. By default this flag is off.
+ *
+ * Note that this option only takes effect when *IFLA_BRPORT_NEIGH_SUPPRESS*
+ * is enabled for a given port.
+ *
+ * @IFLA_BRPORT_BACKUP_NHID
+ * The FDB nexthop object ID to attach to packets being redirected to a
+ * backup port that has VLAN tunnel mapping enabled (via the
+ * *IFLA_BRPORT_VLAN_TUNNEL* option). Setting a value of 0 (default) has
+ * the effect of not attaching any ID.
+ */
enum {
IFLA_BRPORT_UNSPEC,
IFLA_BRPORT_STATE, /* Spanning tree state */
@@ -539,6 +1089,12 @@ enum {
IFLA_BRPORT_MRP_IN_OPEN,
IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT,
IFLA_BRPORT_MCAST_EHT_HOSTS_CNT,
+ IFLA_BRPORT_LOCKED,
+ IFLA_BRPORT_MAB,
+ IFLA_BRPORT_MCAST_N_GROUPS,
+ IFLA_BRPORT_MCAST_MAX_GROUPS,
+ IFLA_BRPORT_NEIGH_VLAN_SUPPRESS,
+ IFLA_BRPORT_BACKUP_NHID,
__IFLA_BRPORT_MAX
};
#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
@@ -605,6 +1161,7 @@ enum {
IFLA_MACVLAN_MACADDR_COUNT,
IFLA_MACVLAN_BC_QUEUE_LEN,
IFLA_MACVLAN_BC_QUEUE_LEN_USED,
+ IFLA_MACVLAN_BC_CUTOFF,
__IFLA_MACVLAN_MAX,
};
@@ -715,7 +1272,96 @@ enum ipvlan_mode {
#define IPVLAN_F_PRIVATE 0x01
#define IPVLAN_F_VEPA 0x02
+/* Tunnel RTM header */
+struct tunnel_msg {
+ __u8 family;
+ __u8 flags;
+ __u16 reserved2;
+ __u32 ifindex;
+};
+
+/* netkit section */
+enum netkit_action {
+ NETKIT_NEXT = -1,
+ NETKIT_PASS = 0,
+ NETKIT_DROP = 2,
+ NETKIT_REDIRECT = 7,
+};
+
+enum netkit_mode {
+ NETKIT_L2,
+ NETKIT_L3,
+};
+
+/* NETKIT_SCRUB_NONE leaves clearing skb->{mark,priority} up to
+ * the BPF program if attached. This also means the latter can
+ * consume the two fields if they were populated earlier.
+ *
+ * NETKIT_SCRUB_DEFAULT zeroes skb->{mark,priority} fields before
+ * invoking the attached BPF program when the peer device resides
+ * in a different network namespace. This is the default behavior.
+ */
+enum netkit_scrub {
+ NETKIT_SCRUB_NONE,
+ NETKIT_SCRUB_DEFAULT,
+};
+
+enum {
+ IFLA_NETKIT_UNSPEC,
+ IFLA_NETKIT_PEER_INFO,
+ IFLA_NETKIT_PRIMARY,
+ IFLA_NETKIT_POLICY,
+ IFLA_NETKIT_PEER_POLICY,
+ IFLA_NETKIT_MODE,
+ IFLA_NETKIT_SCRUB,
+ IFLA_NETKIT_PEER_SCRUB,
+ IFLA_NETKIT_HEADROOM,
+ IFLA_NETKIT_TAILROOM,
+ __IFLA_NETKIT_MAX,
+};
+#define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1)
+
/* VXLAN section */
+
+/* include statistics in the dump */
+#define TUNNEL_MSG_FLAG_STATS 0x01
+
+#define TUNNEL_MSG_VALID_USER_FLAGS TUNNEL_MSG_FLAG_STATS
+
+/* Embedded inside VXLAN_VNIFILTER_ENTRY_STATS */
+enum {
+ VNIFILTER_ENTRY_STATS_UNSPEC,
+ VNIFILTER_ENTRY_STATS_RX_BYTES,
+ VNIFILTER_ENTRY_STATS_RX_PKTS,
+ VNIFILTER_ENTRY_STATS_RX_DROPS,
+ VNIFILTER_ENTRY_STATS_RX_ERRORS,
+ VNIFILTER_ENTRY_STATS_TX_BYTES,
+ VNIFILTER_ENTRY_STATS_TX_PKTS,
+ VNIFILTER_ENTRY_STATS_TX_DROPS,
+ VNIFILTER_ENTRY_STATS_TX_ERRORS,
+ VNIFILTER_ENTRY_STATS_PAD,
+ __VNIFILTER_ENTRY_STATS_MAX
+};
+#define VNIFILTER_ENTRY_STATS_MAX (__VNIFILTER_ENTRY_STATS_MAX - 1)
+
+enum {
+ VXLAN_VNIFILTER_ENTRY_UNSPEC,
+ VXLAN_VNIFILTER_ENTRY_START,
+ VXLAN_VNIFILTER_ENTRY_END,
+ VXLAN_VNIFILTER_ENTRY_GROUP,
+ VXLAN_VNIFILTER_ENTRY_GROUP6,
+ VXLAN_VNIFILTER_ENTRY_STATS,
+ __VXLAN_VNIFILTER_ENTRY_MAX
+};
+#define VXLAN_VNIFILTER_ENTRY_MAX (__VXLAN_VNIFILTER_ENTRY_MAX - 1)
+
+enum {
+ VXLAN_VNIFILTER_UNSPEC,
+ VXLAN_VNIFILTER_ENTRY,
+ __VXLAN_VNIFILTER_MAX
+};
+#define VXLAN_VNIFILTER_MAX (__VXLAN_VNIFILTER_MAX - 1)
+
enum {
IFLA_VXLAN_UNSPEC,
IFLA_VXLAN_ID,
@@ -747,6 +1393,9 @@ enum {
IFLA_VXLAN_GPE,
IFLA_VXLAN_TTL_INHERIT,
IFLA_VXLAN_DF,
+ IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */
+ IFLA_VXLAN_LOCALBYPASS,
+ IFLA_VXLAN_LABEL_POLICY, /* IPv6 flow label policy; ifla_vxlan_label_policy */
__IFLA_VXLAN_MAX
};
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
@@ -764,6 +1413,13 @@ enum ifla_vxlan_df {
VXLAN_DF_MAX = __VXLAN_DF_END - 1,
};
+enum ifla_vxlan_label_policy {
+ VXLAN_LABEL_FIXED = 0,
+ VXLAN_LABEL_INHERIT = 1,
+ __VXLAN_LABEL_END,
+ VXLAN_LABEL_MAX = __VXLAN_LABEL_END - 1,
+};
+
/* GENEVE section */
enum {
IFLA_GENEVE_UNSPEC,
@@ -780,6 +1436,7 @@ enum {
IFLA_GENEVE_LABEL,
IFLA_GENEVE_TTL_INHERIT,
IFLA_GENEVE_DF,
+ IFLA_GENEVE_INNER_PROTO_INHERIT,
__IFLA_GENEVE_MAX
};
#define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1)
@@ -825,6 +1482,10 @@ enum {
IFLA_GTP_FD1,
IFLA_GTP_PDP_HASHSIZE,
IFLA_GTP_ROLE,
+ IFLA_GTP_CREATE_SOCKETS,
+ IFLA_GTP_RESTART_COUNT,
+ IFLA_GTP_LOCAL,
+ IFLA_GTP_LOCAL6,
__IFLA_GTP_MAX,
};
#define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1)
@@ -864,6 +1525,7 @@ enum {
IFLA_BOND_AD_LACP_ACTIVE,
IFLA_BOND_MISSED_MAX,
IFLA_BOND_NS_IP6_TARGET,
+ IFLA_BOND_COUPLED_CONTROL,
__IFLA_BOND_MAX,
};
@@ -1129,6 +1791,7 @@ enum {
IFLA_HSR_PROTOCOL, /* Indicate different protocol than
* HSR. For example PRP.
*/
+ IFLA_HSR_INTERLINK, /* HSR interlink network device */
__IFLA_HSR_MAX,
};
@@ -1161,6 +1824,17 @@ enum {
#define IFLA_STATS_FILTER_BIT(ATTR) (1 << (ATTR - 1))
+enum {
+ IFLA_STATS_GETSET_UNSPEC,
+ IFLA_STATS_GET_FILTERS, /* Nest of IFLA_STATS_LINK_xxx, each a u32 with
+ * a filter mask for the corresponding group.
+ */
+ IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS, /* 0 or 1 as u8 */
+ __IFLA_STATS_GETSET_MAX,
+};
+
+#define IFLA_STATS_GETSET_MAX (__IFLA_STATS_GETSET_MAX - 1)
+
/* These are embedded into IFLA_STATS_LINK_XSTATS:
* [IFLA_STATS_LINK_XSTATS]
* -> [LINK_XSTATS_TYPE_xxx]
@@ -1178,10 +1852,21 @@ enum {
enum {
IFLA_OFFLOAD_XSTATS_UNSPEC,
IFLA_OFFLOAD_XSTATS_CPU_HIT, /* struct rtnl_link_stats64 */
+ IFLA_OFFLOAD_XSTATS_HW_S_INFO, /* HW stats info. A nest */
+ IFLA_OFFLOAD_XSTATS_L3_STATS, /* struct rtnl_hw_stats64 */
__IFLA_OFFLOAD_XSTATS_MAX
};
#define IFLA_OFFLOAD_XSTATS_MAX (__IFLA_OFFLOAD_XSTATS_MAX - 1)
+enum {
+ IFLA_OFFLOAD_XSTATS_HW_S_INFO_UNSPEC,
+ IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST, /* u8 */
+ IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED, /* u8 */
+ __IFLA_OFFLOAD_XSTATS_HW_S_INFO_MAX,
+};
+#define IFLA_OFFLOAD_XSTATS_HW_S_INFO_MAX \
+ (__IFLA_OFFLOAD_XSTATS_HW_S_INFO_MAX - 1)
+
/* XDP section */
#define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0)
@@ -1280,4 +1965,16 @@ enum {
#define IFLA_MCTP_MAX (__IFLA_MCTP_MAX - 1)
+/* DSA section */
+
+enum {
+ IFLA_DSA_UNSPEC,
+ IFLA_DSA_CONDUIT,
+ /* Deprecated, use IFLA_DSA_CONDUIT instead */
+ IFLA_DSA_MASTER = IFLA_DSA_CONDUIT,
+ __IFLA_DSA_MAX,
+};
+
+#define IFLA_DSA_MAX (__IFLA_DSA_MAX - 1)
+
#endif /* _UAPI_LINUX_IF_LINK_H */
diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h
index a78a8096f4ce..23a062781468 100644
--- a/tools/include/uapi/linux/if_xdp.h
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -7,8 +7,8 @@
* Magnus Karlsson <magnus.karlsson@intel.com>
*/
-#ifndef _LINUX_IF_XDP_H
-#define _LINUX_IF_XDP_H
+#ifndef _UAPI_LINUX_IF_XDP_H
+#define _UAPI_LINUX_IF_XDP_H
#include <linux/types.h>
@@ -25,9 +25,25 @@
* application.
*/
#define XDP_USE_NEED_WAKEUP (1 << 3)
+/* By setting this option, userspace application indicates that it can
+ * handle multiple descriptors per packet thus enabling AF_XDP to split
+ * multi-buffer XDP frames into multiple Rx descriptors. Without this set
+ * such frames will be dropped.
+ */
+#define XDP_USE_SG (1 << 4)
/* Flags for xsk_umem_config flags */
-#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0)
+#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0)
+
+/* Force checksum calculation in software. Can be used for testing or
+ * working around potential HW issues. This option causes performance
+ * degradation and only works in XDP_COPY mode.
+ */
+#define XDP_UMEM_TX_SW_CSUM (1 << 1)
+
+/* Request to reserve tx_metadata_len bytes of per-chunk metadata.
+ */
+#define XDP_UMEM_TX_METADATA_LEN (1 << 2)
struct sockaddr_xdp {
__u16 sxdp_family;
@@ -63,6 +79,7 @@ struct xdp_mmap_offsets {
#define XDP_UMEM_COMPLETION_RING 6
#define XDP_STATISTICS 7
#define XDP_OPTIONS 8
+#define XDP_MAX_TX_SKB_BUDGET 9
struct xdp_umem_reg {
__u64 addr; /* Start of packet data area */
@@ -70,6 +87,7 @@ struct xdp_umem_reg {
__u32 chunk_size;
__u32 headroom;
__u32 flags;
+ __u32 tx_metadata_len;
};
struct xdp_statistics {
@@ -99,6 +117,51 @@ struct xdp_options {
#define XSK_UNALIGNED_BUF_ADDR_MASK \
((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1)
+/* Request transmit timestamp. Upon completion, put it into tx_timestamp
+ * field of struct xsk_tx_metadata.
+ */
+#define XDP_TXMD_FLAGS_TIMESTAMP (1 << 0)
+
+/* Request transmit checksum offload. Checksum start position and offset
+ * are communicated via csum_start and csum_offset fields of struct
+ * xsk_tx_metadata.
+ */
+#define XDP_TXMD_FLAGS_CHECKSUM (1 << 1)
+
+/* Request launch time hardware offload. The device will schedule the packet for
+ * transmission at a pre-determined time called launch time. The value of
+ * launch time is communicated via launch_time field of struct xsk_tx_metadata.
+ */
+#define XDP_TXMD_FLAGS_LAUNCH_TIME (1 << 2)
+
+/* AF_XDP offloads request. 'request' union member is consumed by the driver
+ * when the packet is being transmitted. 'completion' union member is
+ * filled by the driver when the transmit completion arrives.
+ */
+struct xsk_tx_metadata {
+ __u64 flags;
+
+ union {
+ struct {
+ /* XDP_TXMD_FLAGS_CHECKSUM */
+
+ /* Offset from desc->addr where checksumming should start. */
+ __u16 csum_start;
+ /* Offset from csum_start where checksum should be stored. */
+ __u16 csum_offset;
+
+ /* XDP_TXMD_FLAGS_LAUNCH_TIME */
+ /* Launch time in nanosecond against the PTP HW Clock */
+ __u64 launch_time;
+ } request;
+
+ struct {
+ /* XDP_TXMD_FLAGS_TIMESTAMP */
+ __u64 tx_timestamp;
+ } completion;
+ };
+};
+
/* Rx/Tx descriptor */
struct xdp_desc {
__u64 addr;
@@ -108,4 +171,14 @@ struct xdp_desc {
/* UMEM descriptor is __u64 */
-#endif /* _LINUX_IF_XDP_H */
+/* Flag indicating that the packet continues with the buffer pointed out by the
+ * next frame in the ring. The end of the packet is signalled by setting this
+ * bit to zero. For single buffer packets, every descriptor has 'options' set
+ * to 0 and this maintains backward compatibility.
+ */
+#define XDP_PKT_CONTD (1 << 0)
+
+/* TX packet carries valid metadata. */
+#define XDP_TX_METADATA (1 << 1)
+
+#endif /* _UAPI_LINUX_IF_XDP_H */
diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h
index 07a4cb149305..ced0fc3c3aa5 100644
--- a/tools/include/uapi/linux/in.h
+++ b/tools/include/uapi/linux/in.h
@@ -79,8 +79,12 @@ enum {
#define IPPROTO_MPLS IPPROTO_MPLS
IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */
#define IPPROTO_ETHERNET IPPROTO_ETHERNET
+ IPPROTO_AGGFRAG = 144, /* AGGFRAG in ESP (RFC 9347) */
+#define IPPROTO_AGGFRAG IPPROTO_AGGFRAG
IPPROTO_RAW = 255, /* Raw IP packets */
#define IPPROTO_RAW IPPROTO_RAW
+ IPPROTO_SMC = 256, /* Shared Memory Communications */
+#define IPPROTO_SMC IPPROTO_SMC
IPPROTO_MPTCP = 262, /* Multipath TCP connection */
#define IPPROTO_MPTCP IPPROTO_MPTCP
IPPROTO_MAX
@@ -139,7 +143,7 @@ struct in_addr {
*/
#define IP_PMTUDISC_INTERFACE 4
/* weaker version of IP_PMTUDISC_INTERFACE, which allows packets to get
- * fragmented if they exeed the interface mtu
+ * fragmented if they exceed the interface mtu
*/
#define IP_PMTUDISC_OMIT 5
@@ -162,6 +166,8 @@ struct in_addr {
#define MCAST_MSFILTER 48
#define IP_MULTICAST_ALL 49
#define IP_UNICAST_IF 50
+#define IP_LOCAL_PORT_RANGE 51
+#define IP_PROTOCOL 52
#define MCAST_EXCLUDE 0
#define MCAST_INCLUDE 1
diff --git a/tools/include/uapi/linux/io_uring.h b/tools/include/uapi/linux/io_uring.h
new file mode 100644
index 000000000000..f1c16f817742
--- /dev/null
+++ b/tools/include/uapi/linux/io_uring.h
@@ -0,0 +1,757 @@
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
+/*
+ * Header file for the io_uring interface.
+ *
+ * Copyright (C) 2019 Jens Axboe
+ * Copyright (C) 2019 Christoph Hellwig
+ */
+#ifndef LINUX_IO_URING_H
+#define LINUX_IO_URING_H
+
+#include <linux/fs.h>
+#include <linux/types.h>
+/*
+ * this file is shared with liburing and that has to autodetect
+ * if linux/time_types.h is available or not, it can
+ * define UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H
+ * if linux/time_types.h is not available
+ */
+#ifndef UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H
+#include <linux/time_types.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * IO submission data structure (Submission Queue Entry)
+ */
+struct io_uring_sqe {
+ __u8 opcode; /* type of operation for this sqe */
+ __u8 flags; /* IOSQE_ flags */
+ __u16 ioprio; /* ioprio for the request */
+ __s32 fd; /* file descriptor to do IO on */
+ union {
+ __u64 off; /* offset into file */
+ __u64 addr2;
+ struct {
+ __u32 cmd_op;
+ __u32 __pad1;
+ };
+ };
+ union {
+ __u64 addr; /* pointer to buffer or iovecs */
+ __u64 splice_off_in;
+ struct {
+ __u32 level;
+ __u32 optname;
+ };
+ };
+ __u32 len; /* buffer size or number of iovecs */
+ union {
+ __kernel_rwf_t rw_flags;
+ __u32 fsync_flags;
+ __u16 poll_events; /* compatibility */
+ __u32 poll32_events; /* word-reversed for BE */
+ __u32 sync_range_flags;
+ __u32 msg_flags;
+ __u32 timeout_flags;
+ __u32 accept_flags;
+ __u32 cancel_flags;
+ __u32 open_flags;
+ __u32 statx_flags;
+ __u32 fadvise_advice;
+ __u32 splice_flags;
+ __u32 rename_flags;
+ __u32 unlink_flags;
+ __u32 hardlink_flags;
+ __u32 xattr_flags;
+ __u32 msg_ring_flags;
+ __u32 uring_cmd_flags;
+ __u32 waitid_flags;
+ __u32 futex_flags;
+ };
+ __u64 user_data; /* data to be passed back at completion time */
+ /* pack this to avoid bogus arm OABI complaints */
+ union {
+ /* index into fixed buffers, if used */
+ __u16 buf_index;
+ /* for grouped buffer selection */
+ __u16 buf_group;
+ } __attribute__((packed));
+ /* personality to use, if used */
+ __u16 personality;
+ union {
+ __s32 splice_fd_in;
+ __u32 file_index;
+ __u32 optlen;
+ struct {
+ __u16 addr_len;
+ __u16 __pad3[1];
+ };
+ };
+ union {
+ struct {
+ __u64 addr3;
+ __u64 __pad2[1];
+ };
+ __u64 optval;
+ /*
+ * If the ring is initialized with IORING_SETUP_SQE128, then
+ * this field is used for 80 bytes of arbitrary command data
+ */
+ __u8 cmd[0];
+ };
+};
+
+/*
+ * If sqe->file_index is set to this for opcodes that instantiate a new
+ * direct descriptor (like openat/openat2/accept), then io_uring will allocate
+ * an available direct descriptor instead of having the application pass one
+ * in. The picked direct descriptor will be returned in cqe->res, or -ENFILE
+ * if the space is full.
+ */
+#define IORING_FILE_INDEX_ALLOC (~0U)
+
+enum {
+ IOSQE_FIXED_FILE_BIT,
+ IOSQE_IO_DRAIN_BIT,
+ IOSQE_IO_LINK_BIT,
+ IOSQE_IO_HARDLINK_BIT,
+ IOSQE_ASYNC_BIT,
+ IOSQE_BUFFER_SELECT_BIT,
+ IOSQE_CQE_SKIP_SUCCESS_BIT,
+};
+
+/*
+ * sqe->flags
+ */
+/* use fixed fileset */
+#define IOSQE_FIXED_FILE (1U << IOSQE_FIXED_FILE_BIT)
+/* issue after inflight IO */
+#define IOSQE_IO_DRAIN (1U << IOSQE_IO_DRAIN_BIT)
+/* links next sqe */
+#define IOSQE_IO_LINK (1U << IOSQE_IO_LINK_BIT)
+/* like LINK, but stronger */
+#define IOSQE_IO_HARDLINK (1U << IOSQE_IO_HARDLINK_BIT)
+/* always go async */
+#define IOSQE_ASYNC (1U << IOSQE_ASYNC_BIT)
+/* select buffer from sqe->buf_group */
+#define IOSQE_BUFFER_SELECT (1U << IOSQE_BUFFER_SELECT_BIT)
+/* don't post CQE if request succeeded */
+#define IOSQE_CQE_SKIP_SUCCESS (1U << IOSQE_CQE_SKIP_SUCCESS_BIT)
+
+/*
+ * io_uring_setup() flags
+ */
+#define IORING_SETUP_IOPOLL (1U << 0) /* io_context is polled */
+#define IORING_SETUP_SQPOLL (1U << 1) /* SQ poll thread */
+#define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */
+#define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */
+#define IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */
+#define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */
+#define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */
+#define IORING_SETUP_SUBMIT_ALL (1U << 7) /* continue submit on error */
+/*
+ * Cooperative task running. When requests complete, they often require
+ * forcing the submitter to transition to the kernel to complete. If this
+ * flag is set, work will be done when the task transitions anyway, rather
+ * than force an inter-processor interrupt reschedule. This avoids interrupting
+ * a task running in userspace, and saves an IPI.
+ */
+#define IORING_SETUP_COOP_TASKRUN (1U << 8)
+/*
+ * If COOP_TASKRUN is set, get notified if task work is available for
+ * running and a kernel transition would be needed to run it. This sets
+ * IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN.
+ */
+#define IORING_SETUP_TASKRUN_FLAG (1U << 9)
+#define IORING_SETUP_SQE128 (1U << 10) /* SQEs are 128 byte */
+#define IORING_SETUP_CQE32 (1U << 11) /* CQEs are 32 byte */
+/*
+ * Only one task is allowed to submit requests
+ */
+#define IORING_SETUP_SINGLE_ISSUER (1U << 12)
+
+/*
+ * Defer running task work to get events.
+ * Rather than running bits of task work whenever the task transitions
+ * try to do it just before it is needed.
+ */
+#define IORING_SETUP_DEFER_TASKRUN (1U << 13)
+
+/*
+ * Application provides the memory for the rings
+ */
+#define IORING_SETUP_NO_MMAP (1U << 14)
+
+/*
+ * Register the ring fd in itself for use with
+ * IORING_REGISTER_USE_REGISTERED_RING; return a registered fd index rather
+ * than an fd.
+ */
+#define IORING_SETUP_REGISTERED_FD_ONLY (1U << 15)
+
+/*
+ * Removes indirection through the SQ index array.
+ */
+#define IORING_SETUP_NO_SQARRAY (1U << 16)
+
+enum io_uring_op {
+ IORING_OP_NOP,
+ IORING_OP_READV,
+ IORING_OP_WRITEV,
+ IORING_OP_FSYNC,
+ IORING_OP_READ_FIXED,
+ IORING_OP_WRITE_FIXED,
+ IORING_OP_POLL_ADD,
+ IORING_OP_POLL_REMOVE,
+ IORING_OP_SYNC_FILE_RANGE,
+ IORING_OP_SENDMSG,
+ IORING_OP_RECVMSG,
+ IORING_OP_TIMEOUT,
+ IORING_OP_TIMEOUT_REMOVE,
+ IORING_OP_ACCEPT,
+ IORING_OP_ASYNC_CANCEL,
+ IORING_OP_LINK_TIMEOUT,
+ IORING_OP_CONNECT,
+ IORING_OP_FALLOCATE,
+ IORING_OP_OPENAT,
+ IORING_OP_CLOSE,
+ IORING_OP_FILES_UPDATE,
+ IORING_OP_STATX,
+ IORING_OP_READ,
+ IORING_OP_WRITE,
+ IORING_OP_FADVISE,
+ IORING_OP_MADVISE,
+ IORING_OP_SEND,
+ IORING_OP_RECV,
+ IORING_OP_OPENAT2,
+ IORING_OP_EPOLL_CTL,
+ IORING_OP_SPLICE,
+ IORING_OP_PROVIDE_BUFFERS,
+ IORING_OP_REMOVE_BUFFERS,
+ IORING_OP_TEE,
+ IORING_OP_SHUTDOWN,
+ IORING_OP_RENAMEAT,
+ IORING_OP_UNLINKAT,
+ IORING_OP_MKDIRAT,
+ IORING_OP_SYMLINKAT,
+ IORING_OP_LINKAT,
+ IORING_OP_MSG_RING,
+ IORING_OP_FSETXATTR,
+ IORING_OP_SETXATTR,
+ IORING_OP_FGETXATTR,
+ IORING_OP_GETXATTR,
+ IORING_OP_SOCKET,
+ IORING_OP_URING_CMD,
+ IORING_OP_SEND_ZC,
+ IORING_OP_SENDMSG_ZC,
+ IORING_OP_READ_MULTISHOT,
+ IORING_OP_WAITID,
+ IORING_OP_FUTEX_WAIT,
+ IORING_OP_FUTEX_WAKE,
+ IORING_OP_FUTEX_WAITV,
+
+ /* this goes last, obviously */
+ IORING_OP_LAST,
+};
+
+/*
+ * sqe->uring_cmd_flags top 8bits aren't available for userspace
+ * IORING_URING_CMD_FIXED use registered buffer; pass this flag
+ * along with setting sqe->buf_index.
+ */
+#define IORING_URING_CMD_FIXED (1U << 0)
+#define IORING_URING_CMD_MASK IORING_URING_CMD_FIXED
+
+
+/*
+ * sqe->fsync_flags
+ */
+#define IORING_FSYNC_DATASYNC (1U << 0)
+
+/*
+ * sqe->timeout_flags
+ */
+#define IORING_TIMEOUT_ABS (1U << 0)
+#define IORING_TIMEOUT_UPDATE (1U << 1)
+#define IORING_TIMEOUT_BOOTTIME (1U << 2)
+#define IORING_TIMEOUT_REALTIME (1U << 3)
+#define IORING_LINK_TIMEOUT_UPDATE (1U << 4)
+#define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5)
+#define IORING_TIMEOUT_MULTISHOT (1U << 6)
+#define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME)
+#define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE)
+/*
+ * sqe->splice_flags
+ * extends splice(2) flags
+ */
+#define SPLICE_F_FD_IN_FIXED (1U << 31) /* the last bit of __u32 */
+
+/*
+ * POLL_ADD flags. Note that since sqe->poll_events is the flag space, the
+ * command flags for POLL_ADD are stored in sqe->len.
+ *
+ * IORING_POLL_ADD_MULTI Multishot poll. Sets IORING_CQE_F_MORE if
+ * the poll handler will continue to report
+ * CQEs on behalf of the same SQE.
+ *
+ * IORING_POLL_UPDATE Update existing poll request, matching
+ * sqe->addr as the old user_data field.
+ *
+ * IORING_POLL_LEVEL Level triggered poll.
+ */
+#define IORING_POLL_ADD_MULTI (1U << 0)
+#define IORING_POLL_UPDATE_EVENTS (1U << 1)
+#define IORING_POLL_UPDATE_USER_DATA (1U << 2)
+#define IORING_POLL_ADD_LEVEL (1U << 3)
+
+/*
+ * ASYNC_CANCEL flags.
+ *
+ * IORING_ASYNC_CANCEL_ALL Cancel all requests that match the given key
+ * IORING_ASYNC_CANCEL_FD Key off 'fd' for cancelation rather than the
+ * request 'user_data'
+ * IORING_ASYNC_CANCEL_ANY Match any request
+ * IORING_ASYNC_CANCEL_FD_FIXED 'fd' passed in is a fixed descriptor
+ * IORING_ASYNC_CANCEL_USERDATA Match on user_data, default for no other key
+ * IORING_ASYNC_CANCEL_OP Match request based on opcode
+ */
+#define IORING_ASYNC_CANCEL_ALL (1U << 0)
+#define IORING_ASYNC_CANCEL_FD (1U << 1)
+#define IORING_ASYNC_CANCEL_ANY (1U << 2)
+#define IORING_ASYNC_CANCEL_FD_FIXED (1U << 3)
+#define IORING_ASYNC_CANCEL_USERDATA (1U << 4)
+#define IORING_ASYNC_CANCEL_OP (1U << 5)
+
+/*
+ * send/sendmsg and recv/recvmsg flags (sqe->ioprio)
+ *
+ * IORING_RECVSEND_POLL_FIRST If set, instead of first attempting to send
+ * or receive and arm poll if that yields an
+ * -EAGAIN result, arm poll upfront and skip
+ * the initial transfer attempt.
+ *
+ * IORING_RECV_MULTISHOT Multishot recv. Sets IORING_CQE_F_MORE if
+ * the handler will continue to report
+ * CQEs on behalf of the same SQE.
+ *
+ * IORING_RECVSEND_FIXED_BUF Use registered buffers, the index is stored in
+ * the buf_index field.
+ *
+ * IORING_SEND_ZC_REPORT_USAGE
+ * If set, SEND[MSG]_ZC should report
+ * the zerocopy usage in cqe.res
+ * for the IORING_CQE_F_NOTIF cqe.
+ * 0 is reported if zerocopy was actually possible.
+ * IORING_NOTIF_USAGE_ZC_COPIED if data was copied
+ * (at least partially).
+ */
+#define IORING_RECVSEND_POLL_FIRST (1U << 0)
+#define IORING_RECV_MULTISHOT (1U << 1)
+#define IORING_RECVSEND_FIXED_BUF (1U << 2)
+#define IORING_SEND_ZC_REPORT_USAGE (1U << 3)
+
+/*
+ * cqe.res for IORING_CQE_F_NOTIF if
+ * IORING_SEND_ZC_REPORT_USAGE was requested
+ *
+ * It should be treated as a flag, all other
+ * bits of cqe.res should be treated as reserved!
+ */
+#define IORING_NOTIF_USAGE_ZC_COPIED (1U << 31)
+
+/*
+ * accept flags stored in sqe->ioprio
+ */
+#define IORING_ACCEPT_MULTISHOT (1U << 0)
+
+/*
+ * IORING_OP_MSG_RING command types, stored in sqe->addr
+ */
+enum {
+ IORING_MSG_DATA, /* pass sqe->len as 'res' and off as user_data */
+ IORING_MSG_SEND_FD, /* send a registered fd to another ring */
+};
+
+/*
+ * IORING_OP_MSG_RING flags (sqe->msg_ring_flags)
+ *
+ * IORING_MSG_RING_CQE_SKIP Don't post a CQE to the target ring. Not
+ * applicable for IORING_MSG_DATA, obviously.
+ */
+#define IORING_MSG_RING_CQE_SKIP (1U << 0)
+/* Pass through the flags from sqe->file_index to cqe->flags */
+#define IORING_MSG_RING_FLAGS_PASS (1U << 1)
+
+/*
+ * IO completion data structure (Completion Queue Entry)
+ */
+struct io_uring_cqe {
+ __u64 user_data; /* sqe->data submission passed back */
+ __s32 res; /* result code for this event */
+ __u32 flags;
+
+ /*
+ * If the ring is initialized with IORING_SETUP_CQE32, then this field
+ * contains 16-bytes of padding, doubling the size of the CQE.
+ */
+ __u64 big_cqe[];
+};
+
+/*
+ * cqe->flags
+ *
+ * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID
+ * IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries
+ * IORING_CQE_F_SOCK_NONEMPTY If set, more data to read after socket recv
+ * IORING_CQE_F_NOTIF Set for notification CQEs. Can be used to distinct
+ * them from sends.
+ */
+#define IORING_CQE_F_BUFFER (1U << 0)
+#define IORING_CQE_F_MORE (1U << 1)
+#define IORING_CQE_F_SOCK_NONEMPTY (1U << 2)
+#define IORING_CQE_F_NOTIF (1U << 3)
+
+enum {
+ IORING_CQE_BUFFER_SHIFT = 16,
+};
+
+/*
+ * Magic offsets for the application to mmap the data it needs
+ */
+#define IORING_OFF_SQ_RING 0ULL
+#define IORING_OFF_CQ_RING 0x8000000ULL
+#define IORING_OFF_SQES 0x10000000ULL
+#define IORING_OFF_PBUF_RING 0x80000000ULL
+#define IORING_OFF_PBUF_SHIFT 16
+#define IORING_OFF_MMAP_MASK 0xf8000000ULL
+
+/*
+ * Filled with the offset for mmap(2)
+ */
+struct io_sqring_offsets {
+ __u32 head;
+ __u32 tail;
+ __u32 ring_mask;
+ __u32 ring_entries;
+ __u32 flags;
+ __u32 dropped;
+ __u32 array;
+ __u32 resv1;
+ __u64 user_addr;
+};
+
+/*
+ * sq_ring->flags
+ */
+#define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */
+#define IORING_SQ_CQ_OVERFLOW (1U << 1) /* CQ ring is overflown */
+#define IORING_SQ_TASKRUN (1U << 2) /* task should enter the kernel */
+
+struct io_cqring_offsets {
+ __u32 head;
+ __u32 tail;
+ __u32 ring_mask;
+ __u32 ring_entries;
+ __u32 overflow;
+ __u32 cqes;
+ __u32 flags;
+ __u32 resv1;
+ __u64 user_addr;
+};
+
+/*
+ * cq_ring->flags
+ */
+
+/* disable eventfd notifications */
+#define IORING_CQ_EVENTFD_DISABLED (1U << 0)
+
+/*
+ * io_uring_enter(2) flags
+ */
+#define IORING_ENTER_GETEVENTS (1U << 0)
+#define IORING_ENTER_SQ_WAKEUP (1U << 1)
+#define IORING_ENTER_SQ_WAIT (1U << 2)
+#define IORING_ENTER_EXT_ARG (1U << 3)
+#define IORING_ENTER_REGISTERED_RING (1U << 4)
+
+/*
+ * Passed in for io_uring_setup(2). Copied back with updated info on success
+ */
+struct io_uring_params {
+ __u32 sq_entries;
+ __u32 cq_entries;
+ __u32 flags;
+ __u32 sq_thread_cpu;
+ __u32 sq_thread_idle;
+ __u32 features;
+ __u32 wq_fd;
+ __u32 resv[3];
+ struct io_sqring_offsets sq_off;
+ struct io_cqring_offsets cq_off;
+};
+
+/*
+ * io_uring_params->features flags
+ */
+#define IORING_FEAT_SINGLE_MMAP (1U << 0)
+#define IORING_FEAT_NODROP (1U << 1)
+#define IORING_FEAT_SUBMIT_STABLE (1U << 2)
+#define IORING_FEAT_RW_CUR_POS (1U << 3)
+#define IORING_FEAT_CUR_PERSONALITY (1U << 4)
+#define IORING_FEAT_FAST_POLL (1U << 5)
+#define IORING_FEAT_POLL_32BITS (1U << 6)
+#define IORING_FEAT_SQPOLL_NONFIXED (1U << 7)
+#define IORING_FEAT_EXT_ARG (1U << 8)
+#define IORING_FEAT_NATIVE_WORKERS (1U << 9)
+#define IORING_FEAT_RSRC_TAGS (1U << 10)
+#define IORING_FEAT_CQE_SKIP (1U << 11)
+#define IORING_FEAT_LINKED_FILE (1U << 12)
+#define IORING_FEAT_REG_REG_RING (1U << 13)
+
+/*
+ * io_uring_register(2) opcodes and arguments
+ */
+enum {
+ IORING_REGISTER_BUFFERS = 0,
+ IORING_UNREGISTER_BUFFERS = 1,
+ IORING_REGISTER_FILES = 2,
+ IORING_UNREGISTER_FILES = 3,
+ IORING_REGISTER_EVENTFD = 4,
+ IORING_UNREGISTER_EVENTFD = 5,
+ IORING_REGISTER_FILES_UPDATE = 6,
+ IORING_REGISTER_EVENTFD_ASYNC = 7,
+ IORING_REGISTER_PROBE = 8,
+ IORING_REGISTER_PERSONALITY = 9,
+ IORING_UNREGISTER_PERSONALITY = 10,
+ IORING_REGISTER_RESTRICTIONS = 11,
+ IORING_REGISTER_ENABLE_RINGS = 12,
+
+ /* extended with tagging */
+ IORING_REGISTER_FILES2 = 13,
+ IORING_REGISTER_FILES_UPDATE2 = 14,
+ IORING_REGISTER_BUFFERS2 = 15,
+ IORING_REGISTER_BUFFERS_UPDATE = 16,
+
+ /* set/clear io-wq thread affinities */
+ IORING_REGISTER_IOWQ_AFF = 17,
+ IORING_UNREGISTER_IOWQ_AFF = 18,
+
+ /* set/get max number of io-wq workers */
+ IORING_REGISTER_IOWQ_MAX_WORKERS = 19,
+
+ /* register/unregister io_uring fd with the ring */
+ IORING_REGISTER_RING_FDS = 20,
+ IORING_UNREGISTER_RING_FDS = 21,
+
+ /* register ring based provide buffer group */
+ IORING_REGISTER_PBUF_RING = 22,
+ IORING_UNREGISTER_PBUF_RING = 23,
+
+ /* sync cancelation API */
+ IORING_REGISTER_SYNC_CANCEL = 24,
+
+ /* register a range of fixed file slots for automatic slot allocation */
+ IORING_REGISTER_FILE_ALLOC_RANGE = 25,
+
+ /* this goes last */
+ IORING_REGISTER_LAST,
+
+ /* flag added to the opcode to use a registered ring fd */
+ IORING_REGISTER_USE_REGISTERED_RING = 1U << 31
+};
+
+/* io-wq worker categories */
+enum {
+ IO_WQ_BOUND,
+ IO_WQ_UNBOUND,
+};
+
+/* deprecated, see struct io_uring_rsrc_update */
+struct io_uring_files_update {
+ __u32 offset;
+ __u32 resv;
+ __aligned_u64 /* __s32 * */ fds;
+};
+
+/*
+ * Register a fully sparse file space, rather than pass in an array of all
+ * -1 file descriptors.
+ */
+#define IORING_RSRC_REGISTER_SPARSE (1U << 0)
+
+struct io_uring_rsrc_register {
+ __u32 nr;
+ __u32 flags;
+ __u64 resv2;
+ __aligned_u64 data;
+ __aligned_u64 tags;
+};
+
+struct io_uring_rsrc_update {
+ __u32 offset;
+ __u32 resv;
+ __aligned_u64 data;
+};
+
+struct io_uring_rsrc_update2 {
+ __u32 offset;
+ __u32 resv;
+ __aligned_u64 data;
+ __aligned_u64 tags;
+ __u32 nr;
+ __u32 resv2;
+};
+
+/* Skip updating fd indexes set to this value in the fd table */
+#define IORING_REGISTER_FILES_SKIP (-2)
+
+#define IO_URING_OP_SUPPORTED (1U << 0)
+
+struct io_uring_probe_op {
+ __u8 op;
+ __u8 resv;
+ __u16 flags; /* IO_URING_OP_* flags */
+ __u32 resv2;
+};
+
+struct io_uring_probe {
+ __u8 last_op; /* last opcode supported */
+ __u8 ops_len; /* length of ops[] array below */
+ __u16 resv;
+ __u32 resv2[3];
+ struct io_uring_probe_op ops[];
+};
+
+struct io_uring_restriction {
+ __u16 opcode;
+ union {
+ __u8 register_op; /* IORING_RESTRICTION_REGISTER_OP */
+ __u8 sqe_op; /* IORING_RESTRICTION_SQE_OP */
+ __u8 sqe_flags; /* IORING_RESTRICTION_SQE_FLAGS_* */
+ };
+ __u8 resv;
+ __u32 resv2[3];
+};
+
+struct io_uring_buf {
+ __u64 addr;
+ __u32 len;
+ __u16 bid;
+ __u16 resv;
+};
+
+struct io_uring_buf_ring {
+ union {
+ /*
+ * To avoid spilling into more pages than we need to, the
+ * ring tail is overlaid with the io_uring_buf->resv field.
+ */
+ struct {
+ __u64 resv1;
+ __u32 resv2;
+ __u16 resv3;
+ __u16 tail;
+ };
+ __DECLARE_FLEX_ARRAY(struct io_uring_buf, bufs);
+ };
+};
+
+/*
+ * Flags for IORING_REGISTER_PBUF_RING.
+ *
+ * IOU_PBUF_RING_MMAP: If set, kernel will allocate the memory for the ring.
+ * The application must not set a ring_addr in struct
+ * io_uring_buf_reg, instead it must subsequently call
+ * mmap(2) with the offset set as:
+ * IORING_OFF_PBUF_RING | (bgid << IORING_OFF_PBUF_SHIFT)
+ * to get a virtual mapping for the ring.
+ */
+enum {
+ IOU_PBUF_RING_MMAP = 1,
+};
+
+/* argument for IORING_(UN)REGISTER_PBUF_RING */
+struct io_uring_buf_reg {
+ __u64 ring_addr;
+ __u32 ring_entries;
+ __u16 bgid;
+ __u16 flags;
+ __u64 resv[3];
+};
+
+/*
+ * io_uring_restriction->opcode values
+ */
+enum {
+ /* Allow an io_uring_register(2) opcode */
+ IORING_RESTRICTION_REGISTER_OP = 0,
+
+ /* Allow an sqe opcode */
+ IORING_RESTRICTION_SQE_OP = 1,
+
+ /* Allow sqe flags */
+ IORING_RESTRICTION_SQE_FLAGS_ALLOWED = 2,
+
+ /* Require sqe flags (these flags must be set on each submission) */
+ IORING_RESTRICTION_SQE_FLAGS_REQUIRED = 3,
+
+ IORING_RESTRICTION_LAST
+};
+
+struct io_uring_getevents_arg {
+ __u64 sigmask;
+ __u32 sigmask_sz;
+ __u32 pad;
+ __u64 ts;
+};
+
+/*
+ * Argument for IORING_REGISTER_SYNC_CANCEL
+ */
+struct io_uring_sync_cancel_reg {
+ __u64 addr;
+ __s32 fd;
+ __u32 flags;
+ struct __kernel_timespec timeout;
+ __u8 opcode;
+ __u8 pad[7];
+ __u64 pad2[3];
+};
+
+/*
+ * Argument for IORING_REGISTER_FILE_ALLOC_RANGE
+ * The range is specified as [off, off + len)
+ */
+struct io_uring_file_index_range {
+ __u32 off;
+ __u32 len;
+ __u64 resv;
+};
+
+struct io_uring_recvmsg_out {
+ __u32 namelen;
+ __u32 controllen;
+ __u32 payloadlen;
+ __u32 flags;
+};
+
+/*
+ * Argument for IORING_OP_URING_CMD when file is a socket
+ */
+enum {
+ SOCKET_URING_OP_SIOCINQ = 0,
+ SOCKET_URING_OP_SIOCOUTQ,
+ SOCKET_URING_OP_GETSOCKOPT,
+ SOCKET_URING_OP_SETSOCKOPT,
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 55155e262646..52f6000ab020 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -16,75 +16,10 @@
#define KVM_API_VERSION 12
-/* *** Deprecated interfaces *** */
-
-#define KVM_TRC_SHIFT 16
-
-#define KVM_TRC_ENTRYEXIT (1 << KVM_TRC_SHIFT)
-#define KVM_TRC_HANDLER (1 << (KVM_TRC_SHIFT + 1))
-
-#define KVM_TRC_VMENTRY (KVM_TRC_ENTRYEXIT + 0x01)
-#define KVM_TRC_VMEXIT (KVM_TRC_ENTRYEXIT + 0x02)
-#define KVM_TRC_PAGE_FAULT (KVM_TRC_HANDLER + 0x01)
-
-#define KVM_TRC_HEAD_SIZE 12
-#define KVM_TRC_CYCLE_SIZE 8
-#define KVM_TRC_EXTRA_MAX 7
-
-#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02)
-#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03)
-#define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04)
-#define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05)
-#define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06)
-#define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07)
-#define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08)
-#define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09)
-#define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A)
-#define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B)
-#define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C)
-#define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D)
-#define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E)
-#define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F)
-#define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10)
-#define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11)
-#define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12)
-#define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13)
-#define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14)
-#define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15)
-#define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16)
-#define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17)
-#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18)
-#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19)
-
-struct kvm_user_trace_setup {
- __u32 buf_size;
- __u32 buf_nr;
-};
-
-#define __KVM_DEPRECATED_MAIN_W_0x06 \
- _IOW(KVMIO, 0x06, struct kvm_user_trace_setup)
-#define __KVM_DEPRECATED_MAIN_0x07 _IO(KVMIO, 0x07)
-#define __KVM_DEPRECATED_MAIN_0x08 _IO(KVMIO, 0x08)
-
-#define __KVM_DEPRECATED_VM_R_0x70 _IOR(KVMIO, 0x70, struct kvm_assigned_irq)
-
-struct kvm_breakpoint {
- __u32 enabled;
- __u32 padding;
- __u64 address;
-};
-
-struct kvm_debug_guest {
- __u32 enabled;
- __u32 pad;
- struct kvm_breakpoint breakpoints[4];
- __u32 singlestep;
-};
-
-#define __KVM_DEPRECATED_VCPU_W_0x87 _IOW(KVMIO, 0x87, struct kvm_debug_guest)
-
-/* *** End of deprecated interfaces *** */
-
+/*
+ * Backwards-compatible definitions.
+ */
+#define __KVM_HAVE_GUEST_DEBUG
/* for KVM_SET_USER_MEMORY_REGION */
struct kvm_userspace_memory_region {
@@ -95,6 +30,19 @@ struct kvm_userspace_memory_region {
__u64 userspace_addr; /* start of the userspace allocated memory */
};
+/* for KVM_SET_USER_MEMORY_REGION2 */
+struct kvm_userspace_memory_region2 {
+ __u32 slot;
+ __u32 flags;
+ __u64 guest_phys_addr;
+ __u64 memory_size;
+ __u64 userspace_addr;
+ __u64 guest_memfd_offset;
+ __u32 guest_memfd;
+ __u32 pad1;
+ __u64 pad2[14];
+};
+
/*
* The bit 0 ~ bit 15 of kvm_userspace_memory_region::flags are visible for
* userspace, other bits are reserved for kvm internal use which are defined
@@ -102,6 +50,7 @@ struct kvm_userspace_memory_region {
*/
#define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0)
#define KVM_MEM_READONLY (1UL << 1)
+#define KVM_MEM_GUEST_MEMFD (1UL << 2)
/* for KVM_IRQ_LINE */
struct kvm_irq_level {
@@ -141,43 +90,6 @@ struct kvm_pit_config {
#define KVM_PIT_SPEAKER_DUMMY 1
-struct kvm_s390_skeys {
- __u64 start_gfn;
- __u64 count;
- __u64 skeydata_addr;
- __u32 flags;
- __u32 reserved[9];
-};
-
-#define KVM_S390_CMMA_PEEK (1 << 0)
-
-/**
- * kvm_s390_cmma_log - Used for CMMA migration.
- *
- * Used both for input and output.
- *
- * @start_gfn: Guest page number to start from.
- * @count: Size of the result buffer.
- * @flags: Control operation mode via KVM_S390_CMMA_* flags
- * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty
- * pages are still remaining.
- * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set
- * in the PGSTE.
- * @values: Pointer to the values buffer.
- *
- * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls.
- */
-struct kvm_s390_cmma_log {
- __u64 start_gfn;
- __u32 count;
- __u32 flags;
- union {
- __u64 remaining;
- __u64 mask;
- };
- __u64 values;
-};
-
struct kvm_hyperv_exit {
#define KVM_EXIT_HYPERV_SYNIC 1
#define KVM_EXIT_HYPERV_HCALL 2
@@ -264,6 +176,9 @@ struct kvm_xen_exit {
#define KVM_EXIT_RISCV_SBI 35
#define KVM_EXIT_RISCV_CSR 36
#define KVM_EXIT_NOTIFY 37
+#define KVM_EXIT_LOONGARCH_IOCSR 38
+#define KVM_EXIT_MEMORY_FAULT 39
+#define KVM_EXIT_TDX 40
/* For KVM_EXIT_INTERNAL_ERROR */
/* Emulate instruction failed. */
@@ -278,11 +193,24 @@ struct kvm_xen_exit {
/* Flags that describe what fields in emulation_failure hold valid data. */
#define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0)
+/*
+ * struct kvm_run can be modified by userspace at any time, so KVM must be
+ * careful to avoid TOCTOU bugs. In order to protect KVM, HINT_UNSAFE_IN_KVM()
+ * renames fields in struct kvm_run from <symbol> to <symbol>__unsafe when
+ * compiled into the kernel, ensuring that any use within KVM is obvious and
+ * gets extra scrutiny.
+ */
+#ifdef __KERNEL__
+#define HINT_UNSAFE_IN_KVM(_symbol) _symbol##__unsafe
+#else
+#define HINT_UNSAFE_IN_KVM(_symbol) _symbol
+#endif
+
/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
struct kvm_run {
/* in */
__u8 request_interrupt_window;
- __u8 immediate_exit;
+ __u8 HINT_UNSAFE_IN_KVM(immediate_exit);
__u8 padding1[6];
/* out */
@@ -336,13 +264,25 @@ struct kvm_run {
__u32 len;
__u8 is_write;
} mmio;
+ /* KVM_EXIT_LOONGARCH_IOCSR */
+ struct {
+ __u64 phys_addr;
+ __u8 data[8];
+ __u32 len;
+ __u8 is_write;
+ } iocsr_io;
/* KVM_EXIT_HYPERCALL */
struct {
__u64 nr;
__u64 args[6];
__u64 ret;
- __u32 longmode;
- __u32 pad;
+
+ union {
+#ifndef __KERNEL__
+ __u32 longmode;
+#endif
+ __u64 flags;
+ };
} hypercall;
/* KVM_EXIT_TPR_ACCESS */
struct {
@@ -357,11 +297,6 @@ struct kvm_run {
__u32 ipb;
} s390_sieic;
/* KVM_EXIT_S390_RESET */
-#define KVM_S390_RESET_POR 1
-#define KVM_S390_RESET_CLEAR 2
-#define KVM_S390_RESET_SUBSYSTEM 4
-#define KVM_S390_RESET_CPU_INIT 8
-#define KVM_S390_RESET_IPL 16
__u64 s390_reset_flags;
/* KVM_EXIT_S390_UCONTROL */
struct {
@@ -441,6 +376,7 @@ struct kvm_run {
#define KVM_SYSTEM_EVENT_WAKEUP 4
#define KVM_SYSTEM_EVENT_SUSPEND 5
#define KVM_SYSTEM_EVENT_SEV_TERM 6
+#define KVM_SYSTEM_EVENT_TDX_FATAL 7
__u32 type;
__u32 ndata;
union {
@@ -505,6 +441,38 @@ struct kvm_run {
#define KVM_NOTIFY_CONTEXT_INVALID (1 << 0)
__u32 flags;
} notify;
+ /* KVM_EXIT_MEMORY_FAULT */
+ struct {
+#define KVM_MEMORY_EXIT_FLAG_PRIVATE (1ULL << 3)
+ __u64 flags;
+ __u64 gpa;
+ __u64 size;
+ } memory_fault;
+ /* KVM_EXIT_TDX */
+ struct {
+ __u64 flags;
+ __u64 nr;
+ union {
+ struct {
+ __u64 ret;
+ __u64 data[5];
+ } unknown;
+ struct {
+ __u64 ret;
+ __u64 gpa;
+ __u64 size;
+ } get_quote;
+ struct {
+ __u64 ret;
+ __u64 leaf;
+ __u64 r11, r12, r13, r14;
+ } get_tdvmcall_info;
+ struct {
+ __u64 ret;
+ __u64 vector;
+ } setup_event_notify;
+ };
+ } tdx;
/* Fix the size of the union. */
char padding[256];
};
@@ -571,35 +539,6 @@ struct kvm_translation {
__u8 pad[5];
};
-/* for KVM_S390_MEM_OP */
-struct kvm_s390_mem_op {
- /* in */
- __u64 gaddr; /* the guest address */
- __u64 flags; /* flags */
- __u32 size; /* amount of bytes */
- __u32 op; /* type of operation */
- __u64 buf; /* buffer in userspace */
- union {
- struct {
- __u8 ar; /* the access register number */
- __u8 key; /* access key, ignored if flag unset */
- };
- __u32 sida_offset; /* offset into the sida */
- __u8 reserved[32]; /* ignored */
- };
-};
-/* types for kvm_s390_mem_op->op */
-#define KVM_S390_MEMOP_LOGICAL_READ 0
-#define KVM_S390_MEMOP_LOGICAL_WRITE 1
-#define KVM_S390_MEMOP_SIDA_READ 2
-#define KVM_S390_MEMOP_SIDA_WRITE 3
-#define KVM_S390_MEMOP_ABSOLUTE_READ 4
-#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5
-/* flags for kvm_s390_mem_op->flags */
-#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0)
-#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1)
-#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2)
-
/* for KVM_INTERRUPT */
struct kvm_interrupt {
/* in */
@@ -664,124 +603,6 @@ struct kvm_mp_state {
__u32 mp_state;
};
-struct kvm_s390_psw {
- __u64 mask;
- __u64 addr;
-};
-
-/* valid values for type in kvm_s390_interrupt */
-#define KVM_S390_SIGP_STOP 0xfffe0000u
-#define KVM_S390_PROGRAM_INT 0xfffe0001u
-#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u
-#define KVM_S390_RESTART 0xfffe0003u
-#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u
-#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u
-#define KVM_S390_MCHK 0xfffe1000u
-#define KVM_S390_INT_CLOCK_COMP 0xffff1004u
-#define KVM_S390_INT_CPU_TIMER 0xffff1005u
-#define KVM_S390_INT_VIRTIO 0xffff2603u
-#define KVM_S390_INT_SERVICE 0xffff2401u
-#define KVM_S390_INT_EMERGENCY 0xffff1201u
-#define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u
-/* Anything below 0xfffe0000u is taken by INT_IO */
-#define KVM_S390_INT_IO(ai,cssid,ssid,schid) \
- (((schid)) | \
- ((ssid) << 16) | \
- ((cssid) << 18) | \
- ((ai) << 26))
-#define KVM_S390_INT_IO_MIN 0x00000000u
-#define KVM_S390_INT_IO_MAX 0xfffdffffu
-#define KVM_S390_INT_IO_AI_MASK 0x04000000u
-
-
-struct kvm_s390_interrupt {
- __u32 type;
- __u32 parm;
- __u64 parm64;
-};
-
-struct kvm_s390_io_info {
- __u16 subchannel_id;
- __u16 subchannel_nr;
- __u32 io_int_parm;
- __u32 io_int_word;
-};
-
-struct kvm_s390_ext_info {
- __u32 ext_params;
- __u32 pad;
- __u64 ext_params2;
-};
-
-struct kvm_s390_pgm_info {
- __u64 trans_exc_code;
- __u64 mon_code;
- __u64 per_address;
- __u32 data_exc_code;
- __u16 code;
- __u16 mon_class_nr;
- __u8 per_code;
- __u8 per_atmid;
- __u8 exc_access_id;
- __u8 per_access_id;
- __u8 op_access_id;
-#define KVM_S390_PGM_FLAGS_ILC_VALID 0x01
-#define KVM_S390_PGM_FLAGS_ILC_0 0x02
-#define KVM_S390_PGM_FLAGS_ILC_1 0x04
-#define KVM_S390_PGM_FLAGS_ILC_MASK 0x06
-#define KVM_S390_PGM_FLAGS_NO_REWIND 0x08
- __u8 flags;
- __u8 pad[2];
-};
-
-struct kvm_s390_prefix_info {
- __u32 address;
-};
-
-struct kvm_s390_extcall_info {
- __u16 code;
-};
-
-struct kvm_s390_emerg_info {
- __u16 code;
-};
-
-#define KVM_S390_STOP_FLAG_STORE_STATUS 0x01
-struct kvm_s390_stop_info {
- __u32 flags;
-};
-
-struct kvm_s390_mchk_info {
- __u64 cr14;
- __u64 mcic;
- __u64 failing_storage_address;
- __u32 ext_damage_code;
- __u32 pad;
- __u8 fixed_logout[16];
-};
-
-struct kvm_s390_irq {
- __u64 type;
- union {
- struct kvm_s390_io_info io;
- struct kvm_s390_ext_info ext;
- struct kvm_s390_pgm_info pgm;
- struct kvm_s390_emerg_info emerg;
- struct kvm_s390_extcall_info extcall;
- struct kvm_s390_prefix_info prefix;
- struct kvm_s390_stop_info stop;
- struct kvm_s390_mchk_info mchk;
- char reserved[64];
- } u;
-};
-
-struct kvm_s390_irq_state {
- __u64 buf;
- __u32 flags; /* will stay unused for compatibility reasons */
- __u32 len;
- __u32 reserved[4]; /* will stay unused for compatibility reasons */
-};
-
/* for KVM_SET_GUEST_DEBUG */
#define KVM_GUESTDBG_ENABLE 0x00000001
@@ -823,10 +644,7 @@ struct kvm_ioeventfd {
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
-#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \
- KVM_X86_DISABLE_EXITS_HLT | \
- KVM_X86_DISABLE_EXITS_PAUSE | \
- KVM_X86_DISABLE_EXITS_CSTATE)
+#define KVM_X86_DISABLE_EXITS_APERFMPERF (1 << 4)
/* for KVM_ENABLE_CAP */
struct kvm_enable_cap {
@@ -837,50 +655,6 @@ struct kvm_enable_cap {
__u8 pad[64];
};
-/* for KVM_PPC_GET_PVINFO */
-
-#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0)
-
-struct kvm_ppc_pvinfo {
- /* out */
- __u32 flags;
- __u32 hcall[4];
- __u8 pad[108];
-};
-
-/* for KVM_PPC_GET_SMMU_INFO */
-#define KVM_PPC_PAGE_SIZES_MAX_SZ 8
-
-struct kvm_ppc_one_page_size {
- __u32 page_shift; /* Page shift (or 0) */
- __u32 pte_enc; /* Encoding in the HPTE (>>12) */
-};
-
-struct kvm_ppc_one_seg_page_size {
- __u32 page_shift; /* Base page shift of segment (or 0) */
- __u32 slb_enc; /* SLB encoding for BookS */
- struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ];
-};
-
-#define KVM_PPC_PAGE_SIZES_REAL 0x00000001
-#define KVM_PPC_1T_SEGMENTS 0x00000002
-#define KVM_PPC_NO_HASH 0x00000004
-
-struct kvm_ppc_smmu_info {
- __u64 flags;
- __u32 slb_size;
- __u16 data_keys; /* # storage keys supported for data */
- __u16 instr_keys; /* # storage keys supported for instructions */
- struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
-};
-
-/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */
-struct kvm_ppc_resize_hpt {
- __u64 flags;
- __u32 shift;
- __u32 pad;
-};
-
#define KVMIO 0xAE
/* machine type bits, to be used as argument to KVM_CREATE_VM */
@@ -924,9 +698,6 @@ struct kvm_ppc_resize_hpt {
*/
#define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */
#define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x05, struct kvm_cpuid2)
-#define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06
-#define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07
-#define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08
#define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2)
#define KVM_GET_MSR_FEATURE_INDEX_LIST _IOWR(KVMIO, 0x0a, struct kvm_msr_list)
@@ -953,9 +724,7 @@ struct kvm_ppc_resize_hpt {
/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
#define KVM_CAP_USER_NMI 22
-#ifdef __KVM_HAVE_GUEST_DEBUG
#define KVM_CAP_SET_GUEST_DEBUG 23
-#endif
#ifdef __KVM_HAVE_PIT
#define KVM_CAP_REINJECT_CONTROL 24
#endif
@@ -1175,8 +944,25 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223
#define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224
#define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225
-
-#ifdef KVM_CAP_IRQ_ROUTING
+#define KVM_CAP_PMU_EVENT_MASKED_EVENTS 226
+#define KVM_CAP_COUNTER_OFFSET 227
+#define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228
+#define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229
+#define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230
+#define KVM_CAP_USER_MEMORY2 231
+#define KVM_CAP_MEMORY_FAULT_INFO 232
+#define KVM_CAP_MEMORY_ATTRIBUTES 233
+#define KVM_CAP_GUEST_MEMFD 234
+#define KVM_CAP_VM_TYPES 235
+#define KVM_CAP_PRE_FAULT_MEMORY 236
+#define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237
+#define KVM_CAP_X86_GUEST_MODE 238
+#define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239
+#define KVM_CAP_ARM_EL2 240
+#define KVM_CAP_ARM_EL2_E2H0 241
+#define KVM_CAP_RISCV_MP_STATE_RESET 242
+#define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243
+#define KVM_CAP_GUEST_MEMFD_FLAGS 244
struct kvm_irq_routing_irqchip {
__u32 irqchip;
@@ -1242,41 +1028,6 @@ struct kvm_irq_routing {
struct kvm_irq_routing_entry entries[];
};
-#endif
-
-#ifdef KVM_CAP_MCE
-/* x86 MCE */
-struct kvm_x86_mce {
- __u64 status;
- __u64 addr;
- __u64 misc;
- __u64 mcg_status;
- __u8 bank;
- __u8 pad1[7];
- __u64 pad2[3];
-};
-#endif
-
-#ifdef KVM_CAP_XEN_HVM
-#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0)
-#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
-#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
-#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3)
-#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4)
-#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5)
-#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6)
-
-struct kvm_xen_hvm_config {
- __u32 flags;
- __u32 msr;
- __u64 blob_addr_32;
- __u64 blob_addr_64;
- __u8 blob_size_32;
- __u8 blob_size_64;
- __u8 pad2[30];
-};
-#endif
-
#define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
/*
* Available with KVM_CAP_IRQFD_RESAMPLE
@@ -1345,9 +1096,14 @@ struct kvm_dirty_tlb {
#define KVM_REG_ARM64 0x6000000000000000ULL
#define KVM_REG_MIPS 0x7000000000000000ULL
#define KVM_REG_RISCV 0x8000000000000000ULL
+#define KVM_REG_LOONGARCH 0x9000000000000000ULL
#define KVM_REG_SIZE_SHIFT 52
#define KVM_REG_SIZE_MASK 0x00f0000000000000ULL
+
+#define KVM_REG_SIZE(id) \
+ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
+
#define KVM_REG_SIZE_U8 0x0000000000000000ULL
#define KVM_REG_SIZE_U16 0x0010000000000000ULL
#define KVM_REG_SIZE_U32 0x0020000000000000ULL
@@ -1401,9 +1157,16 @@ struct kvm_device_attr {
__u64 addr; /* userspace address of attr data */
};
-#define KVM_DEV_VFIO_GROUP 1
-#define KVM_DEV_VFIO_GROUP_ADD 1
-#define KVM_DEV_VFIO_GROUP_DEL 2
+#define KVM_DEV_VFIO_FILE 1
+
+#define KVM_DEV_VFIO_FILE_ADD 1
+#define KVM_DEV_VFIO_FILE_DEL 2
+
+/* KVM_DEV_VFIO_GROUP aliases are for compile time uapi compatibility */
+#define KVM_DEV_VFIO_GROUP KVM_DEV_VFIO_FILE
+
+#define KVM_DEV_VFIO_GROUP_ADD KVM_DEV_VFIO_FILE_ADD
+#define KVM_DEV_VFIO_GROUP_DEL KVM_DEV_VFIO_FILE_DEL
#define KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE 3
enum kvm_device_type {
@@ -1427,7 +1190,17 @@ enum kvm_device_type {
#define KVM_DEV_TYPE_XIVE KVM_DEV_TYPE_XIVE
KVM_DEV_TYPE_ARM_PV_TIME,
#define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME
+ KVM_DEV_TYPE_RISCV_AIA,
+#define KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_RISCV_AIA
+ KVM_DEV_TYPE_LOONGARCH_IPI,
+#define KVM_DEV_TYPE_LOONGARCH_IPI KVM_DEV_TYPE_LOONGARCH_IPI
+ KVM_DEV_TYPE_LOONGARCH_EIOINTC,
+#define KVM_DEV_TYPE_LOONGARCH_EIOINTC KVM_DEV_TYPE_LOONGARCH_EIOINTC
+ KVM_DEV_TYPE_LOONGARCH_PCHPIC,
+#define KVM_DEV_TYPE_LOONGARCH_PCHPIC KVM_DEV_TYPE_LOONGARCH_PCHPIC
+
KVM_DEV_TYPE_MAX,
+
};
struct kvm_vfio_spapr_tce {
@@ -1442,18 +1215,15 @@ struct kvm_vfio_spapr_tce {
#define KVM_CREATE_VCPU _IO(KVMIO, 0x41)
#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log)
#define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44)
-#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45)
+#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45) /* deprecated */
#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \
struct kvm_userspace_memory_region)
#define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47)
#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64)
+#define KVM_SET_USER_MEMORY_REGION2 _IOW(KVMIO, 0x49, \
+ struct kvm_userspace_memory_region2)
/* enable ucontrol for s390 */
-struct kvm_s390_ucas_mapping {
- __u64 user_addr;
- __u64 vcpu_addr;
- __u64 length;
-};
#define KVM_S390_UCAS_MAP _IOW(KVMIO, 0x50, struct kvm_s390_ucas_mapping)
#define KVM_S390_UCAS_UNMAP _IOW(KVMIO, 0x51, struct kvm_s390_ucas_mapping)
#define KVM_S390_VCPU_FAULT _IOW(KVMIO, 0x52, unsigned long)
@@ -1471,20 +1241,8 @@ struct kvm_s390_ucas_mapping {
_IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone)
#define KVM_UNREGISTER_COALESCED_MMIO \
_IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone)
-#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \
- struct kvm_assigned_pci_dev)
#define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing)
-/* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */
-#define KVM_ASSIGN_IRQ __KVM_DEPRECATED_VM_R_0x70
-#define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq)
#define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71)
-#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \
- struct kvm_assigned_pci_dev)
-#define KVM_ASSIGN_SET_MSIX_NR _IOW(KVMIO, 0x73, \
- struct kvm_assigned_msix_nr)
-#define KVM_ASSIGN_SET_MSIX_ENTRY _IOW(KVMIO, 0x74, \
- struct kvm_assigned_msix_entry)
-#define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
#define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd)
#define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config)
#define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78)
@@ -1501,9 +1259,6 @@ struct kvm_s390_ucas_mapping {
* KVM_CAP_VM_TSC_CONTROL to set defaults for a VM */
#define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2)
#define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3)
-/* Available with KVM_CAP_PCI_2_3 */
-#define KVM_ASSIGN_SET_INTX_MASK _IOW(KVMIO, 0xa4, \
- struct kvm_assigned_pci_dev)
/* Available with KVM_CAP_SIGNAL_MSI */
#define KVM_SIGNAL_MSI _IOW(KVMIO, 0xa5, struct kvm_msi)
/* Available with KVM_CAP_PPC_GET_SMMU_INFO */
@@ -1524,9 +1279,9 @@ struct kvm_s390_ucas_mapping {
/* Available with KVM_CAP_SPAPR_RESIZE_HPT */
#define KVM_PPC_RESIZE_HPT_PREPARE _IOR(KVMIO, 0xad, struct kvm_ppc_resize_hpt)
#define KVM_PPC_RESIZE_HPT_COMMIT _IOR(KVMIO, 0xae, struct kvm_ppc_resize_hpt)
-/* Available with KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3 */
+/* Available with KVM_CAP_PPC_MMU_RADIX or KVM_CAP_PPC_MMU_HASH_V3 */
#define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg)
-/* Available with KVM_CAP_PPC_RADIX_MMU */
+/* Available with KVM_CAP_PPC_MMU_RADIX */
#define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info)
/* Available with KVM_CAP_PPC_GET_CPU_CHAR */
#define KVM_PPC_GET_CPU_CHAR _IOR(KVMIO, 0xb1, struct kvm_ppc_cpu_char)
@@ -1534,6 +1289,9 @@ struct kvm_s390_ucas_mapping {
#define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter)
#define KVM_PPC_SVM_OFF _IO(KVMIO, 0xb3)
#define KVM_ARM_MTE_COPY_TAGS _IOR(KVMIO, 0xb4, struct kvm_arm_copy_mte_tags)
+/* Available with KVM_CAP_COUNTER_OFFSET */
+#define KVM_ARM_SET_COUNTER_OFFSET _IOW(KVMIO, 0xb5, struct kvm_arm_counter_offset)
+#define KVM_ARM_GET_REG_WRITABLE_MASKS _IOR(KVMIO, 0xb6, struct reg_mask_range)
/* ioctl for vm fd */
#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device)
@@ -1553,8 +1311,6 @@ struct kvm_s390_ucas_mapping {
#define KVM_SET_SREGS _IOW(KVMIO, 0x84, struct kvm_sregs)
#define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation)
#define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt)
-/* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */
-#define KVM_DEBUG_GUEST __KVM_DEPRECATED_VCPU_W_0x87
#define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs)
#define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs)
#define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid)
@@ -1596,7 +1352,7 @@ struct kvm_s390_ucas_mapping {
#define KVM_GET_DEBUGREGS _IOR(KVMIO, 0xa1, struct kvm_debugregs)
#define KVM_SET_DEBUGREGS _IOW(KVMIO, 0xa2, struct kvm_debugregs)
/*
- * vcpu version available with KVM_ENABLE_CAP
+ * vcpu version available with KVM_CAP_ENABLE_CAP
* vm version available with KVM_CAP_ENABLE_CAP_VM
*/
#define KVM_ENABLE_CAP _IOW(KVMIO, 0xa3, struct kvm_enable_cap)
@@ -1662,89 +1418,6 @@ struct kvm_enc_region {
#define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3)
#define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4)
-struct kvm_s390_pv_sec_parm {
- __u64 origin;
- __u64 length;
-};
-
-struct kvm_s390_pv_unp {
- __u64 addr;
- __u64 size;
- __u64 tweak;
-};
-
-enum pv_cmd_dmp_id {
- KVM_PV_DUMP_INIT,
- KVM_PV_DUMP_CONFIG_STOR_STATE,
- KVM_PV_DUMP_COMPLETE,
- KVM_PV_DUMP_CPU,
-};
-
-struct kvm_s390_pv_dmp {
- __u64 subcmd;
- __u64 buff_addr;
- __u64 buff_len;
- __u64 gaddr; /* For dump storage state */
- __u64 reserved[4];
-};
-
-enum pv_cmd_info_id {
- KVM_PV_INFO_VM,
- KVM_PV_INFO_DUMP,
-};
-
-struct kvm_s390_pv_info_dump {
- __u64 dump_cpu_buffer_len;
- __u64 dump_config_mem_buffer_per_1m;
- __u64 dump_config_finalize_len;
-};
-
-struct kvm_s390_pv_info_vm {
- __u64 inst_calls_list[4];
- __u64 max_cpus;
- __u64 max_guests;
- __u64 max_guest_addr;
- __u64 feature_indication;
-};
-
-struct kvm_s390_pv_info_header {
- __u32 id;
- __u32 len_max;
- __u32 len_written;
- __u32 reserved;
-};
-
-struct kvm_s390_pv_info {
- struct kvm_s390_pv_info_header header;
- union {
- struct kvm_s390_pv_info_dump dump;
- struct kvm_s390_pv_info_vm vm;
- };
-};
-
-enum pv_cmd_id {
- KVM_PV_ENABLE,
- KVM_PV_DISABLE,
- KVM_PV_SET_SEC_PARMS,
- KVM_PV_UNPACK,
- KVM_PV_VERIFY,
- KVM_PV_PREP_RESET,
- KVM_PV_UNSHARE_ALL,
- KVM_PV_INFO,
- KVM_PV_DUMP,
- KVM_PV_ASYNC_CLEANUP_PREPARE,
- KVM_PV_ASYNC_CLEANUP_PERFORM,
-};
-
-struct kvm_pv_cmd {
- __u32 cmd; /* Command to be executed */
- __u16 rc; /* Ultravisor return code */
- __u16 rrc; /* Ultravisor return reason code */
- __u64 data; /* Data or address */
- __u32 flags; /* flags for future extensions. Must be 0 for now */
- __u32 reserved[3];
-};
-
/* Available with KVM_CAP_S390_PROTECTED */
#define KVM_S390_PV_COMMAND _IOWR(KVMIO, 0xc5, struct kvm_pv_cmd)
@@ -1758,58 +1431,6 @@ struct kvm_pv_cmd {
#define KVM_XEN_HVM_GET_ATTR _IOWR(KVMIO, 0xc8, struct kvm_xen_hvm_attr)
#define KVM_XEN_HVM_SET_ATTR _IOW(KVMIO, 0xc9, struct kvm_xen_hvm_attr)
-struct kvm_xen_hvm_attr {
- __u16 type;
- __u16 pad[3];
- union {
- __u8 long_mode;
- __u8 vector;
- __u8 runstate_update_flag;
- struct {
- __u64 gfn;
-#define KVM_XEN_INVALID_GFN ((__u64)-1)
- } shared_info;
- struct {
- __u32 send_port;
- __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */
- __u32 flags;
-#define KVM_XEN_EVTCHN_DEASSIGN (1 << 0)
-#define KVM_XEN_EVTCHN_UPDATE (1 << 1)
-#define KVM_XEN_EVTCHN_RESET (1 << 2)
- /*
- * Events sent by the guest are either looped back to
- * the guest itself (potentially on a different port#)
- * or signalled via an eventfd.
- */
- union {
- struct {
- __u32 port;
- __u32 vcpu;
- __u32 priority;
- } port;
- struct {
- __u32 port; /* Zero for eventfd */
- __s32 fd;
- } eventfd;
- __u32 padding[4];
- } deliver;
- } evtchn;
- __u32 xen_version;
- __u64 pad[8];
- } u;
-};
-
-
-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
-#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0
-#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1
-#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2
-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
-#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3
-#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4
-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */
-#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5
-
/* Per-vCPU Xen attributes */
#define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr)
#define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr)
@@ -1820,242 +1441,6 @@ struct kvm_xen_hvm_attr {
#define KVM_GET_SREGS2 _IOR(KVMIO, 0xcc, struct kvm_sregs2)
#define KVM_SET_SREGS2 _IOW(KVMIO, 0xcd, struct kvm_sregs2)
-struct kvm_xen_vcpu_attr {
- __u16 type;
- __u16 pad[3];
- union {
- __u64 gpa;
-#define KVM_XEN_INVALID_GPA ((__u64)-1)
- __u64 pad[8];
- struct {
- __u64 state;
- __u64 state_entry_time;
- __u64 time_running;
- __u64 time_runnable;
- __u64 time_blocked;
- __u64 time_offline;
- } runstate;
- __u32 vcpu_id;
- struct {
- __u32 port;
- __u32 priority;
- __u64 expires_ns;
- } timer;
- __u8 vector;
- } u;
-};
-
-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
-#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0
-#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1
-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2
-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3
-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4
-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5
-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
-#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6
-#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7
-#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8
-
-/* Secure Encrypted Virtualization command */
-enum sev_cmd_id {
- /* Guest initialization commands */
- KVM_SEV_INIT = 0,
- KVM_SEV_ES_INIT,
- /* Guest launch commands */
- KVM_SEV_LAUNCH_START,
- KVM_SEV_LAUNCH_UPDATE_DATA,
- KVM_SEV_LAUNCH_UPDATE_VMSA,
- KVM_SEV_LAUNCH_SECRET,
- KVM_SEV_LAUNCH_MEASURE,
- KVM_SEV_LAUNCH_FINISH,
- /* Guest migration commands (outgoing) */
- KVM_SEV_SEND_START,
- KVM_SEV_SEND_UPDATE_DATA,
- KVM_SEV_SEND_UPDATE_VMSA,
- KVM_SEV_SEND_FINISH,
- /* Guest migration commands (incoming) */
- KVM_SEV_RECEIVE_START,
- KVM_SEV_RECEIVE_UPDATE_DATA,
- KVM_SEV_RECEIVE_UPDATE_VMSA,
- KVM_SEV_RECEIVE_FINISH,
- /* Guest status and debug commands */
- KVM_SEV_GUEST_STATUS,
- KVM_SEV_DBG_DECRYPT,
- KVM_SEV_DBG_ENCRYPT,
- /* Guest certificates commands */
- KVM_SEV_CERT_EXPORT,
- /* Attestation report */
- KVM_SEV_GET_ATTESTATION_REPORT,
- /* Guest Migration Extension */
- KVM_SEV_SEND_CANCEL,
-
- KVM_SEV_NR_MAX,
-};
-
-struct kvm_sev_cmd {
- __u32 id;
- __u64 data;
- __u32 error;
- __u32 sev_fd;
-};
-
-struct kvm_sev_launch_start {
- __u32 handle;
- __u32 policy;
- __u64 dh_uaddr;
- __u32 dh_len;
- __u64 session_uaddr;
- __u32 session_len;
-};
-
-struct kvm_sev_launch_update_data {
- __u64 uaddr;
- __u32 len;
-};
-
-
-struct kvm_sev_launch_secret {
- __u64 hdr_uaddr;
- __u32 hdr_len;
- __u64 guest_uaddr;
- __u32 guest_len;
- __u64 trans_uaddr;
- __u32 trans_len;
-};
-
-struct kvm_sev_launch_measure {
- __u64 uaddr;
- __u32 len;
-};
-
-struct kvm_sev_guest_status {
- __u32 handle;
- __u32 policy;
- __u32 state;
-};
-
-struct kvm_sev_dbg {
- __u64 src_uaddr;
- __u64 dst_uaddr;
- __u32 len;
-};
-
-struct kvm_sev_attestation_report {
- __u8 mnonce[16];
- __u64 uaddr;
- __u32 len;
-};
-
-struct kvm_sev_send_start {
- __u32 policy;
- __u64 pdh_cert_uaddr;
- __u32 pdh_cert_len;
- __u64 plat_certs_uaddr;
- __u32 plat_certs_len;
- __u64 amd_certs_uaddr;
- __u32 amd_certs_len;
- __u64 session_uaddr;
- __u32 session_len;
-};
-
-struct kvm_sev_send_update_data {
- __u64 hdr_uaddr;
- __u32 hdr_len;
- __u64 guest_uaddr;
- __u32 guest_len;
- __u64 trans_uaddr;
- __u32 trans_len;
-};
-
-struct kvm_sev_receive_start {
- __u32 handle;
- __u32 policy;
- __u64 pdh_uaddr;
- __u32 pdh_len;
- __u64 session_uaddr;
- __u32 session_len;
-};
-
-struct kvm_sev_receive_update_data {
- __u64 hdr_uaddr;
- __u32 hdr_len;
- __u64 guest_uaddr;
- __u32 guest_len;
- __u64 trans_uaddr;
- __u32 trans_len;
-};
-
-#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
-#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
-#define KVM_DEV_ASSIGN_MASK_INTX (1 << 2)
-
-struct kvm_assigned_pci_dev {
- __u32 assigned_dev_id;
- __u32 busnr;
- __u32 devfn;
- __u32 flags;
- __u32 segnr;
- union {
- __u32 reserved[11];
- };
-};
-
-#define KVM_DEV_IRQ_HOST_INTX (1 << 0)
-#define KVM_DEV_IRQ_HOST_MSI (1 << 1)
-#define KVM_DEV_IRQ_HOST_MSIX (1 << 2)
-
-#define KVM_DEV_IRQ_GUEST_INTX (1 << 8)
-#define KVM_DEV_IRQ_GUEST_MSI (1 << 9)
-#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10)
-
-#define KVM_DEV_IRQ_HOST_MASK 0x00ff
-#define KVM_DEV_IRQ_GUEST_MASK 0xff00
-
-struct kvm_assigned_irq {
- __u32 assigned_dev_id;
- __u32 host_irq; /* ignored (legacy field) */
- __u32 guest_irq;
- __u32 flags;
- union {
- __u32 reserved[12];
- };
-};
-
-struct kvm_assigned_msix_nr {
- __u32 assigned_dev_id;
- __u16 entry_nr;
- __u16 padding;
-};
-
-#define KVM_MAX_MSIX_PER_DEV 256
-struct kvm_assigned_msix_entry {
- __u32 assigned_dev_id;
- __u32 gsi;
- __u16 entry; /* The index of entry in the MSI-X table */
- __u16 padding[3];
-};
-
-#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
-#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)
-
-/* Available with KVM_CAP_ARM_USER_IRQ */
-
-/* Bits for run->s.regs.device_irq_level */
-#define KVM_ARM_DEV_EL1_VTIMER (1 << 0)
-#define KVM_ARM_DEV_EL1_PTIMER (1 << 1)
-#define KVM_ARM_DEV_PMU (1 << 2)
-
-struct kvm_hyperv_eventfd {
- __u32 conn_id;
- __s32 fd;
- __u32 flags;
- __u32 padding[3];
-};
-
-#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff
-#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0)
-
#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0)
#define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1)
@@ -2201,31 +1586,35 @@ struct kvm_stats_desc {
/* Available with KVM_CAP_S390_ZPCI_OP */
#define KVM_S390_ZPCI_OP _IOW(KVMIO, 0xd1, struct kvm_s390_zpci_op)
-struct kvm_s390_zpci_op {
- /* in */
- __u32 fh; /* target device */
- __u8 op; /* operation to perform */
- __u8 pad[3];
- union {
- /* for KVM_S390_ZPCIOP_REG_AEN */
- struct {
- __u64 ibv; /* Guest addr of interrupt bit vector */
- __u64 sb; /* Guest addr of summary bit */
- __u32 flags;
- __u32 noi; /* Number of interrupts */
- __u8 isc; /* Guest interrupt subclass */
- __u8 sbo; /* Offset of guest summary bit vector */
- __u16 pad;
- } reg_aen;
- __u64 reserved[8];
- } u;
+/* Available with KVM_CAP_MEMORY_ATTRIBUTES */
+#define KVM_SET_MEMORY_ATTRIBUTES _IOW(KVMIO, 0xd2, struct kvm_memory_attributes)
+
+struct kvm_memory_attributes {
+ __u64 address;
+ __u64 size;
+ __u64 attributes;
+ __u64 flags;
+};
+
+#define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3)
+
+#define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd)
+#define GUEST_MEMFD_FLAG_MMAP (1ULL << 0)
+#define GUEST_MEMFD_FLAG_INIT_SHARED (1ULL << 1)
+
+struct kvm_create_guest_memfd {
+ __u64 size;
+ __u64 flags;
+ __u64 reserved[6];
};
-/* types for kvm_s390_zpci_op->op */
-#define KVM_S390_ZPCIOP_REG_AEN 0
-#define KVM_S390_ZPCIOP_DEREG_AEN 1
+#define KVM_PRE_FAULT_MEMORY _IOWR(KVMIO, 0xd5, struct kvm_pre_fault_memory)
-/* flags for kvm_s390_zpci_op->u.reg_aen.flags */
-#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0)
+struct kvm_pre_fault_memory {
+ __u64 gpa;
+ __u64 size;
+ __u64 flags;
+ __u64 padding[5];
+};
#endif /* __LINUX_KVM_H */
diff --git a/tools/include/uapi/linux/memfd.h b/tools/include/uapi/linux/memfd.h
new file mode 100644
index 000000000000..01c0324e7733
--- /dev/null
+++ b/tools/include/uapi/linux/memfd.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _LINUX_MEMFD_H
+#define _LINUX_MEMFD_H
+
+#include <asm-generic/hugetlb_encode.h>
+
+/* flags for memfd_create(2) (unsigned int) */
+#define MFD_CLOEXEC 0x0001U
+#define MFD_ALLOW_SEALING 0x0002U
+#define MFD_HUGETLB 0x0004U
+/* not executable and sealed to prevent changing to executable. */
+#define MFD_NOEXEC_SEAL 0x0008U
+/* executable */
+#define MFD_EXEC 0x0010U
+
+/*
+ * Huge page size encoding when MFD_HUGETLB is specified, and a huge page
+ * size other than the default is desired. See hugetlb_encode.h.
+ * All known huge page size encodings are provided here. It is the
+ * responsibility of the application to know which sizes are supported on
+ * the running system. See mmap(2) man page for details.
+ */
+#define MFD_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT
+#define MFD_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK
+
+#define MFD_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB
+#define MFD_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB
+#define MFD_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB
+#define MFD_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB
+#define MFD_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB
+#define MFD_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB
+#define MFD_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB
+#define MFD_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB
+#define MFD_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB
+#define MFD_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB
+#define MFD_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB
+#define MFD_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB
+
+#endif /* _LINUX_MEMFD_H */
diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h
index f55bc680b5b0..e89d00528f2f 100644
--- a/tools/include/uapi/linux/mman.h
+++ b/tools/include/uapi/linux/mman.h
@@ -4,6 +4,7 @@
#include <asm/mman.h>
#include <asm-generic/hugetlb_encode.h>
+#include <linux/types.h>
#define MREMAP_MAYMOVE 1
#define MREMAP_FIXED 2
@@ -16,6 +17,7 @@
#define MAP_SHARED 0x01 /* Share changes */
#define MAP_PRIVATE 0x02 /* Changes are private */
#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */
+#define MAP_DROPPABLE 0x08 /* Zero memory under memory pressure. */
/*
* Huge page size encoding when MAP_HUGETLB is specified, and a huge page
@@ -41,4 +43,17 @@
#define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB
#define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB
+struct cachestat_range {
+ __u64 off;
+ __u64 len;
+};
+
+struct cachestat {
+ __u64 nr_cache;
+ __u64 nr_dirty;
+ __u64 nr_writeback;
+ __u64 nr_evicted;
+ __u64 nr_recently_evicted;
+};
+
#endif /* _UAPI_LINUX_MMAN_H */
diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h
index 4d93967f8aea..7fa67c2031a5 100644
--- a/tools/include/uapi/linux/mount.h
+++ b/tools/include/uapi/linux/mount.h
@@ -74,7 +74,8 @@
#define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */
#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */
#define MOVE_MOUNT_SET_GROUP 0x00000100 /* Set sharing group instead */
-#define MOVE_MOUNT__MASK 0x00000177
+#define MOVE_MOUNT_BENEATH 0x00000200 /* Mount beneath top mount */
+#define MOVE_MOUNT__MASK 0x00000377
/*
* fsopen() flags.
@@ -99,8 +100,9 @@ enum fsconfig_command {
FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */
FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */
FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */
- FSCONFIG_CMD_CREATE = 6, /* Invoke superblock creation */
+ FSCONFIG_CMD_CREATE = 6, /* Create new or reuse existing superblock */
FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */
+ FSCONFIG_CMD_CREATE_EXCL = 8, /* Create new superblock, fail if reusing existing superblock */
};
/*
@@ -136,4 +138,98 @@ struct mount_attr {
/* List of all mount_attr versions. */
#define MOUNT_ATTR_SIZE_VER0 32 /* sizeof first published struct */
+
+/*
+ * Structure for getting mount/superblock/filesystem info with statmount(2).
+ *
+ * The interface is similar to statx(2): individual fields or groups can be
+ * selected with the @mask argument of statmount(). Kernel will set the @mask
+ * field according to the supported fields.
+ *
+ * If string fields are selected, then the caller needs to pass a buffer that
+ * has space after the fixed part of the structure. Nul terminated strings are
+ * copied there and offsets relative to @str are stored in the relevant fields.
+ * If the buffer is too small, then EOVERFLOW is returned. The actually used
+ * size is returned in @size.
+ */
+struct statmount {
+ __u32 size; /* Total size, including strings */
+ __u32 mnt_opts; /* [str] Options (comma separated, escaped) */
+ __u64 mask; /* What results were written */
+ __u32 sb_dev_major; /* Device ID */
+ __u32 sb_dev_minor;
+ __u64 sb_magic; /* ..._SUPER_MAGIC */
+ __u32 sb_flags; /* SB_{RDONLY,SYNCHRONOUS,DIRSYNC,LAZYTIME} */
+ __u32 fs_type; /* [str] Filesystem type */
+ __u64 mnt_id; /* Unique ID of mount */
+ __u64 mnt_parent_id; /* Unique ID of parent (for root == mnt_id) */
+ __u32 mnt_id_old; /* Reused IDs used in proc/.../mountinfo */
+ __u32 mnt_parent_id_old;
+ __u64 mnt_attr; /* MOUNT_ATTR_... */
+ __u64 mnt_propagation; /* MS_{SHARED,SLAVE,PRIVATE,UNBINDABLE} */
+ __u64 mnt_peer_group; /* ID of shared peer group */
+ __u64 mnt_master; /* Mount receives propagation from this ID */
+ __u64 propagate_from; /* Propagation from in current namespace */
+ __u32 mnt_root; /* [str] Root of mount relative to root of fs */
+ __u32 mnt_point; /* [str] Mountpoint relative to current root */
+ __u64 mnt_ns_id; /* ID of the mount namespace */
+ __u32 fs_subtype; /* [str] Subtype of fs_type (if any) */
+ __u32 sb_source; /* [str] Source string of the mount */
+ __u32 opt_num; /* Number of fs options */
+ __u32 opt_array; /* [str] Array of nul terminated fs options */
+ __u32 opt_sec_num; /* Number of security options */
+ __u32 opt_sec_array; /* [str] Array of nul terminated security options */
+ __u64 supported_mask; /* Mask flags that this kernel supports */
+ __u32 mnt_uidmap_num; /* Number of uid mappings */
+ __u32 mnt_uidmap; /* [str] Array of uid mappings (as seen from callers namespace) */
+ __u32 mnt_gidmap_num; /* Number of gid mappings */
+ __u32 mnt_gidmap; /* [str] Array of gid mappings (as seen from callers namespace) */
+ __u64 __spare2[43];
+ char str[]; /* Variable size part containing strings */
+};
+
+/*
+ * Structure for passing mount ID and miscellaneous parameters to statmount(2)
+ * and listmount(2).
+ *
+ * For statmount(2) @param represents the request mask.
+ * For listmount(2) @param represents the last listed mount id (or zero).
+ */
+struct mnt_id_req {
+ __u32 size;
+ __u32 spare;
+ __u64 mnt_id;
+ __u64 param;
+ __u64 mnt_ns_id;
+};
+
+/* List of all mnt_id_req versions. */
+#define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */
+#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */
+
+/*
+ * @mask bits for statmount(2)
+ */
+#define STATMOUNT_SB_BASIC 0x00000001U /* Want/got sb_... */
+#define STATMOUNT_MNT_BASIC 0x00000002U /* Want/got mnt_... */
+#define STATMOUNT_PROPAGATE_FROM 0x00000004U /* Want/got propagate_from */
+#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */
+#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */
+#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */
+#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */
+#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */
+#define STATMOUNT_FS_SUBTYPE 0x00000100U /* Want/got fs_subtype */
+#define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */
+#define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */
+#define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */
+#define STATMOUNT_SUPPORTED_MASK 0x00001000U /* Want/got supported mask flags */
+#define STATMOUNT_MNT_UIDMAP 0x00002000U /* Want/got uidmap... */
+#define STATMOUNT_MNT_GIDMAP 0x00004000U /* Want/got gidmap... */
+
+/*
+ * Special @mnt_id values that can be passed to listmount
+ */
+#define LSMT_ROOT 0xffffffffffffffff /* root mount */
+#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */
+
#endif /* _UAPI_LINUX_MOUNT_H */
diff --git a/tools/include/uapi/linux/neighbour.h b/tools/include/uapi/linux/neighbour.h
new file mode 100644
index 000000000000..c34a81245f87
--- /dev/null
+++ b/tools/include/uapi/linux/neighbour.h
@@ -0,0 +1,229 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_NEIGHBOUR_H
+#define _UAPI__LINUX_NEIGHBOUR_H
+
+#include <linux/types.h>
+#include <linux/netlink.h>
+
+struct ndmsg {
+ __u8 ndm_family;
+ __u8 ndm_pad1;
+ __u16 ndm_pad2;
+ __s32 ndm_ifindex;
+ __u16 ndm_state;
+ __u8 ndm_flags;
+ __u8 ndm_type;
+};
+
+enum {
+ NDA_UNSPEC,
+ NDA_DST,
+ NDA_LLADDR,
+ NDA_CACHEINFO,
+ NDA_PROBES,
+ NDA_VLAN,
+ NDA_PORT,
+ NDA_VNI,
+ NDA_IFINDEX,
+ NDA_MASTER,
+ NDA_LINK_NETNSID,
+ NDA_SRC_VNI,
+ NDA_PROTOCOL, /* Originator of entry */
+ NDA_NH_ID,
+ NDA_FDB_EXT_ATTRS,
+ NDA_FLAGS_EXT,
+ NDA_NDM_STATE_MASK,
+ NDA_NDM_FLAGS_MASK,
+ __NDA_MAX
+};
+
+#define NDA_MAX (__NDA_MAX - 1)
+
+/*
+ * Neighbor Cache Entry Flags
+ */
+
+#define NTF_USE (1 << 0)
+#define NTF_SELF (1 << 1)
+#define NTF_MASTER (1 << 2)
+#define NTF_PROXY (1 << 3) /* == ATF_PUBL */
+#define NTF_EXT_LEARNED (1 << 4)
+#define NTF_OFFLOADED (1 << 5)
+#define NTF_STICKY (1 << 6)
+#define NTF_ROUTER (1 << 7)
+/* Extended flags under NDA_FLAGS_EXT: */
+#define NTF_EXT_MANAGED (1 << 0)
+#define NTF_EXT_LOCKED (1 << 1)
+#define NTF_EXT_EXT_VALIDATED (1 << 2)
+
+/*
+ * Neighbor Cache Entry States.
+ */
+
+#define NUD_INCOMPLETE 0x01
+#define NUD_REACHABLE 0x02
+#define NUD_STALE 0x04
+#define NUD_DELAY 0x08
+#define NUD_PROBE 0x10
+#define NUD_FAILED 0x20
+
+/* Dummy states */
+#define NUD_NOARP 0x40
+#define NUD_PERMANENT 0x80
+#define NUD_NONE 0x00
+
+/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change and make no
+ * address resolution or NUD.
+ *
+ * NUD_PERMANENT also cannot be deleted by garbage collectors. This holds true
+ * for dynamic entries with NTF_EXT_LEARNED flag as well. However, upon carrier
+ * down event, NUD_PERMANENT entries are not flushed whereas NTF_EXT_LEARNED
+ * flagged entries explicitly are (which is also consistent with the routing
+ * subsystem).
+ *
+ * When NTF_EXT_LEARNED is set for a bridge fdb entry the different cache entry
+ * states don't make sense and thus are ignored. Such entries don't age and
+ * can roam.
+ *
+ * NTF_EXT_MANAGED flagged neigbor entries are managed by the kernel on behalf
+ * of a user space control plane, and automatically refreshed so that (if
+ * possible) they remain in NUD_REACHABLE state.
+ *
+ * NTF_EXT_LOCKED flagged bridge FDB entries are entries generated by the
+ * bridge in response to a host trying to communicate via a locked bridge port
+ * with MAB enabled. Their purpose is to notify user space that a host requires
+ * authentication.
+ *
+ * NTF_EXT_EXT_VALIDATED flagged neighbor entries were externally validated by
+ * a user space control plane. The kernel will not remove or invalidate them,
+ * but it can probe them and notify user space when they become reachable.
+ */
+
+struct nda_cacheinfo {
+ __u32 ndm_confirmed;
+ __u32 ndm_used;
+ __u32 ndm_updated;
+ __u32 ndm_refcnt;
+};
+
+/*****************************************************************
+ * Neighbour tables specific messages.
+ *
+ * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the
+ * NLM_F_DUMP flag set. Every neighbour table configuration is
+ * spread over multiple messages to avoid running into message
+ * size limits on systems with many interfaces. The first message
+ * in the sequence transports all not device specific data such as
+ * statistics, configuration, and the default parameter set.
+ * This message is followed by 0..n messages carrying device
+ * specific parameter sets.
+ * Although the ordering should be sufficient, NDTA_NAME can be
+ * used to identify sequences. The initial message can be identified
+ * by checking for NDTA_CONFIG. The device specific messages do
+ * not contain this TLV but have NDTPA_IFINDEX set to the
+ * corresponding interface index.
+ *
+ * To change neighbour table attributes, send RTM_SETNEIGHTBL
+ * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3],
+ * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked
+ * otherwise. Device specific parameter sets can be changed by
+ * setting NDTPA_IFINDEX to the interface index of the corresponding
+ * device.
+ ****/
+
+struct ndt_stats {
+ __u64 ndts_allocs;
+ __u64 ndts_destroys;
+ __u64 ndts_hash_grows;
+ __u64 ndts_res_failed;
+ __u64 ndts_lookups;
+ __u64 ndts_hits;
+ __u64 ndts_rcv_probes_mcast;
+ __u64 ndts_rcv_probes_ucast;
+ __u64 ndts_periodic_gc_runs;
+ __u64 ndts_forced_gc_runs;
+ __u64 ndts_table_fulls;
+};
+
+enum {
+ NDTPA_UNSPEC,
+ NDTPA_IFINDEX, /* u32, unchangeable */
+ NDTPA_REFCNT, /* u32, read-only */
+ NDTPA_REACHABLE_TIME, /* u64, read-only, msecs */
+ NDTPA_BASE_REACHABLE_TIME, /* u64, msecs */
+ NDTPA_RETRANS_TIME, /* u64, msecs */
+ NDTPA_GC_STALETIME, /* u64, msecs */
+ NDTPA_DELAY_PROBE_TIME, /* u64, msecs */
+ NDTPA_QUEUE_LEN, /* u32 */
+ NDTPA_APP_PROBES, /* u32 */
+ NDTPA_UCAST_PROBES, /* u32 */
+ NDTPA_MCAST_PROBES, /* u32 */
+ NDTPA_ANYCAST_DELAY, /* u64, msecs */
+ NDTPA_PROXY_DELAY, /* u64, msecs */
+ NDTPA_PROXY_QLEN, /* u32 */
+ NDTPA_LOCKTIME, /* u64, msecs */
+ NDTPA_QUEUE_LENBYTES, /* u32 */
+ NDTPA_MCAST_REPROBES, /* u32 */
+ NDTPA_PAD,
+ NDTPA_INTERVAL_PROBE_TIME_MS, /* u64, msecs */
+ __NDTPA_MAX
+};
+#define NDTPA_MAX (__NDTPA_MAX - 1)
+
+struct ndtmsg {
+ __u8 ndtm_family;
+ __u8 ndtm_pad1;
+ __u16 ndtm_pad2;
+};
+
+struct ndt_config {
+ __u16 ndtc_key_len;
+ __u16 ndtc_entry_size;
+ __u32 ndtc_entries;
+ __u32 ndtc_last_flush; /* delta to now in msecs */
+ __u32 ndtc_last_rand; /* delta to now in msecs */
+ __u32 ndtc_hash_rnd;
+ __u32 ndtc_hash_mask;
+ __u32 ndtc_hash_chain_gc;
+ __u32 ndtc_proxy_qlen;
+};
+
+enum {
+ NDTA_UNSPEC,
+ NDTA_NAME, /* char *, unchangeable */
+ NDTA_THRESH1, /* u32 */
+ NDTA_THRESH2, /* u32 */
+ NDTA_THRESH3, /* u32 */
+ NDTA_CONFIG, /* struct ndt_config, read-only */
+ NDTA_PARMS, /* nested TLV NDTPA_* */
+ NDTA_STATS, /* struct ndt_stats, read-only */
+ NDTA_GC_INTERVAL, /* u64, msecs */
+ NDTA_PAD,
+ __NDTA_MAX
+};
+#define NDTA_MAX (__NDTA_MAX - 1)
+
+ /* FDB activity notification bits used in NFEA_ACTIVITY_NOTIFY:
+ * - FDB_NOTIFY_BIT - notify on activity/expire for any entry
+ * - FDB_NOTIFY_INACTIVE_BIT - mark as inactive to avoid multiple notifications
+ */
+enum {
+ FDB_NOTIFY_BIT = (1 << 0),
+ FDB_NOTIFY_INACTIVE_BIT = (1 << 1)
+};
+
+/* embedded into NDA_FDB_EXT_ATTRS:
+ * [NDA_FDB_EXT_ATTRS] = {
+ * [NFEA_ACTIVITY_NOTIFY]
+ * ...
+ * }
+ */
+enum {
+ NFEA_UNSPEC,
+ NFEA_ACTIVITY_NOTIFY,
+ NFEA_DONT_REFRESH,
+ __NFEA_MAX
+};
+#define NFEA_MAX (__NFEA_MAX - 1)
+
+#endif
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
new file mode 100644
index 000000000000..e0b579a1df4f
--- /dev/null
+++ b/tools/include/uapi/linux/netdev.h
@@ -0,0 +1,239 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/netdev.yaml */
+/* YNL-GEN uapi header */
+/* To regenerate run: tools/net/ynl/ynl-regen.sh */
+
+#ifndef _UAPI_LINUX_NETDEV_H
+#define _UAPI_LINUX_NETDEV_H
+
+#define NETDEV_FAMILY_NAME "netdev"
+#define NETDEV_FAMILY_VERSION 1
+
+/**
+ * enum netdev_xdp_act
+ * @NETDEV_XDP_ACT_BASIC: XDP features set supported by all drivers
+ * (XDP_ABORTED, XDP_DROP, XDP_PASS, XDP_TX)
+ * @NETDEV_XDP_ACT_REDIRECT: The netdev supports XDP_REDIRECT
+ * @NETDEV_XDP_ACT_NDO_XMIT: This feature informs if netdev implements
+ * ndo_xdp_xmit callback.
+ * @NETDEV_XDP_ACT_XSK_ZEROCOPY: This feature informs if netdev supports AF_XDP
+ * in zero copy mode.
+ * @NETDEV_XDP_ACT_HW_OFFLOAD: This feature informs if netdev supports XDP hw
+ * offloading.
+ * @NETDEV_XDP_ACT_RX_SG: This feature informs if netdev implements non-linear
+ * XDP buffer support in the driver napi callback.
+ * @NETDEV_XDP_ACT_NDO_XMIT_SG: This feature informs if netdev implements
+ * non-linear XDP buffer support in ndo_xdp_xmit callback.
+ */
+enum netdev_xdp_act {
+ NETDEV_XDP_ACT_BASIC = 1,
+ NETDEV_XDP_ACT_REDIRECT = 2,
+ NETDEV_XDP_ACT_NDO_XMIT = 4,
+ NETDEV_XDP_ACT_XSK_ZEROCOPY = 8,
+ NETDEV_XDP_ACT_HW_OFFLOAD = 16,
+ NETDEV_XDP_ACT_RX_SG = 32,
+ NETDEV_XDP_ACT_NDO_XMIT_SG = 64,
+
+ /* private: */
+ NETDEV_XDP_ACT_MASK = 127,
+};
+
+/**
+ * enum netdev_xdp_rx_metadata
+ * @NETDEV_XDP_RX_METADATA_TIMESTAMP: Device is capable of exposing receive HW
+ * timestamp via bpf_xdp_metadata_rx_timestamp().
+ * @NETDEV_XDP_RX_METADATA_HASH: Device is capable of exposing receive packet
+ * hash via bpf_xdp_metadata_rx_hash().
+ * @NETDEV_XDP_RX_METADATA_VLAN_TAG: Device is capable of exposing receive
+ * packet VLAN tag via bpf_xdp_metadata_rx_vlan_tag().
+ */
+enum netdev_xdp_rx_metadata {
+ NETDEV_XDP_RX_METADATA_TIMESTAMP = 1,
+ NETDEV_XDP_RX_METADATA_HASH = 2,
+ NETDEV_XDP_RX_METADATA_VLAN_TAG = 4,
+};
+
+/**
+ * enum netdev_xsk_flags
+ * @NETDEV_XSK_FLAGS_TX_TIMESTAMP: HW timestamping egress packets is supported
+ * by the driver.
+ * @NETDEV_XSK_FLAGS_TX_CHECKSUM: L3 checksum HW offload is supported by the
+ * driver.
+ * @NETDEV_XSK_FLAGS_TX_LAUNCH_TIME_FIFO: Launch time HW offload is supported
+ * by the driver.
+ */
+enum netdev_xsk_flags {
+ NETDEV_XSK_FLAGS_TX_TIMESTAMP = 1,
+ NETDEV_XSK_FLAGS_TX_CHECKSUM = 2,
+ NETDEV_XSK_FLAGS_TX_LAUNCH_TIME_FIFO = 4,
+};
+
+enum netdev_queue_type {
+ NETDEV_QUEUE_TYPE_RX,
+ NETDEV_QUEUE_TYPE_TX,
+};
+
+enum netdev_qstats_scope {
+ NETDEV_QSTATS_SCOPE_QUEUE = 1,
+};
+
+enum netdev_napi_threaded {
+ NETDEV_NAPI_THREADED_DISABLED,
+ NETDEV_NAPI_THREADED_ENABLED,
+ NETDEV_NAPI_THREADED_BUSY_POLL,
+};
+
+enum {
+ NETDEV_A_DEV_IFINDEX = 1,
+ NETDEV_A_DEV_PAD,
+ NETDEV_A_DEV_XDP_FEATURES,
+ NETDEV_A_DEV_XDP_ZC_MAX_SEGS,
+ NETDEV_A_DEV_XDP_RX_METADATA_FEATURES,
+ NETDEV_A_DEV_XSK_FEATURES,
+
+ __NETDEV_A_DEV_MAX,
+ NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1)
+};
+
+enum {
+ __NETDEV_A_IO_URING_PROVIDER_INFO_MAX,
+ NETDEV_A_IO_URING_PROVIDER_INFO_MAX = (__NETDEV_A_IO_URING_PROVIDER_INFO_MAX - 1)
+};
+
+enum {
+ NETDEV_A_PAGE_POOL_ID = 1,
+ NETDEV_A_PAGE_POOL_IFINDEX,
+ NETDEV_A_PAGE_POOL_NAPI_ID,
+ NETDEV_A_PAGE_POOL_INFLIGHT,
+ NETDEV_A_PAGE_POOL_INFLIGHT_MEM,
+ NETDEV_A_PAGE_POOL_DETACH_TIME,
+ NETDEV_A_PAGE_POOL_DMABUF,
+ NETDEV_A_PAGE_POOL_IO_URING,
+
+ __NETDEV_A_PAGE_POOL_MAX,
+ NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1)
+};
+
+enum {
+ NETDEV_A_PAGE_POOL_STATS_INFO = 1,
+ NETDEV_A_PAGE_POOL_STATS_ALLOC_FAST = 8,
+ NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW,
+ NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW_HIGH_ORDER,
+ NETDEV_A_PAGE_POOL_STATS_ALLOC_EMPTY,
+ NETDEV_A_PAGE_POOL_STATS_ALLOC_REFILL,
+ NETDEV_A_PAGE_POOL_STATS_ALLOC_WAIVE,
+ NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHED,
+ NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHE_FULL,
+ NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING,
+ NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING_FULL,
+ NETDEV_A_PAGE_POOL_STATS_RECYCLE_RELEASED_REFCNT,
+
+ __NETDEV_A_PAGE_POOL_STATS_MAX,
+ NETDEV_A_PAGE_POOL_STATS_MAX = (__NETDEV_A_PAGE_POOL_STATS_MAX - 1)
+};
+
+enum {
+ NETDEV_A_NAPI_IFINDEX = 1,
+ NETDEV_A_NAPI_ID,
+ NETDEV_A_NAPI_IRQ,
+ NETDEV_A_NAPI_PID,
+ NETDEV_A_NAPI_DEFER_HARD_IRQS,
+ NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT,
+ NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT,
+ NETDEV_A_NAPI_THREADED,
+
+ __NETDEV_A_NAPI_MAX,
+ NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1)
+};
+
+enum {
+ __NETDEV_A_XSK_INFO_MAX,
+ NETDEV_A_XSK_INFO_MAX = (__NETDEV_A_XSK_INFO_MAX - 1)
+};
+
+enum {
+ NETDEV_A_QUEUE_ID = 1,
+ NETDEV_A_QUEUE_IFINDEX,
+ NETDEV_A_QUEUE_TYPE,
+ NETDEV_A_QUEUE_NAPI_ID,
+ NETDEV_A_QUEUE_DMABUF,
+ NETDEV_A_QUEUE_IO_URING,
+ NETDEV_A_QUEUE_XSK,
+
+ __NETDEV_A_QUEUE_MAX,
+ NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)
+};
+
+enum {
+ NETDEV_A_QSTATS_IFINDEX = 1,
+ NETDEV_A_QSTATS_QUEUE_TYPE,
+ NETDEV_A_QSTATS_QUEUE_ID,
+ NETDEV_A_QSTATS_SCOPE,
+ NETDEV_A_QSTATS_RX_PACKETS = 8,
+ NETDEV_A_QSTATS_RX_BYTES,
+ NETDEV_A_QSTATS_TX_PACKETS,
+ NETDEV_A_QSTATS_TX_BYTES,
+ NETDEV_A_QSTATS_RX_ALLOC_FAIL,
+ NETDEV_A_QSTATS_RX_HW_DROPS,
+ NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS,
+ NETDEV_A_QSTATS_RX_CSUM_COMPLETE,
+ NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY,
+ NETDEV_A_QSTATS_RX_CSUM_NONE,
+ NETDEV_A_QSTATS_RX_CSUM_BAD,
+ NETDEV_A_QSTATS_RX_HW_GRO_PACKETS,
+ NETDEV_A_QSTATS_RX_HW_GRO_BYTES,
+ NETDEV_A_QSTATS_RX_HW_GRO_WIRE_PACKETS,
+ NETDEV_A_QSTATS_RX_HW_GRO_WIRE_BYTES,
+ NETDEV_A_QSTATS_RX_HW_DROP_RATELIMITS,
+ NETDEV_A_QSTATS_TX_HW_DROPS,
+ NETDEV_A_QSTATS_TX_HW_DROP_ERRORS,
+ NETDEV_A_QSTATS_TX_CSUM_NONE,
+ NETDEV_A_QSTATS_TX_NEEDS_CSUM,
+ NETDEV_A_QSTATS_TX_HW_GSO_PACKETS,
+ NETDEV_A_QSTATS_TX_HW_GSO_BYTES,
+ NETDEV_A_QSTATS_TX_HW_GSO_WIRE_PACKETS,
+ NETDEV_A_QSTATS_TX_HW_GSO_WIRE_BYTES,
+ NETDEV_A_QSTATS_TX_HW_DROP_RATELIMITS,
+ NETDEV_A_QSTATS_TX_STOP,
+ NETDEV_A_QSTATS_TX_WAKE,
+
+ __NETDEV_A_QSTATS_MAX,
+ NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1)
+};
+
+enum {
+ NETDEV_A_DMABUF_IFINDEX = 1,
+ NETDEV_A_DMABUF_QUEUES,
+ NETDEV_A_DMABUF_FD,
+ NETDEV_A_DMABUF_ID,
+
+ __NETDEV_A_DMABUF_MAX,
+ NETDEV_A_DMABUF_MAX = (__NETDEV_A_DMABUF_MAX - 1)
+};
+
+enum {
+ NETDEV_CMD_DEV_GET = 1,
+ NETDEV_CMD_DEV_ADD_NTF,
+ NETDEV_CMD_DEV_DEL_NTF,
+ NETDEV_CMD_DEV_CHANGE_NTF,
+ NETDEV_CMD_PAGE_POOL_GET,
+ NETDEV_CMD_PAGE_POOL_ADD_NTF,
+ NETDEV_CMD_PAGE_POOL_DEL_NTF,
+ NETDEV_CMD_PAGE_POOL_CHANGE_NTF,
+ NETDEV_CMD_PAGE_POOL_STATS_GET,
+ NETDEV_CMD_QUEUE_GET,
+ NETDEV_CMD_NAPI_GET,
+ NETDEV_CMD_QSTATS_GET,
+ NETDEV_CMD_BIND_RX,
+ NETDEV_CMD_NAPI_SET,
+ NETDEV_CMD_BIND_TX,
+
+ __NETDEV_CMD_MAX,
+ NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
+};
+
+#define NETDEV_MCGRP_MGMT "mgmt"
+#define NETDEV_MCGRP_PAGE_POOL "page-pool"
+
+#endif /* _UAPI_LINUX_NETDEV_H */
diff --git a/tools/include/uapi/linux/netfilter.h b/tools/include/uapi/linux/netfilter.h
new file mode 100644
index 000000000000..5a79ccb76701
--- /dev/null
+++ b/tools/include/uapi/linux/netfilter.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_NETFILTER_H
+#define _UAPI__LINUX_NETFILTER_H
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+
+/* Responses from hook functions. */
+#define NF_DROP 0
+#define NF_ACCEPT 1
+#define NF_STOLEN 2
+#define NF_QUEUE 3
+#define NF_REPEAT 4
+#define NF_STOP 5 /* Deprecated, for userspace nf_queue compatibility. */
+#define NF_MAX_VERDICT NF_STOP
+
+/* we overload the higher bits for encoding auxiliary data such as the queue
+ * number or errno values. Not nice, but better than additional function
+ * arguments. */
+#define NF_VERDICT_MASK 0x000000ff
+
+/* extra verdict flags have mask 0x0000ff00 */
+#define NF_VERDICT_FLAG_QUEUE_BYPASS 0x00008000
+
+/* queue number (NF_QUEUE) or errno (NF_DROP) */
+#define NF_VERDICT_QMASK 0xffff0000
+#define NF_VERDICT_QBITS 16
+
+#define NF_QUEUE_NR(x) ((((x) << 16) & NF_VERDICT_QMASK) | NF_QUEUE)
+
+#define NF_DROP_ERR(x) (((-x) << 16) | NF_DROP)
+
+/* only for userspace compatibility */
+#ifndef __KERNEL__
+
+/* NF_VERDICT_BITS should be 8 now, but userspace might break if this changes */
+#define NF_VERDICT_BITS 16
+#endif
+
+enum nf_inet_hooks {
+ NF_INET_PRE_ROUTING,
+ NF_INET_LOCAL_IN,
+ NF_INET_FORWARD,
+ NF_INET_LOCAL_OUT,
+ NF_INET_POST_ROUTING,
+ NF_INET_NUMHOOKS,
+ NF_INET_INGRESS = NF_INET_NUMHOOKS,
+};
+
+enum nf_dev_hooks {
+ NF_NETDEV_INGRESS,
+ NF_NETDEV_EGRESS,
+ NF_NETDEV_NUMHOOKS
+};
+
+enum {
+ NFPROTO_UNSPEC = 0,
+ NFPROTO_INET = 1,
+ NFPROTO_IPV4 = 2,
+ NFPROTO_ARP = 3,
+ NFPROTO_NETDEV = 5,
+ NFPROTO_BRIDGE = 7,
+ NFPROTO_IPV6 = 10,
+#ifndef __KERNEL__ /* no longer supported by kernel */
+ NFPROTO_DECNET = 12,
+#endif
+ NFPROTO_NUMPROTO,
+};
+
+union nf_inet_addr {
+ __u32 all[4];
+ __be32 ip;
+ __be32 ip6[4];
+ struct in_addr in;
+ struct in6_addr in6;
+};
+
+#endif /* _UAPI__LINUX_NETFILTER_H */
diff --git a/tools/include/uapi/linux/netfilter_arp.h b/tools/include/uapi/linux/netfilter_arp.h
new file mode 100644
index 000000000000..791dfc5ae907
--- /dev/null
+++ b/tools/include/uapi/linux/netfilter_arp.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note */
+#ifndef __LINUX_ARP_NETFILTER_H
+#define __LINUX_ARP_NETFILTER_H
+
+/* ARP-specific defines for netfilter.
+ * (C)2002 Rusty Russell IBM -- This code is GPL.
+ */
+
+#include <linux/netfilter.h>
+
+/* There is no PF_ARP. */
+#define NF_ARP 0
+
+/* ARP Hooks */
+#define NF_ARP_IN 0
+#define NF_ARP_OUT 1
+#define NF_ARP_FORWARD 2
+
+#ifndef __KERNEL__
+#define NF_ARP_NUMHOOKS 3
+#endif
+
+#endif /* __LINUX_ARP_NETFILTER_H */
diff --git a/tools/include/uapi/linux/nsfs.h b/tools/include/uapi/linux/nsfs.h
new file mode 100644
index 000000000000..a25e38d1c874
--- /dev/null
+++ b/tools/include/uapi/linux/nsfs.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __LINUX_NSFS_H
+#define __LINUX_NSFS_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+#define NSIO 0xb7
+
+/* Returns a file descriptor that refers to an owning user namespace */
+#define NS_GET_USERNS _IO(NSIO, 0x1)
+/* Returns a file descriptor that refers to a parent namespace */
+#define NS_GET_PARENT _IO(NSIO, 0x2)
+/* Returns the type of namespace (CLONE_NEW* value) referred to by
+ file descriptor */
+#define NS_GET_NSTYPE _IO(NSIO, 0x3)
+/* Get owner UID (in the caller's user namespace) for a user namespace */
+#define NS_GET_OWNER_UID _IO(NSIO, 0x4)
+/* Translate pid from target pid namespace into the caller's pid namespace. */
+#define NS_GET_PID_FROM_PIDNS _IOR(NSIO, 0x6, int)
+/* Return thread-group leader id of pid in the callers pid namespace. */
+#define NS_GET_TGID_FROM_PIDNS _IOR(NSIO, 0x7, int)
+/* Translate pid from caller's pid namespace into a target pid namespace. */
+#define NS_GET_PID_IN_PIDNS _IOR(NSIO, 0x8, int)
+/* Return thread-group leader id of pid in the target pid namespace. */
+#define NS_GET_TGID_IN_PIDNS _IOR(NSIO, 0x9, int)
+
+struct mnt_ns_info {
+ __u32 size;
+ __u32 nr_mounts;
+ __u64 mnt_ns_id;
+};
+
+#define MNT_NS_INFO_SIZE_VER0 16 /* size of first published struct */
+
+/* Get information about namespace. */
+#define NS_MNT_GET_INFO _IOR(NSIO, 10, struct mnt_ns_info)
+/* Get next namespace. */
+#define NS_MNT_GET_NEXT _IOR(NSIO, 11, struct mnt_ns_info)
+/* Get previous namespace. */
+#define NS_MNT_GET_PREV _IOR(NSIO, 12, struct mnt_ns_info)
+
+/* Retrieve namespace identifiers. */
+#define NS_GET_MNTNS_ID _IOR(NSIO, 5, __u64)
+#define NS_GET_ID _IOR(NSIO, 13, __u64)
+
+enum init_ns_ino {
+ IPC_NS_INIT_INO = 0xEFFFFFFFU,
+ UTS_NS_INIT_INO = 0xEFFFFFFEU,
+ USER_NS_INIT_INO = 0xEFFFFFFDU,
+ PID_NS_INIT_INO = 0xEFFFFFFCU,
+ CGROUP_NS_INIT_INO = 0xEFFFFFFBU,
+ TIME_NS_INIT_INO = 0xEFFFFFFAU,
+ NET_NS_INIT_INO = 0xEFFFFFF9U,
+ MNT_NS_INIT_INO = 0xEFFFFFF8U,
+#ifdef __KERNEL__
+ MNT_NS_ANON_INO = 0xEFFFFFF7U,
+#endif
+};
+
+struct nsfs_file_handle {
+ __u64 ns_id;
+ __u32 ns_type;
+ __u32 ns_inum;
+};
+
+#define NSFS_FILE_HANDLE_SIZE_VER0 16 /* sizeof first published struct */
+#define NSFS_FILE_HANDLE_SIZE_LATEST sizeof(struct nsfs_file_handle) /* sizeof latest published struct */
+
+enum init_ns_id {
+ IPC_NS_INIT_ID = 1ULL,
+ UTS_NS_INIT_ID = 2ULL,
+ USER_NS_INIT_ID = 3ULL,
+ PID_NS_INIT_ID = 4ULL,
+ CGROUP_NS_INIT_ID = 5ULL,
+ TIME_NS_INIT_ID = 6ULL,
+ NET_NS_INIT_ID = 7ULL,
+ MNT_NS_INIT_ID = 8ULL,
+#ifdef __KERNEL__
+ NS_LAST_INIT_ID = MNT_NS_INIT_ID,
+#endif
+};
+
+enum ns_type {
+ TIME_NS = (1ULL << 7), /* CLONE_NEWTIME */
+ MNT_NS = (1ULL << 17), /* CLONE_NEWNS */
+ CGROUP_NS = (1ULL << 25), /* CLONE_NEWCGROUP */
+ UTS_NS = (1ULL << 26), /* CLONE_NEWUTS */
+ IPC_NS = (1ULL << 27), /* CLONE_NEWIPC */
+ USER_NS = (1ULL << 28), /* CLONE_NEWUSER */
+ PID_NS = (1ULL << 29), /* CLONE_NEWPID */
+ NET_NS = (1ULL << 30), /* CLONE_NEWNET */
+};
+
+/**
+ * struct ns_id_req - namespace ID request structure
+ * @size: size of this structure
+ * @spare: reserved for future use
+ * @filter: filter mask
+ * @ns_id: last namespace id
+ * @user_ns_id: owning user namespace ID
+ *
+ * Structure for passing namespace ID and miscellaneous parameters to
+ * statns(2) and listns(2).
+ *
+ * For statns(2) @param represents the request mask.
+ * For listns(2) @param represents the last listed mount id (or zero).
+ */
+struct ns_id_req {
+ __u32 size;
+ __u32 spare;
+ __u64 ns_id;
+ struct /* listns */ {
+ __u32 ns_type;
+ __u32 spare2;
+ __u64 user_ns_id;
+ };
+};
+
+/*
+ * Special @user_ns_id value that can be passed to listns()
+ */
+#define LISTNS_CURRENT_USER 0xffffffffffffffff /* Caller's userns */
+
+/* List of all ns_id_req versions. */
+#define NS_ID_REQ_SIZE_VER0 32 /* sizeof first published struct */
+
+#endif /* __LINUX_NSFS_H */
diff --git a/tools/include/uapi/linux/openat2.h b/tools/include/uapi/linux/openat2.h
deleted file mode 100644
index a5feb7604948..000000000000
--- a/tools/include/uapi/linux/openat2.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _UAPI_LINUX_OPENAT2_H
-#define _UAPI_LINUX_OPENAT2_H
-
-#include <linux/types.h>
-
-/*
- * Arguments for how openat2(2) should open the target path. If only @flags and
- * @mode are non-zero, then openat2(2) operates very similarly to openat(2).
- *
- * However, unlike openat(2), unknown or invalid bits in @flags result in
- * -EINVAL rather than being silently ignored. @mode must be zero unless one of
- * {O_CREAT, O_TMPFILE} are set.
- *
- * @flags: O_* flags.
- * @mode: O_CREAT/O_TMPFILE file mode.
- * @resolve: RESOLVE_* flags.
- */
-struct open_how {
- __u64 flags;
- __u64 mode;
- __u64 resolve;
-};
-
-/* how->resolve flags for openat2(2). */
-#define RESOLVE_NO_XDEV 0x01 /* Block mount-point crossings
- (includes bind-mounts). */
-#define RESOLVE_NO_MAGICLINKS 0x02 /* Block traversal through procfs-style
- "magic-links". */
-#define RESOLVE_NO_SYMLINKS 0x04 /* Block traversal through all symlinks
- (implies OEXT_NO_MAGICLINKS) */
-#define RESOLVE_BENEATH 0x08 /* Block "lexical" trickery like
- "..", symlinks, and absolute
- paths which escape the dirfd. */
-#define RESOLVE_IN_ROOT 0x10 /* Make all jumps to "/" and ".."
- be scoped inside the dirfd
- (similar to chroot(2)). */
-#define RESOLVE_CACHED 0x20 /* Only complete if resolution can be
- completed through cached lookup. May
- return -EAGAIN if that's not
- possible. */
-
-#endif /* _UAPI_LINUX_OPENAT2_H */
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index ccb7f5dad59b..c44a8fb3e418 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -39,18 +39,21 @@ enum perf_type_id {
/*
* attr.config layout for type PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE
+ *
* PERF_TYPE_HARDWARE: 0xEEEEEEEE000000AA
* AA: hardware event ID
* EEEEEEEE: PMU type ID
+ *
* PERF_TYPE_HW_CACHE: 0xEEEEEEEE00DDCCBB
* BB: hardware cache ID
* CC: hardware cache op ID
* DD: hardware cache op result ID
* EEEEEEEE: PMU type ID
- * If the PMU type ID is 0, the PERF_TYPE_RAW will be applied.
+ *
+ * If the PMU type ID is 0, PERF_TYPE_RAW will be applied.
*/
-#define PERF_PMU_TYPE_SHIFT 32
-#define PERF_HW_EVENT_MASK 0xffffffff
+#define PERF_PMU_TYPE_SHIFT 32
+#define PERF_HW_EVENT_MASK 0xffffffff
/*
* Generalized performance event event_id types, used by the
@@ -112,7 +115,7 @@ enum perf_hw_cache_op_result_id {
/*
* Special "software" events provided by the kernel, even if the hardware
* does not support performance events. These events measure various
- * physical and sw events of the kernel (and allow the profiling of them as
+ * physical and SW events of the kernel (and allow the profiling of them as
* well):
*/
enum perf_sw_ids {
@@ -167,8 +170,9 @@ enum perf_event_sample_format {
};
#define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT)
+
/*
- * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set
+ * Values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set.
*
* If the user does not pass priv level information via branch_sample_type,
* the kernel uses the event's priv level. Branch and event priv levels do
@@ -178,20 +182,20 @@ enum perf_event_sample_format {
* of branches and therefore it supersedes all the other types.
*/
enum perf_branch_sample_type_shift {
- PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */
- PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */
- PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */
-
- PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */
- PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */
- PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */
- PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */
- PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */
- PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */
- PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */
+ PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */
+ PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */
+ PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */
+
+ PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */
+ PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */
+ PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */
+ PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */
+ PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */
+ PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */
+ PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */
PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */
- PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* call/ret stack */
+ PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* CALL/RET stack */
PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT = 12, /* indirect jumps */
PERF_SAMPLE_BRANCH_CALL_SHIFT = 13, /* direct call */
@@ -204,98 +208,101 @@ enum perf_branch_sample_type_shift {
PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT = 18, /* save privilege mode */
+ PERF_SAMPLE_BRANCH_COUNTERS_SHIFT = 19, /* save occurrences of events on a branch */
+
PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */
};
enum perf_branch_sample_type {
- PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT,
- PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT,
- PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT,
+ PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT,
+ PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT,
+ PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT,
+
+ PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT,
+ PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT,
+ PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT,
+ PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT,
+ PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT,
+ PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT,
+ PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT,
+ PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT,
- PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT,
- PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT,
- PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT,
- PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT,
- PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT,
- PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT,
- PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT,
- PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT,
+ PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT,
+ PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT,
+ PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT,
- PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT,
- PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT,
- PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT,
+ PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT,
+ PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT,
- PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT,
- PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT,
+ PERF_SAMPLE_BRANCH_TYPE_SAVE = 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT,
- PERF_SAMPLE_BRANCH_TYPE_SAVE =
- 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT,
+ PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT,
- PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT,
+ PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT,
- PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT,
+ PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT,
- PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
+ PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
};
/*
- * Common flow change classification
+ * Common control flow change classifications:
*/
enum {
- PERF_BR_UNKNOWN = 0, /* unknown */
- PERF_BR_COND = 1, /* conditional */
- PERF_BR_UNCOND = 2, /* unconditional */
- PERF_BR_IND = 3, /* indirect */
- PERF_BR_CALL = 4, /* function call */
- PERF_BR_IND_CALL = 5, /* indirect function call */
- PERF_BR_RET = 6, /* function return */
- PERF_BR_SYSCALL = 7, /* syscall */
- PERF_BR_SYSRET = 8, /* syscall return */
- PERF_BR_COND_CALL = 9, /* conditional function call */
- PERF_BR_COND_RET = 10, /* conditional function return */
- PERF_BR_ERET = 11, /* exception return */
- PERF_BR_IRQ = 12, /* irq */
- PERF_BR_SERROR = 13, /* system error */
- PERF_BR_NO_TX = 14, /* not in transaction */
- PERF_BR_EXTEND_ABI = 15, /* extend ABI */
+ PERF_BR_UNKNOWN = 0, /* Unknown */
+ PERF_BR_COND = 1, /* Conditional */
+ PERF_BR_UNCOND = 2, /* Unconditional */
+ PERF_BR_IND = 3, /* Indirect */
+ PERF_BR_CALL = 4, /* Function call */
+ PERF_BR_IND_CALL = 5, /* Indirect function call */
+ PERF_BR_RET = 6, /* Function return */
+ PERF_BR_SYSCALL = 7, /* Syscall */
+ PERF_BR_SYSRET = 8, /* Syscall return */
+ PERF_BR_COND_CALL = 9, /* Conditional function call */
+ PERF_BR_COND_RET = 10, /* Conditional function return */
+ PERF_BR_ERET = 11, /* Exception return */
+ PERF_BR_IRQ = 12, /* IRQ */
+ PERF_BR_SERROR = 13, /* System error */
+ PERF_BR_NO_TX = 14, /* Not in transaction */
+ PERF_BR_EXTEND_ABI = 15, /* Extend ABI */
PERF_BR_MAX,
};
/*
- * Common branch speculation outcome classification
+ * Common branch speculation outcome classifications:
*/
enum {
- PERF_BR_SPEC_NA = 0, /* Not available */
- PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */
- PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */
- PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */
+ PERF_BR_SPEC_NA = 0, /* Not available */
+ PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */
+ PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */
+ PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */
PERF_BR_SPEC_MAX,
};
enum {
- PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */
- PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */
- PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */
- PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */
- PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */
- PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */
- PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */
- PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */
+ PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */
+ PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */
+ PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */
+ PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */
+ PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */
+ PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */
+ PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */
+ PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */
PERF_BR_NEW_MAX,
};
enum {
- PERF_BR_PRIV_UNKNOWN = 0,
- PERF_BR_PRIV_USER = 1,
- PERF_BR_PRIV_KERNEL = 2,
- PERF_BR_PRIV_HV = 3,
+ PERF_BR_PRIV_UNKNOWN = 0,
+ PERF_BR_PRIV_USER = 1,
+ PERF_BR_PRIV_KERNEL = 2,
+ PERF_BR_PRIV_HV = 3,
};
-#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1
-#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2
-#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3
-#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4
-#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5
+#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1
+#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2
+#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3
+#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4
+#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5
#define PERF_SAMPLE_BRANCH_PLM_ALL \
(PERF_SAMPLE_BRANCH_USER|\
@@ -306,9 +313,9 @@ enum {
* Values to determine ABI of the registers dump.
*/
enum perf_sample_regs_abi {
- PERF_SAMPLE_REGS_ABI_NONE = 0,
- PERF_SAMPLE_REGS_ABI_32 = 1,
- PERF_SAMPLE_REGS_ABI_64 = 2,
+ PERF_SAMPLE_REGS_ABI_NONE = 0,
+ PERF_SAMPLE_REGS_ABI_32 = 1,
+ PERF_SAMPLE_REGS_ABI_64 = 2,
};
/*
@@ -316,21 +323,21 @@ enum perf_sample_regs_abi {
* abort events. Multiple bits can be set.
*/
enum {
- PERF_TXN_ELISION = (1 << 0), /* From elision */
- PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */
- PERF_TXN_SYNC = (1 << 2), /* Instruction is related */
- PERF_TXN_ASYNC = (1 << 3), /* Instruction not related */
- PERF_TXN_RETRY = (1 << 4), /* Retry possible */
- PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */
- PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */
- PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */
+ PERF_TXN_ELISION = (1 << 0), /* From elision */
+ PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */
+ PERF_TXN_SYNC = (1 << 2), /* Instruction is related */
+ PERF_TXN_ASYNC = (1 << 3), /* Instruction is not related */
+ PERF_TXN_RETRY = (1 << 4), /* Retry possible */
+ PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */
+ PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */
+ PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */
- PERF_TXN_MAX = (1 << 8), /* non-ABI */
+ PERF_TXN_MAX = (1 << 8), /* non-ABI */
- /* bits 32..63 are reserved for the abort code */
+ /* Bits 32..63 are reserved for the abort code */
- PERF_TXN_ABORT_MASK = (0xffffffffULL << 32),
- PERF_TXN_ABORT_SHIFT = 32,
+ PERF_TXN_ABORT_MASK = (0xffffffffULL << 32),
+ PERF_TXN_ABORT_SHIFT = 32,
};
/*
@@ -365,21 +372,23 @@ enum perf_event_read_format {
PERF_FORMAT_MAX = 1U << 5, /* non-ABI */
};
-#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
-#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */
-#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */
-#define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */
- /* add: sample_stack_user */
-#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */
-#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */
-#define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */
-#define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */
+#define PERF_ATTR_SIZE_VER0 64 /* Size of first published 'struct perf_event_attr' */
+#define PERF_ATTR_SIZE_VER1 72 /* Add: config2 */
+#define PERF_ATTR_SIZE_VER2 80 /* Add: branch_sample_type */
+#define PERF_ATTR_SIZE_VER3 96 /* Add: sample_regs_user */
+ /* Add: sample_stack_user */
+#define PERF_ATTR_SIZE_VER4 104 /* Add: sample_regs_intr */
+#define PERF_ATTR_SIZE_VER5 112 /* Add: aux_watermark */
+#define PERF_ATTR_SIZE_VER6 120 /* Add: aux_sample_size */
+#define PERF_ATTR_SIZE_VER7 128 /* Add: sig_data */
+#define PERF_ATTR_SIZE_VER8 136 /* Add: config3 */
+#define PERF_ATTR_SIZE_VER9 144 /* add: config4 */
/*
- * Hardware event_id to monitor via a performance monitoring event:
- *
- * @sample_max_stack: Max number of frame pointers in a callchain,
- * should be < /proc/sys/kernel/perf_event_max_stack
+ * 'struct perf_event_attr' contains various attributes that define
+ * a performance event - most of them hardware related configuration
+ * details, but also a lot of behavioral switches and values implemented
+ * by the kernel.
*/
struct perf_event_attr {
@@ -389,7 +398,7 @@ struct perf_event_attr {
__u32 type;
/*
- * Size of the attr structure, for fwd/bwd compat.
+ * Size of the attr structure, for forward/backwards compatibility.
*/
__u32 size;
@@ -444,21 +453,23 @@ struct perf_event_attr {
comm_exec : 1, /* flag comm events that are due to an exec */
use_clockid : 1, /* use @clockid for time fields */
context_switch : 1, /* context switch data */
- write_backward : 1, /* Write ring buffer from end to beginning */
+ write_backward : 1, /* write ring buffer from end to beginning */
namespaces : 1, /* include namespaces data */
ksymbol : 1, /* include ksymbol events */
- bpf_event : 1, /* include bpf events */
+ bpf_event : 1, /* include BPF events */
aux_output : 1, /* generate AUX records instead of events */
cgroup : 1, /* include cgroup events */
text_poke : 1, /* include text poke events */
- build_id : 1, /* use build id in mmap2 events */
+ build_id : 1, /* use build ID in mmap2 events */
inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */
remove_on_exec : 1, /* event is removed from task on exec */
sigtrap : 1, /* send synchronous SIGTRAP on event */
- __reserved_1 : 26;
+ defer_callchain: 1, /* request PERF_RECORD_CALLCHAIN_DEFERRED records */
+ defer_output : 1, /* output PERF_RECORD_CALLCHAIN_DEFERRED records */
+ __reserved_1 : 24;
union {
- __u32 wakeup_events; /* wakeup every n events */
+ __u32 wakeup_events; /* wake up every n events */
__u32 wakeup_watermark; /* bytes before wakeup */
};
@@ -467,13 +478,13 @@ struct perf_event_attr {
__u64 bp_addr;
__u64 kprobe_func; /* for perf_kprobe */
__u64 uprobe_path; /* for perf_uprobe */
- __u64 config1; /* extension of config */
+ __u64 config1; /* extension of config */
};
union {
__u64 bp_len;
- __u64 kprobe_addr; /* when kprobe_func == NULL */
+ __u64 kprobe_addr; /* when kprobe_func == NULL */
__u64 probe_offset; /* for perf_[k,u]probe */
- __u64 config2; /* extension of config1 */
+ __u64 config2; /* extension of config1 */
};
__u64 branch_sample_type; /* enum perf_branch_sample_type */
@@ -503,10 +514,28 @@ struct perf_event_attr {
* Wakeup watermark for AUX area
*/
__u32 aux_watermark;
+
+ /*
+ * Max number of frame pointers in a callchain, should be
+ * lower than /proc/sys/kernel/perf_event_max_stack.
+ *
+ * Max number of entries of branch stack should be lower
+ * than the hardware limit.
+ */
__u16 sample_max_stack;
+
__u16 __reserved_2;
__u32 aux_sample_size;
- __u32 __reserved_3;
+
+ union {
+ __u32 aux_action;
+ struct {
+ __u32 aux_start_paused : 1, /* start AUX area tracing paused */
+ aux_pause : 1, /* on overflow, pause AUX area tracing */
+ aux_resume : 1, /* on overflow, resume AUX area tracing */
+ __reserved_3 : 29;
+ };
+ };
/*
* User provided data if sigtrap=1, passed back to user via
@@ -515,11 +544,14 @@ struct perf_event_attr {
* truncated accordingly on 32 bit architectures.
*/
__u64 sig_data;
+
+ __u64 config3; /* extension of config2 */
+ __u64 config4; /* extension of config3 */
};
/*
* Structure used by below PERF_EVENT_IOC_QUERY_BPF command
- * to query bpf programs attached to the same perf tracepoint
+ * to query BPF programs attached to the same perf tracepoint
* as the given perf event.
*/
struct perf_event_query_bpf {
@@ -541,21 +573,21 @@ struct perf_event_query_bpf {
/*
* Ioctls that can be done on a perf event fd:
*/
-#define PERF_EVENT_IOC_ENABLE _IO ('$', 0)
-#define PERF_EVENT_IOC_DISABLE _IO ('$', 1)
-#define PERF_EVENT_IOC_REFRESH _IO ('$', 2)
-#define PERF_EVENT_IOC_RESET _IO ('$', 3)
-#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64)
-#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5)
-#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *)
-#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *)
-#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32)
-#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32)
+#define PERF_EVENT_IOC_ENABLE _IO ('$', 0)
+#define PERF_EVENT_IOC_DISABLE _IO ('$', 1)
+#define PERF_EVENT_IOC_REFRESH _IO ('$', 2)
+#define PERF_EVENT_IOC_RESET _IO ('$', 3)
+#define PERF_EVENT_IOC_PERIOD _IOW ('$', 4, __u64)
+#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5)
+#define PERF_EVENT_IOC_SET_FILTER _IOW ('$', 6, char *)
+#define PERF_EVENT_IOC_ID _IOR ('$', 7, __u64 *)
+#define PERF_EVENT_IOC_SET_BPF _IOW ('$', 8, __u32)
+#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW ('$', 9, __u32)
#define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *)
-#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW('$', 11, struct perf_event_attr *)
+#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW ('$', 11, struct perf_event_attr *)
enum perf_event_ioc_flags {
- PERF_IOC_FLAG_GROUP = 1U << 0,
+ PERF_IOC_FLAG_GROUP = 1U << 0,
};
/*
@@ -566,7 +598,7 @@ struct perf_event_mmap_page {
__u32 compat_version; /* lowest version this is compat with */
/*
- * Bits needed to read the hw events in user-space.
+ * Bits needed to read the HW events in user-space.
*
* u32 seq, time_mult, time_shift, index, width;
* u64 count, enabled, running;
@@ -604,7 +636,7 @@ struct perf_event_mmap_page {
__u32 index; /* hardware event identifier */
__s64 offset; /* add to hardware event value */
__u64 time_enabled; /* time event active */
- __u64 time_running; /* time event on cpu */
+ __u64 time_running; /* time event on CPU */
union {
__u64 capabilities;
struct {
@@ -632,7 +664,7 @@ struct perf_event_mmap_page {
/*
* If cap_usr_time the below fields can be used to compute the time
- * delta since time_enabled (in ns) using rdtsc or similar.
+ * delta since time_enabled (in ns) using RDTSC or similar.
*
* u64 quot, rem;
* u64 delta;
@@ -705,7 +737,7 @@ struct perf_event_mmap_page {
* after reading this value.
*
* When the mapping is PROT_WRITE the @data_tail value should be
- * written by userspace to reflect the last read data, after issueing
+ * written by user-space to reflect the last read data, after issuing
* an smp_mb() to separate the data read from the ->data_tail store.
* In this case the kernel will not over-write unread data.
*
@@ -721,7 +753,7 @@ struct perf_event_mmap_page {
/*
* AUX area is defined by aux_{offset,size} fields that should be set
- * by the userspace, so that
+ * by the user-space, so that
*
* aux_offset >= data_offset + data_size
*
@@ -795,7 +827,7 @@ struct perf_event_mmap_page {
* Indicates that thread was preempted in TASK_RUNNING state.
*
* PERF_RECORD_MISC_MMAP_BUILD_ID:
- * Indicates that mmap2 event carries build id data.
+ * Indicates that mmap2 event carries build ID data.
*/
#define PERF_RECORD_MISC_EXACT_IP (1 << 14)
#define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14)
@@ -806,26 +838,26 @@ struct perf_event_mmap_page {
#define PERF_RECORD_MISC_EXT_RESERVED (1 << 15)
struct perf_event_header {
- __u32 type;
- __u16 misc;
- __u16 size;
+ __u32 type;
+ __u16 misc;
+ __u16 size;
};
struct perf_ns_link_info {
- __u64 dev;
- __u64 ino;
+ __u64 dev;
+ __u64 ino;
};
enum {
- NET_NS_INDEX = 0,
- UTS_NS_INDEX = 1,
- IPC_NS_INDEX = 2,
- PID_NS_INDEX = 3,
- USER_NS_INDEX = 4,
- MNT_NS_INDEX = 5,
- CGROUP_NS_INDEX = 6,
-
- NR_NAMESPACES, /* number of available namespaces */
+ NET_NS_INDEX = 0,
+ UTS_NS_INDEX = 1,
+ IPC_NS_INDEX = 2,
+ PID_NS_INDEX = 3,
+ USER_NS_INDEX = 4,
+ MNT_NS_INDEX = 5,
+ CGROUP_NS_INDEX = 6,
+
+ NR_NAMESPACES, /* number of available namespaces */
};
enum perf_event_type {
@@ -841,11 +873,11 @@ enum perf_event_type {
* optional fields being ignored.
*
* struct sample_id {
- * { u32 pid, tid; } && PERF_SAMPLE_TID
- * { u64 time; } && PERF_SAMPLE_TIME
- * { u64 id; } && PERF_SAMPLE_ID
- * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID
- * { u32 cpu, res; } && PERF_SAMPLE_CPU
+ * { u32 pid, tid; } && PERF_SAMPLE_TID
+ * { u64 time; } && PERF_SAMPLE_TIME
+ * { u64 id; } && PERF_SAMPLE_ID
+ * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID
+ * { u32 cpu, res; } && PERF_SAMPLE_CPU
* { u64 id; } && PERF_SAMPLE_IDENTIFIER
* } && perf_event_attr::sample_id_all
*
@@ -856,7 +888,7 @@ enum perf_event_type {
/*
* The MMAP events record the PROT_EXEC mappings so that we can
- * correlate userspace IPs to code. They have the following structure:
+ * correlate user-space IPs to code. They have the following structure:
*
* struct {
* struct perf_event_header header;
@@ -866,7 +898,7 @@ enum perf_event_type {
* u64 len;
* u64 pgoff;
* char filename[];
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_MMAP = 1,
@@ -876,7 +908,7 @@ enum perf_event_type {
* struct perf_event_header header;
* u64 id;
* u64 lost;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_LOST = 2,
@@ -887,7 +919,7 @@ enum perf_event_type {
*
* u32 pid, tid;
* char comm[];
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_COMM = 3,
@@ -898,7 +930,7 @@ enum perf_event_type {
* u32 pid, ppid;
* u32 tid, ptid;
* u64 time;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_EXIT = 4,
@@ -909,7 +941,7 @@ enum perf_event_type {
* u64 time;
* u64 id;
* u64 stream_id;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_THROTTLE = 5,
@@ -921,7 +953,7 @@ enum perf_event_type {
* u32 pid, ppid;
* u32 tid, ptid;
* u64 time;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_FORK = 7,
@@ -932,7 +964,7 @@ enum perf_event_type {
* u32 pid, tid;
*
* struct read_format values;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_READ = 8,
@@ -979,14 +1011,20 @@ enum perf_event_type {
* { u64 nr;
* { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX
* { u64 from, to, flags } lbr[nr];
+ * #
+ * # The format of the counters is decided by the
+ * # "branch_counter_nr" and "branch_counter_width",
+ * # which are defined in the ABI.
+ * #
+ * { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS
* } && PERF_SAMPLE_BRANCH_STACK
*
- * { u64 abi; # enum perf_sample_regs_abi
- * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
+ * { u64 abi; # enum perf_sample_regs_abi
+ * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
*
- * { u64 size;
- * char data[size];
- * u64 dyn_size; } && PERF_SAMPLE_STACK_USER
+ * { u64 size;
+ * char data[size];
+ * u64 dyn_size; } && PERF_SAMPLE_STACK_USER
*
* { union perf_sample_weight
* {
@@ -1011,10 +1049,11 @@ enum perf_event_type {
* { u64 abi; # enum perf_sample_regs_abi
* u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
* { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR
- * { u64 size;
- * char data[size]; } && PERF_SAMPLE_AUX
+ * { u64 cgroup;} && PERF_SAMPLE_CGROUP
* { u64 data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE
* { u64 code_page_size;} && PERF_SAMPLE_CODE_PAGE_SIZE
+ * { u64 size;
+ * char data[size]; } && PERF_SAMPLE_AUX
* };
*/
PERF_RECORD_SAMPLE = 9,
@@ -1046,7 +1085,7 @@ enum perf_event_type {
* };
* u32 prot, flags;
* char filename[];
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_MMAP2 = 10,
@@ -1055,12 +1094,12 @@ enum perf_event_type {
* Records that new data landed in the AUX buffer part.
*
* struct {
- * struct perf_event_header header;
+ * struct perf_event_header header;
*
- * u64 aux_offset;
- * u64 aux_size;
+ * u64 aux_offset;
+ * u64 aux_size;
* u64 flags;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_AUX = 11,
@@ -1143,7 +1182,7 @@ enum perf_event_type {
PERF_RECORD_KSYMBOL = 17,
/*
- * Record bpf events:
+ * Record BPF events:
* enum perf_bpf_event_type {
* PERF_BPF_EVENT_UNKNOWN = 0,
* PERF_BPF_EVENT_PROG_LOAD = 1,
@@ -1204,6 +1243,22 @@ enum perf_event_type {
*/
PERF_RECORD_AUX_OUTPUT_HW_ID = 21,
+ /*
+ * This user callchain capture was deferred until shortly before
+ * returning to user space. Previous samples would have kernel
+ * callchains only and they need to be stitched with this to make full
+ * callchains.
+ *
+ * struct {
+ * struct perf_event_header header;
+ * u64 cookie;
+ * u64 nr;
+ * u64 ips[nr];
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_CALLCHAIN_DEFERRED = 22,
+
PERF_RECORD_MAX, /* non-ABI */
};
@@ -1221,178 +1276,182 @@ enum perf_record_ksymbol_type {
#define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0)
enum perf_bpf_event_type {
- PERF_BPF_EVENT_UNKNOWN = 0,
- PERF_BPF_EVENT_PROG_LOAD = 1,
- PERF_BPF_EVENT_PROG_UNLOAD = 2,
- PERF_BPF_EVENT_MAX, /* non-ABI */
+ PERF_BPF_EVENT_UNKNOWN = 0,
+ PERF_BPF_EVENT_PROG_LOAD = 1,
+ PERF_BPF_EVENT_PROG_UNLOAD = 2,
+ PERF_BPF_EVENT_MAX, /* non-ABI */
};
-#define PERF_MAX_STACK_DEPTH 127
-#define PERF_MAX_CONTEXTS_PER_STACK 8
+#define PERF_MAX_STACK_DEPTH 127
+#define PERF_MAX_CONTEXTS_PER_STACK 8
enum perf_callchain_context {
- PERF_CONTEXT_HV = (__u64)-32,
- PERF_CONTEXT_KERNEL = (__u64)-128,
- PERF_CONTEXT_USER = (__u64)-512,
+ PERF_CONTEXT_HV = (__u64)-32,
+ PERF_CONTEXT_KERNEL = (__u64)-128,
+ PERF_CONTEXT_USER = (__u64)-512,
+ PERF_CONTEXT_USER_DEFERRED = (__u64)-640,
- PERF_CONTEXT_GUEST = (__u64)-2048,
- PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176,
- PERF_CONTEXT_GUEST_USER = (__u64)-2560,
+ PERF_CONTEXT_GUEST = (__u64)-2048,
+ PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176,
+ PERF_CONTEXT_GUEST_USER = (__u64)-2560,
- PERF_CONTEXT_MAX = (__u64)-4095,
+ PERF_CONTEXT_MAX = (__u64)-4095,
};
/**
* PERF_RECORD_AUX::flags bits
*/
-#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */
-#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */
-#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */
-#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */
+#define PERF_AUX_FLAG_TRUNCATED 0x0001 /* Record was truncated to fit */
+#define PERF_AUX_FLAG_OVERWRITE 0x0002 /* Snapshot from overwrite mode */
+#define PERF_AUX_FLAG_PARTIAL 0x0004 /* Record contains gaps */
+#define PERF_AUX_FLAG_COLLISION 0x0008 /* Sample collided with another */
#define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK 0xff00 /* PMU specific trace format type */
/* CoreSight PMU AUX buffer formats */
-#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */
-#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */
+#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */
+#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */
-#define PERF_FLAG_FD_NO_GROUP (1UL << 0)
-#define PERF_FLAG_FD_OUTPUT (1UL << 1)
-#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
-#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
+#define PERF_FLAG_FD_NO_GROUP (1UL << 0)
+#define PERF_FLAG_FD_OUTPUT (1UL << 1)
+#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup ID, per-CPU mode only */
+#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
#if defined(__LITTLE_ENDIAN_BITFIELD)
union perf_mem_data_src {
__u64 val;
struct {
- __u64 mem_op:5, /* type of opcode */
- mem_lvl:14, /* memory hierarchy level */
- mem_snoop:5, /* snoop mode */
- mem_lock:2, /* lock instr */
- mem_dtlb:7, /* tlb access */
- mem_lvl_num:4, /* memory hierarchy level number */
- mem_remote:1, /* remote */
- mem_snoopx:2, /* snoop mode, ext */
- mem_blk:3, /* access blocked */
- mem_hops:3, /* hop level */
- mem_rsvd:18;
+ __u64 mem_op : 5, /* Type of opcode */
+ mem_lvl : 14, /* Memory hierarchy level */
+ mem_snoop : 5, /* Snoop mode */
+ mem_lock : 2, /* Lock instr */
+ mem_dtlb : 7, /* TLB access */
+ mem_lvl_num : 4, /* Memory hierarchy level number */
+ mem_remote : 1, /* Remote */
+ mem_snoopx : 2, /* Snoop mode, ext */
+ mem_blk : 3, /* Access blocked */
+ mem_hops : 3, /* Hop level */
+ mem_rsvd : 18;
};
};
#elif defined(__BIG_ENDIAN_BITFIELD)
union perf_mem_data_src {
__u64 val;
struct {
- __u64 mem_rsvd:18,
- mem_hops:3, /* hop level */
- mem_blk:3, /* access blocked */
- mem_snoopx:2, /* snoop mode, ext */
- mem_remote:1, /* remote */
- mem_lvl_num:4, /* memory hierarchy level number */
- mem_dtlb:7, /* tlb access */
- mem_lock:2, /* lock instr */
- mem_snoop:5, /* snoop mode */
- mem_lvl:14, /* memory hierarchy level */
- mem_op:5; /* type of opcode */
+ __u64 mem_rsvd : 18,
+ mem_hops : 3, /* Hop level */
+ mem_blk : 3, /* Access blocked */
+ mem_snoopx : 2, /* Snoop mode, ext */
+ mem_remote : 1, /* Remote */
+ mem_lvl_num : 4, /* Memory hierarchy level number */
+ mem_dtlb : 7, /* TLB access */
+ mem_lock : 2, /* Lock instr */
+ mem_snoop : 5, /* Snoop mode */
+ mem_lvl : 14, /* Memory hierarchy level */
+ mem_op : 5; /* Type of opcode */
};
};
#else
-#error "Unknown endianness"
+# error "Unknown endianness"
#endif
-/* type of opcode (load/store/prefetch,code) */
-#define PERF_MEM_OP_NA 0x01 /* not available */
-#define PERF_MEM_OP_LOAD 0x02 /* load instruction */
-#define PERF_MEM_OP_STORE 0x04 /* store instruction */
-#define PERF_MEM_OP_PFETCH 0x08 /* prefetch */
-#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */
-#define PERF_MEM_OP_SHIFT 0
+/* Type of memory opcode: */
+#define PERF_MEM_OP_NA 0x0001 /* Not available */
+#define PERF_MEM_OP_LOAD 0x0002 /* Load instruction */
+#define PERF_MEM_OP_STORE 0x0004 /* Store instruction */
+#define PERF_MEM_OP_PFETCH 0x0008 /* Prefetch */
+#define PERF_MEM_OP_EXEC 0x0010 /* Code (execution) */
+#define PERF_MEM_OP_SHIFT 0
/*
- * PERF_MEM_LVL_* namespace being depricated to some extent in the
+ * The PERF_MEM_LVL_* namespace is being deprecated to some extent in
* favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields.
- * Supporting this namespace inorder to not break defined ABIs.
+ * We support this namespace in order to not break defined ABIs.
*
- * memory hierarchy (memory level, hit or miss)
+ * Memory hierarchy (memory level, hit or miss)
*/
-#define PERF_MEM_LVL_NA 0x01 /* not available */
-#define PERF_MEM_LVL_HIT 0x02 /* hit level */
-#define PERF_MEM_LVL_MISS 0x04 /* miss level */
-#define PERF_MEM_LVL_L1 0x08 /* L1 */
-#define PERF_MEM_LVL_LFB 0x10 /* Line Fill Buffer */
-#define PERF_MEM_LVL_L2 0x20 /* L2 */
-#define PERF_MEM_LVL_L3 0x40 /* L3 */
-#define PERF_MEM_LVL_LOC_RAM 0x80 /* Local DRAM */
-#define PERF_MEM_LVL_REM_RAM1 0x100 /* Remote DRAM (1 hop) */
-#define PERF_MEM_LVL_REM_RAM2 0x200 /* Remote DRAM (2 hops) */
-#define PERF_MEM_LVL_REM_CCE1 0x400 /* Remote Cache (1 hop) */
-#define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */
-#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */
-#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */
-#define PERF_MEM_LVL_SHIFT 5
-
-#define PERF_MEM_REMOTE_REMOTE 0x01 /* Remote */
-#define PERF_MEM_REMOTE_SHIFT 37
-
-#define PERF_MEM_LVLNUM_L1 0x01 /* L1 */
-#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */
-#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */
-#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */
-/* 5-0x8 available */
-#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */
-#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */
-#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
-#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */
-#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */
-#define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */
-#define PERF_MEM_LVLNUM_NA 0x0f /* N/A */
-
-#define PERF_MEM_LVLNUM_SHIFT 33
-
-/* snoop mode */
-#define PERF_MEM_SNOOP_NA 0x01 /* not available */
-#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */
-#define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */
-#define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */
-#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */
-#define PERF_MEM_SNOOP_SHIFT 19
-
-#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */
-#define PERF_MEM_SNOOPX_PEER 0x02 /* xfer from peer */
-#define PERF_MEM_SNOOPX_SHIFT 38
-
-/* locked instruction */
-#define PERF_MEM_LOCK_NA 0x01 /* not available */
-#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */
-#define PERF_MEM_LOCK_SHIFT 24
+#define PERF_MEM_LVL_NA 0x0001 /* Not available */
+#define PERF_MEM_LVL_HIT 0x0002 /* Hit level */
+#define PERF_MEM_LVL_MISS 0x0004 /* Miss level */
+#define PERF_MEM_LVL_L1 0x0008 /* L1 */
+#define PERF_MEM_LVL_LFB 0x0010 /* Line Fill Buffer */
+#define PERF_MEM_LVL_L2 0x0020 /* L2 */
+#define PERF_MEM_LVL_L3 0x0040 /* L3 */
+#define PERF_MEM_LVL_LOC_RAM 0x0080 /* Local DRAM */
+#define PERF_MEM_LVL_REM_RAM1 0x0100 /* Remote DRAM (1 hop) */
+#define PERF_MEM_LVL_REM_RAM2 0x0200 /* Remote DRAM (2 hops) */
+#define PERF_MEM_LVL_REM_CCE1 0x0400 /* Remote Cache (1 hop) */
+#define PERF_MEM_LVL_REM_CCE2 0x0800 /* Remote Cache (2 hops) */
+#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */
+#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */
+#define PERF_MEM_LVL_SHIFT 5
+
+#define PERF_MEM_REMOTE_REMOTE 0x0001 /* Remote */
+#define PERF_MEM_REMOTE_SHIFT 37
+
+#define PERF_MEM_LVLNUM_L1 0x0001 /* L1 */
+#define PERF_MEM_LVLNUM_L2 0x0002 /* L2 */
+#define PERF_MEM_LVLNUM_L3 0x0003 /* L3 */
+#define PERF_MEM_LVLNUM_L4 0x0004 /* L4 */
+#define PERF_MEM_LVLNUM_L2_MHB 0x0005 /* L2 Miss Handling Buffer */
+#define PERF_MEM_LVLNUM_MSC 0x0006 /* Memory-side Cache */
+/* 0x007 available */
+#define PERF_MEM_LVLNUM_UNC 0x0008 /* Uncached */
+#define PERF_MEM_LVLNUM_CXL 0x0009 /* CXL */
+#define PERF_MEM_LVLNUM_IO 0x000a /* I/O */
+#define PERF_MEM_LVLNUM_ANY_CACHE 0x000b /* Any cache */
+#define PERF_MEM_LVLNUM_LFB 0x000c /* LFB / L1 Miss Handling Buffer */
+#define PERF_MEM_LVLNUM_RAM 0x000d /* RAM */
+#define PERF_MEM_LVLNUM_PMEM 0x000e /* PMEM */
+#define PERF_MEM_LVLNUM_NA 0x000f /* N/A */
+
+#define PERF_MEM_LVLNUM_SHIFT 33
+
+/* Snoop mode */
+#define PERF_MEM_SNOOP_NA 0x0001 /* Not available */
+#define PERF_MEM_SNOOP_NONE 0x0002 /* No snoop */
+#define PERF_MEM_SNOOP_HIT 0x0004 /* Snoop hit */
+#define PERF_MEM_SNOOP_MISS 0x0008 /* Snoop miss */
+#define PERF_MEM_SNOOP_HITM 0x0010 /* Snoop hit modified */
+#define PERF_MEM_SNOOP_SHIFT 19
+
+#define PERF_MEM_SNOOPX_FWD 0x0001 /* Forward */
+#define PERF_MEM_SNOOPX_PEER 0x0002 /* Transfer from peer */
+#define PERF_MEM_SNOOPX_SHIFT 38
+
+/* Locked instruction */
+#define PERF_MEM_LOCK_NA 0x0001 /* Not available */
+#define PERF_MEM_LOCK_LOCKED 0x0002 /* Locked transaction */
+#define PERF_MEM_LOCK_SHIFT 24
/* TLB access */
-#define PERF_MEM_TLB_NA 0x01 /* not available */
-#define PERF_MEM_TLB_HIT 0x02 /* hit level */
-#define PERF_MEM_TLB_MISS 0x04 /* miss level */
-#define PERF_MEM_TLB_L1 0x08 /* L1 */
-#define PERF_MEM_TLB_L2 0x10 /* L2 */
-#define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/
-#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */
-#define PERF_MEM_TLB_SHIFT 26
+#define PERF_MEM_TLB_NA 0x0001 /* Not available */
+#define PERF_MEM_TLB_HIT 0x0002 /* Hit level */
+#define PERF_MEM_TLB_MISS 0x0004 /* Miss level */
+#define PERF_MEM_TLB_L1 0x0008 /* L1 */
+#define PERF_MEM_TLB_L2 0x0010 /* L2 */
+#define PERF_MEM_TLB_WK 0x0020 /* Hardware Walker*/
+#define PERF_MEM_TLB_OS 0x0040 /* OS fault handler */
+#define PERF_MEM_TLB_SHIFT 26
/* Access blocked */
-#define PERF_MEM_BLK_NA 0x01 /* not available */
-#define PERF_MEM_BLK_DATA 0x02 /* data could not be forwarded */
-#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */
-#define PERF_MEM_BLK_SHIFT 40
-
-/* hop level */
-#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */
-#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */
-#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */
-#define PERF_MEM_HOPS_3 0x04 /* remote board */
+#define PERF_MEM_BLK_NA 0x0001 /* Not available */
+#define PERF_MEM_BLK_DATA 0x0002 /* Data could not be forwarded */
+#define PERF_MEM_BLK_ADDR 0x0004 /* Address conflict */
+#define PERF_MEM_BLK_SHIFT 40
+
+/* Hop level */
+#define PERF_MEM_HOPS_0 0x0001 /* Remote core, same node */
+#define PERF_MEM_HOPS_1 0x0002 /* Remote node, same socket */
+#define PERF_MEM_HOPS_2 0x0003 /* Remote socket, same board */
+#define PERF_MEM_HOPS_3 0x0004 /* Remote board */
/* 5-7 available */
-#define PERF_MEM_HOPS_SHIFT 43
+#define PERF_MEM_HOPS_SHIFT 43
#define PERF_MEM_S(a, s) \
(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
/*
- * single taken branch record layout:
+ * Layout of single taken branch records:
*
* from: source instruction (may not always be a branch insn)
* to: branch target
@@ -1411,34 +1470,37 @@ union perf_mem_data_src {
struct perf_branch_entry {
__u64 from;
__u64 to;
- __u64 mispred:1, /* target mispredicted */
- predicted:1,/* target predicted */
- in_tx:1, /* in transaction */
- abort:1, /* transaction abort */
- cycles:16, /* cycle count to last branch */
- type:4, /* branch type */
- spec:2, /* branch speculation info */
- new_type:4, /* additional branch type */
- priv:3, /* privilege level */
- reserved:31;
+ __u64 mispred : 1, /* target mispredicted */
+ predicted : 1, /* target predicted */
+ in_tx : 1, /* in transaction */
+ abort : 1, /* transaction abort */
+ cycles : 16, /* cycle count to last branch */
+ type : 4, /* branch type */
+ spec : 2, /* branch speculation info */
+ new_type : 4, /* additional branch type */
+ priv : 3, /* privilege level */
+ reserved : 31;
};
+/* Size of used info bits in struct perf_branch_entry */
+#define PERF_BRANCH_ENTRY_INFO_BITS_MAX 33
+
union perf_sample_weight {
- __u64 full;
+ __u64 full;
#if defined(__LITTLE_ENDIAN_BITFIELD)
struct {
- __u32 var1_dw;
- __u16 var2_w;
- __u16 var3_w;
+ __u32 var1_dw;
+ __u16 var2_w;
+ __u16 var3_w;
};
#elif defined(__BIG_ENDIAN_BITFIELD)
struct {
- __u16 var3_w;
- __u16 var2_w;
- __u32 var1_dw;
+ __u16 var3_w;
+ __u16 var2_w;
+ __u32 var1_dw;
};
#else
-#error "Unknown endianness"
+# error "Unknown endianness"
#endif
};
diff --git a/tools/include/uapi/linux/pkt_cls.h b/tools/include/uapi/linux/pkt_cls.h
index 3faee0199a9b..bd4b227ab4ba 100644
--- a/tools/include/uapi/linux/pkt_cls.h
+++ b/tools/include/uapi/linux/pkt_cls.h
@@ -204,37 +204,6 @@ struct tc_u32_pcnt {
#define TC_U32_MAXDEPTH 8
-
-/* RSVP filter */
-
-enum {
- TCA_RSVP_UNSPEC,
- TCA_RSVP_CLASSID,
- TCA_RSVP_DST,
- TCA_RSVP_SRC,
- TCA_RSVP_PINFO,
- TCA_RSVP_POLICE,
- TCA_RSVP_ACT,
- __TCA_RSVP_MAX
-};
-
-#define TCA_RSVP_MAX (__TCA_RSVP_MAX - 1 )
-
-struct tc_rsvp_gpi {
- __u32 key;
- __u32 mask;
- int offset;
-};
-
-struct tc_rsvp_pinfo {
- struct tc_rsvp_gpi dpi;
- struct tc_rsvp_gpi spi;
- __u8 protocol;
- __u8 tunnelid;
- __u8 tunnelhdr;
- __u8 pad;
-};
-
/* ROUTE filter */
enum {
@@ -265,22 +234,6 @@ enum {
#define TCA_FW_MAX (__TCA_FW_MAX - 1)
-/* TC index filter */
-
-enum {
- TCA_TCINDEX_UNSPEC,
- TCA_TCINDEX_HASH,
- TCA_TCINDEX_MASK,
- TCA_TCINDEX_SHIFT,
- TCA_TCINDEX_FALL_THROUGH,
- TCA_TCINDEX_CLASSID,
- TCA_TCINDEX_POLICE,
- TCA_TCINDEX_ACT,
- __TCA_TCINDEX_MAX
-};
-
-#define TCA_TCINDEX_MAX (__TCA_TCINDEX_MAX - 1)
-
/* Flow filter */
enum {
diff --git a/tools/include/uapi/linux/pkt_sched.h b/tools/include/uapi/linux/pkt_sched.h
index 5c903abc9fa5..587481a19433 100644
--- a/tools/include/uapi/linux/pkt_sched.h
+++ b/tools/include/uapi/linux/pkt_sched.h
@@ -457,115 +457,6 @@ enum {
#define TCA_HFSC_MAX (__TCA_HFSC_MAX - 1)
-
-/* CBQ section */
-
-#define TC_CBQ_MAXPRIO 8
-#define TC_CBQ_MAXLEVEL 8
-#define TC_CBQ_DEF_EWMA 5
-
-struct tc_cbq_lssopt {
- unsigned char change;
- unsigned char flags;
-#define TCF_CBQ_LSS_BOUNDED 1
-#define TCF_CBQ_LSS_ISOLATED 2
- unsigned char ewma_log;
- unsigned char level;
-#define TCF_CBQ_LSS_FLAGS 1
-#define TCF_CBQ_LSS_EWMA 2
-#define TCF_CBQ_LSS_MAXIDLE 4
-#define TCF_CBQ_LSS_MINIDLE 8
-#define TCF_CBQ_LSS_OFFTIME 0x10
-#define TCF_CBQ_LSS_AVPKT 0x20
- __u32 maxidle;
- __u32 minidle;
- __u32 offtime;
- __u32 avpkt;
-};
-
-struct tc_cbq_wrropt {
- unsigned char flags;
- unsigned char priority;
- unsigned char cpriority;
- unsigned char __reserved;
- __u32 allot;
- __u32 weight;
-};
-
-struct tc_cbq_ovl {
- unsigned char strategy;
-#define TC_CBQ_OVL_CLASSIC 0
-#define TC_CBQ_OVL_DELAY 1
-#define TC_CBQ_OVL_LOWPRIO 2
-#define TC_CBQ_OVL_DROP 3
-#define TC_CBQ_OVL_RCLASSIC 4
- unsigned char priority2;
- __u16 pad;
- __u32 penalty;
-};
-
-struct tc_cbq_police {
- unsigned char police;
- unsigned char __res1;
- unsigned short __res2;
-};
-
-struct tc_cbq_fopt {
- __u32 split;
- __u32 defmap;
- __u32 defchange;
-};
-
-struct tc_cbq_xstats {
- __u32 borrows;
- __u32 overactions;
- __s32 avgidle;
- __s32 undertime;
-};
-
-enum {
- TCA_CBQ_UNSPEC,
- TCA_CBQ_LSSOPT,
- TCA_CBQ_WRROPT,
- TCA_CBQ_FOPT,
- TCA_CBQ_OVL_STRATEGY,
- TCA_CBQ_RATE,
- TCA_CBQ_RTAB,
- TCA_CBQ_POLICE,
- __TCA_CBQ_MAX,
-};
-
-#define TCA_CBQ_MAX (__TCA_CBQ_MAX - 1)
-
-/* dsmark section */
-
-enum {
- TCA_DSMARK_UNSPEC,
- TCA_DSMARK_INDICES,
- TCA_DSMARK_DEFAULT_INDEX,
- TCA_DSMARK_SET_TC_INDEX,
- TCA_DSMARK_MASK,
- TCA_DSMARK_VALUE,
- __TCA_DSMARK_MAX,
-};
-
-#define TCA_DSMARK_MAX (__TCA_DSMARK_MAX - 1)
-
-/* ATM section */
-
-enum {
- TCA_ATM_UNSPEC,
- TCA_ATM_FD, /* file/socket descriptor */
- TCA_ATM_PTR, /* pointer to descriptor - later */
- TCA_ATM_HDR, /* LL header */
- TCA_ATM_EXCESS, /* excess traffic class (0 for CLP) */
- TCA_ATM_ADDR, /* PVC address (for output only) */
- TCA_ATM_STATE, /* VC state (ATM_VS_*; for output only) */
- __TCA_ATM_MAX,
-};
-
-#define TCA_ATM_MAX (__TCA_ATM_MAX - 1)
-
/* Network emulator */
enum {
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index a5e06dcbba13..475fc8ca4403 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -230,7 +230,7 @@ struct prctl_mm_map {
# define PR_PAC_APDBKEY (1UL << 3)
# define PR_PAC_APGAKEY (1UL << 4)
-/* Tagged user address controls for arm64 */
+/* Tagged user address controls for arm64 and RISC-V */
#define PR_SET_TAGGED_ADDR_CTRL 55
#define PR_GET_TAGGED_ADDR_CTRL 56
# define PR_TAGGED_ADDR_ENABLE (1UL << 0)
@@ -244,6 +244,9 @@ struct prctl_mm_map {
# define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT)
/* Unused; kept only for source compatibility */
# define PR_MTE_TCF_SHIFT 1
+/* RISC-V pointer masking tag length */
+# define PR_PMLEN_SHIFT 24
+# define PR_PMLEN_MASK (0x7fUL << PR_PMLEN_SHIFT)
/* Control reclaim behavior when allocating memory */
#define PR_SET_IO_FLUSHER 57
@@ -252,7 +255,12 @@ struct prctl_mm_map {
/* Dispatch syscalls to a userspace handler */
#define PR_SET_SYSCALL_USER_DISPATCH 59
# define PR_SYS_DISPATCH_OFF 0
-# define PR_SYS_DISPATCH_ON 1
+/* Enable dispatch except for the specified range */
+# define PR_SYS_DISPATCH_EXCLUSIVE_ON 1
+/* Enable dispatch for the specified range */
+# define PR_SYS_DISPATCH_INCLUSIVE_ON 2
+/* Legacy name for backwards compatibility */
+# define PR_SYS_DISPATCH_ON PR_SYS_DISPATCH_EXCLUSIVE_ON
/* The control values for the user space selector when dispatch is enabled */
# define SYSCALL_DISPATCH_FILTER_ALLOW 0
# define SYSCALL_DISPATCH_FILTER_BLOCK 1
@@ -281,7 +289,89 @@ struct prctl_mm_map {
# define PR_SME_VL_LEN_MASK 0xffff
# define PR_SME_VL_INHERIT (1 << 17) /* inherit across exec */
+/* Memory deny write / execute */
+#define PR_SET_MDWE 65
+# define PR_MDWE_REFUSE_EXEC_GAIN (1UL << 0)
+# define PR_MDWE_NO_INHERIT (1UL << 1)
+
+#define PR_GET_MDWE 66
+
#define PR_SET_VMA 0x53564d41
# define PR_SET_VMA_ANON_NAME 0
+#define PR_GET_AUXV 0x41555856
+
+#define PR_SET_MEMORY_MERGE 67
+#define PR_GET_MEMORY_MERGE 68
+
+#define PR_RISCV_V_SET_CONTROL 69
+#define PR_RISCV_V_GET_CONTROL 70
+# define PR_RISCV_V_VSTATE_CTRL_DEFAULT 0
+# define PR_RISCV_V_VSTATE_CTRL_OFF 1
+# define PR_RISCV_V_VSTATE_CTRL_ON 2
+# define PR_RISCV_V_VSTATE_CTRL_INHERIT (1 << 4)
+# define PR_RISCV_V_VSTATE_CTRL_CUR_MASK 0x3
+# define PR_RISCV_V_VSTATE_CTRL_NEXT_MASK 0xc
+# define PR_RISCV_V_VSTATE_CTRL_MASK 0x1f
+
+#define PR_RISCV_SET_ICACHE_FLUSH_CTX 71
+# define PR_RISCV_CTX_SW_FENCEI_ON 0
+# define PR_RISCV_CTX_SW_FENCEI_OFF 1
+# define PR_RISCV_SCOPE_PER_PROCESS 0
+# define PR_RISCV_SCOPE_PER_THREAD 1
+
+/* PowerPC Dynamic Execution Control Register (DEXCR) controls */
+#define PR_PPC_GET_DEXCR 72
+#define PR_PPC_SET_DEXCR 73
+/* DEXCR aspect to act on */
+# define PR_PPC_DEXCR_SBHE 0 /* Speculative branch hint enable */
+# define PR_PPC_DEXCR_IBRTPD 1 /* Indirect branch recurrent target prediction disable */
+# define PR_PPC_DEXCR_SRAPD 2 /* Subroutine return address prediction disable */
+# define PR_PPC_DEXCR_NPHIE 3 /* Non-privileged hash instruction enable */
+/* Action to apply / return */
+# define PR_PPC_DEXCR_CTRL_EDITABLE 0x1 /* Aspect can be modified with PR_PPC_SET_DEXCR */
+# define PR_PPC_DEXCR_CTRL_SET 0x2 /* Set the aspect for this process */
+# define PR_PPC_DEXCR_CTRL_CLEAR 0x4 /* Clear the aspect for this process */
+# define PR_PPC_DEXCR_CTRL_SET_ONEXEC 0x8 /* Set the aspect on exec */
+# define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC 0x10 /* Clear the aspect on exec */
+# define PR_PPC_DEXCR_CTRL_MASK 0x1f
+
+/*
+ * Get the current shadow stack configuration for the current thread,
+ * this will be the value configured via PR_SET_SHADOW_STACK_STATUS.
+ */
+#define PR_GET_SHADOW_STACK_STATUS 74
+
+/*
+ * Set the current shadow stack configuration. Enabling the shadow
+ * stack will cause a shadow stack to be allocated for the thread.
+ */
+#define PR_SET_SHADOW_STACK_STATUS 75
+# define PR_SHADOW_STACK_ENABLE (1UL << 0)
+# define PR_SHADOW_STACK_WRITE (1UL << 1)
+# define PR_SHADOW_STACK_PUSH (1UL << 2)
+
+/*
+ * Prevent further changes to the specified shadow stack
+ * configuration. All bits may be locked via this call, including
+ * undefined bits.
+ */
+#define PR_LOCK_SHADOW_STACK_STATUS 76
+
+/*
+ * Controls the mode of timer_create() for CRIU restore operations.
+ * Enabling this allows CRIU to restore timers with explicit IDs.
+ *
+ * Don't use for normal operations as the result might be undefined.
+ */
+#define PR_TIMER_CREATE_RESTORE_IDS 77
+# define PR_TIMER_CREATE_RESTORE_IDS_OFF 0
+# define PR_TIMER_CREATE_RESTORE_IDS_ON 1
+# define PR_TIMER_CREATE_RESTORE_IDS_GET 2
+
+/* FUTEX hash management */
+#define PR_FUTEX_HASH 78
+# define PR_FUTEX_HASH_SET_SLOTS 1
+# define PR_FUTEX_HASH_GET_SLOTS 2
+
#endif /* _LINUX_PRCTL_H */
diff --git a/tools/include/uapi/linux/rtnetlink.h b/tools/include/uapi/linux/rtnetlink.h
new file mode 100644
index 000000000000..dab9493c791b
--- /dev/null
+++ b/tools/include/uapi/linux/rtnetlink.h
@@ -0,0 +1,848 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_RTNETLINK_H
+#define _UAPI__LINUX_RTNETLINK_H
+
+#include <linux/types.h>
+#include <linux/netlink.h>
+#include <linux/if_link.h>
+#include <linux/if_addr.h>
+#include <linux/neighbour.h>
+
+/* rtnetlink families. Values up to 127 are reserved for real address
+ * families, values above 128 may be used arbitrarily.
+ */
+#define RTNL_FAMILY_IPMR 128
+#define RTNL_FAMILY_IP6MR 129
+#define RTNL_FAMILY_MAX 129
+
+/****
+ * Routing/neighbour discovery messages.
+ ****/
+
+/* Types of messages */
+
+enum {
+ RTM_BASE = 16,
+#define RTM_BASE RTM_BASE
+
+ RTM_NEWLINK = 16,
+#define RTM_NEWLINK RTM_NEWLINK
+ RTM_DELLINK,
+#define RTM_DELLINK RTM_DELLINK
+ RTM_GETLINK,
+#define RTM_GETLINK RTM_GETLINK
+ RTM_SETLINK,
+#define RTM_SETLINK RTM_SETLINK
+
+ RTM_NEWADDR = 20,
+#define RTM_NEWADDR RTM_NEWADDR
+ RTM_DELADDR,
+#define RTM_DELADDR RTM_DELADDR
+ RTM_GETADDR,
+#define RTM_GETADDR RTM_GETADDR
+
+ RTM_NEWROUTE = 24,
+#define RTM_NEWROUTE RTM_NEWROUTE
+ RTM_DELROUTE,
+#define RTM_DELROUTE RTM_DELROUTE
+ RTM_GETROUTE,
+#define RTM_GETROUTE RTM_GETROUTE
+
+ RTM_NEWNEIGH = 28,
+#define RTM_NEWNEIGH RTM_NEWNEIGH
+ RTM_DELNEIGH,
+#define RTM_DELNEIGH RTM_DELNEIGH
+ RTM_GETNEIGH,
+#define RTM_GETNEIGH RTM_GETNEIGH
+
+ RTM_NEWRULE = 32,
+#define RTM_NEWRULE RTM_NEWRULE
+ RTM_DELRULE,
+#define RTM_DELRULE RTM_DELRULE
+ RTM_GETRULE,
+#define RTM_GETRULE RTM_GETRULE
+
+ RTM_NEWQDISC = 36,
+#define RTM_NEWQDISC RTM_NEWQDISC
+ RTM_DELQDISC,
+#define RTM_DELQDISC RTM_DELQDISC
+ RTM_GETQDISC,
+#define RTM_GETQDISC RTM_GETQDISC
+
+ RTM_NEWTCLASS = 40,
+#define RTM_NEWTCLASS RTM_NEWTCLASS
+ RTM_DELTCLASS,
+#define RTM_DELTCLASS RTM_DELTCLASS
+ RTM_GETTCLASS,
+#define RTM_GETTCLASS RTM_GETTCLASS
+
+ RTM_NEWTFILTER = 44,
+#define RTM_NEWTFILTER RTM_NEWTFILTER
+ RTM_DELTFILTER,
+#define RTM_DELTFILTER RTM_DELTFILTER
+ RTM_GETTFILTER,
+#define RTM_GETTFILTER RTM_GETTFILTER
+
+ RTM_NEWACTION = 48,
+#define RTM_NEWACTION RTM_NEWACTION
+ RTM_DELACTION,
+#define RTM_DELACTION RTM_DELACTION
+ RTM_GETACTION,
+#define RTM_GETACTION RTM_GETACTION
+
+ RTM_NEWPREFIX = 52,
+#define RTM_NEWPREFIX RTM_NEWPREFIX
+
+ RTM_NEWMULTICAST = 56,
+#define RTM_NEWMULTICAST RTM_NEWMULTICAST
+ RTM_DELMULTICAST,
+#define RTM_DELMULTICAST RTM_DELMULTICAST
+ RTM_GETMULTICAST,
+#define RTM_GETMULTICAST RTM_GETMULTICAST
+
+ RTM_NEWANYCAST = 60,
+#define RTM_NEWANYCAST RTM_NEWANYCAST
+ RTM_DELANYCAST,
+#define RTM_DELANYCAST RTM_DELANYCAST
+ RTM_GETANYCAST,
+#define RTM_GETANYCAST RTM_GETANYCAST
+
+ RTM_NEWNEIGHTBL = 64,
+#define RTM_NEWNEIGHTBL RTM_NEWNEIGHTBL
+ RTM_GETNEIGHTBL = 66,
+#define RTM_GETNEIGHTBL RTM_GETNEIGHTBL
+ RTM_SETNEIGHTBL,
+#define RTM_SETNEIGHTBL RTM_SETNEIGHTBL
+
+ RTM_NEWNDUSEROPT = 68,
+#define RTM_NEWNDUSEROPT RTM_NEWNDUSEROPT
+
+ RTM_NEWADDRLABEL = 72,
+#define RTM_NEWADDRLABEL RTM_NEWADDRLABEL
+ RTM_DELADDRLABEL,
+#define RTM_DELADDRLABEL RTM_DELADDRLABEL
+ RTM_GETADDRLABEL,
+#define RTM_GETADDRLABEL RTM_GETADDRLABEL
+
+ RTM_GETDCB = 78,
+#define RTM_GETDCB RTM_GETDCB
+ RTM_SETDCB,
+#define RTM_SETDCB RTM_SETDCB
+
+ RTM_NEWNETCONF = 80,
+#define RTM_NEWNETCONF RTM_NEWNETCONF
+ RTM_DELNETCONF,
+#define RTM_DELNETCONF RTM_DELNETCONF
+ RTM_GETNETCONF = 82,
+#define RTM_GETNETCONF RTM_GETNETCONF
+
+ RTM_NEWMDB = 84,
+#define RTM_NEWMDB RTM_NEWMDB
+ RTM_DELMDB = 85,
+#define RTM_DELMDB RTM_DELMDB
+ RTM_GETMDB = 86,
+#define RTM_GETMDB RTM_GETMDB
+
+ RTM_NEWNSID = 88,
+#define RTM_NEWNSID RTM_NEWNSID
+ RTM_DELNSID = 89,
+#define RTM_DELNSID RTM_DELNSID
+ RTM_GETNSID = 90,
+#define RTM_GETNSID RTM_GETNSID
+
+ RTM_NEWSTATS = 92,
+#define RTM_NEWSTATS RTM_NEWSTATS
+ RTM_GETSTATS = 94,
+#define RTM_GETSTATS RTM_GETSTATS
+ RTM_SETSTATS,
+#define RTM_SETSTATS RTM_SETSTATS
+
+ RTM_NEWCACHEREPORT = 96,
+#define RTM_NEWCACHEREPORT RTM_NEWCACHEREPORT
+
+ RTM_NEWCHAIN = 100,
+#define RTM_NEWCHAIN RTM_NEWCHAIN
+ RTM_DELCHAIN,
+#define RTM_DELCHAIN RTM_DELCHAIN
+ RTM_GETCHAIN,
+#define RTM_GETCHAIN RTM_GETCHAIN
+
+ RTM_NEWNEXTHOP = 104,
+#define RTM_NEWNEXTHOP RTM_NEWNEXTHOP
+ RTM_DELNEXTHOP,
+#define RTM_DELNEXTHOP RTM_DELNEXTHOP
+ RTM_GETNEXTHOP,
+#define RTM_GETNEXTHOP RTM_GETNEXTHOP
+
+ RTM_NEWLINKPROP = 108,
+#define RTM_NEWLINKPROP RTM_NEWLINKPROP
+ RTM_DELLINKPROP,
+#define RTM_DELLINKPROP RTM_DELLINKPROP
+ RTM_GETLINKPROP,
+#define RTM_GETLINKPROP RTM_GETLINKPROP
+
+ RTM_NEWVLAN = 112,
+#define RTM_NEWVLAN RTM_NEWVLAN
+ RTM_DELVLAN,
+#define RTM_DELVLAN RTM_DELVLAN
+ RTM_GETVLAN,
+#define RTM_GETVLAN RTM_GETVLAN
+
+ RTM_NEWNEXTHOPBUCKET = 116,
+#define RTM_NEWNEXTHOPBUCKET RTM_NEWNEXTHOPBUCKET
+ RTM_DELNEXTHOPBUCKET,
+#define RTM_DELNEXTHOPBUCKET RTM_DELNEXTHOPBUCKET
+ RTM_GETNEXTHOPBUCKET,
+#define RTM_GETNEXTHOPBUCKET RTM_GETNEXTHOPBUCKET
+
+ RTM_NEWTUNNEL = 120,
+#define RTM_NEWTUNNEL RTM_NEWTUNNEL
+ RTM_DELTUNNEL,
+#define RTM_DELTUNNEL RTM_DELTUNNEL
+ RTM_GETTUNNEL,
+#define RTM_GETTUNNEL RTM_GETTUNNEL
+
+ __RTM_MAX,
+#define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1)
+};
+
+#define RTM_NR_MSGTYPES (RTM_MAX + 1 - RTM_BASE)
+#define RTM_NR_FAMILIES (RTM_NR_MSGTYPES >> 2)
+#define RTM_FAM(cmd) (((cmd) - RTM_BASE) >> 2)
+
+/*
+ Generic structure for encapsulation of optional route information.
+ It is reminiscent of sockaddr, but with sa_family replaced
+ with attribute type.
+ */
+
+struct rtattr {
+ unsigned short rta_len;
+ unsigned short rta_type;
+};
+
+/* Macros to handle rtattributes */
+
+#define RTA_ALIGNTO 4U
+#define RTA_ALIGN(len) ( ((len)+RTA_ALIGNTO-1) & ~(RTA_ALIGNTO-1) )
+#define RTA_OK(rta,len) ((len) >= (int)sizeof(struct rtattr) && \
+ (rta)->rta_len >= sizeof(struct rtattr) && \
+ (rta)->rta_len <= (len))
+#define RTA_NEXT(rta,attrlen) ((attrlen) -= RTA_ALIGN((rta)->rta_len), \
+ (struct rtattr*)(((char*)(rta)) + RTA_ALIGN((rta)->rta_len)))
+#define RTA_LENGTH(len) (RTA_ALIGN(sizeof(struct rtattr)) + (len))
+#define RTA_SPACE(len) RTA_ALIGN(RTA_LENGTH(len))
+#define RTA_DATA(rta) ((void*)(((char*)(rta)) + RTA_LENGTH(0)))
+#define RTA_PAYLOAD(rta) ((int)((rta)->rta_len) - RTA_LENGTH(0))
+
+
+
+
+/******************************************************************************
+ * Definitions used in routing table administration.
+ ****/
+
+struct rtmsg {
+ unsigned char rtm_family;
+ unsigned char rtm_dst_len;
+ unsigned char rtm_src_len;
+ unsigned char rtm_tos;
+
+ unsigned char rtm_table; /* Routing table id */
+ unsigned char rtm_protocol; /* Routing protocol; see below */
+ unsigned char rtm_scope; /* See below */
+ unsigned char rtm_type; /* See below */
+
+ unsigned rtm_flags;
+};
+
+/* rtm_type */
+
+enum {
+ RTN_UNSPEC,
+ RTN_UNICAST, /* Gateway or direct route */
+ RTN_LOCAL, /* Accept locally */
+ RTN_BROADCAST, /* Accept locally as broadcast,
+ send as broadcast */
+ RTN_ANYCAST, /* Accept locally as broadcast,
+ but send as unicast */
+ RTN_MULTICAST, /* Multicast route */
+ RTN_BLACKHOLE, /* Drop */
+ RTN_UNREACHABLE, /* Destination is unreachable */
+ RTN_PROHIBIT, /* Administratively prohibited */
+ RTN_THROW, /* Not in this table */
+ RTN_NAT, /* Translate this address */
+ RTN_XRESOLVE, /* Use external resolver */
+ __RTN_MAX
+};
+
+#define RTN_MAX (__RTN_MAX - 1)
+
+
+/* rtm_protocol */
+
+#define RTPROT_UNSPEC 0
+#define RTPROT_REDIRECT 1 /* Route installed by ICMP redirects;
+ not used by current IPv4 */
+#define RTPROT_KERNEL 2 /* Route installed by kernel */
+#define RTPROT_BOOT 3 /* Route installed during boot */
+#define RTPROT_STATIC 4 /* Route installed by administrator */
+
+/* Values of protocol >= RTPROT_STATIC are not interpreted by kernel;
+ they are just passed from user and back as is.
+ It will be used by hypothetical multiple routing daemons.
+ Note that protocol values should be standardized in order to
+ avoid conflicts.
+ */
+
+#define RTPROT_GATED 8 /* Apparently, GateD */
+#define RTPROT_RA 9 /* RDISC/ND router advertisements */
+#define RTPROT_MRT 10 /* Merit MRT */
+#define RTPROT_ZEBRA 11 /* Zebra */
+#define RTPROT_BIRD 12 /* BIRD */
+#define RTPROT_DNROUTED 13 /* DECnet routing daemon */
+#define RTPROT_XORP 14 /* XORP */
+#define RTPROT_NTK 15 /* Netsukuku */
+#define RTPROT_DHCP 16 /* DHCP client */
+#define RTPROT_MROUTED 17 /* Multicast daemon */
+#define RTPROT_KEEPALIVED 18 /* Keepalived daemon */
+#define RTPROT_BABEL 42 /* Babel daemon */
+#define RTPROT_OVN 84 /* OVN daemon */
+#define RTPROT_OPENR 99 /* Open Routing (Open/R) Routes */
+#define RTPROT_BGP 186 /* BGP Routes */
+#define RTPROT_ISIS 187 /* ISIS Routes */
+#define RTPROT_OSPF 188 /* OSPF Routes */
+#define RTPROT_RIP 189 /* RIP Routes */
+#define RTPROT_EIGRP 192 /* EIGRP Routes */
+
+/* rtm_scope
+
+ Really it is not scope, but sort of distance to the destination.
+ NOWHERE are reserved for not existing destinations, HOST is our
+ local addresses, LINK are destinations, located on directly attached
+ link and UNIVERSE is everywhere in the Universe.
+
+ Intermediate values are also possible f.e. interior routes
+ could be assigned a value between UNIVERSE and LINK.
+*/
+
+enum rt_scope_t {
+ RT_SCOPE_UNIVERSE=0,
+/* User defined values */
+ RT_SCOPE_SITE=200,
+ RT_SCOPE_LINK=253,
+ RT_SCOPE_HOST=254,
+ RT_SCOPE_NOWHERE=255
+};
+
+/* rtm_flags */
+
+#define RTM_F_NOTIFY 0x100 /* Notify user of route change */
+#define RTM_F_CLONED 0x200 /* This route is cloned */
+#define RTM_F_EQUALIZE 0x400 /* Multipath equalizer: NI */
+#define RTM_F_PREFIX 0x800 /* Prefix addresses */
+#define RTM_F_LOOKUP_TABLE 0x1000 /* set rtm_table to FIB lookup result */
+#define RTM_F_FIB_MATCH 0x2000 /* return full fib lookup match */
+#define RTM_F_OFFLOAD 0x4000 /* route is offloaded */
+#define RTM_F_TRAP 0x8000 /* route is trapping packets */
+#define RTM_F_OFFLOAD_FAILED 0x20000000 /* route offload failed, this value
+ * is chosen to avoid conflicts with
+ * other flags defined in
+ * include/uapi/linux/ipv6_route.h
+ */
+
+/* Reserved table identifiers */
+
+enum rt_class_t {
+ RT_TABLE_UNSPEC=0,
+/* User defined values */
+ RT_TABLE_COMPAT=252,
+ RT_TABLE_DEFAULT=253,
+ RT_TABLE_MAIN=254,
+ RT_TABLE_LOCAL=255,
+ RT_TABLE_MAX=0xFFFFFFFF
+};
+
+
+/* Routing message attributes */
+
+enum rtattr_type_t {
+ RTA_UNSPEC,
+ RTA_DST,
+ RTA_SRC,
+ RTA_IIF,
+ RTA_OIF,
+ RTA_GATEWAY,
+ RTA_PRIORITY,
+ RTA_PREFSRC,
+ RTA_METRICS,
+ RTA_MULTIPATH,
+ RTA_PROTOINFO, /* no longer used */
+ RTA_FLOW,
+ RTA_CACHEINFO,
+ RTA_SESSION, /* no longer used */
+ RTA_MP_ALGO, /* no longer used */
+ RTA_TABLE,
+ RTA_MARK,
+ RTA_MFC_STATS,
+ RTA_VIA,
+ RTA_NEWDST,
+ RTA_PREF,
+ RTA_ENCAP_TYPE,
+ RTA_ENCAP,
+ RTA_EXPIRES,
+ RTA_PAD,
+ RTA_UID,
+ RTA_TTL_PROPAGATE,
+ RTA_IP_PROTO,
+ RTA_SPORT,
+ RTA_DPORT,
+ RTA_NH_ID,
+ RTA_FLOWLABEL,
+ __RTA_MAX
+};
+
+#define RTA_MAX (__RTA_MAX - 1)
+
+#define RTM_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct rtmsg))))
+#define RTM_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct rtmsg))
+
+/* RTM_MULTIPATH --- array of struct rtnexthop.
+ *
+ * "struct rtnexthop" describes all necessary nexthop information,
+ * i.e. parameters of path to a destination via this nexthop.
+ *
+ * At the moment it is impossible to set different prefsrc, mtu, window
+ * and rtt for different paths from multipath.
+ */
+
+struct rtnexthop {
+ unsigned short rtnh_len;
+ unsigned char rtnh_flags;
+ unsigned char rtnh_hops;
+ int rtnh_ifindex;
+};
+
+/* rtnh_flags */
+
+#define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */
+#define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */
+#define RTNH_F_ONLINK 4 /* Gateway is forced on link */
+#define RTNH_F_OFFLOAD 8 /* Nexthop is offloaded */
+#define RTNH_F_LINKDOWN 16 /* carrier-down on nexthop */
+#define RTNH_F_UNRESOLVED 32 /* The entry is unresolved (ipmr) */
+#define RTNH_F_TRAP 64 /* Nexthop is trapping packets */
+
+#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN | \
+ RTNH_F_OFFLOAD | RTNH_F_TRAP)
+
+/* Macros to handle hexthops */
+
+#define RTNH_ALIGNTO 4
+#define RTNH_ALIGN(len) ( ((len)+RTNH_ALIGNTO-1) & ~(RTNH_ALIGNTO-1) )
+#define RTNH_OK(rtnh,len) ((rtnh)->rtnh_len >= sizeof(struct rtnexthop) && \
+ ((int)(rtnh)->rtnh_len) <= (len))
+#define RTNH_NEXT(rtnh) ((struct rtnexthop*)(((char*)(rtnh)) + RTNH_ALIGN((rtnh)->rtnh_len)))
+#define RTNH_LENGTH(len) (RTNH_ALIGN(sizeof(struct rtnexthop)) + (len))
+#define RTNH_SPACE(len) RTNH_ALIGN(RTNH_LENGTH(len))
+#define RTNH_DATA(rtnh) ((struct rtattr*)(((char*)(rtnh)) + RTNH_LENGTH(0)))
+
+/* RTA_VIA */
+struct rtvia {
+ __kernel_sa_family_t rtvia_family;
+ __u8 rtvia_addr[];
+};
+
+/* RTM_CACHEINFO */
+
+struct rta_cacheinfo {
+ __u32 rta_clntref;
+ __u32 rta_lastuse;
+ __s32 rta_expires;
+ __u32 rta_error;
+ __u32 rta_used;
+
+#define RTNETLINK_HAVE_PEERINFO 1
+ __u32 rta_id;
+ __u32 rta_ts;
+ __u32 rta_tsage;
+};
+
+/* RTM_METRICS --- array of struct rtattr with types of RTAX_* */
+
+enum {
+ RTAX_UNSPEC,
+#define RTAX_UNSPEC RTAX_UNSPEC
+ RTAX_LOCK,
+#define RTAX_LOCK RTAX_LOCK
+ RTAX_MTU,
+#define RTAX_MTU RTAX_MTU
+ RTAX_WINDOW,
+#define RTAX_WINDOW RTAX_WINDOW
+ RTAX_RTT,
+#define RTAX_RTT RTAX_RTT
+ RTAX_RTTVAR,
+#define RTAX_RTTVAR RTAX_RTTVAR
+ RTAX_SSTHRESH,
+#define RTAX_SSTHRESH RTAX_SSTHRESH
+ RTAX_CWND,
+#define RTAX_CWND RTAX_CWND
+ RTAX_ADVMSS,
+#define RTAX_ADVMSS RTAX_ADVMSS
+ RTAX_REORDERING,
+#define RTAX_REORDERING RTAX_REORDERING
+ RTAX_HOPLIMIT,
+#define RTAX_HOPLIMIT RTAX_HOPLIMIT
+ RTAX_INITCWND,
+#define RTAX_INITCWND RTAX_INITCWND
+ RTAX_FEATURES,
+#define RTAX_FEATURES RTAX_FEATURES
+ RTAX_RTO_MIN,
+#define RTAX_RTO_MIN RTAX_RTO_MIN
+ RTAX_INITRWND,
+#define RTAX_INITRWND RTAX_INITRWND
+ RTAX_QUICKACK,
+#define RTAX_QUICKACK RTAX_QUICKACK
+ RTAX_CC_ALGO,
+#define RTAX_CC_ALGO RTAX_CC_ALGO
+ RTAX_FASTOPEN_NO_COOKIE,
+#define RTAX_FASTOPEN_NO_COOKIE RTAX_FASTOPEN_NO_COOKIE
+ __RTAX_MAX
+};
+
+#define RTAX_MAX (__RTAX_MAX - 1)
+
+#define RTAX_FEATURE_ECN (1 << 0)
+#define RTAX_FEATURE_SACK (1 << 1) /* unused */
+#define RTAX_FEATURE_TIMESTAMP (1 << 2) /* unused */
+#define RTAX_FEATURE_ALLFRAG (1 << 3) /* unused */
+#define RTAX_FEATURE_TCP_USEC_TS (1 << 4)
+
+#define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | \
+ RTAX_FEATURE_SACK | \
+ RTAX_FEATURE_TIMESTAMP | \
+ RTAX_FEATURE_ALLFRAG | \
+ RTAX_FEATURE_TCP_USEC_TS)
+
+struct rta_session {
+ __u8 proto;
+ __u8 pad1;
+ __u16 pad2;
+
+ union {
+ struct {
+ __u16 sport;
+ __u16 dport;
+ } ports;
+
+ struct {
+ __u8 type;
+ __u8 code;
+ __u16 ident;
+ } icmpt;
+
+ __u32 spi;
+ } u;
+};
+
+struct rta_mfc_stats {
+ __u64 mfcs_packets;
+ __u64 mfcs_bytes;
+ __u64 mfcs_wrong_if;
+};
+
+/****
+ * General form of address family dependent message.
+ ****/
+
+struct rtgenmsg {
+ unsigned char rtgen_family;
+};
+
+/*****************************************************************
+ * Link layer specific messages.
+ ****/
+
+/* struct ifinfomsg
+ * passes link level specific information, not dependent
+ * on network protocol.
+ */
+
+struct ifinfomsg {
+ unsigned char ifi_family;
+ unsigned char __ifi_pad;
+ unsigned short ifi_type; /* ARPHRD_* */
+ int ifi_index; /* Link index */
+ unsigned ifi_flags; /* IFF_* flags */
+ unsigned ifi_change; /* IFF_* change mask */
+};
+
+/********************************************************************
+ * prefix information
+ ****/
+
+struct prefixmsg {
+ unsigned char prefix_family;
+ unsigned char prefix_pad1;
+ unsigned short prefix_pad2;
+ int prefix_ifindex;
+ unsigned char prefix_type;
+ unsigned char prefix_len;
+ unsigned char prefix_flags;
+ unsigned char prefix_pad3;
+};
+
+enum
+{
+ PREFIX_UNSPEC,
+ PREFIX_ADDRESS,
+ PREFIX_CACHEINFO,
+ __PREFIX_MAX
+};
+
+#define PREFIX_MAX (__PREFIX_MAX - 1)
+
+struct prefix_cacheinfo {
+ __u32 preferred_time;
+ __u32 valid_time;
+};
+
+
+/*****************************************************************
+ * Traffic control messages.
+ ****/
+
+struct tcmsg {
+ unsigned char tcm_family;
+ unsigned char tcm__pad1;
+ unsigned short tcm__pad2;
+ int tcm_ifindex;
+ __u32 tcm_handle;
+ __u32 tcm_parent;
+/* tcm_block_index is used instead of tcm_parent
+ * in case tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK
+ */
+#define tcm_block_index tcm_parent
+ __u32 tcm_info;
+};
+
+/* For manipulation of filters in shared block, tcm_ifindex is set to
+ * TCM_IFINDEX_MAGIC_BLOCK, and tcm_parent is aliased to tcm_block_index
+ * which is the block index.
+ */
+#define TCM_IFINDEX_MAGIC_BLOCK (0xFFFFFFFFU)
+
+enum {
+ TCA_UNSPEC,
+ TCA_KIND,
+ TCA_OPTIONS,
+ TCA_STATS,
+ TCA_XSTATS,
+ TCA_RATE,
+ TCA_FCNT,
+ TCA_STATS2,
+ TCA_STAB,
+ TCA_PAD,
+ TCA_DUMP_INVISIBLE,
+ TCA_CHAIN,
+ TCA_HW_OFFLOAD,
+ TCA_INGRESS_BLOCK,
+ TCA_EGRESS_BLOCK,
+ TCA_DUMP_FLAGS,
+ TCA_EXT_WARN_MSG,
+ __TCA_MAX
+};
+
+#define TCA_MAX (__TCA_MAX - 1)
+
+#define TCA_DUMP_FLAGS_TERSE (1 << 0) /* Means that in dump user gets only basic
+ * data necessary to identify the objects
+ * (handle, cookie, etc.) and stats.
+ */
+
+#define TCA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcmsg))))
+#define TCA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcmsg))
+
+/********************************************************************
+ * Neighbor Discovery userland options
+ ****/
+
+struct nduseroptmsg {
+ unsigned char nduseropt_family;
+ unsigned char nduseropt_pad1;
+ unsigned short nduseropt_opts_len; /* Total length of options */
+ int nduseropt_ifindex;
+ __u8 nduseropt_icmp_type;
+ __u8 nduseropt_icmp_code;
+ unsigned short nduseropt_pad2;
+ unsigned int nduseropt_pad3;
+ /* Followed by one or more ND options */
+};
+
+enum {
+ NDUSEROPT_UNSPEC,
+ NDUSEROPT_SRCADDR,
+ __NDUSEROPT_MAX
+};
+
+#define NDUSEROPT_MAX (__NDUSEROPT_MAX - 1)
+
+#ifndef __KERNEL__
+/* RTnetlink multicast groups - backwards compatibility for userspace */
+#define RTMGRP_LINK 1
+#define RTMGRP_NOTIFY 2
+#define RTMGRP_NEIGH 4
+#define RTMGRP_TC 8
+
+#define RTMGRP_IPV4_IFADDR 0x10
+#define RTMGRP_IPV4_MROUTE 0x20
+#define RTMGRP_IPV4_ROUTE 0x40
+#define RTMGRP_IPV4_RULE 0x80
+
+#define RTMGRP_IPV6_IFADDR 0x100
+#define RTMGRP_IPV6_MROUTE 0x200
+#define RTMGRP_IPV6_ROUTE 0x400
+#define RTMGRP_IPV6_IFINFO 0x800
+
+#define RTMGRP_DECnet_IFADDR 0x1000
+#define RTMGRP_DECnet_ROUTE 0x4000
+
+#define RTMGRP_IPV6_PREFIX 0x20000
+#endif
+
+/* RTnetlink multicast groups */
+enum rtnetlink_groups {
+ RTNLGRP_NONE,
+#define RTNLGRP_NONE RTNLGRP_NONE
+ RTNLGRP_LINK,
+#define RTNLGRP_LINK RTNLGRP_LINK
+ RTNLGRP_NOTIFY,
+#define RTNLGRP_NOTIFY RTNLGRP_NOTIFY
+ RTNLGRP_NEIGH,
+#define RTNLGRP_NEIGH RTNLGRP_NEIGH
+ RTNLGRP_TC,
+#define RTNLGRP_TC RTNLGRP_TC
+ RTNLGRP_IPV4_IFADDR,
+#define RTNLGRP_IPV4_IFADDR RTNLGRP_IPV4_IFADDR
+ RTNLGRP_IPV4_MROUTE,
+#define RTNLGRP_IPV4_MROUTE RTNLGRP_IPV4_MROUTE
+ RTNLGRP_IPV4_ROUTE,
+#define RTNLGRP_IPV4_ROUTE RTNLGRP_IPV4_ROUTE
+ RTNLGRP_IPV4_RULE,
+#define RTNLGRP_IPV4_RULE RTNLGRP_IPV4_RULE
+ RTNLGRP_IPV6_IFADDR,
+#define RTNLGRP_IPV6_IFADDR RTNLGRP_IPV6_IFADDR
+ RTNLGRP_IPV6_MROUTE,
+#define RTNLGRP_IPV6_MROUTE RTNLGRP_IPV6_MROUTE
+ RTNLGRP_IPV6_ROUTE,
+#define RTNLGRP_IPV6_ROUTE RTNLGRP_IPV6_ROUTE
+ RTNLGRP_IPV6_IFINFO,
+#define RTNLGRP_IPV6_IFINFO RTNLGRP_IPV6_IFINFO
+ RTNLGRP_DECnet_IFADDR,
+#define RTNLGRP_DECnet_IFADDR RTNLGRP_DECnet_IFADDR
+ RTNLGRP_NOP2,
+ RTNLGRP_DECnet_ROUTE,
+#define RTNLGRP_DECnet_ROUTE RTNLGRP_DECnet_ROUTE
+ RTNLGRP_DECnet_RULE,
+#define RTNLGRP_DECnet_RULE RTNLGRP_DECnet_RULE
+ RTNLGRP_NOP4,
+ RTNLGRP_IPV6_PREFIX,
+#define RTNLGRP_IPV6_PREFIX RTNLGRP_IPV6_PREFIX
+ RTNLGRP_IPV6_RULE,
+#define RTNLGRP_IPV6_RULE RTNLGRP_IPV6_RULE
+ RTNLGRP_ND_USEROPT,
+#define RTNLGRP_ND_USEROPT RTNLGRP_ND_USEROPT
+ RTNLGRP_PHONET_IFADDR,
+#define RTNLGRP_PHONET_IFADDR RTNLGRP_PHONET_IFADDR
+ RTNLGRP_PHONET_ROUTE,
+#define RTNLGRP_PHONET_ROUTE RTNLGRP_PHONET_ROUTE
+ RTNLGRP_DCB,
+#define RTNLGRP_DCB RTNLGRP_DCB
+ RTNLGRP_IPV4_NETCONF,
+#define RTNLGRP_IPV4_NETCONF RTNLGRP_IPV4_NETCONF
+ RTNLGRP_IPV6_NETCONF,
+#define RTNLGRP_IPV6_NETCONF RTNLGRP_IPV6_NETCONF
+ RTNLGRP_MDB,
+#define RTNLGRP_MDB RTNLGRP_MDB
+ RTNLGRP_MPLS_ROUTE,
+#define RTNLGRP_MPLS_ROUTE RTNLGRP_MPLS_ROUTE
+ RTNLGRP_NSID,
+#define RTNLGRP_NSID RTNLGRP_NSID
+ RTNLGRP_MPLS_NETCONF,
+#define RTNLGRP_MPLS_NETCONF RTNLGRP_MPLS_NETCONF
+ RTNLGRP_IPV4_MROUTE_R,
+#define RTNLGRP_IPV4_MROUTE_R RTNLGRP_IPV4_MROUTE_R
+ RTNLGRP_IPV6_MROUTE_R,
+#define RTNLGRP_IPV6_MROUTE_R RTNLGRP_IPV6_MROUTE_R
+ RTNLGRP_NEXTHOP,
+#define RTNLGRP_NEXTHOP RTNLGRP_NEXTHOP
+ RTNLGRP_BRVLAN,
+#define RTNLGRP_BRVLAN RTNLGRP_BRVLAN
+ RTNLGRP_MCTP_IFADDR,
+#define RTNLGRP_MCTP_IFADDR RTNLGRP_MCTP_IFADDR
+ RTNLGRP_TUNNEL,
+#define RTNLGRP_TUNNEL RTNLGRP_TUNNEL
+ RTNLGRP_STATS,
+#define RTNLGRP_STATS RTNLGRP_STATS
+ RTNLGRP_IPV4_MCADDR,
+#define RTNLGRP_IPV4_MCADDR RTNLGRP_IPV4_MCADDR
+ RTNLGRP_IPV6_MCADDR,
+#define RTNLGRP_IPV6_MCADDR RTNLGRP_IPV6_MCADDR
+ RTNLGRP_IPV6_ACADDR,
+#define RTNLGRP_IPV6_ACADDR RTNLGRP_IPV6_ACADDR
+ __RTNLGRP_MAX
+};
+#define RTNLGRP_MAX (__RTNLGRP_MAX - 1)
+
+/* TC action piece */
+struct tcamsg {
+ unsigned char tca_family;
+ unsigned char tca__pad1;
+ unsigned short tca__pad2;
+};
+
+enum {
+ TCA_ROOT_UNSPEC,
+ TCA_ROOT_TAB,
+#define TCA_ACT_TAB TCA_ROOT_TAB
+#define TCAA_MAX TCA_ROOT_TAB
+ TCA_ROOT_FLAGS,
+ TCA_ROOT_COUNT,
+ TCA_ROOT_TIME_DELTA, /* in msecs */
+ TCA_ROOT_EXT_WARN_MSG,
+ __TCA_ROOT_MAX,
+#define TCA_ROOT_MAX (__TCA_ROOT_MAX - 1)
+};
+
+#define TA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
+#define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
+/* tcamsg flags stored in attribute TCA_ROOT_FLAGS
+ *
+ * TCA_ACT_FLAG_LARGE_DUMP_ON user->kernel to request for larger than
+ * TCA_ACT_MAX_PRIO actions in a dump. All dump responses will contain the
+ * number of actions being dumped stored in for user app's consumption in
+ * TCA_ROOT_COUNT
+ *
+ * TCA_ACT_FLAG_TERSE_DUMP user->kernel to request terse (brief) dump that only
+ * includes essential action info (kind, index, etc.)
+ *
+ */
+#define TCA_FLAG_LARGE_DUMP_ON (1 << 0)
+#define TCA_ACT_FLAG_LARGE_DUMP_ON TCA_FLAG_LARGE_DUMP_ON
+#define TCA_ACT_FLAG_TERSE_DUMP (1 << 1)
+
+/* New extended info filters for IFLA_EXT_MASK */
+#define RTEXT_FILTER_VF (1 << 0)
+#define RTEXT_FILTER_BRVLAN (1 << 1)
+#define RTEXT_FILTER_BRVLAN_COMPRESSED (1 << 2)
+#define RTEXT_FILTER_SKIP_STATS (1 << 3)
+#define RTEXT_FILTER_MRP (1 << 4)
+#define RTEXT_FILTER_CFM_CONFIG (1 << 5)
+#define RTEXT_FILTER_CFM_STATUS (1 << 6)
+#define RTEXT_FILTER_MST (1 << 7)
+
+/* End of information exported to user level */
+
+
+
+#endif /* _UAPI__LINUX_RTNETLINK_H */
diff --git a/tools/include/uapi/linux/sched.h b/tools/include/uapi/linux/sched.h
deleted file mode 100644
index 3bac0a8ceab2..000000000000
--- a/tools/include/uapi/linux/sched.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _UAPI_LINUX_SCHED_H
-#define _UAPI_LINUX_SCHED_H
-
-#include <linux/types.h>
-
-/*
- * cloning flags:
- */
-#define CSIGNAL 0x000000ff /* signal mask to be sent at exit */
-#define CLONE_VM 0x00000100 /* set if VM shared between processes */
-#define CLONE_FS 0x00000200 /* set if fs info shared between processes */
-#define CLONE_FILES 0x00000400 /* set if open files shared between processes */
-#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */
-#define CLONE_PIDFD 0x00001000 /* set if a pidfd should be placed in parent */
-#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */
-#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */
-#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */
-#define CLONE_THREAD 0x00010000 /* Same thread group? */
-#define CLONE_NEWNS 0x00020000 /* New mount namespace group */
-#define CLONE_SYSVSEM 0x00040000 /* share system V SEM_UNDO semantics */
-#define CLONE_SETTLS 0x00080000 /* create a new TLS for the child */
-#define CLONE_PARENT_SETTID 0x00100000 /* set the TID in the parent */
-#define CLONE_CHILD_CLEARTID 0x00200000 /* clear the TID in the child */
-#define CLONE_DETACHED 0x00400000 /* Unused, ignored */
-#define CLONE_UNTRACED 0x00800000 /* set if the tracing process can't force CLONE_PTRACE on this clone */
-#define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */
-#define CLONE_NEWCGROUP 0x02000000 /* New cgroup namespace */
-#define CLONE_NEWUTS 0x04000000 /* New utsname namespace */
-#define CLONE_NEWIPC 0x08000000 /* New ipc namespace */
-#define CLONE_NEWUSER 0x10000000 /* New user namespace */
-#define CLONE_NEWPID 0x20000000 /* New pid namespace */
-#define CLONE_NEWNET 0x40000000 /* New network namespace */
-#define CLONE_IO 0x80000000 /* Clone io context */
-
-/* Flags for the clone3() syscall. */
-#define CLONE_CLEAR_SIGHAND 0x100000000ULL /* Clear any signal handler and reset to SIG_DFL. */
-#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */
-
-/*
- * cloning flags intersect with CSIGNAL so can be used with unshare and clone3
- * syscalls only:
- */
-#define CLONE_NEWTIME 0x00000080 /* New time namespace */
-
-#ifndef __ASSEMBLY__
-/**
- * struct clone_args - arguments for the clone3 syscall
- * @flags: Flags for the new process as listed above.
- * All flags are valid except for CSIGNAL and
- * CLONE_DETACHED.
- * @pidfd: If CLONE_PIDFD is set, a pidfd will be
- * returned in this argument.
- * @child_tid: If CLONE_CHILD_SETTID is set, the TID of the
- * child process will be returned in the child's
- * memory.
- * @parent_tid: If CLONE_PARENT_SETTID is set, the TID of
- * the child process will be returned in the
- * parent's memory.
- * @exit_signal: The exit_signal the parent process will be
- * sent when the child exits.
- * @stack: Specify the location of the stack for the
- * child process.
- * Note, @stack is expected to point to the
- * lowest address. The stack direction will be
- * determined by the kernel and set up
- * appropriately based on @stack_size.
- * @stack_size: The size of the stack for the child process.
- * @tls: If CLONE_SETTLS is set, the tls descriptor
- * is set to tls.
- * @set_tid: Pointer to an array of type *pid_t. The size
- * of the array is defined using @set_tid_size.
- * This array is used to select PIDs/TIDs for
- * newly created processes. The first element in
- * this defines the PID in the most nested PID
- * namespace. Each additional element in the array
- * defines the PID in the parent PID namespace of
- * the original PID namespace. If the array has
- * less entries than the number of currently
- * nested PID namespaces only the PIDs in the
- * corresponding namespaces are set.
- * @set_tid_size: This defines the size of the array referenced
- * in @set_tid. This cannot be larger than the
- * kernel's limit of nested PID namespaces.
- * @cgroup: If CLONE_INTO_CGROUP is specified set this to
- * a file descriptor for the cgroup.
- *
- * The structure is versioned by size and thus extensible.
- * New struct members must go at the end of the struct and
- * must be properly 64bit aligned.
- */
-struct clone_args {
- __aligned_u64 flags;
- __aligned_u64 pidfd;
- __aligned_u64 child_tid;
- __aligned_u64 parent_tid;
- __aligned_u64 exit_signal;
- __aligned_u64 stack;
- __aligned_u64 stack_size;
- __aligned_u64 tls;
- __aligned_u64 set_tid;
- __aligned_u64 set_tid_size;
- __aligned_u64 cgroup;
-};
-#endif
-
-#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */
-#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */
-#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */
-
-/*
- * Scheduling policies
- */
-#define SCHED_NORMAL 0
-#define SCHED_FIFO 1
-#define SCHED_RR 2
-#define SCHED_BATCH 3
-/* SCHED_ISO: reserved but not implemented yet */
-#define SCHED_IDLE 5
-#define SCHED_DEADLINE 6
-
-/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
-#define SCHED_RESET_ON_FORK 0x40000000
-
-/*
- * For the sched_{set,get}attr() calls
- */
-#define SCHED_FLAG_RESET_ON_FORK 0x01
-#define SCHED_FLAG_RECLAIM 0x02
-#define SCHED_FLAG_DL_OVERRUN 0x04
-#define SCHED_FLAG_KEEP_POLICY 0x08
-#define SCHED_FLAG_KEEP_PARAMS 0x10
-#define SCHED_FLAG_UTIL_CLAMP_MIN 0x20
-#define SCHED_FLAG_UTIL_CLAMP_MAX 0x40
-
-#define SCHED_FLAG_KEEP_ALL (SCHED_FLAG_KEEP_POLICY | \
- SCHED_FLAG_KEEP_PARAMS)
-
-#define SCHED_FLAG_UTIL_CLAMP (SCHED_FLAG_UTIL_CLAMP_MIN | \
- SCHED_FLAG_UTIL_CLAMP_MAX)
-
-#define SCHED_FLAG_ALL (SCHED_FLAG_RESET_ON_FORK | \
- SCHED_FLAG_RECLAIM | \
- SCHED_FLAG_DL_OVERRUN | \
- SCHED_FLAG_KEEP_ALL | \
- SCHED_FLAG_UTIL_CLAMP)
-
-#endif /* _UAPI_LINUX_SCHED_H */
diff --git a/tools/include/uapi/linux/seccomp.h b/tools/include/uapi/linux/seccomp.h
new file mode 100644
index 000000000000..dbfc9b37fcae
--- /dev/null
+++ b/tools/include/uapi/linux/seccomp.h
@@ -0,0 +1,157 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_SECCOMP_H
+#define _UAPI_LINUX_SECCOMP_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+
+/* Valid values for seccomp.mode and prctl(PR_SET_SECCOMP, <mode>) */
+#define SECCOMP_MODE_DISABLED 0 /* seccomp is not in use. */
+#define SECCOMP_MODE_STRICT 1 /* uses hard-coded filter. */
+#define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */
+
+/* Valid operations for seccomp syscall. */
+#define SECCOMP_SET_MODE_STRICT 0
+#define SECCOMP_SET_MODE_FILTER 1
+#define SECCOMP_GET_ACTION_AVAIL 2
+#define SECCOMP_GET_NOTIF_SIZES 3
+
+/* Valid flags for SECCOMP_SET_MODE_FILTER */
+#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
+#define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
+#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
+#define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3)
+#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
+/* Received notifications wait in killable state (only respond to fatal signals) */
+#define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV (1UL << 5)
+
+/*
+ * All BPF programs must return a 32-bit value.
+ * The bottom 16-bits are for optional return data.
+ * The upper 16-bits are ordered from least permissive values to most,
+ * as a signed value (so 0x8000000 is negative).
+ *
+ * The ordering ensures that a min_t() over composed return values always
+ * selects the least permissive choice.
+ */
+#define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
+#define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */
+#define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD
+#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */
+#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */
+#define SECCOMP_RET_USER_NOTIF 0x7fc00000U /* notifies userspace */
+#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */
+#define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */
+#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */
+
+/* Masks for the return value sections. */
+#define SECCOMP_RET_ACTION_FULL 0xffff0000U
+#define SECCOMP_RET_ACTION 0x7fff0000U
+#define SECCOMP_RET_DATA 0x0000ffffU
+
+/**
+ * struct seccomp_data - the format the BPF program executes over.
+ * @nr: the system call number
+ * @arch: indicates system call convention as an AUDIT_ARCH_* value
+ * as defined in <linux/audit.h>.
+ * @instruction_pointer: at the time of the system call.
+ * @args: up to 6 system call arguments always stored as 64-bit values
+ * regardless of the architecture.
+ */
+struct seccomp_data {
+ int nr;
+ __u32 arch;
+ __u64 instruction_pointer;
+ __u64 args[6];
+};
+
+struct seccomp_notif_sizes {
+ __u16 seccomp_notif;
+ __u16 seccomp_notif_resp;
+ __u16 seccomp_data;
+};
+
+struct seccomp_notif {
+ __u64 id;
+ __u32 pid;
+ __u32 flags;
+ struct seccomp_data data;
+};
+
+/*
+ * Valid flags for struct seccomp_notif_resp
+ *
+ * Note, the SECCOMP_USER_NOTIF_FLAG_CONTINUE flag must be used with caution!
+ * If set by the process supervising the syscalls of another process the
+ * syscall will continue. This is problematic because of an inherent TOCTOU.
+ * An attacker can exploit the time while the supervised process is waiting on
+ * a response from the supervising process to rewrite syscall arguments which
+ * are passed as pointers of the intercepted syscall.
+ * It should be absolutely clear that this means that the seccomp notifier
+ * _cannot_ be used to implement a security policy! It should only ever be used
+ * in scenarios where a more privileged process supervises the syscalls of a
+ * lesser privileged process to get around kernel-enforced security
+ * restrictions when the privileged process deems this safe. In other words,
+ * in order to continue a syscall the supervising process should be sure that
+ * another security mechanism or the kernel itself will sufficiently block
+ * syscalls if arguments are rewritten to something unsafe.
+ *
+ * Similar precautions should be applied when stacking SECCOMP_RET_USER_NOTIF
+ * or SECCOMP_RET_TRACE. For SECCOMP_RET_USER_NOTIF filters acting on the
+ * same syscall, the most recently added filter takes precedence. This means
+ * that the new SECCOMP_RET_USER_NOTIF filter can override any
+ * SECCOMP_IOCTL_NOTIF_SEND from earlier filters, essentially allowing all
+ * such filtered syscalls to be executed by sending the response
+ * SECCOMP_USER_NOTIF_FLAG_CONTINUE. Note that SECCOMP_RET_TRACE can equally
+ * be overriden by SECCOMP_USER_NOTIF_FLAG_CONTINUE.
+ */
+#define SECCOMP_USER_NOTIF_FLAG_CONTINUE (1UL << 0)
+
+struct seccomp_notif_resp {
+ __u64 id;
+ __s64 val;
+ __s32 error;
+ __u32 flags;
+};
+
+#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0)
+
+/* valid flags for seccomp_notif_addfd */
+#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */
+#define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */
+
+/**
+ * struct seccomp_notif_addfd
+ * @id: The ID of the seccomp notification
+ * @flags: SECCOMP_ADDFD_FLAG_*
+ * @srcfd: The local fd number
+ * @newfd: Optional remote FD number if SETFD option is set, otherwise 0.
+ * @newfd_flags: The O_* flags the remote FD should have applied
+ */
+struct seccomp_notif_addfd {
+ __u64 id;
+ __u32 flags;
+ __u32 srcfd;
+ __u32 newfd;
+ __u32 newfd_flags;
+};
+
+#define SECCOMP_IOC_MAGIC '!'
+#define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr)
+#define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type)
+#define SECCOMP_IOW(nr, type) _IOW(SECCOMP_IOC_MAGIC, nr, type)
+#define SECCOMP_IOWR(nr, type) _IOWR(SECCOMP_IOC_MAGIC, nr, type)
+
+/* Flags for seccomp notification fd ioctl. */
+#define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif)
+#define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \
+ struct seccomp_notif_resp)
+#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64)
+/* On success, the return value is the remote process's added fd number */
+#define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \
+ struct seccomp_notif_addfd)
+
+#define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64)
+
+#endif /* _UAPI_LINUX_SECCOMP_H */
diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h
index 7cab2c65d3d7..1686861aae20 100644
--- a/tools/include/uapi/linux/stat.h
+++ b/tools/include/uapi/linux/stat.h
@@ -98,36 +98,97 @@ struct statx_timestamp {
*/
struct statx {
/* 0x00 */
- __u32 stx_mask; /* What results were written [uncond] */
- __u32 stx_blksize; /* Preferred general I/O size [uncond] */
- __u64 stx_attributes; /* Flags conveying information about the file [uncond] */
+ /* What results were written [uncond] */
+ __u32 stx_mask;
+
+ /* Preferred general I/O size [uncond] */
+ __u32 stx_blksize;
+
+ /* Flags conveying information about the file [uncond] */
+ __u64 stx_attributes;
+
/* 0x10 */
- __u32 stx_nlink; /* Number of hard links */
- __u32 stx_uid; /* User ID of owner */
- __u32 stx_gid; /* Group ID of owner */
- __u16 stx_mode; /* File mode */
+ /* Number of hard links */
+ __u32 stx_nlink;
+
+ /* User ID of owner */
+ __u32 stx_uid;
+
+ /* Group ID of owner */
+ __u32 stx_gid;
+
+ /* File mode */
+ __u16 stx_mode;
__u16 __spare0[1];
+
/* 0x20 */
- __u64 stx_ino; /* Inode number */
- __u64 stx_size; /* File size */
- __u64 stx_blocks; /* Number of 512-byte blocks allocated */
- __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */
+ /* Inode number */
+ __u64 stx_ino;
+
+ /* File size */
+ __u64 stx_size;
+
+ /* Number of 512-byte blocks allocated */
+ __u64 stx_blocks;
+
+ /* Mask to show what's supported in stx_attributes */
+ __u64 stx_attributes_mask;
+
/* 0x40 */
- struct statx_timestamp stx_atime; /* Last access time */
- struct statx_timestamp stx_btime; /* File creation time */
- struct statx_timestamp stx_ctime; /* Last attribute change time */
- struct statx_timestamp stx_mtime; /* Last data modification time */
+ /* Last access time */
+ struct statx_timestamp stx_atime;
+
+ /* File creation time */
+ struct statx_timestamp stx_btime;
+
+ /* Last attribute change time */
+ struct statx_timestamp stx_ctime;
+
+ /* Last data modification time */
+ struct statx_timestamp stx_mtime;
+
/* 0x80 */
- __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */
+ /* Device ID of special file [if bdev/cdev] */
+ __u32 stx_rdev_major;
__u32 stx_rdev_minor;
- __u32 stx_dev_major; /* ID of device containing file [uncond] */
+
+ /* ID of device containing file [uncond] */
+ __u32 stx_dev_major;
__u32 stx_dev_minor;
+
/* 0x90 */
__u64 stx_mnt_id;
- __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */
- __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */
+
+ /* Memory buffer alignment for direct I/O */
+ __u32 stx_dio_mem_align;
+
+ /* File offset alignment for direct I/O */
+ __u32 stx_dio_offset_align;
+
/* 0xa0 */
- __u64 __spare3[12]; /* Spare space for future expansion */
+ /* Subvolume identifier */
+ __u64 stx_subvol;
+
+ /* Min atomic write unit in bytes */
+ __u32 stx_atomic_write_unit_min;
+
+ /* Max atomic write unit in bytes */
+ __u32 stx_atomic_write_unit_max;
+
+ /* 0xb0 */
+ /* Max atomic write segment count */
+ __u32 stx_atomic_write_segments_max;
+
+ /* File offset alignment for direct I/O reads */
+ __u32 stx_dio_read_offset_align;
+
+ /* Optimised max atomic write unit in bytes */
+ __u32 stx_atomic_write_unit_max_opt;
+ __u32 __spare2[1];
+
+ /* 0xc0 */
+ __u64 __spare3[8]; /* Spare space for future expansion */
+
/* 0x100 */
};
@@ -154,6 +215,10 @@ struct statx {
#define STATX_BTIME 0x00000800U /* Want/got stx_btime */
#define STATX_MNT_ID 0x00001000U /* Got stx_mnt_id */
#define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */
+#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */
+#define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */
+#define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */
+#define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
@@ -189,6 +254,7 @@ struct statx {
#define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */
#define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */
#define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */
+#define STATX_ATTR_WRITE_ATOMIC 0x00400000 /* File supports atomic write operations */
#endif /* _UAPI_LINUX_STAT_H */
diff --git a/tools/include/uapi/linux/stddef.h b/tools/include/uapi/linux/stddef.h
index bb6ea517efb5..c53cde425406 100644
--- a/tools/include/uapi/linux/stddef.h
+++ b/tools/include/uapi/linux/stddef.h
@@ -8,6 +8,13 @@
#define __always_inline __inline__
#endif
+/* Not all C++ standards support type declarations inside an anonymous union */
+#ifndef __cplusplus
+#define __struct_group_tag(TAG) TAG
+#else
+#define __struct_group_tag(TAG)
+#endif
+
/**
* __struct_group() - Create a mirrored named and anonyomous struct
*
@@ -20,14 +27,14 @@
* and size: one anonymous and one named. The former's members can be used
* normally without sub-struct naming, and the latter can be used to
* reason about the start, end, and size of the group of struct members.
- * The named struct can also be explicitly tagged for layer reuse, as well
- * as both having struct attributes appended.
+ * The named struct can also be explicitly tagged for layer reuse (C only),
+ * as well as both having struct attributes appended.
*/
#define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \
union { \
struct { MEMBERS } ATTRS; \
- struct TAG { MEMBERS } ATTRS NAME; \
- }
+ struct __struct_group_tag(TAG) { MEMBERS } ATTRS NAME; \
+ } ATTRS
/**
* __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union
diff --git a/tools/include/uapi/linux/types.h b/tools/include/uapi/linux/types.h
index 91fa51a9c31d..85aa327245c6 100644
--- a/tools/include/uapi/linux/types.h
+++ b/tools/include/uapi/linux/types.h
@@ -4,6 +4,8 @@
#include <asm-generic/int-ll64.h>
+#ifndef __ASSEMBLER__
+
/* copied from linux:include/uapi/linux/types.h */
#define __bitwise
typedef __u16 __bitwise __le16;
@@ -20,4 +22,5 @@ typedef __u32 __bitwise __wsum;
#define __aligned_be64 __be64 __attribute__((aligned(8)))
#define __aligned_le64 __le64 __attribute__((aligned(8)))
+#endif /* __ASSEMBLER__ */
#endif /* _UAPI_LINUX_TYPES_H */
diff --git a/tools/include/uapi/linux/usbdevice_fs.h b/tools/include/uapi/linux/usbdevice_fs.h
deleted file mode 100644
index 74a84e02422a..000000000000
--- a/tools/include/uapi/linux/usbdevice_fs.h
+++ /dev/null
@@ -1,231 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
-/*****************************************************************************/
-
-/*
- * usbdevice_fs.h -- USB device file system.
- *
- * Copyright (C) 2000
- * Thomas Sailer (sailer@ife.ee.ethz.ch)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * History:
- * 0.1 04.01.2000 Created
- */
-
-/*****************************************************************************/
-
-#ifndef _UAPI_LINUX_USBDEVICE_FS_H
-#define _UAPI_LINUX_USBDEVICE_FS_H
-
-#include <linux/types.h>
-#include <linux/magic.h>
-
-/* --------------------------------------------------------------------- */
-
-/* usbdevfs ioctl codes */
-
-struct usbdevfs_ctrltransfer {
- __u8 bRequestType;
- __u8 bRequest;
- __u16 wValue;
- __u16 wIndex;
- __u16 wLength;
- __u32 timeout; /* in milliseconds */
- void __user *data;
-};
-
-struct usbdevfs_bulktransfer {
- unsigned int ep;
- unsigned int len;
- unsigned int timeout; /* in milliseconds */
- void __user *data;
-};
-
-struct usbdevfs_setinterface {
- unsigned int interface;
- unsigned int altsetting;
-};
-
-struct usbdevfs_disconnectsignal {
- unsigned int signr;
- void __user *context;
-};
-
-#define USBDEVFS_MAXDRIVERNAME 255
-
-struct usbdevfs_getdriver {
- unsigned int interface;
- char driver[USBDEVFS_MAXDRIVERNAME + 1];
-};
-
-struct usbdevfs_connectinfo {
- unsigned int devnum;
- unsigned char slow;
-};
-
-struct usbdevfs_conninfo_ex {
- __u32 size; /* Size of the structure from the kernel's */
- /* point of view. Can be used by userspace */
- /* to determine how much data can be */
- /* used/trusted. */
- __u32 busnum; /* USB bus number, as enumerated by the */
- /* kernel, the device is connected to. */
- __u32 devnum; /* Device address on the bus. */
- __u32 speed; /* USB_SPEED_* constants from ch9.h */
- __u8 num_ports; /* Number of ports the device is connected */
- /* to on the way to the root hub. It may */
- /* be bigger than size of 'ports' array so */
- /* userspace can detect overflows. */
- __u8 ports[7]; /* List of ports on the way from the root */
- /* hub to the device. Current limit in */
- /* USB specification is 7 tiers (root hub, */
- /* 5 intermediate hubs, device), which */
- /* gives at most 6 port entries. */
-};
-
-#define USBDEVFS_URB_SHORT_NOT_OK 0x01
-#define USBDEVFS_URB_ISO_ASAP 0x02
-#define USBDEVFS_URB_BULK_CONTINUATION 0x04
-#define USBDEVFS_URB_NO_FSBR 0x20 /* Not used */
-#define USBDEVFS_URB_ZERO_PACKET 0x40
-#define USBDEVFS_URB_NO_INTERRUPT 0x80
-
-#define USBDEVFS_URB_TYPE_ISO 0
-#define USBDEVFS_URB_TYPE_INTERRUPT 1
-#define USBDEVFS_URB_TYPE_CONTROL 2
-#define USBDEVFS_URB_TYPE_BULK 3
-
-struct usbdevfs_iso_packet_desc {
- unsigned int length;
- unsigned int actual_length;
- unsigned int status;
-};
-
-struct usbdevfs_urb {
- unsigned char type;
- unsigned char endpoint;
- int status;
- unsigned int flags;
- void __user *buffer;
- int buffer_length;
- int actual_length;
- int start_frame;
- union {
- int number_of_packets; /* Only used for isoc urbs */
- unsigned int stream_id; /* Only used with bulk streams */
- };
- int error_count;
- unsigned int signr; /* signal to be sent on completion,
- or 0 if none should be sent. */
- void __user *usercontext;
- struct usbdevfs_iso_packet_desc iso_frame_desc[];
-};
-
-/* ioctls for talking directly to drivers */
-struct usbdevfs_ioctl {
- int ifno; /* interface 0..N ; negative numbers reserved */
- int ioctl_code; /* MUST encode size + direction of data so the
- * macros in <asm/ioctl.h> give correct values */
- void __user *data; /* param buffer (in, or out) */
-};
-
-/* You can do most things with hubs just through control messages,
- * except find out what device connects to what port. */
-struct usbdevfs_hub_portinfo {
- char nports; /* number of downstream ports in this hub */
- char port [127]; /* e.g. port 3 connects to device 27 */
-};
-
-/* System and bus capability flags */
-#define USBDEVFS_CAP_ZERO_PACKET 0x01
-#define USBDEVFS_CAP_BULK_CONTINUATION 0x02
-#define USBDEVFS_CAP_NO_PACKET_SIZE_LIM 0x04
-#define USBDEVFS_CAP_BULK_SCATTER_GATHER 0x08
-#define USBDEVFS_CAP_REAP_AFTER_DISCONNECT 0x10
-#define USBDEVFS_CAP_MMAP 0x20
-#define USBDEVFS_CAP_DROP_PRIVILEGES 0x40
-#define USBDEVFS_CAP_CONNINFO_EX 0x80
-#define USBDEVFS_CAP_SUSPEND 0x100
-
-/* USBDEVFS_DISCONNECT_CLAIM flags & struct */
-
-/* disconnect-and-claim if the driver matches the driver field */
-#define USBDEVFS_DISCONNECT_CLAIM_IF_DRIVER 0x01
-/* disconnect-and-claim except when the driver matches the driver field */
-#define USBDEVFS_DISCONNECT_CLAIM_EXCEPT_DRIVER 0x02
-
-struct usbdevfs_disconnect_claim {
- unsigned int interface;
- unsigned int flags;
- char driver[USBDEVFS_MAXDRIVERNAME + 1];
-};
-
-struct usbdevfs_streams {
- unsigned int num_streams; /* Not used by USBDEVFS_FREE_STREAMS */
- unsigned int num_eps;
- unsigned char eps[];
-};
-
-/*
- * USB_SPEED_* values returned by USBDEVFS_GET_SPEED are defined in
- * linux/usb/ch9.h
- */
-
-#define USBDEVFS_CONTROL _IOWR('U', 0, struct usbdevfs_ctrltransfer)
-#define USBDEVFS_CONTROL32 _IOWR('U', 0, struct usbdevfs_ctrltransfer32)
-#define USBDEVFS_BULK _IOWR('U', 2, struct usbdevfs_bulktransfer)
-#define USBDEVFS_BULK32 _IOWR('U', 2, struct usbdevfs_bulktransfer32)
-#define USBDEVFS_RESETEP _IOR('U', 3, unsigned int)
-#define USBDEVFS_SETINTERFACE _IOR('U', 4, struct usbdevfs_setinterface)
-#define USBDEVFS_SETCONFIGURATION _IOR('U', 5, unsigned int)
-#define USBDEVFS_GETDRIVER _IOW('U', 8, struct usbdevfs_getdriver)
-#define USBDEVFS_SUBMITURB _IOR('U', 10, struct usbdevfs_urb)
-#define USBDEVFS_SUBMITURB32 _IOR('U', 10, struct usbdevfs_urb32)
-#define USBDEVFS_DISCARDURB _IO('U', 11)
-#define USBDEVFS_REAPURB _IOW('U', 12, void *)
-#define USBDEVFS_REAPURB32 _IOW('U', 12, __u32)
-#define USBDEVFS_REAPURBNDELAY _IOW('U', 13, void *)
-#define USBDEVFS_REAPURBNDELAY32 _IOW('U', 13, __u32)
-#define USBDEVFS_DISCSIGNAL _IOR('U', 14, struct usbdevfs_disconnectsignal)
-#define USBDEVFS_DISCSIGNAL32 _IOR('U', 14, struct usbdevfs_disconnectsignal32)
-#define USBDEVFS_CLAIMINTERFACE _IOR('U', 15, unsigned int)
-#define USBDEVFS_RELEASEINTERFACE _IOR('U', 16, unsigned int)
-#define USBDEVFS_CONNECTINFO _IOW('U', 17, struct usbdevfs_connectinfo)
-#define USBDEVFS_IOCTL _IOWR('U', 18, struct usbdevfs_ioctl)
-#define USBDEVFS_IOCTL32 _IOWR('U', 18, struct usbdevfs_ioctl32)
-#define USBDEVFS_HUB_PORTINFO _IOR('U', 19, struct usbdevfs_hub_portinfo)
-#define USBDEVFS_RESET _IO('U', 20)
-#define USBDEVFS_CLEAR_HALT _IOR('U', 21, unsigned int)
-#define USBDEVFS_DISCONNECT _IO('U', 22)
-#define USBDEVFS_CONNECT _IO('U', 23)
-#define USBDEVFS_CLAIM_PORT _IOR('U', 24, unsigned int)
-#define USBDEVFS_RELEASE_PORT _IOR('U', 25, unsigned int)
-#define USBDEVFS_GET_CAPABILITIES _IOR('U', 26, __u32)
-#define USBDEVFS_DISCONNECT_CLAIM _IOR('U', 27, struct usbdevfs_disconnect_claim)
-#define USBDEVFS_ALLOC_STREAMS _IOR('U', 28, struct usbdevfs_streams)
-#define USBDEVFS_FREE_STREAMS _IOR('U', 29, struct usbdevfs_streams)
-#define USBDEVFS_DROP_PRIVILEGES _IOW('U', 30, __u32)
-#define USBDEVFS_GET_SPEED _IO('U', 31)
-/*
- * Returns struct usbdevfs_conninfo_ex; length is variable to allow
- * extending size of the data returned.
- */
-#define USBDEVFS_CONNINFO_EX(len) _IOC(_IOC_READ, 'U', 32, len)
-#define USBDEVFS_FORBID_SUSPEND _IO('U', 33)
-#define USBDEVFS_ALLOW_SUSPEND _IO('U', 34)
-#define USBDEVFS_WAIT_FOR_RESUME _IO('U', 35)
-
-#endif /* _UAPI_LINUX_USBDEVICE_FS_H */
diff --git a/tools/include/uapi/linux/userfaultfd.h b/tools/include/uapi/linux/userfaultfd.h
new file mode 100644
index 000000000000..4283de22d5b6
--- /dev/null
+++ b/tools/include/uapi/linux/userfaultfd.h
@@ -0,0 +1,386 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * include/linux/userfaultfd.h
+ *
+ * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org>
+ * Copyright (C) 2015 Red Hat, Inc.
+ *
+ */
+
+#ifndef _LINUX_USERFAULTFD_H
+#define _LINUX_USERFAULTFD_H
+
+#include <linux/types.h>
+
+/* ioctls for /dev/userfaultfd */
+#define USERFAULTFD_IOC 0xAA
+#define USERFAULTFD_IOC_NEW _IO(USERFAULTFD_IOC, 0x00)
+
+/*
+ * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and
+ * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR. In
+ * userfaultfd.h we assumed the kernel was reading (instead _IOC_READ
+ * means the userland is reading).
+ */
+#define UFFD_API ((__u64)0xAA)
+#define UFFD_API_REGISTER_MODES (UFFDIO_REGISTER_MODE_MISSING | \
+ UFFDIO_REGISTER_MODE_WP | \
+ UFFDIO_REGISTER_MODE_MINOR)
+#define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \
+ UFFD_FEATURE_EVENT_FORK | \
+ UFFD_FEATURE_EVENT_REMAP | \
+ UFFD_FEATURE_EVENT_REMOVE | \
+ UFFD_FEATURE_EVENT_UNMAP | \
+ UFFD_FEATURE_MISSING_HUGETLBFS | \
+ UFFD_FEATURE_MISSING_SHMEM | \
+ UFFD_FEATURE_SIGBUS | \
+ UFFD_FEATURE_THREAD_ID | \
+ UFFD_FEATURE_MINOR_HUGETLBFS | \
+ UFFD_FEATURE_MINOR_SHMEM | \
+ UFFD_FEATURE_EXACT_ADDRESS | \
+ UFFD_FEATURE_WP_HUGETLBFS_SHMEM | \
+ UFFD_FEATURE_WP_UNPOPULATED | \
+ UFFD_FEATURE_POISON | \
+ UFFD_FEATURE_WP_ASYNC | \
+ UFFD_FEATURE_MOVE)
+#define UFFD_API_IOCTLS \
+ ((__u64)1 << _UFFDIO_REGISTER | \
+ (__u64)1 << _UFFDIO_UNREGISTER | \
+ (__u64)1 << _UFFDIO_API)
+#define UFFD_API_RANGE_IOCTLS \
+ ((__u64)1 << _UFFDIO_WAKE | \
+ (__u64)1 << _UFFDIO_COPY | \
+ (__u64)1 << _UFFDIO_ZEROPAGE | \
+ (__u64)1 << _UFFDIO_MOVE | \
+ (__u64)1 << _UFFDIO_WRITEPROTECT | \
+ (__u64)1 << _UFFDIO_CONTINUE | \
+ (__u64)1 << _UFFDIO_POISON)
+#define UFFD_API_RANGE_IOCTLS_BASIC \
+ ((__u64)1 << _UFFDIO_WAKE | \
+ (__u64)1 << _UFFDIO_COPY | \
+ (__u64)1 << _UFFDIO_WRITEPROTECT | \
+ (__u64)1 << _UFFDIO_CONTINUE | \
+ (__u64)1 << _UFFDIO_POISON)
+
+/*
+ * Valid ioctl command number range with this API is from 0x00 to
+ * 0x3F. UFFDIO_API is the fixed number, everything else can be
+ * changed by implementing a different UFFD_API. If sticking to the
+ * same UFFD_API more ioctl can be added and userland will be aware of
+ * which ioctl the running kernel implements through the ioctl command
+ * bitmask written by the UFFDIO_API.
+ */
+#define _UFFDIO_REGISTER (0x00)
+#define _UFFDIO_UNREGISTER (0x01)
+#define _UFFDIO_WAKE (0x02)
+#define _UFFDIO_COPY (0x03)
+#define _UFFDIO_ZEROPAGE (0x04)
+#define _UFFDIO_MOVE (0x05)
+#define _UFFDIO_WRITEPROTECT (0x06)
+#define _UFFDIO_CONTINUE (0x07)
+#define _UFFDIO_POISON (0x08)
+#define _UFFDIO_API (0x3F)
+
+/* userfaultfd ioctl ids */
+#define UFFDIO 0xAA
+#define UFFDIO_API _IOWR(UFFDIO, _UFFDIO_API, \
+ struct uffdio_api)
+#define UFFDIO_REGISTER _IOWR(UFFDIO, _UFFDIO_REGISTER, \
+ struct uffdio_register)
+#define UFFDIO_UNREGISTER _IOR(UFFDIO, _UFFDIO_UNREGISTER, \
+ struct uffdio_range)
+#define UFFDIO_WAKE _IOR(UFFDIO, _UFFDIO_WAKE, \
+ struct uffdio_range)
+#define UFFDIO_COPY _IOWR(UFFDIO, _UFFDIO_COPY, \
+ struct uffdio_copy)
+#define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \
+ struct uffdio_zeropage)
+#define UFFDIO_MOVE _IOWR(UFFDIO, _UFFDIO_MOVE, \
+ struct uffdio_move)
+#define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \
+ struct uffdio_writeprotect)
+#define UFFDIO_CONTINUE _IOWR(UFFDIO, _UFFDIO_CONTINUE, \
+ struct uffdio_continue)
+#define UFFDIO_POISON _IOWR(UFFDIO, _UFFDIO_POISON, \
+ struct uffdio_poison)
+
+/* read() structure */
+struct uffd_msg {
+ __u8 event;
+
+ __u8 reserved1;
+ __u16 reserved2;
+ __u32 reserved3;
+
+ union {
+ struct {
+ __u64 flags;
+ __u64 address;
+ union {
+ __u32 ptid;
+ } feat;
+ } pagefault;
+
+ struct {
+ __u32 ufd;
+ } fork;
+
+ struct {
+ __u64 from;
+ __u64 to;
+ __u64 len;
+ } remap;
+
+ struct {
+ __u64 start;
+ __u64 end;
+ } remove;
+
+ struct {
+ /* unused reserved fields */
+ __u64 reserved1;
+ __u64 reserved2;
+ __u64 reserved3;
+ } reserved;
+ } arg;
+} __attribute__((packed));
+
+/*
+ * Start at 0x12 and not at 0 to be more strict against bugs.
+ */
+#define UFFD_EVENT_PAGEFAULT 0x12
+#define UFFD_EVENT_FORK 0x13
+#define UFFD_EVENT_REMAP 0x14
+#define UFFD_EVENT_REMOVE 0x15
+#define UFFD_EVENT_UNMAP 0x16
+
+/* flags for UFFD_EVENT_PAGEFAULT */
+#define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */
+#define UFFD_PAGEFAULT_FLAG_WP (1<<1) /* If reason is VM_UFFD_WP */
+#define UFFD_PAGEFAULT_FLAG_MINOR (1<<2) /* If reason is VM_UFFD_MINOR */
+
+struct uffdio_api {
+ /* userland asks for an API number and the features to enable */
+ __u64 api;
+ /*
+ * Kernel answers below with the all available features for
+ * the API, this notifies userland of which events and/or
+ * which flags for each event are enabled in the current
+ * kernel.
+ *
+ * Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE
+ * are to be considered implicitly always enabled in all kernels as
+ * long as the uffdio_api.api requested matches UFFD_API.
+ *
+ * UFFD_FEATURE_MISSING_HUGETLBFS means an UFFDIO_REGISTER
+ * with UFFDIO_REGISTER_MODE_MISSING mode will succeed on
+ * hugetlbfs virtual memory ranges. Adding or not adding
+ * UFFD_FEATURE_MISSING_HUGETLBFS to uffdio_api.features has
+ * no real functional effect after UFFDIO_API returns, but
+ * it's only useful for an initial feature set probe at
+ * UFFDIO_API time. There are two ways to use it:
+ *
+ * 1) by adding UFFD_FEATURE_MISSING_HUGETLBFS to the
+ * uffdio_api.features before calling UFFDIO_API, an error
+ * will be returned by UFFDIO_API on a kernel without
+ * hugetlbfs missing support
+ *
+ * 2) the UFFD_FEATURE_MISSING_HUGETLBFS can not be added in
+ * uffdio_api.features and instead it will be set by the
+ * kernel in the uffdio_api.features if the kernel supports
+ * it, so userland can later check if the feature flag is
+ * present in uffdio_api.features after UFFDIO_API
+ * succeeded.
+ *
+ * UFFD_FEATURE_MISSING_SHMEM works the same as
+ * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem
+ * (i.e. tmpfs and other shmem based APIs).
+ *
+ * UFFD_FEATURE_SIGBUS feature means no page-fault
+ * (UFFD_EVENT_PAGEFAULT) event will be delivered, instead
+ * a SIGBUS signal will be sent to the faulting process.
+ *
+ * UFFD_FEATURE_THREAD_ID pid of the page faulted task_struct will
+ * be returned, if feature is not requested 0 will be returned.
+ *
+ * UFFD_FEATURE_MINOR_HUGETLBFS indicates that minor faults
+ * can be intercepted (via REGISTER_MODE_MINOR) for
+ * hugetlbfs-backed pages.
+ *
+ * UFFD_FEATURE_MINOR_SHMEM indicates the same support as
+ * UFFD_FEATURE_MINOR_HUGETLBFS, but for shmem-backed pages instead.
+ *
+ * UFFD_FEATURE_EXACT_ADDRESS indicates that the exact address of page
+ * faults would be provided and the offset within the page would not be
+ * masked.
+ *
+ * UFFD_FEATURE_WP_HUGETLBFS_SHMEM indicates that userfaultfd
+ * write-protection mode is supported on both shmem and hugetlbfs.
+ *
+ * UFFD_FEATURE_WP_UNPOPULATED indicates that userfaultfd
+ * write-protection mode will always apply to unpopulated pages
+ * (i.e. empty ptes). This will be the default behavior for shmem
+ * & hugetlbfs, so this flag only affects anonymous memory behavior
+ * when userfault write-protection mode is registered.
+ *
+ * UFFD_FEATURE_WP_ASYNC indicates that userfaultfd write-protection
+ * asynchronous mode is supported in which the write fault is
+ * automatically resolved and write-protection is un-set.
+ * It implies UFFD_FEATURE_WP_UNPOPULATED.
+ *
+ * UFFD_FEATURE_MOVE indicates that the kernel supports moving an
+ * existing page contents from userspace.
+ */
+#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0)
+#define UFFD_FEATURE_EVENT_FORK (1<<1)
+#define UFFD_FEATURE_EVENT_REMAP (1<<2)
+#define UFFD_FEATURE_EVENT_REMOVE (1<<3)
+#define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4)
+#define UFFD_FEATURE_MISSING_SHMEM (1<<5)
+#define UFFD_FEATURE_EVENT_UNMAP (1<<6)
+#define UFFD_FEATURE_SIGBUS (1<<7)
+#define UFFD_FEATURE_THREAD_ID (1<<8)
+#define UFFD_FEATURE_MINOR_HUGETLBFS (1<<9)
+#define UFFD_FEATURE_MINOR_SHMEM (1<<10)
+#define UFFD_FEATURE_EXACT_ADDRESS (1<<11)
+#define UFFD_FEATURE_WP_HUGETLBFS_SHMEM (1<<12)
+#define UFFD_FEATURE_WP_UNPOPULATED (1<<13)
+#define UFFD_FEATURE_POISON (1<<14)
+#define UFFD_FEATURE_WP_ASYNC (1<<15)
+#define UFFD_FEATURE_MOVE (1<<16)
+ __u64 features;
+
+ __u64 ioctls;
+};
+
+struct uffdio_range {
+ __u64 start;
+ __u64 len;
+};
+
+struct uffdio_register {
+ struct uffdio_range range;
+#define UFFDIO_REGISTER_MODE_MISSING ((__u64)1<<0)
+#define UFFDIO_REGISTER_MODE_WP ((__u64)1<<1)
+#define UFFDIO_REGISTER_MODE_MINOR ((__u64)1<<2)
+ __u64 mode;
+
+ /*
+ * kernel answers which ioctl commands are available for the
+ * range, keep at the end as the last 8 bytes aren't read.
+ */
+ __u64 ioctls;
+};
+
+struct uffdio_copy {
+ __u64 dst;
+ __u64 src;
+ __u64 len;
+#define UFFDIO_COPY_MODE_DONTWAKE ((__u64)1<<0)
+ /*
+ * UFFDIO_COPY_MODE_WP will map the page write protected on
+ * the fly. UFFDIO_COPY_MODE_WP is available only if the
+ * write protected ioctl is implemented for the range
+ * according to the uffdio_register.ioctls.
+ */
+#define UFFDIO_COPY_MODE_WP ((__u64)1<<1)
+ __u64 mode;
+
+ /*
+ * "copy" is written by the ioctl and must be at the end: the
+ * copy_from_user will not read the last 8 bytes.
+ */
+ __s64 copy;
+};
+
+struct uffdio_zeropage {
+ struct uffdio_range range;
+#define UFFDIO_ZEROPAGE_MODE_DONTWAKE ((__u64)1<<0)
+ __u64 mode;
+
+ /*
+ * "zeropage" is written by the ioctl and must be at the end:
+ * the copy_from_user will not read the last 8 bytes.
+ */
+ __s64 zeropage;
+};
+
+struct uffdio_writeprotect {
+ struct uffdio_range range;
+/*
+ * UFFDIO_WRITEPROTECT_MODE_WP: set the flag to write protect a range,
+ * unset the flag to undo protection of a range which was previously
+ * write protected.
+ *
+ * UFFDIO_WRITEPROTECT_MODE_DONTWAKE: set the flag to avoid waking up
+ * any wait thread after the operation succeeds.
+ *
+ * NOTE: Write protecting a region (WP=1) is unrelated to page faults,
+ * therefore DONTWAKE flag is meaningless with WP=1. Removing write
+ * protection (WP=0) in response to a page fault wakes the faulting
+ * task unless DONTWAKE is set.
+ */
+#define UFFDIO_WRITEPROTECT_MODE_WP ((__u64)1<<0)
+#define UFFDIO_WRITEPROTECT_MODE_DONTWAKE ((__u64)1<<1)
+ __u64 mode;
+};
+
+struct uffdio_continue {
+ struct uffdio_range range;
+#define UFFDIO_CONTINUE_MODE_DONTWAKE ((__u64)1<<0)
+ /*
+ * UFFDIO_CONTINUE_MODE_WP will map the page write protected on
+ * the fly. UFFDIO_CONTINUE_MODE_WP is available only if the
+ * write protected ioctl is implemented for the range
+ * according to the uffdio_register.ioctls.
+ */
+#define UFFDIO_CONTINUE_MODE_WP ((__u64)1<<1)
+ __u64 mode;
+
+ /*
+ * Fields below here are written by the ioctl and must be at the end:
+ * the copy_from_user will not read past here.
+ */
+ __s64 mapped;
+};
+
+struct uffdio_poison {
+ struct uffdio_range range;
+#define UFFDIO_POISON_MODE_DONTWAKE ((__u64)1<<0)
+ __u64 mode;
+
+ /*
+ * Fields below here are written by the ioctl and must be at the end:
+ * the copy_from_user will not read past here.
+ */
+ __s64 updated;
+};
+
+struct uffdio_move {
+ __u64 dst;
+ __u64 src;
+ __u64 len;
+ /*
+ * Especially if used to atomically remove memory from the
+ * address space the wake on the dst range is not needed.
+ */
+#define UFFDIO_MOVE_MODE_DONTWAKE ((__u64)1<<0)
+#define UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES ((__u64)1<<1)
+ __u64 mode;
+ /*
+ * "move" is written by the ioctl and must be at the end: the
+ * copy_from_user will not read the last 8 bytes.
+ */
+ __s64 move;
+};
+
+/*
+ * Flags for the userfaultfd(2) system call itself.
+ */
+
+/*
+ * Create a userfaultfd that can handle page faults only in user mode.
+ */
+#define UFFD_USER_MODE_ONLY 1
+
+#endif /* _LINUX_USERFAULTFD_H */
diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h
deleted file mode 100644
index f9f115a7c75b..000000000000
--- a/tools/include/uapi/linux/vhost.h
+++ /dev/null
@@ -1,183 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _LINUX_VHOST_H
-#define _LINUX_VHOST_H
-/* Userspace interface for in-kernel virtio accelerators. */
-
-/* vhost is used to reduce the number of system calls involved in virtio.
- *
- * Existing virtio net code is used in the guest without modification.
- *
- * This header includes interface used by userspace hypervisor for
- * device configuration.
- */
-
-#include <linux/vhost_types.h>
-#include <linux/types.h>
-#include <linux/ioctl.h>
-
-#define VHOST_FILE_UNBIND -1
-
-/* ioctls */
-
-#define VHOST_VIRTIO 0xAF
-
-/* Features bitmask for forward compatibility. Transport bits are used for
- * vhost specific features. */
-#define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64)
-#define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64)
-
-/* Set current process as the (exclusive) owner of this file descriptor. This
- * must be called before any other vhost command. Further calls to
- * VHOST_OWNER_SET fail until VHOST_OWNER_RESET is called. */
-#define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01)
-/* Give up ownership, and reset the device to default values.
- * Allows subsequent call to VHOST_OWNER_SET to succeed. */
-#define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02)
-
-/* Set up/modify memory layout */
-#define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory)
-
-/* Write logging setup. */
-/* Memory writes can optionally be logged by setting bit at an offset
- * (calculated from the physical address) from specified log base.
- * The bit is set using an atomic 32 bit operation. */
-/* Set base address for logging. */
-#define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64)
-/* Specify an eventfd file descriptor to signal on log write. */
-#define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int)
-
-/* Ring setup. */
-/* Set number of descriptors in ring. This parameter can not
- * be modified while ring is running (bound to a device). */
-#define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state)
-/* Set addresses for the ring. */
-#define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr)
-/* Base value where queue looks for available descriptors */
-#define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
-/* Get accessor: reads index, writes value in num */
-#define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
-
-/* Set the vring byte order in num. Valid values are VHOST_VRING_LITTLE_ENDIAN
- * or VHOST_VRING_BIG_ENDIAN (other values return -EINVAL).
- * The byte order cannot be changed while the device is active: trying to do so
- * returns -EBUSY.
- * This is a legacy only API that is simply ignored when VIRTIO_F_VERSION_1 is
- * set.
- * Not all kernel configurations support this ioctl, but all configurations that
- * support SET also support GET.
- */
-#define VHOST_VRING_LITTLE_ENDIAN 0
-#define VHOST_VRING_BIG_ENDIAN 1
-#define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state)
-#define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state)
-
-/* The following ioctls use eventfd file descriptors to signal and poll
- * for events. */
-
-/* Set eventfd to poll for added buffers */
-#define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file)
-/* Set eventfd to signal when buffers have beed used */
-#define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file)
-/* Set eventfd to signal an error */
-#define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file)
-/* Set busy loop timeout (in us) */
-#define VHOST_SET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x23, \
- struct vhost_vring_state)
-/* Get busy loop timeout (in us) */
-#define VHOST_GET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x24, \
- struct vhost_vring_state)
-
-/* Set or get vhost backend capability */
-
-#define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64)
-#define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64)
-
-/* VHOST_NET specific defines */
-
-/* Attach virtio net ring to a raw socket, or tap device.
- * The socket must be already bound to an ethernet device, this device will be
- * used for transmit. Pass fd -1 to unbind from the socket and the transmit
- * device. This can be used to stop the ring (e.g. for migration). */
-#define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file)
-
-/* VHOST_SCSI specific defines */
-
-#define VHOST_SCSI_SET_ENDPOINT _IOW(VHOST_VIRTIO, 0x40, struct vhost_scsi_target)
-#define VHOST_SCSI_CLEAR_ENDPOINT _IOW(VHOST_VIRTIO, 0x41, struct vhost_scsi_target)
-/* Changing this breaks userspace. */
-#define VHOST_SCSI_GET_ABI_VERSION _IOW(VHOST_VIRTIO, 0x42, int)
-/* Set and get the events missed flag */
-#define VHOST_SCSI_SET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x43, __u32)
-#define VHOST_SCSI_GET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x44, __u32)
-
-/* VHOST_VSOCK specific defines */
-
-#define VHOST_VSOCK_SET_GUEST_CID _IOW(VHOST_VIRTIO, 0x60, __u64)
-#define VHOST_VSOCK_SET_RUNNING _IOW(VHOST_VIRTIO, 0x61, int)
-
-/* VHOST_VDPA specific defines */
-
-/* Get the device id. The device ids follow the same definition of
- * the device id defined in virtio-spec.
- */
-#define VHOST_VDPA_GET_DEVICE_ID _IOR(VHOST_VIRTIO, 0x70, __u32)
-/* Get and set the status. The status bits follow the same definition
- * of the device status defined in virtio-spec.
- */
-#define VHOST_VDPA_GET_STATUS _IOR(VHOST_VIRTIO, 0x71, __u8)
-#define VHOST_VDPA_SET_STATUS _IOW(VHOST_VIRTIO, 0x72, __u8)
-/* Get and set the device config. The device config follows the same
- * definition of the device config defined in virtio-spec.
- */
-#define VHOST_VDPA_GET_CONFIG _IOR(VHOST_VIRTIO, 0x73, \
- struct vhost_vdpa_config)
-#define VHOST_VDPA_SET_CONFIG _IOW(VHOST_VIRTIO, 0x74, \
- struct vhost_vdpa_config)
-/* Enable/disable the ring. */
-#define VHOST_VDPA_SET_VRING_ENABLE _IOW(VHOST_VIRTIO, 0x75, \
- struct vhost_vring_state)
-/* Get the max ring size. */
-#define VHOST_VDPA_GET_VRING_NUM _IOR(VHOST_VIRTIO, 0x76, __u16)
-
-/* Set event fd for config interrupt*/
-#define VHOST_VDPA_SET_CONFIG_CALL _IOW(VHOST_VIRTIO, 0x77, int)
-
-/* Get the valid iova range */
-#define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \
- struct vhost_vdpa_iova_range)
-/* Get the config size */
-#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32)
-
-/* Get the count of all virtqueues */
-#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32)
-
-/* Get the number of virtqueue groups. */
-#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32)
-
-/* Get the number of address spaces. */
-#define VHOST_VDPA_GET_AS_NUM _IOR(VHOST_VIRTIO, 0x7A, unsigned int)
-
-/* Get the group for a virtqueue: read index, write group in num,
- * The virtqueue index is stored in the index field of
- * vhost_vring_state. The group for this specific virtqueue is
- * returned via num field of vhost_vring_state.
- */
-#define VHOST_VDPA_GET_VRING_GROUP _IOWR(VHOST_VIRTIO, 0x7B, \
- struct vhost_vring_state)
-/* Set the ASID for a virtqueue group. The group index is stored in
- * the index field of vhost_vring_state, the ASID associated with this
- * group is stored at num field of vhost_vring_state.
- */
-#define VHOST_VDPA_SET_GROUP_ASID _IOW(VHOST_VIRTIO, 0x7C, \
- struct vhost_vring_state)
-
-/* Suspend a device so it does not process virtqueue requests anymore
- *
- * After the return of ioctl the device must preserve all the necessary state
- * (the virtqueue vring base plus the possible device specific states) that is
- * required for restoring in the future. The device must not change its
- * configuration after that point.
- */
-#define VHOST_VDPA_SUSPEND _IO(VHOST_VIRTIO, 0x7D)
-
-#endif
diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h
deleted file mode 100644
index de6810e94abe..000000000000
--- a/tools/include/uapi/sound/asound.h
+++ /dev/null
@@ -1,1166 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
-/*
- * Advanced Linux Sound Architecture - ALSA - Driver
- * Copyright (c) 1994-2003 by Jaroslav Kysela <perex@perex.cz>,
- * Abramo Bagnara <abramo@alsa-project.org>
- */
-
-#ifndef _UAPI__SOUND_ASOUND_H
-#define _UAPI__SOUND_ASOUND_H
-
-#if defined(__KERNEL__) || defined(__linux__)
-#include <linux/types.h>
-#include <asm/byteorder.h>
-#else
-#include <endian.h>
-#include <sys/ioctl.h>
-#endif
-
-#ifndef __KERNEL__
-#include <stdlib.h>
-#include <time.h>
-#endif
-
-/*
- * protocol version
- */
-
-#define SNDRV_PROTOCOL_VERSION(major, minor, subminor) (((major)<<16)|((minor)<<8)|(subminor))
-#define SNDRV_PROTOCOL_MAJOR(version) (((version)>>16)&0xffff)
-#define SNDRV_PROTOCOL_MINOR(version) (((version)>>8)&0xff)
-#define SNDRV_PROTOCOL_MICRO(version) ((version)&0xff)
-#define SNDRV_PROTOCOL_INCOMPATIBLE(kversion, uversion) \
- (SNDRV_PROTOCOL_MAJOR(kversion) != SNDRV_PROTOCOL_MAJOR(uversion) || \
- (SNDRV_PROTOCOL_MAJOR(kversion) == SNDRV_PROTOCOL_MAJOR(uversion) && \
- SNDRV_PROTOCOL_MINOR(kversion) != SNDRV_PROTOCOL_MINOR(uversion)))
-
-/****************************************************************************
- * *
- * Digital audio interface *
- * *
- ****************************************************************************/
-
-#define AES_IEC958_STATUS_SIZE 24
-
-struct snd_aes_iec958 {
- unsigned char status[AES_IEC958_STATUS_SIZE]; /* AES/IEC958 channel status bits */
- unsigned char subcode[147]; /* AES/IEC958 subcode bits */
- unsigned char pad; /* nothing */
- unsigned char dig_subframe[4]; /* AES/IEC958 subframe bits */
-};
-
-/****************************************************************************
- * *
- * CEA-861 Audio InfoFrame. Used in HDMI and DisplayPort *
- * *
- ****************************************************************************/
-
-struct snd_cea_861_aud_if {
- unsigned char db1_ct_cc; /* coding type and channel count */
- unsigned char db2_sf_ss; /* sample frequency and size */
- unsigned char db3; /* not used, all zeros */
- unsigned char db4_ca; /* channel allocation code */
- unsigned char db5_dminh_lsv; /* downmix inhibit & level-shit values */
-};
-
-/****************************************************************************
- * *
- * Section for driver hardware dependent interface - /dev/snd/hw? *
- * *
- ****************************************************************************/
-
-#define SNDRV_HWDEP_VERSION SNDRV_PROTOCOL_VERSION(1, 0, 1)
-
-enum {
- SNDRV_HWDEP_IFACE_OPL2 = 0,
- SNDRV_HWDEP_IFACE_OPL3,
- SNDRV_HWDEP_IFACE_OPL4,
- SNDRV_HWDEP_IFACE_SB16CSP, /* Creative Signal Processor */
- SNDRV_HWDEP_IFACE_EMU10K1, /* FX8010 processor in EMU10K1 chip */
- SNDRV_HWDEP_IFACE_YSS225, /* Yamaha FX processor */
- SNDRV_HWDEP_IFACE_ICS2115, /* Wavetable synth */
- SNDRV_HWDEP_IFACE_SSCAPE, /* Ensoniq SoundScape ISA card (MC68EC000) */
- SNDRV_HWDEP_IFACE_VX, /* Digigram VX cards */
- SNDRV_HWDEP_IFACE_MIXART, /* Digigram miXart cards */
- SNDRV_HWDEP_IFACE_USX2Y, /* Tascam US122, US224 & US428 usb */
- SNDRV_HWDEP_IFACE_EMUX_WAVETABLE, /* EmuX wavetable */
- SNDRV_HWDEP_IFACE_BLUETOOTH, /* Bluetooth audio */
- SNDRV_HWDEP_IFACE_USX2Y_PCM, /* Tascam US122, US224 & US428 rawusb pcm */
- SNDRV_HWDEP_IFACE_PCXHR, /* Digigram PCXHR */
- SNDRV_HWDEP_IFACE_SB_RC, /* SB Extigy/Audigy2NX remote control */
- SNDRV_HWDEP_IFACE_HDA, /* HD-audio */
- SNDRV_HWDEP_IFACE_USB_STREAM, /* direct access to usb stream */
- SNDRV_HWDEP_IFACE_FW_DICE, /* TC DICE FireWire device */
- SNDRV_HWDEP_IFACE_FW_FIREWORKS, /* Echo Audio Fireworks based device */
- SNDRV_HWDEP_IFACE_FW_BEBOB, /* BridgeCo BeBoB based device */
- SNDRV_HWDEP_IFACE_FW_OXFW, /* Oxford OXFW970/971 based device */
- SNDRV_HWDEP_IFACE_FW_DIGI00X, /* Digidesign Digi 002/003 family */
- SNDRV_HWDEP_IFACE_FW_TASCAM, /* TASCAM FireWire series */
- SNDRV_HWDEP_IFACE_LINE6, /* Line6 USB processors */
- SNDRV_HWDEP_IFACE_FW_MOTU, /* MOTU FireWire series */
- SNDRV_HWDEP_IFACE_FW_FIREFACE, /* RME Fireface series */
-
- /* Don't forget to change the following: */
- SNDRV_HWDEP_IFACE_LAST = SNDRV_HWDEP_IFACE_FW_FIREFACE
-};
-
-struct snd_hwdep_info {
- unsigned int device; /* WR: device number */
- int card; /* R: card number */
- unsigned char id[64]; /* ID (user selectable) */
- unsigned char name[80]; /* hwdep name */
- int iface; /* hwdep interface */
- unsigned char reserved[64]; /* reserved for future */
-};
-
-/* generic DSP loader */
-struct snd_hwdep_dsp_status {
- unsigned int version; /* R: driver-specific version */
- unsigned char id[32]; /* R: driver-specific ID string */
- unsigned int num_dsps; /* R: number of DSP images to transfer */
- unsigned int dsp_loaded; /* R: bit flags indicating the loaded DSPs */
- unsigned int chip_ready; /* R: 1 = initialization finished */
- unsigned char reserved[16]; /* reserved for future use */
-};
-
-struct snd_hwdep_dsp_image {
- unsigned int index; /* W: DSP index */
- unsigned char name[64]; /* W: ID (e.g. file name) */
- unsigned char __user *image; /* W: binary image */
- size_t length; /* W: size of image in bytes */
- unsigned long driver_data; /* W: driver-specific data */
-};
-
-#define SNDRV_HWDEP_IOCTL_PVERSION _IOR ('H', 0x00, int)
-#define SNDRV_HWDEP_IOCTL_INFO _IOR ('H', 0x01, struct snd_hwdep_info)
-#define SNDRV_HWDEP_IOCTL_DSP_STATUS _IOR('H', 0x02, struct snd_hwdep_dsp_status)
-#define SNDRV_HWDEP_IOCTL_DSP_LOAD _IOW('H', 0x03, struct snd_hwdep_dsp_image)
-
-/*****************************************************************************
- * *
- * Digital Audio (PCM) interface - /dev/snd/pcm?? *
- * *
- *****************************************************************************/
-
-#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 15)
-
-typedef unsigned long snd_pcm_uframes_t;
-typedef signed long snd_pcm_sframes_t;
-
-enum {
- SNDRV_PCM_CLASS_GENERIC = 0, /* standard mono or stereo device */
- SNDRV_PCM_CLASS_MULTI, /* multichannel device */
- SNDRV_PCM_CLASS_MODEM, /* software modem class */
- SNDRV_PCM_CLASS_DIGITIZER, /* digitizer class */
- /* Don't forget to change the following: */
- SNDRV_PCM_CLASS_LAST = SNDRV_PCM_CLASS_DIGITIZER,
-};
-
-enum {
- SNDRV_PCM_SUBCLASS_GENERIC_MIX = 0, /* mono or stereo subdevices are mixed together */
- SNDRV_PCM_SUBCLASS_MULTI_MIX, /* multichannel subdevices are mixed together */
- /* Don't forget to change the following: */
- SNDRV_PCM_SUBCLASS_LAST = SNDRV_PCM_SUBCLASS_MULTI_MIX,
-};
-
-enum {
- SNDRV_PCM_STREAM_PLAYBACK = 0,
- SNDRV_PCM_STREAM_CAPTURE,
- SNDRV_PCM_STREAM_LAST = SNDRV_PCM_STREAM_CAPTURE,
-};
-
-typedef int __bitwise snd_pcm_access_t;
-#define SNDRV_PCM_ACCESS_MMAP_INTERLEAVED ((__force snd_pcm_access_t) 0) /* interleaved mmap */
-#define SNDRV_PCM_ACCESS_MMAP_NONINTERLEAVED ((__force snd_pcm_access_t) 1) /* noninterleaved mmap */
-#define SNDRV_PCM_ACCESS_MMAP_COMPLEX ((__force snd_pcm_access_t) 2) /* complex mmap */
-#define SNDRV_PCM_ACCESS_RW_INTERLEAVED ((__force snd_pcm_access_t) 3) /* readi/writei */
-#define SNDRV_PCM_ACCESS_RW_NONINTERLEAVED ((__force snd_pcm_access_t) 4) /* readn/writen */
-#define SNDRV_PCM_ACCESS_LAST SNDRV_PCM_ACCESS_RW_NONINTERLEAVED
-
-typedef int __bitwise snd_pcm_format_t;
-#define SNDRV_PCM_FORMAT_S8 ((__force snd_pcm_format_t) 0)
-#define SNDRV_PCM_FORMAT_U8 ((__force snd_pcm_format_t) 1)
-#define SNDRV_PCM_FORMAT_S16_LE ((__force snd_pcm_format_t) 2)
-#define SNDRV_PCM_FORMAT_S16_BE ((__force snd_pcm_format_t) 3)
-#define SNDRV_PCM_FORMAT_U16_LE ((__force snd_pcm_format_t) 4)
-#define SNDRV_PCM_FORMAT_U16_BE ((__force snd_pcm_format_t) 5)
-#define SNDRV_PCM_FORMAT_S24_LE ((__force snd_pcm_format_t) 6) /* low three bytes */
-#define SNDRV_PCM_FORMAT_S24_BE ((__force snd_pcm_format_t) 7) /* low three bytes */
-#define SNDRV_PCM_FORMAT_U24_LE ((__force snd_pcm_format_t) 8) /* low three bytes */
-#define SNDRV_PCM_FORMAT_U24_BE ((__force snd_pcm_format_t) 9) /* low three bytes */
-/*
- * For S32/U32 formats, 'msbits' hardware parameter is often used to deliver information about the
- * available bit count in most significant bit. It's for the case of so-called 'left-justified' or
- * `right-padding` sample which has less width than 32 bit.
- */
-#define SNDRV_PCM_FORMAT_S32_LE ((__force snd_pcm_format_t) 10)
-#define SNDRV_PCM_FORMAT_S32_BE ((__force snd_pcm_format_t) 11)
-#define SNDRV_PCM_FORMAT_U32_LE ((__force snd_pcm_format_t) 12)
-#define SNDRV_PCM_FORMAT_U32_BE ((__force snd_pcm_format_t) 13)
-#define SNDRV_PCM_FORMAT_FLOAT_LE ((__force snd_pcm_format_t) 14) /* 4-byte float, IEEE-754 32-bit, range -1.0 to 1.0 */
-#define SNDRV_PCM_FORMAT_FLOAT_BE ((__force snd_pcm_format_t) 15) /* 4-byte float, IEEE-754 32-bit, range -1.0 to 1.0 */
-#define SNDRV_PCM_FORMAT_FLOAT64_LE ((__force snd_pcm_format_t) 16) /* 8-byte float, IEEE-754 64-bit, range -1.0 to 1.0 */
-#define SNDRV_PCM_FORMAT_FLOAT64_BE ((__force snd_pcm_format_t) 17) /* 8-byte float, IEEE-754 64-bit, range -1.0 to 1.0 */
-#define SNDRV_PCM_FORMAT_IEC958_SUBFRAME_LE ((__force snd_pcm_format_t) 18) /* IEC-958 subframe, Little Endian */
-#define SNDRV_PCM_FORMAT_IEC958_SUBFRAME_BE ((__force snd_pcm_format_t) 19) /* IEC-958 subframe, Big Endian */
-#define SNDRV_PCM_FORMAT_MU_LAW ((__force snd_pcm_format_t) 20)
-#define SNDRV_PCM_FORMAT_A_LAW ((__force snd_pcm_format_t) 21)
-#define SNDRV_PCM_FORMAT_IMA_ADPCM ((__force snd_pcm_format_t) 22)
-#define SNDRV_PCM_FORMAT_MPEG ((__force snd_pcm_format_t) 23)
-#define SNDRV_PCM_FORMAT_GSM ((__force snd_pcm_format_t) 24)
-#define SNDRV_PCM_FORMAT_S20_LE ((__force snd_pcm_format_t) 25) /* in four bytes, LSB justified */
-#define SNDRV_PCM_FORMAT_S20_BE ((__force snd_pcm_format_t) 26) /* in four bytes, LSB justified */
-#define SNDRV_PCM_FORMAT_U20_LE ((__force snd_pcm_format_t) 27) /* in four bytes, LSB justified */
-#define SNDRV_PCM_FORMAT_U20_BE ((__force snd_pcm_format_t) 28) /* in four bytes, LSB justified */
-/* gap in the numbering for a future standard linear format */
-#define SNDRV_PCM_FORMAT_SPECIAL ((__force snd_pcm_format_t) 31)
-#define SNDRV_PCM_FORMAT_S24_3LE ((__force snd_pcm_format_t) 32) /* in three bytes */
-#define SNDRV_PCM_FORMAT_S24_3BE ((__force snd_pcm_format_t) 33) /* in three bytes */
-#define SNDRV_PCM_FORMAT_U24_3LE ((__force snd_pcm_format_t) 34) /* in three bytes */
-#define SNDRV_PCM_FORMAT_U24_3BE ((__force snd_pcm_format_t) 35) /* in three bytes */
-#define SNDRV_PCM_FORMAT_S20_3LE ((__force snd_pcm_format_t) 36) /* in three bytes */
-#define SNDRV_PCM_FORMAT_S20_3BE ((__force snd_pcm_format_t) 37) /* in three bytes */
-#define SNDRV_PCM_FORMAT_U20_3LE ((__force snd_pcm_format_t) 38) /* in three bytes */
-#define SNDRV_PCM_FORMAT_U20_3BE ((__force snd_pcm_format_t) 39) /* in three bytes */
-#define SNDRV_PCM_FORMAT_S18_3LE ((__force snd_pcm_format_t) 40) /* in three bytes */
-#define SNDRV_PCM_FORMAT_S18_3BE ((__force snd_pcm_format_t) 41) /* in three bytes */
-#define SNDRV_PCM_FORMAT_U18_3LE ((__force snd_pcm_format_t) 42) /* in three bytes */
-#define SNDRV_PCM_FORMAT_U18_3BE ((__force snd_pcm_format_t) 43) /* in three bytes */
-#define SNDRV_PCM_FORMAT_G723_24 ((__force snd_pcm_format_t) 44) /* 8 samples in 3 bytes */
-#define SNDRV_PCM_FORMAT_G723_24_1B ((__force snd_pcm_format_t) 45) /* 1 sample in 1 byte */
-#define SNDRV_PCM_FORMAT_G723_40 ((__force snd_pcm_format_t) 46) /* 8 Samples in 5 bytes */
-#define SNDRV_PCM_FORMAT_G723_40_1B ((__force snd_pcm_format_t) 47) /* 1 sample in 1 byte */
-#define SNDRV_PCM_FORMAT_DSD_U8 ((__force snd_pcm_format_t) 48) /* DSD, 1-byte samples DSD (x8) */
-#define SNDRV_PCM_FORMAT_DSD_U16_LE ((__force snd_pcm_format_t) 49) /* DSD, 2-byte samples DSD (x16), little endian */
-#define SNDRV_PCM_FORMAT_DSD_U32_LE ((__force snd_pcm_format_t) 50) /* DSD, 4-byte samples DSD (x32), little endian */
-#define SNDRV_PCM_FORMAT_DSD_U16_BE ((__force snd_pcm_format_t) 51) /* DSD, 2-byte samples DSD (x16), big endian */
-#define SNDRV_PCM_FORMAT_DSD_U32_BE ((__force snd_pcm_format_t) 52) /* DSD, 4-byte samples DSD (x32), big endian */
-#define SNDRV_PCM_FORMAT_LAST SNDRV_PCM_FORMAT_DSD_U32_BE
-#define SNDRV_PCM_FORMAT_FIRST SNDRV_PCM_FORMAT_S8
-
-#ifdef SNDRV_LITTLE_ENDIAN
-#define SNDRV_PCM_FORMAT_S16 SNDRV_PCM_FORMAT_S16_LE
-#define SNDRV_PCM_FORMAT_U16 SNDRV_PCM_FORMAT_U16_LE
-#define SNDRV_PCM_FORMAT_S24 SNDRV_PCM_FORMAT_S24_LE
-#define SNDRV_PCM_FORMAT_U24 SNDRV_PCM_FORMAT_U24_LE
-#define SNDRV_PCM_FORMAT_S32 SNDRV_PCM_FORMAT_S32_LE
-#define SNDRV_PCM_FORMAT_U32 SNDRV_PCM_FORMAT_U32_LE
-#define SNDRV_PCM_FORMAT_FLOAT SNDRV_PCM_FORMAT_FLOAT_LE
-#define SNDRV_PCM_FORMAT_FLOAT64 SNDRV_PCM_FORMAT_FLOAT64_LE
-#define SNDRV_PCM_FORMAT_IEC958_SUBFRAME SNDRV_PCM_FORMAT_IEC958_SUBFRAME_LE
-#define SNDRV_PCM_FORMAT_S20 SNDRV_PCM_FORMAT_S20_LE
-#define SNDRV_PCM_FORMAT_U20 SNDRV_PCM_FORMAT_U20_LE
-#endif
-#ifdef SNDRV_BIG_ENDIAN
-#define SNDRV_PCM_FORMAT_S16 SNDRV_PCM_FORMAT_S16_BE
-#define SNDRV_PCM_FORMAT_U16 SNDRV_PCM_FORMAT_U16_BE
-#define SNDRV_PCM_FORMAT_S24 SNDRV_PCM_FORMAT_S24_BE
-#define SNDRV_PCM_FORMAT_U24 SNDRV_PCM_FORMAT_U24_BE
-#define SNDRV_PCM_FORMAT_S32 SNDRV_PCM_FORMAT_S32_BE
-#define SNDRV_PCM_FORMAT_U32 SNDRV_PCM_FORMAT_U32_BE
-#define SNDRV_PCM_FORMAT_FLOAT SNDRV_PCM_FORMAT_FLOAT_BE
-#define SNDRV_PCM_FORMAT_FLOAT64 SNDRV_PCM_FORMAT_FLOAT64_BE
-#define SNDRV_PCM_FORMAT_IEC958_SUBFRAME SNDRV_PCM_FORMAT_IEC958_SUBFRAME_BE
-#define SNDRV_PCM_FORMAT_S20 SNDRV_PCM_FORMAT_S20_BE
-#define SNDRV_PCM_FORMAT_U20 SNDRV_PCM_FORMAT_U20_BE
-#endif
-
-typedef int __bitwise snd_pcm_subformat_t;
-#define SNDRV_PCM_SUBFORMAT_STD ((__force snd_pcm_subformat_t) 0)
-#define SNDRV_PCM_SUBFORMAT_LAST SNDRV_PCM_SUBFORMAT_STD
-
-#define SNDRV_PCM_INFO_MMAP 0x00000001 /* hardware supports mmap */
-#define SNDRV_PCM_INFO_MMAP_VALID 0x00000002 /* period data are valid during transfer */
-#define SNDRV_PCM_INFO_DOUBLE 0x00000004 /* Double buffering needed for PCM start/stop */
-#define SNDRV_PCM_INFO_BATCH 0x00000010 /* double buffering */
-#define SNDRV_PCM_INFO_SYNC_APPLPTR 0x00000020 /* need the explicit sync of appl_ptr update */
-#define SNDRV_PCM_INFO_INTERLEAVED 0x00000100 /* channels are interleaved */
-#define SNDRV_PCM_INFO_NONINTERLEAVED 0x00000200 /* channels are not interleaved */
-#define SNDRV_PCM_INFO_COMPLEX 0x00000400 /* complex frame organization (mmap only) */
-#define SNDRV_PCM_INFO_BLOCK_TRANSFER 0x00010000 /* hardware transfer block of samples */
-#define SNDRV_PCM_INFO_OVERRANGE 0x00020000 /* hardware supports ADC (capture) overrange detection */
-#define SNDRV_PCM_INFO_RESUME 0x00040000 /* hardware supports stream resume after suspend */
-#define SNDRV_PCM_INFO_PAUSE 0x00080000 /* pause ioctl is supported */
-#define SNDRV_PCM_INFO_HALF_DUPLEX 0x00100000 /* only half duplex */
-#define SNDRV_PCM_INFO_JOINT_DUPLEX 0x00200000 /* playback and capture stream are somewhat correlated */
-#define SNDRV_PCM_INFO_SYNC_START 0x00400000 /* pcm support some kind of sync go */
-#define SNDRV_PCM_INFO_NO_PERIOD_WAKEUP 0x00800000 /* period wakeup can be disabled */
-#define SNDRV_PCM_INFO_HAS_WALL_CLOCK 0x01000000 /* (Deprecated)has audio wall clock for audio/system time sync */
-#define SNDRV_PCM_INFO_HAS_LINK_ATIME 0x01000000 /* report hardware link audio time, reset on startup */
-#define SNDRV_PCM_INFO_HAS_LINK_ABSOLUTE_ATIME 0x02000000 /* report absolute hardware link audio time, not reset on startup */
-#define SNDRV_PCM_INFO_HAS_LINK_ESTIMATED_ATIME 0x04000000 /* report estimated link audio time */
-#define SNDRV_PCM_INFO_HAS_LINK_SYNCHRONIZED_ATIME 0x08000000 /* report synchronized audio/system time */
-#define SNDRV_PCM_INFO_EXPLICIT_SYNC 0x10000000 /* needs explicit sync of pointers and data */
-#define SNDRV_PCM_INFO_NO_REWINDS 0x20000000 /* hardware can only support monotonic changes of appl_ptr */
-#define SNDRV_PCM_INFO_DRAIN_TRIGGER 0x40000000 /* internal kernel flag - trigger in drain */
-#define SNDRV_PCM_INFO_FIFO_IN_FRAMES 0x80000000 /* internal kernel flag - FIFO size is in frames */
-
-#if (__BITS_PER_LONG == 32 && defined(__USE_TIME_BITS64)) || defined __KERNEL__
-#define __SND_STRUCT_TIME64
-#endif
-
-typedef int __bitwise snd_pcm_state_t;
-#define SNDRV_PCM_STATE_OPEN ((__force snd_pcm_state_t) 0) /* stream is open */
-#define SNDRV_PCM_STATE_SETUP ((__force snd_pcm_state_t) 1) /* stream has a setup */
-#define SNDRV_PCM_STATE_PREPARED ((__force snd_pcm_state_t) 2) /* stream is ready to start */
-#define SNDRV_PCM_STATE_RUNNING ((__force snd_pcm_state_t) 3) /* stream is running */
-#define SNDRV_PCM_STATE_XRUN ((__force snd_pcm_state_t) 4) /* stream reached an xrun */
-#define SNDRV_PCM_STATE_DRAINING ((__force snd_pcm_state_t) 5) /* stream is draining */
-#define SNDRV_PCM_STATE_PAUSED ((__force snd_pcm_state_t) 6) /* stream is paused */
-#define SNDRV_PCM_STATE_SUSPENDED ((__force snd_pcm_state_t) 7) /* hardware is suspended */
-#define SNDRV_PCM_STATE_DISCONNECTED ((__force snd_pcm_state_t) 8) /* hardware is disconnected */
-#define SNDRV_PCM_STATE_LAST SNDRV_PCM_STATE_DISCONNECTED
-
-enum {
- SNDRV_PCM_MMAP_OFFSET_DATA = 0x00000000,
- SNDRV_PCM_MMAP_OFFSET_STATUS_OLD = 0x80000000,
- SNDRV_PCM_MMAP_OFFSET_CONTROL_OLD = 0x81000000,
- SNDRV_PCM_MMAP_OFFSET_STATUS_NEW = 0x82000000,
- SNDRV_PCM_MMAP_OFFSET_CONTROL_NEW = 0x83000000,
-#ifdef __SND_STRUCT_TIME64
- SNDRV_PCM_MMAP_OFFSET_STATUS = SNDRV_PCM_MMAP_OFFSET_STATUS_NEW,
- SNDRV_PCM_MMAP_OFFSET_CONTROL = SNDRV_PCM_MMAP_OFFSET_CONTROL_NEW,
-#else
- SNDRV_PCM_MMAP_OFFSET_STATUS = SNDRV_PCM_MMAP_OFFSET_STATUS_OLD,
- SNDRV_PCM_MMAP_OFFSET_CONTROL = SNDRV_PCM_MMAP_OFFSET_CONTROL_OLD,
-#endif
-};
-
-union snd_pcm_sync_id {
- unsigned char id[16];
- unsigned short id16[8];
- unsigned int id32[4];
-};
-
-struct snd_pcm_info {
- unsigned int device; /* RO/WR (control): device number */
- unsigned int subdevice; /* RO/WR (control): subdevice number */
- int stream; /* RO/WR (control): stream direction */
- int card; /* R: card number */
- unsigned char id[64]; /* ID (user selectable) */
- unsigned char name[80]; /* name of this device */
- unsigned char subname[32]; /* subdevice name */
- int dev_class; /* SNDRV_PCM_CLASS_* */
- int dev_subclass; /* SNDRV_PCM_SUBCLASS_* */
- unsigned int subdevices_count;
- unsigned int subdevices_avail;
- union snd_pcm_sync_id sync; /* hardware synchronization ID */
- unsigned char reserved[64]; /* reserved for future... */
-};
-
-typedef int snd_pcm_hw_param_t;
-#define SNDRV_PCM_HW_PARAM_ACCESS 0 /* Access type */
-#define SNDRV_PCM_HW_PARAM_FORMAT 1 /* Format */
-#define SNDRV_PCM_HW_PARAM_SUBFORMAT 2 /* Subformat */
-#define SNDRV_PCM_HW_PARAM_FIRST_MASK SNDRV_PCM_HW_PARAM_ACCESS
-#define SNDRV_PCM_HW_PARAM_LAST_MASK SNDRV_PCM_HW_PARAM_SUBFORMAT
-
-#define SNDRV_PCM_HW_PARAM_SAMPLE_BITS 8 /* Bits per sample */
-#define SNDRV_PCM_HW_PARAM_FRAME_BITS 9 /* Bits per frame */
-#define SNDRV_PCM_HW_PARAM_CHANNELS 10 /* Channels */
-#define SNDRV_PCM_HW_PARAM_RATE 11 /* Approx rate */
-#define SNDRV_PCM_HW_PARAM_PERIOD_TIME 12 /* Approx distance between
- * interrupts in us
- */
-#define SNDRV_PCM_HW_PARAM_PERIOD_SIZE 13 /* Approx frames between
- * interrupts
- */
-#define SNDRV_PCM_HW_PARAM_PERIOD_BYTES 14 /* Approx bytes between
- * interrupts
- */
-#define SNDRV_PCM_HW_PARAM_PERIODS 15 /* Approx interrupts per
- * buffer
- */
-#define SNDRV_PCM_HW_PARAM_BUFFER_TIME 16 /* Approx duration of buffer
- * in us
- */
-#define SNDRV_PCM_HW_PARAM_BUFFER_SIZE 17 /* Size of buffer in frames */
-#define SNDRV_PCM_HW_PARAM_BUFFER_BYTES 18 /* Size of buffer in bytes */
-#define SNDRV_PCM_HW_PARAM_TICK_TIME 19 /* Approx tick duration in us */
-#define SNDRV_PCM_HW_PARAM_FIRST_INTERVAL SNDRV_PCM_HW_PARAM_SAMPLE_BITS
-#define SNDRV_PCM_HW_PARAM_LAST_INTERVAL SNDRV_PCM_HW_PARAM_TICK_TIME
-
-#define SNDRV_PCM_HW_PARAMS_NORESAMPLE (1<<0) /* avoid rate resampling */
-#define SNDRV_PCM_HW_PARAMS_EXPORT_BUFFER (1<<1) /* export buffer */
-#define SNDRV_PCM_HW_PARAMS_NO_PERIOD_WAKEUP (1<<2) /* disable period wakeups */
-
-struct snd_interval {
- unsigned int min, max;
- unsigned int openmin:1,
- openmax:1,
- integer:1,
- empty:1;
-};
-
-#define SNDRV_MASK_MAX 256
-
-struct snd_mask {
- __u32 bits[(SNDRV_MASK_MAX+31)/32];
-};
-
-struct snd_pcm_hw_params {
- unsigned int flags;
- struct snd_mask masks[SNDRV_PCM_HW_PARAM_LAST_MASK -
- SNDRV_PCM_HW_PARAM_FIRST_MASK + 1];
- struct snd_mask mres[5]; /* reserved masks */
- struct snd_interval intervals[SNDRV_PCM_HW_PARAM_LAST_INTERVAL -
- SNDRV_PCM_HW_PARAM_FIRST_INTERVAL + 1];
- struct snd_interval ires[9]; /* reserved intervals */
- unsigned int rmask; /* W: requested masks */
- unsigned int cmask; /* R: changed masks */
- unsigned int info; /* R: Info flags for returned setup */
- unsigned int msbits; /* R: used most significant bits */
- unsigned int rate_num; /* R: rate numerator */
- unsigned int rate_den; /* R: rate denominator */
- snd_pcm_uframes_t fifo_size; /* R: chip FIFO size in frames */
- unsigned char reserved[64]; /* reserved for future */
-};
-
-enum {
- SNDRV_PCM_TSTAMP_NONE = 0,
- SNDRV_PCM_TSTAMP_ENABLE,
- SNDRV_PCM_TSTAMP_LAST = SNDRV_PCM_TSTAMP_ENABLE,
-};
-
-struct snd_pcm_sw_params {
- int tstamp_mode; /* timestamp mode */
- unsigned int period_step;
- unsigned int sleep_min; /* min ticks to sleep */
- snd_pcm_uframes_t avail_min; /* min avail frames for wakeup */
- snd_pcm_uframes_t xfer_align; /* obsolete: xfer size need to be a multiple */
- snd_pcm_uframes_t start_threshold; /* min hw_avail frames for automatic start */
- snd_pcm_uframes_t stop_threshold; /* min avail frames for automatic stop */
- snd_pcm_uframes_t silence_threshold; /* min distance from noise for silence filling */
- snd_pcm_uframes_t silence_size; /* silence block size */
- snd_pcm_uframes_t boundary; /* pointers wrap point */
- unsigned int proto; /* protocol version */
- unsigned int tstamp_type; /* timestamp type (req. proto >= 2.0.12) */
- unsigned char reserved[56]; /* reserved for future */
-};
-
-struct snd_pcm_channel_info {
- unsigned int channel;
- __kernel_off_t offset; /* mmap offset */
- unsigned int first; /* offset to first sample in bits */
- unsigned int step; /* samples distance in bits */
-};
-
-enum {
- /*
- * first definition for backwards compatibility only,
- * maps to wallclock/link time for HDAudio playback and DEFAULT/DMA time for everything else
- */
- SNDRV_PCM_AUDIO_TSTAMP_TYPE_COMPAT = 0,
-
- /* timestamp definitions */
- SNDRV_PCM_AUDIO_TSTAMP_TYPE_DEFAULT = 1, /* DMA time, reported as per hw_ptr */
- SNDRV_PCM_AUDIO_TSTAMP_TYPE_LINK = 2, /* link time reported by sample or wallclock counter, reset on startup */
- SNDRV_PCM_AUDIO_TSTAMP_TYPE_LINK_ABSOLUTE = 3, /* link time reported by sample or wallclock counter, not reset on startup */
- SNDRV_PCM_AUDIO_TSTAMP_TYPE_LINK_ESTIMATED = 4, /* link time estimated indirectly */
- SNDRV_PCM_AUDIO_TSTAMP_TYPE_LINK_SYNCHRONIZED = 5, /* link time synchronized with system time */
- SNDRV_PCM_AUDIO_TSTAMP_TYPE_LAST = SNDRV_PCM_AUDIO_TSTAMP_TYPE_LINK_SYNCHRONIZED
-};
-
-#ifndef __KERNEL__
-/* explicit padding avoids incompatibility between i386 and x86-64 */
-typedef struct { unsigned char pad[sizeof(time_t) - sizeof(int)]; } __time_pad;
-
-struct snd_pcm_status {
- snd_pcm_state_t state; /* stream state */
- __time_pad pad1; /* align to timespec */
- struct timespec trigger_tstamp; /* time when stream was started/stopped/paused */
- struct timespec tstamp; /* reference timestamp */
- snd_pcm_uframes_t appl_ptr; /* appl ptr */
- snd_pcm_uframes_t hw_ptr; /* hw ptr */
- snd_pcm_sframes_t delay; /* current delay in frames */
- snd_pcm_uframes_t avail; /* number of frames available */
- snd_pcm_uframes_t avail_max; /* max frames available on hw since last status */
- snd_pcm_uframes_t overrange; /* count of ADC (capture) overrange detections from last status */
- snd_pcm_state_t suspended_state; /* suspended stream state */
- __u32 audio_tstamp_data; /* needed for 64-bit alignment, used for configs/report to/from userspace */
- struct timespec audio_tstamp; /* sample counter, wall clock, PHC or on-demand sync'ed */
- struct timespec driver_tstamp; /* useful in case reference system tstamp is reported with delay */
- __u32 audio_tstamp_accuracy; /* in ns units, only valid if indicated in audio_tstamp_data */
- unsigned char reserved[52-2*sizeof(struct timespec)]; /* must be filled with zero */
-};
-#endif
-
-/*
- * For mmap operations, we need the 64-bit layout, both for compat mode,
- * and for y2038 compatibility. For 64-bit applications, the two definitions
- * are identical, so we keep the traditional version.
- */
-#ifdef __SND_STRUCT_TIME64
-#define __snd_pcm_mmap_status64 snd_pcm_mmap_status
-#define __snd_pcm_mmap_control64 snd_pcm_mmap_control
-#define __snd_pcm_sync_ptr64 snd_pcm_sync_ptr
-#ifdef __KERNEL__
-#define __snd_timespec64 __kernel_timespec
-#else
-#define __snd_timespec64 timespec
-#endif
-struct __snd_timespec {
- __s32 tv_sec;
- __s32 tv_nsec;
-};
-#else
-#define __snd_pcm_mmap_status snd_pcm_mmap_status
-#define __snd_pcm_mmap_control snd_pcm_mmap_control
-#define __snd_pcm_sync_ptr snd_pcm_sync_ptr
-#define __snd_timespec timespec
-struct __snd_timespec64 {
- __s64 tv_sec;
- __s64 tv_nsec;
-};
-
-#endif
-
-struct __snd_pcm_mmap_status {
- snd_pcm_state_t state; /* RO: state - SNDRV_PCM_STATE_XXXX */
- int pad1; /* Needed for 64 bit alignment */
- snd_pcm_uframes_t hw_ptr; /* RO: hw ptr (0...boundary-1) */
- struct __snd_timespec tstamp; /* Timestamp */
- snd_pcm_state_t suspended_state; /* RO: suspended stream state */
- struct __snd_timespec audio_tstamp; /* from sample counter or wall clock */
-};
-
-struct __snd_pcm_mmap_control {
- snd_pcm_uframes_t appl_ptr; /* RW: appl ptr (0...boundary-1) */
- snd_pcm_uframes_t avail_min; /* RW: min available frames for wakeup */
-};
-
-#define SNDRV_PCM_SYNC_PTR_HWSYNC (1<<0) /* execute hwsync */
-#define SNDRV_PCM_SYNC_PTR_APPL (1<<1) /* get appl_ptr from driver (r/w op) */
-#define SNDRV_PCM_SYNC_PTR_AVAIL_MIN (1<<2) /* get avail_min from driver */
-
-struct __snd_pcm_sync_ptr {
- unsigned int flags;
- union {
- struct __snd_pcm_mmap_status status;
- unsigned char reserved[64];
- } s;
- union {
- struct __snd_pcm_mmap_control control;
- unsigned char reserved[64];
- } c;
-};
-
-#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __BIG_ENDIAN : defined(__BIG_ENDIAN)
-typedef char __pad_before_uframe[sizeof(__u64) - sizeof(snd_pcm_uframes_t)];
-typedef char __pad_after_uframe[0];
-#endif
-
-#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN)
-typedef char __pad_before_uframe[0];
-typedef char __pad_after_uframe[sizeof(__u64) - sizeof(snd_pcm_uframes_t)];
-#endif
-
-struct __snd_pcm_mmap_status64 {
- snd_pcm_state_t state; /* RO: state - SNDRV_PCM_STATE_XXXX */
- __u32 pad1; /* Needed for 64 bit alignment */
- __pad_before_uframe __pad1;
- snd_pcm_uframes_t hw_ptr; /* RO: hw ptr (0...boundary-1) */
- __pad_after_uframe __pad2;
- struct __snd_timespec64 tstamp; /* Timestamp */
- snd_pcm_state_t suspended_state;/* RO: suspended stream state */
- __u32 pad3; /* Needed for 64 bit alignment */
- struct __snd_timespec64 audio_tstamp; /* sample counter or wall clock */
-};
-
-struct __snd_pcm_mmap_control64 {
- __pad_before_uframe __pad1;
- snd_pcm_uframes_t appl_ptr; /* RW: appl ptr (0...boundary-1) */
- __pad_before_uframe __pad2;
-
- __pad_before_uframe __pad3;
- snd_pcm_uframes_t avail_min; /* RW: min available frames for wakeup */
- __pad_after_uframe __pad4;
-};
-
-struct __snd_pcm_sync_ptr64 {
- __u32 flags;
- __u32 pad1;
- union {
- struct __snd_pcm_mmap_status64 status;
- unsigned char reserved[64];
- } s;
- union {
- struct __snd_pcm_mmap_control64 control;
- unsigned char reserved[64];
- } c;
-};
-
-struct snd_xferi {
- snd_pcm_sframes_t result;
- void __user *buf;
- snd_pcm_uframes_t frames;
-};
-
-struct snd_xfern {
- snd_pcm_sframes_t result;
- void __user * __user *bufs;
- snd_pcm_uframes_t frames;
-};
-
-enum {
- SNDRV_PCM_TSTAMP_TYPE_GETTIMEOFDAY = 0, /* gettimeofday equivalent */
- SNDRV_PCM_TSTAMP_TYPE_MONOTONIC, /* posix_clock_monotonic equivalent */
- SNDRV_PCM_TSTAMP_TYPE_MONOTONIC_RAW, /* monotonic_raw (no NTP) */
- SNDRV_PCM_TSTAMP_TYPE_LAST = SNDRV_PCM_TSTAMP_TYPE_MONOTONIC_RAW,
-};
-
-/* channel positions */
-enum {
- SNDRV_CHMAP_UNKNOWN = 0,
- SNDRV_CHMAP_NA, /* N/A, silent */
- SNDRV_CHMAP_MONO, /* mono stream */
- /* this follows the alsa-lib mixer channel value + 3 */
- SNDRV_CHMAP_FL, /* front left */
- SNDRV_CHMAP_FR, /* front right */
- SNDRV_CHMAP_RL, /* rear left */
- SNDRV_CHMAP_RR, /* rear right */
- SNDRV_CHMAP_FC, /* front center */
- SNDRV_CHMAP_LFE, /* LFE */
- SNDRV_CHMAP_SL, /* side left */
- SNDRV_CHMAP_SR, /* side right */
- SNDRV_CHMAP_RC, /* rear center */
- /* new definitions */
- SNDRV_CHMAP_FLC, /* front left center */
- SNDRV_CHMAP_FRC, /* front right center */
- SNDRV_CHMAP_RLC, /* rear left center */
- SNDRV_CHMAP_RRC, /* rear right center */
- SNDRV_CHMAP_FLW, /* front left wide */
- SNDRV_CHMAP_FRW, /* front right wide */
- SNDRV_CHMAP_FLH, /* front left high */
- SNDRV_CHMAP_FCH, /* front center high */
- SNDRV_CHMAP_FRH, /* front right high */
- SNDRV_CHMAP_TC, /* top center */
- SNDRV_CHMAP_TFL, /* top front left */
- SNDRV_CHMAP_TFR, /* top front right */
- SNDRV_CHMAP_TFC, /* top front center */
- SNDRV_CHMAP_TRL, /* top rear left */
- SNDRV_CHMAP_TRR, /* top rear right */
- SNDRV_CHMAP_TRC, /* top rear center */
- /* new definitions for UAC2 */
- SNDRV_CHMAP_TFLC, /* top front left center */
- SNDRV_CHMAP_TFRC, /* top front right center */
- SNDRV_CHMAP_TSL, /* top side left */
- SNDRV_CHMAP_TSR, /* top side right */
- SNDRV_CHMAP_LLFE, /* left LFE */
- SNDRV_CHMAP_RLFE, /* right LFE */
- SNDRV_CHMAP_BC, /* bottom center */
- SNDRV_CHMAP_BLC, /* bottom left center */
- SNDRV_CHMAP_BRC, /* bottom right center */
- SNDRV_CHMAP_LAST = SNDRV_CHMAP_BRC,
-};
-
-#define SNDRV_CHMAP_POSITION_MASK 0xffff
-#define SNDRV_CHMAP_PHASE_INVERSE (0x01 << 16)
-#define SNDRV_CHMAP_DRIVER_SPEC (0x02 << 16)
-
-#define SNDRV_PCM_IOCTL_PVERSION _IOR('A', 0x00, int)
-#define SNDRV_PCM_IOCTL_INFO _IOR('A', 0x01, struct snd_pcm_info)
-#define SNDRV_PCM_IOCTL_TSTAMP _IOW('A', 0x02, int)
-#define SNDRV_PCM_IOCTL_TTSTAMP _IOW('A', 0x03, int)
-#define SNDRV_PCM_IOCTL_USER_PVERSION _IOW('A', 0x04, int)
-#define SNDRV_PCM_IOCTL_HW_REFINE _IOWR('A', 0x10, struct snd_pcm_hw_params)
-#define SNDRV_PCM_IOCTL_HW_PARAMS _IOWR('A', 0x11, struct snd_pcm_hw_params)
-#define SNDRV_PCM_IOCTL_HW_FREE _IO('A', 0x12)
-#define SNDRV_PCM_IOCTL_SW_PARAMS _IOWR('A', 0x13, struct snd_pcm_sw_params)
-#define SNDRV_PCM_IOCTL_STATUS _IOR('A', 0x20, struct snd_pcm_status)
-#define SNDRV_PCM_IOCTL_DELAY _IOR('A', 0x21, snd_pcm_sframes_t)
-#define SNDRV_PCM_IOCTL_HWSYNC _IO('A', 0x22)
-#define __SNDRV_PCM_IOCTL_SYNC_PTR _IOWR('A', 0x23, struct __snd_pcm_sync_ptr)
-#define __SNDRV_PCM_IOCTL_SYNC_PTR64 _IOWR('A', 0x23, struct __snd_pcm_sync_ptr64)
-#define SNDRV_PCM_IOCTL_SYNC_PTR _IOWR('A', 0x23, struct snd_pcm_sync_ptr)
-#define SNDRV_PCM_IOCTL_STATUS_EXT _IOWR('A', 0x24, struct snd_pcm_status)
-#define SNDRV_PCM_IOCTL_CHANNEL_INFO _IOR('A', 0x32, struct snd_pcm_channel_info)
-#define SNDRV_PCM_IOCTL_PREPARE _IO('A', 0x40)
-#define SNDRV_PCM_IOCTL_RESET _IO('A', 0x41)
-#define SNDRV_PCM_IOCTL_START _IO('A', 0x42)
-#define SNDRV_PCM_IOCTL_DROP _IO('A', 0x43)
-#define SNDRV_PCM_IOCTL_DRAIN _IO('A', 0x44)
-#define SNDRV_PCM_IOCTL_PAUSE _IOW('A', 0x45, int)
-#define SNDRV_PCM_IOCTL_REWIND _IOW('A', 0x46, snd_pcm_uframes_t)
-#define SNDRV_PCM_IOCTL_RESUME _IO('A', 0x47)
-#define SNDRV_PCM_IOCTL_XRUN _IO('A', 0x48)
-#define SNDRV_PCM_IOCTL_FORWARD _IOW('A', 0x49, snd_pcm_uframes_t)
-#define SNDRV_PCM_IOCTL_WRITEI_FRAMES _IOW('A', 0x50, struct snd_xferi)
-#define SNDRV_PCM_IOCTL_READI_FRAMES _IOR('A', 0x51, struct snd_xferi)
-#define SNDRV_PCM_IOCTL_WRITEN_FRAMES _IOW('A', 0x52, struct snd_xfern)
-#define SNDRV_PCM_IOCTL_READN_FRAMES _IOR('A', 0x53, struct snd_xfern)
-#define SNDRV_PCM_IOCTL_LINK _IOW('A', 0x60, int)
-#define SNDRV_PCM_IOCTL_UNLINK _IO('A', 0x61)
-
-/*****************************************************************************
- * *
- * MIDI v1.0 interface *
- * *
- *****************************************************************************/
-
-/*
- * Raw MIDI section - /dev/snd/midi??
- */
-
-#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 2)
-
-enum {
- SNDRV_RAWMIDI_STREAM_OUTPUT = 0,
- SNDRV_RAWMIDI_STREAM_INPUT,
- SNDRV_RAWMIDI_STREAM_LAST = SNDRV_RAWMIDI_STREAM_INPUT,
-};
-
-#define SNDRV_RAWMIDI_INFO_OUTPUT 0x00000001
-#define SNDRV_RAWMIDI_INFO_INPUT 0x00000002
-#define SNDRV_RAWMIDI_INFO_DUPLEX 0x00000004
-
-struct snd_rawmidi_info {
- unsigned int device; /* RO/WR (control): device number */
- unsigned int subdevice; /* RO/WR (control): subdevice number */
- int stream; /* WR: stream */
- int card; /* R: card number */
- unsigned int flags; /* SNDRV_RAWMIDI_INFO_XXXX */
- unsigned char id[64]; /* ID (user selectable) */
- unsigned char name[80]; /* name of device */
- unsigned char subname[32]; /* name of active or selected subdevice */
- unsigned int subdevices_count;
- unsigned int subdevices_avail;
- unsigned char reserved[64]; /* reserved for future use */
-};
-
-#define SNDRV_RAWMIDI_MODE_FRAMING_MASK (7<<0)
-#define SNDRV_RAWMIDI_MODE_FRAMING_SHIFT 0
-#define SNDRV_RAWMIDI_MODE_FRAMING_NONE (0<<0)
-#define SNDRV_RAWMIDI_MODE_FRAMING_TSTAMP (1<<0)
-#define SNDRV_RAWMIDI_MODE_CLOCK_MASK (7<<3)
-#define SNDRV_RAWMIDI_MODE_CLOCK_SHIFT 3
-#define SNDRV_RAWMIDI_MODE_CLOCK_NONE (0<<3)
-#define SNDRV_RAWMIDI_MODE_CLOCK_REALTIME (1<<3)
-#define SNDRV_RAWMIDI_MODE_CLOCK_MONOTONIC (2<<3)
-#define SNDRV_RAWMIDI_MODE_CLOCK_MONOTONIC_RAW (3<<3)
-
-#define SNDRV_RAWMIDI_FRAMING_DATA_LENGTH 16
-
-struct snd_rawmidi_framing_tstamp {
- /* For now, frame_type is always 0. Midi 2.0 is expected to add new
- * types here. Applications are expected to skip unknown frame types.
- */
- __u8 frame_type;
- __u8 length; /* number of valid bytes in data field */
- __u8 reserved[2];
- __u32 tv_nsec; /* nanoseconds */
- __u64 tv_sec; /* seconds */
- __u8 data[SNDRV_RAWMIDI_FRAMING_DATA_LENGTH];
-} __packed;
-
-struct snd_rawmidi_params {
- int stream;
- size_t buffer_size; /* queue size in bytes */
- size_t avail_min; /* minimum avail bytes for wakeup */
- unsigned int no_active_sensing: 1; /* do not send active sensing byte in close() */
- unsigned int mode; /* For input data only, frame incoming data */
- unsigned char reserved[12]; /* reserved for future use */
-};
-
-#ifndef __KERNEL__
-struct snd_rawmidi_status {
- int stream;
- __time_pad pad1;
- struct timespec tstamp; /* Timestamp */
- size_t avail; /* available bytes */
- size_t xruns; /* count of overruns since last status (in bytes) */
- unsigned char reserved[16]; /* reserved for future use */
-};
-#endif
-
-#define SNDRV_RAWMIDI_IOCTL_PVERSION _IOR('W', 0x00, int)
-#define SNDRV_RAWMIDI_IOCTL_INFO _IOR('W', 0x01, struct snd_rawmidi_info)
-#define SNDRV_RAWMIDI_IOCTL_USER_PVERSION _IOW('W', 0x02, int)
-#define SNDRV_RAWMIDI_IOCTL_PARAMS _IOWR('W', 0x10, struct snd_rawmidi_params)
-#define SNDRV_RAWMIDI_IOCTL_STATUS _IOWR('W', 0x20, struct snd_rawmidi_status)
-#define SNDRV_RAWMIDI_IOCTL_DROP _IOW('W', 0x30, int)
-#define SNDRV_RAWMIDI_IOCTL_DRAIN _IOW('W', 0x31, int)
-
-/*
- * Timer section - /dev/snd/timer
- */
-
-#define SNDRV_TIMER_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 7)
-
-enum {
- SNDRV_TIMER_CLASS_NONE = -1,
- SNDRV_TIMER_CLASS_SLAVE = 0,
- SNDRV_TIMER_CLASS_GLOBAL,
- SNDRV_TIMER_CLASS_CARD,
- SNDRV_TIMER_CLASS_PCM,
- SNDRV_TIMER_CLASS_LAST = SNDRV_TIMER_CLASS_PCM,
-};
-
-/* slave timer classes */
-enum {
- SNDRV_TIMER_SCLASS_NONE = 0,
- SNDRV_TIMER_SCLASS_APPLICATION,
- SNDRV_TIMER_SCLASS_SEQUENCER, /* alias */
- SNDRV_TIMER_SCLASS_OSS_SEQUENCER, /* alias */
- SNDRV_TIMER_SCLASS_LAST = SNDRV_TIMER_SCLASS_OSS_SEQUENCER,
-};
-
-/* global timers (device member) */
-#define SNDRV_TIMER_GLOBAL_SYSTEM 0
-#define SNDRV_TIMER_GLOBAL_RTC 1 /* unused */
-#define SNDRV_TIMER_GLOBAL_HPET 2
-#define SNDRV_TIMER_GLOBAL_HRTIMER 3
-
-/* info flags */
-#define SNDRV_TIMER_FLG_SLAVE (1<<0) /* cannot be controlled */
-
-struct snd_timer_id {
- int dev_class;
- int dev_sclass;
- int card;
- int device;
- int subdevice;
-};
-
-struct snd_timer_ginfo {
- struct snd_timer_id tid; /* requested timer ID */
- unsigned int flags; /* timer flags - SNDRV_TIMER_FLG_* */
- int card; /* card number */
- unsigned char id[64]; /* timer identification */
- unsigned char name[80]; /* timer name */
- unsigned long reserved0; /* reserved for future use */
- unsigned long resolution; /* average period resolution in ns */
- unsigned long resolution_min; /* minimal period resolution in ns */
- unsigned long resolution_max; /* maximal period resolution in ns */
- unsigned int clients; /* active timer clients */
- unsigned char reserved[32];
-};
-
-struct snd_timer_gparams {
- struct snd_timer_id tid; /* requested timer ID */
- unsigned long period_num; /* requested precise period duration (in seconds) - numerator */
- unsigned long period_den; /* requested precise period duration (in seconds) - denominator */
- unsigned char reserved[32];
-};
-
-struct snd_timer_gstatus {
- struct snd_timer_id tid; /* requested timer ID */
- unsigned long resolution; /* current period resolution in ns */
- unsigned long resolution_num; /* precise current period resolution (in seconds) - numerator */
- unsigned long resolution_den; /* precise current period resolution (in seconds) - denominator */
- unsigned char reserved[32];
-};
-
-struct snd_timer_select {
- struct snd_timer_id id; /* bind to timer ID */
- unsigned char reserved[32]; /* reserved */
-};
-
-struct snd_timer_info {
- unsigned int flags; /* timer flags - SNDRV_TIMER_FLG_* */
- int card; /* card number */
- unsigned char id[64]; /* timer identificator */
- unsigned char name[80]; /* timer name */
- unsigned long reserved0; /* reserved for future use */
- unsigned long resolution; /* average period resolution in ns */
- unsigned char reserved[64]; /* reserved */
-};
-
-#define SNDRV_TIMER_PSFLG_AUTO (1<<0) /* auto start, otherwise one-shot */
-#define SNDRV_TIMER_PSFLG_EXCLUSIVE (1<<1) /* exclusive use, precise start/stop/pause/continue */
-#define SNDRV_TIMER_PSFLG_EARLY_EVENT (1<<2) /* write early event to the poll queue */
-
-struct snd_timer_params {
- unsigned int flags; /* flags - SNDRV_TIMER_PSFLG_* */
- unsigned int ticks; /* requested resolution in ticks */
- unsigned int queue_size; /* total size of queue (32-1024) */
- unsigned int reserved0; /* reserved, was: failure locations */
- unsigned int filter; /* event filter (bitmask of SNDRV_TIMER_EVENT_*) */
- unsigned char reserved[60]; /* reserved */
-};
-
-#ifndef __KERNEL__
-struct snd_timer_status {
- struct timespec tstamp; /* Timestamp - last update */
- unsigned int resolution; /* current period resolution in ns */
- unsigned int lost; /* counter of master tick lost */
- unsigned int overrun; /* count of read queue overruns */
- unsigned int queue; /* used queue size */
- unsigned char reserved[64]; /* reserved */
-};
-#endif
-
-#define SNDRV_TIMER_IOCTL_PVERSION _IOR('T', 0x00, int)
-#define SNDRV_TIMER_IOCTL_NEXT_DEVICE _IOWR('T', 0x01, struct snd_timer_id)
-#define SNDRV_TIMER_IOCTL_TREAD_OLD _IOW('T', 0x02, int)
-#define SNDRV_TIMER_IOCTL_GINFO _IOWR('T', 0x03, struct snd_timer_ginfo)
-#define SNDRV_TIMER_IOCTL_GPARAMS _IOW('T', 0x04, struct snd_timer_gparams)
-#define SNDRV_TIMER_IOCTL_GSTATUS _IOWR('T', 0x05, struct snd_timer_gstatus)
-#define SNDRV_TIMER_IOCTL_SELECT _IOW('T', 0x10, struct snd_timer_select)
-#define SNDRV_TIMER_IOCTL_INFO _IOR('T', 0x11, struct snd_timer_info)
-#define SNDRV_TIMER_IOCTL_PARAMS _IOW('T', 0x12, struct snd_timer_params)
-#define SNDRV_TIMER_IOCTL_STATUS _IOR('T', 0x14, struct snd_timer_status)
-/* The following four ioctls are changed since 1.0.9 due to confliction */
-#define SNDRV_TIMER_IOCTL_START _IO('T', 0xa0)
-#define SNDRV_TIMER_IOCTL_STOP _IO('T', 0xa1)
-#define SNDRV_TIMER_IOCTL_CONTINUE _IO('T', 0xa2)
-#define SNDRV_TIMER_IOCTL_PAUSE _IO('T', 0xa3)
-#define SNDRV_TIMER_IOCTL_TREAD64 _IOW('T', 0xa4, int)
-
-#if __BITS_PER_LONG == 64
-#define SNDRV_TIMER_IOCTL_TREAD SNDRV_TIMER_IOCTL_TREAD_OLD
-#else
-#define SNDRV_TIMER_IOCTL_TREAD ((sizeof(__kernel_long_t) >= sizeof(time_t)) ? \
- SNDRV_TIMER_IOCTL_TREAD_OLD : \
- SNDRV_TIMER_IOCTL_TREAD64)
-#endif
-
-struct snd_timer_read {
- unsigned int resolution;
- unsigned int ticks;
-};
-
-enum {
- SNDRV_TIMER_EVENT_RESOLUTION = 0, /* val = resolution in ns */
- SNDRV_TIMER_EVENT_TICK, /* val = ticks */
- SNDRV_TIMER_EVENT_START, /* val = resolution in ns */
- SNDRV_TIMER_EVENT_STOP, /* val = 0 */
- SNDRV_TIMER_EVENT_CONTINUE, /* val = resolution in ns */
- SNDRV_TIMER_EVENT_PAUSE, /* val = 0 */
- SNDRV_TIMER_EVENT_EARLY, /* val = 0, early event */
- SNDRV_TIMER_EVENT_SUSPEND, /* val = 0 */
- SNDRV_TIMER_EVENT_RESUME, /* val = resolution in ns */
- /* master timer events for slave timer instances */
- SNDRV_TIMER_EVENT_MSTART = SNDRV_TIMER_EVENT_START + 10,
- SNDRV_TIMER_EVENT_MSTOP = SNDRV_TIMER_EVENT_STOP + 10,
- SNDRV_TIMER_EVENT_MCONTINUE = SNDRV_TIMER_EVENT_CONTINUE + 10,
- SNDRV_TIMER_EVENT_MPAUSE = SNDRV_TIMER_EVENT_PAUSE + 10,
- SNDRV_TIMER_EVENT_MSUSPEND = SNDRV_TIMER_EVENT_SUSPEND + 10,
- SNDRV_TIMER_EVENT_MRESUME = SNDRV_TIMER_EVENT_RESUME + 10,
-};
-
-#ifndef __KERNEL__
-struct snd_timer_tread {
- int event;
- __time_pad pad1;
- struct timespec tstamp;
- unsigned int val;
- __time_pad pad2;
-};
-#endif
-
-/****************************************************************************
- * *
- * Section for driver control interface - /dev/snd/control? *
- * *
- ****************************************************************************/
-
-#define SNDRV_CTL_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 8)
-
-struct snd_ctl_card_info {
- int card; /* card number */
- int pad; /* reserved for future (was type) */
- unsigned char id[16]; /* ID of card (user selectable) */
- unsigned char driver[16]; /* Driver name */
- unsigned char name[32]; /* Short name of soundcard */
- unsigned char longname[80]; /* name + info text about soundcard */
- unsigned char reserved_[16]; /* reserved for future (was ID of mixer) */
- unsigned char mixername[80]; /* visual mixer identification */
- unsigned char components[128]; /* card components / fine identification, delimited with one space (AC97 etc..) */
-};
-
-typedef int __bitwise snd_ctl_elem_type_t;
-#define SNDRV_CTL_ELEM_TYPE_NONE ((__force snd_ctl_elem_type_t) 0) /* invalid */
-#define SNDRV_CTL_ELEM_TYPE_BOOLEAN ((__force snd_ctl_elem_type_t) 1) /* boolean type */
-#define SNDRV_CTL_ELEM_TYPE_INTEGER ((__force snd_ctl_elem_type_t) 2) /* integer type */
-#define SNDRV_CTL_ELEM_TYPE_ENUMERATED ((__force snd_ctl_elem_type_t) 3) /* enumerated type */
-#define SNDRV_CTL_ELEM_TYPE_BYTES ((__force snd_ctl_elem_type_t) 4) /* byte array */
-#define SNDRV_CTL_ELEM_TYPE_IEC958 ((__force snd_ctl_elem_type_t) 5) /* IEC958 (S/PDIF) setup */
-#define SNDRV_CTL_ELEM_TYPE_INTEGER64 ((__force snd_ctl_elem_type_t) 6) /* 64-bit integer type */
-#define SNDRV_CTL_ELEM_TYPE_LAST SNDRV_CTL_ELEM_TYPE_INTEGER64
-
-typedef int __bitwise snd_ctl_elem_iface_t;
-#define SNDRV_CTL_ELEM_IFACE_CARD ((__force snd_ctl_elem_iface_t) 0) /* global control */
-#define SNDRV_CTL_ELEM_IFACE_HWDEP ((__force snd_ctl_elem_iface_t) 1) /* hardware dependent device */
-#define SNDRV_CTL_ELEM_IFACE_MIXER ((__force snd_ctl_elem_iface_t) 2) /* virtual mixer device */
-#define SNDRV_CTL_ELEM_IFACE_PCM ((__force snd_ctl_elem_iface_t) 3) /* PCM device */
-#define SNDRV_CTL_ELEM_IFACE_RAWMIDI ((__force snd_ctl_elem_iface_t) 4) /* RawMidi device */
-#define SNDRV_CTL_ELEM_IFACE_TIMER ((__force snd_ctl_elem_iface_t) 5) /* timer device */
-#define SNDRV_CTL_ELEM_IFACE_SEQUENCER ((__force snd_ctl_elem_iface_t) 6) /* sequencer client */
-#define SNDRV_CTL_ELEM_IFACE_LAST SNDRV_CTL_ELEM_IFACE_SEQUENCER
-
-#define SNDRV_CTL_ELEM_ACCESS_READ (1<<0)
-#define SNDRV_CTL_ELEM_ACCESS_WRITE (1<<1)
-#define SNDRV_CTL_ELEM_ACCESS_READWRITE (SNDRV_CTL_ELEM_ACCESS_READ|SNDRV_CTL_ELEM_ACCESS_WRITE)
-#define SNDRV_CTL_ELEM_ACCESS_VOLATILE (1<<2) /* control value may be changed without a notification */
-/* (1 << 3) is unused. */
-#define SNDRV_CTL_ELEM_ACCESS_TLV_READ (1<<4) /* TLV read is possible */
-#define SNDRV_CTL_ELEM_ACCESS_TLV_WRITE (1<<5) /* TLV write is possible */
-#define SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE (SNDRV_CTL_ELEM_ACCESS_TLV_READ|SNDRV_CTL_ELEM_ACCESS_TLV_WRITE)
-#define SNDRV_CTL_ELEM_ACCESS_TLV_COMMAND (1<<6) /* TLV command is possible */
-#define SNDRV_CTL_ELEM_ACCESS_INACTIVE (1<<8) /* control does actually nothing, but may be updated */
-#define SNDRV_CTL_ELEM_ACCESS_LOCK (1<<9) /* write lock */
-#define SNDRV_CTL_ELEM_ACCESS_OWNER (1<<10) /* write lock owner */
-#define SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK (1<<28) /* kernel use a TLV callback */
-#define SNDRV_CTL_ELEM_ACCESS_USER (1<<29) /* user space element */
-/* bits 30 and 31 are obsoleted (for indirect access) */
-
-/* for further details see the ACPI and PCI power management specification */
-#define SNDRV_CTL_POWER_D0 0x0000 /* full On */
-#define SNDRV_CTL_POWER_D1 0x0100 /* partial On */
-#define SNDRV_CTL_POWER_D2 0x0200 /* partial On */
-#define SNDRV_CTL_POWER_D3 0x0300 /* Off */
-#define SNDRV_CTL_POWER_D3hot (SNDRV_CTL_POWER_D3|0x0000) /* Off, with power */
-#define SNDRV_CTL_POWER_D3cold (SNDRV_CTL_POWER_D3|0x0001) /* Off, without power */
-
-#define SNDRV_CTL_ELEM_ID_NAME_MAXLEN 44
-
-struct snd_ctl_elem_id {
- unsigned int numid; /* numeric identifier, zero = invalid */
- snd_ctl_elem_iface_t iface; /* interface identifier */
- unsigned int device; /* device/client number */
- unsigned int subdevice; /* subdevice (substream) number */
- unsigned char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; /* ASCII name of item */
- unsigned int index; /* index of item */
-};
-
-struct snd_ctl_elem_list {
- unsigned int offset; /* W: first element ID to get */
- unsigned int space; /* W: count of element IDs to get */
- unsigned int used; /* R: count of element IDs set */
- unsigned int count; /* R: count of all elements */
- struct snd_ctl_elem_id __user *pids; /* R: IDs */
- unsigned char reserved[50];
-};
-
-struct snd_ctl_elem_info {
- struct snd_ctl_elem_id id; /* W: element ID */
- snd_ctl_elem_type_t type; /* R: value type - SNDRV_CTL_ELEM_TYPE_* */
- unsigned int access; /* R: value access (bitmask) - SNDRV_CTL_ELEM_ACCESS_* */
- unsigned int count; /* count of values */
- __kernel_pid_t owner; /* owner's PID of this control */
- union {
- struct {
- long min; /* R: minimum value */
- long max; /* R: maximum value */
- long step; /* R: step (0 variable) */
- } integer;
- struct {
- long long min; /* R: minimum value */
- long long max; /* R: maximum value */
- long long step; /* R: step (0 variable) */
- } integer64;
- struct {
- unsigned int items; /* R: number of items */
- unsigned int item; /* W: item number */
- char name[64]; /* R: value name */
- __u64 names_ptr; /* W: names list (ELEM_ADD only) */
- unsigned int names_length;
- } enumerated;
- unsigned char reserved[128];
- } value;
- unsigned char reserved[64];
-};
-
-struct snd_ctl_elem_value {
- struct snd_ctl_elem_id id; /* W: element ID */
- unsigned int indirect: 1; /* W: indirect access - obsoleted */
- union {
- union {
- long value[128];
- long *value_ptr; /* obsoleted */
- } integer;
- union {
- long long value[64];
- long long *value_ptr; /* obsoleted */
- } integer64;
- union {
- unsigned int item[128];
- unsigned int *item_ptr; /* obsoleted */
- } enumerated;
- union {
- unsigned char data[512];
- unsigned char *data_ptr; /* obsoleted */
- } bytes;
- struct snd_aes_iec958 iec958;
- } value; /* RO */
- unsigned char reserved[128];
-};
-
-struct snd_ctl_tlv {
- unsigned int numid; /* control element numeric identification */
- unsigned int length; /* in bytes aligned to 4 */
- unsigned int tlv[]; /* first TLV */
-};
-
-#define SNDRV_CTL_IOCTL_PVERSION _IOR('U', 0x00, int)
-#define SNDRV_CTL_IOCTL_CARD_INFO _IOR('U', 0x01, struct snd_ctl_card_info)
-#define SNDRV_CTL_IOCTL_ELEM_LIST _IOWR('U', 0x10, struct snd_ctl_elem_list)
-#define SNDRV_CTL_IOCTL_ELEM_INFO _IOWR('U', 0x11, struct snd_ctl_elem_info)
-#define SNDRV_CTL_IOCTL_ELEM_READ _IOWR('U', 0x12, struct snd_ctl_elem_value)
-#define SNDRV_CTL_IOCTL_ELEM_WRITE _IOWR('U', 0x13, struct snd_ctl_elem_value)
-#define SNDRV_CTL_IOCTL_ELEM_LOCK _IOW('U', 0x14, struct snd_ctl_elem_id)
-#define SNDRV_CTL_IOCTL_ELEM_UNLOCK _IOW('U', 0x15, struct snd_ctl_elem_id)
-#define SNDRV_CTL_IOCTL_SUBSCRIBE_EVENTS _IOWR('U', 0x16, int)
-#define SNDRV_CTL_IOCTL_ELEM_ADD _IOWR('U', 0x17, struct snd_ctl_elem_info)
-#define SNDRV_CTL_IOCTL_ELEM_REPLACE _IOWR('U', 0x18, struct snd_ctl_elem_info)
-#define SNDRV_CTL_IOCTL_ELEM_REMOVE _IOWR('U', 0x19, struct snd_ctl_elem_id)
-#define SNDRV_CTL_IOCTL_TLV_READ _IOWR('U', 0x1a, struct snd_ctl_tlv)
-#define SNDRV_CTL_IOCTL_TLV_WRITE _IOWR('U', 0x1b, struct snd_ctl_tlv)
-#define SNDRV_CTL_IOCTL_TLV_COMMAND _IOWR('U', 0x1c, struct snd_ctl_tlv)
-#define SNDRV_CTL_IOCTL_HWDEP_NEXT_DEVICE _IOWR('U', 0x20, int)
-#define SNDRV_CTL_IOCTL_HWDEP_INFO _IOR('U', 0x21, struct snd_hwdep_info)
-#define SNDRV_CTL_IOCTL_PCM_NEXT_DEVICE _IOR('U', 0x30, int)
-#define SNDRV_CTL_IOCTL_PCM_INFO _IOWR('U', 0x31, struct snd_pcm_info)
-#define SNDRV_CTL_IOCTL_PCM_PREFER_SUBDEVICE _IOW('U', 0x32, int)
-#define SNDRV_CTL_IOCTL_RAWMIDI_NEXT_DEVICE _IOWR('U', 0x40, int)
-#define SNDRV_CTL_IOCTL_RAWMIDI_INFO _IOWR('U', 0x41, struct snd_rawmidi_info)
-#define SNDRV_CTL_IOCTL_RAWMIDI_PREFER_SUBDEVICE _IOW('U', 0x42, int)
-#define SNDRV_CTL_IOCTL_POWER _IOWR('U', 0xd0, int)
-#define SNDRV_CTL_IOCTL_POWER_STATE _IOR('U', 0xd1, int)
-
-/*
- * Read interface.
- */
-
-enum sndrv_ctl_event_type {
- SNDRV_CTL_EVENT_ELEM = 0,
- SNDRV_CTL_EVENT_LAST = SNDRV_CTL_EVENT_ELEM,
-};
-
-#define SNDRV_CTL_EVENT_MASK_VALUE (1<<0) /* element value was changed */
-#define SNDRV_CTL_EVENT_MASK_INFO (1<<1) /* element info was changed */
-#define SNDRV_CTL_EVENT_MASK_ADD (1<<2) /* element was added */
-#define SNDRV_CTL_EVENT_MASK_TLV (1<<3) /* element TLV tree was changed */
-#define SNDRV_CTL_EVENT_MASK_REMOVE (~0U) /* element was removed */
-
-struct snd_ctl_event {
- int type; /* event type - SNDRV_CTL_EVENT_* */
- union {
- struct {
- unsigned int mask;
- struct snd_ctl_elem_id id;
- } elem;
- unsigned char data8[60];
- } data;
-};
-
-/*
- * Control names
- */
-
-#define SNDRV_CTL_NAME_NONE ""
-#define SNDRV_CTL_NAME_PLAYBACK "Playback "
-#define SNDRV_CTL_NAME_CAPTURE "Capture "
-
-#define SNDRV_CTL_NAME_IEC958_NONE ""
-#define SNDRV_CTL_NAME_IEC958_SWITCH "Switch"
-#define SNDRV_CTL_NAME_IEC958_VOLUME "Volume"
-#define SNDRV_CTL_NAME_IEC958_DEFAULT "Default"
-#define SNDRV_CTL_NAME_IEC958_MASK "Mask"
-#define SNDRV_CTL_NAME_IEC958_CON_MASK "Con Mask"
-#define SNDRV_CTL_NAME_IEC958_PRO_MASK "Pro Mask"
-#define SNDRV_CTL_NAME_IEC958_PCM_STREAM "PCM Stream"
-#define SNDRV_CTL_NAME_IEC958(expl,direction,what) "IEC958 " expl SNDRV_CTL_NAME_##direction SNDRV_CTL_NAME_IEC958_##what
-
-#endif /* _UAPI__SOUND_ASOUND_H */
diff --git a/tools/include/vdso/bits.h b/tools/include/vdso/bits.h
index 6d005a1f5d94..388b212088ea 100644
--- a/tools/include/vdso/bits.h
+++ b/tools/include/vdso/bits.h
@@ -5,5 +5,6 @@
#include <vdso/const.h>
#define BIT(nr) (UL(1) << (nr))
+#define BIT_ULL(nr) (ULL(1) << (nr))
#endif /* __VDSO_BITS_H */
diff --git a/tools/include/vdso/unaligned.h b/tools/include/vdso/unaligned.h
new file mode 100644
index 000000000000..ff0c06b6513e
--- /dev/null
+++ b/tools/include/vdso/unaligned.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __VDSO_UNALIGNED_H
+#define __VDSO_UNALIGNED_H
+
+#define __get_unaligned_t(type, ptr) ({ \
+ const struct { type x; } __packed * __get_pptr = (typeof(__get_pptr))(ptr); \
+ __get_pptr->x; \
+})
+
+#define __put_unaligned_t(type, val, ptr) do { \
+ struct { type x; } __packed * __put_pptr = (typeof(__put_pptr))(ptr); \
+ __put_pptr->x = (val); \
+} while (0)
+
+#endif /* __VDSO_UNALIGNED_H */