diff options
Diffstat (limited to 'tools/include')
123 files changed, 8084 insertions, 1349 deletions
diff --git a/tools/include/asm-generic/bitops/__fls.h b/tools/include/asm-generic/bitops/__fls.h index e974ec932ec1..35f33780ca6c 100644 --- a/tools/include/asm-generic/bitops/__fls.h +++ b/tools/include/asm-generic/bitops/__fls.h @@ -10,7 +10,7 @@ * * Undefined if no set bit exists, so code should check against 0 first. */ -static __always_inline unsigned int generic___fls(unsigned long word) +static __always_inline __attribute_const__ unsigned int generic___fls(unsigned long word) { unsigned int num = BITS_PER_LONG - 1; diff --git a/tools/include/asm-generic/bitops/fls.h b/tools/include/asm-generic/bitops/fls.h index 26f3ce1dd6e4..8eed3437edb9 100644 --- a/tools/include/asm-generic/bitops/fls.h +++ b/tools/include/asm-generic/bitops/fls.h @@ -10,7 +10,7 @@ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. */ -static __always_inline int generic_fls(unsigned int x) +static __always_inline __attribute_const__ int generic_fls(unsigned int x) { int r = 32; diff --git a/tools/include/asm-generic/bitops/fls64.h b/tools/include/asm-generic/bitops/fls64.h index 866f2b2304ff..b5f58dd261a3 100644 --- a/tools/include/asm-generic/bitops/fls64.h +++ b/tools/include/asm-generic/bitops/fls64.h @@ -16,7 +16,7 @@ * at position 64. */ #if BITS_PER_LONG == 32 -static __always_inline int fls64(__u64 x) +static __always_inline __attribute_const__ int fls64(__u64 x) { __u32 h = x >> 32; if (h) @@ -24,7 +24,7 @@ static __always_inline int fls64(__u64 x) return fls(x); } #elif BITS_PER_LONG == 64 -static __always_inline int fls64(__u64 x) +static __always_inline __attribute_const__ int fls64(__u64 x) { if (x == 0) return 0; diff --git a/tools/include/asm-generic/io.h b/tools/include/asm-generic/io.h new file mode 100644 index 000000000000..e5a0b07ad452 --- /dev/null +++ b/tools/include/asm-generic/io.h @@ -0,0 +1,482 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_ASM_GENERIC_IO_H +#define _TOOLS_ASM_GENERIC_IO_H + +#include <asm/barrier.h> +#include <asm/byteorder.h> + +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/types.h> + +#ifndef mmiowb_set_pending +#define mmiowb_set_pending() do { } while (0) +#endif + +#ifndef __io_br +#define __io_br() barrier() +#endif + +/* prevent prefetching of coherent DMA data ahead of a dma-complete */ +#ifndef __io_ar +#ifdef rmb +#define __io_ar(v) rmb() +#else +#define __io_ar(v) barrier() +#endif +#endif + +/* flush writes to coherent DMA data before possibly triggering a DMA read */ +#ifndef __io_bw +#ifdef wmb +#define __io_bw() wmb() +#else +#define __io_bw() barrier() +#endif +#endif + +/* serialize device access against a spin_unlock, usually handled there. */ +#ifndef __io_aw +#define __io_aw() mmiowb_set_pending() +#endif + +#ifndef __io_pbw +#define __io_pbw() __io_bw() +#endif + +#ifndef __io_paw +#define __io_paw() __io_aw() +#endif + +#ifndef __io_pbr +#define __io_pbr() __io_br() +#endif + +#ifndef __io_par +#define __io_par(v) __io_ar(v) +#endif + +#ifndef _THIS_IP_ +#define _THIS_IP_ 0 +#endif + +static inline void log_write_mmio(u64 val, u8 width, volatile void __iomem *addr, + unsigned long caller_addr, unsigned long caller_addr0) {} +static inline void log_post_write_mmio(u64 val, u8 width, volatile void __iomem *addr, + unsigned long caller_addr, unsigned long caller_addr0) {} +static inline void log_read_mmio(u8 width, const volatile void __iomem *addr, + unsigned long caller_addr, unsigned long caller_addr0) {} +static inline void log_post_read_mmio(u64 val, u8 width, const volatile void __iomem *addr, + unsigned long caller_addr, unsigned long caller_addr0) {} + +/* + * __raw_{read,write}{b,w,l,q}() access memory in native endianness. + * + * On some architectures memory mapped IO needs to be accessed differently. + * On the simple architectures, we just read/write the memory location + * directly. + */ + +#ifndef __raw_readb +#define __raw_readb __raw_readb +static inline u8 __raw_readb(const volatile void __iomem *addr) +{ + return *(const volatile u8 __force *)addr; +} +#endif + +#ifndef __raw_readw +#define __raw_readw __raw_readw +static inline u16 __raw_readw(const volatile void __iomem *addr) +{ + return *(const volatile u16 __force *)addr; +} +#endif + +#ifndef __raw_readl +#define __raw_readl __raw_readl +static inline u32 __raw_readl(const volatile void __iomem *addr) +{ + return *(const volatile u32 __force *)addr; +} +#endif + +#ifndef __raw_readq +#define __raw_readq __raw_readq +static inline u64 __raw_readq(const volatile void __iomem *addr) +{ + return *(const volatile u64 __force *)addr; +} +#endif + +#ifndef __raw_writeb +#define __raw_writeb __raw_writeb +static inline void __raw_writeb(u8 value, volatile void __iomem *addr) +{ + *(volatile u8 __force *)addr = value; +} +#endif + +#ifndef __raw_writew +#define __raw_writew __raw_writew +static inline void __raw_writew(u16 value, volatile void __iomem *addr) +{ + *(volatile u16 __force *)addr = value; +} +#endif + +#ifndef __raw_writel +#define __raw_writel __raw_writel +static inline void __raw_writel(u32 value, volatile void __iomem *addr) +{ + *(volatile u32 __force *)addr = value; +} +#endif + +#ifndef __raw_writeq +#define __raw_writeq __raw_writeq +static inline void __raw_writeq(u64 value, volatile void __iomem *addr) +{ + *(volatile u64 __force *)addr = value; +} +#endif + +/* + * {read,write}{b,w,l,q}() access little endian memory and return result in + * native endianness. + */ + +#ifndef readb +#define readb readb +static inline u8 readb(const volatile void __iomem *addr) +{ + u8 val; + + log_read_mmio(8, addr, _THIS_IP_, _RET_IP_); + __io_br(); + val = __raw_readb(addr); + __io_ar(val); + log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef readw +#define readw readw +static inline u16 readw(const volatile void __iomem *addr) +{ + u16 val; + + log_read_mmio(16, addr, _THIS_IP_, _RET_IP_); + __io_br(); + val = __le16_to_cpu((__le16 __force)__raw_readw(addr)); + __io_ar(val); + log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef readl +#define readl readl +static inline u32 readl(const volatile void __iomem *addr) +{ + u32 val; + + log_read_mmio(32, addr, _THIS_IP_, _RET_IP_); + __io_br(); + val = __le32_to_cpu((__le32 __force)__raw_readl(addr)); + __io_ar(val); + log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef readq +#define readq readq +static inline u64 readq(const volatile void __iomem *addr) +{ + u64 val; + + log_read_mmio(64, addr, _THIS_IP_, _RET_IP_); + __io_br(); + val = __le64_to_cpu((__le64 __force)__raw_readq(addr)); + __io_ar(val); + log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef writeb +#define writeb writeb +static inline void writeb(u8 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); + __io_bw(); + __raw_writeb(value, addr); + __io_aw(); + log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); +} +#endif + +#ifndef writew +#define writew writew +static inline void writew(u16 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); + __io_bw(); + __raw_writew((u16 __force)cpu_to_le16(value), addr); + __io_aw(); + log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); +} +#endif + +#ifndef writel +#define writel writel +static inline void writel(u32 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); + __io_bw(); + __raw_writel((u32 __force)__cpu_to_le32(value), addr); + __io_aw(); + log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); +} +#endif + +#ifndef writeq +#define writeq writeq +static inline void writeq(u64 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); + __io_bw(); + __raw_writeq((u64 __force)__cpu_to_le64(value), addr); + __io_aw(); + log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); +} +#endif + +/* + * {read,write}{b,w,l,q}_relaxed() are like the regular version, but + * are not guaranteed to provide ordering against spinlocks or memory + * accesses. + */ +#ifndef readb_relaxed +#define readb_relaxed readb_relaxed +static inline u8 readb_relaxed(const volatile void __iomem *addr) +{ + u8 val; + + log_read_mmio(8, addr, _THIS_IP_, _RET_IP_); + val = __raw_readb(addr); + log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef readw_relaxed +#define readw_relaxed readw_relaxed +static inline u16 readw_relaxed(const volatile void __iomem *addr) +{ + u16 val; + + log_read_mmio(16, addr, _THIS_IP_, _RET_IP_); + val = __le16_to_cpu((__le16 __force)__raw_readw(addr)); + log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef readl_relaxed +#define readl_relaxed readl_relaxed +static inline u32 readl_relaxed(const volatile void __iomem *addr) +{ + u32 val; + + log_read_mmio(32, addr, _THIS_IP_, _RET_IP_); + val = __le32_to_cpu((__le32 __force)__raw_readl(addr)); + log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#if defined(readq) && !defined(readq_relaxed) +#define readq_relaxed readq_relaxed +static inline u64 readq_relaxed(const volatile void __iomem *addr) +{ + u64 val; + + log_read_mmio(64, addr, _THIS_IP_, _RET_IP_); + val = __le64_to_cpu((__le64 __force)__raw_readq(addr)); + log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef writeb_relaxed +#define writeb_relaxed writeb_relaxed +static inline void writeb_relaxed(u8 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); + __raw_writeb(value, addr); + log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); +} +#endif + +#ifndef writew_relaxed +#define writew_relaxed writew_relaxed +static inline void writew_relaxed(u16 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); + __raw_writew((u16 __force)cpu_to_le16(value), addr); + log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); +} +#endif + +#ifndef writel_relaxed +#define writel_relaxed writel_relaxed +static inline void writel_relaxed(u32 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); + __raw_writel((u32 __force)__cpu_to_le32(value), addr); + log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); +} +#endif + +#if defined(writeq) && !defined(writeq_relaxed) +#define writeq_relaxed writeq_relaxed +static inline void writeq_relaxed(u64 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); + __raw_writeq((u64 __force)__cpu_to_le64(value), addr); + log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); +} +#endif + +/* + * {read,write}s{b,w,l,q}() repeatedly access the same memory address in + * native endianness in 8-, 16-, 32- or 64-bit chunks (@count times). + */ +#ifndef readsb +#define readsb readsb +static inline void readsb(const volatile void __iomem *addr, void *buffer, + unsigned int count) +{ + if (count) { + u8 *buf = buffer; + + do { + u8 x = __raw_readb(addr); + *buf++ = x; + } while (--count); + } +} +#endif + +#ifndef readsw +#define readsw readsw +static inline void readsw(const volatile void __iomem *addr, void *buffer, + unsigned int count) +{ + if (count) { + u16 *buf = buffer; + + do { + u16 x = __raw_readw(addr); + *buf++ = x; + } while (--count); + } +} +#endif + +#ifndef readsl +#define readsl readsl +static inline void readsl(const volatile void __iomem *addr, void *buffer, + unsigned int count) +{ + if (count) { + u32 *buf = buffer; + + do { + u32 x = __raw_readl(addr); + *buf++ = x; + } while (--count); + } +} +#endif + +#ifndef readsq +#define readsq readsq +static inline void readsq(const volatile void __iomem *addr, void *buffer, + unsigned int count) +{ + if (count) { + u64 *buf = buffer; + + do { + u64 x = __raw_readq(addr); + *buf++ = x; + } while (--count); + } +} +#endif + +#ifndef writesb +#define writesb writesb +static inline void writesb(volatile void __iomem *addr, const void *buffer, + unsigned int count) +{ + if (count) { + const u8 *buf = buffer; + + do { + __raw_writeb(*buf++, addr); + } while (--count); + } +} +#endif + +#ifndef writesw +#define writesw writesw +static inline void writesw(volatile void __iomem *addr, const void *buffer, + unsigned int count) +{ + if (count) { + const u16 *buf = buffer; + + do { + __raw_writew(*buf++, addr); + } while (--count); + } +} +#endif + +#ifndef writesl +#define writesl writesl +static inline void writesl(volatile void __iomem *addr, const void *buffer, + unsigned int count) +{ + if (count) { + const u32 *buf = buffer; + + do { + __raw_writel(*buf++, addr); + } while (--count); + } +} +#endif + +#ifndef writesq +#define writesq writesq +static inline void writesq(volatile void __iomem *addr, const void *buffer, + unsigned int count) +{ + if (count) { + const u64 *buf = buffer; + + do { + __raw_writeq(*buf++, addr); + } while (--count); + } +} +#endif + +#endif /* _TOOLS_ASM_GENERIC_IO_H */ diff --git a/tools/include/asm/io.h b/tools/include/asm/io.h new file mode 100644 index 000000000000..eed5066f25c4 --- /dev/null +++ b/tools/include/asm/io.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_ASM_IO_H +#define _TOOLS_ASM_IO_H + +#if defined(__i386__) || defined(__x86_64__) +#include "../../arch/x86/include/asm/io.h" +#else +#include <asm-generic/io.h> +#endif + +#endif /* _TOOLS_ASM_IO_H */ diff --git a/tools/include/asm/timex.h b/tools/include/asm/timex.h new file mode 100644 index 000000000000..5adfe3c6d326 --- /dev/null +++ b/tools/include/asm/timex.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TOOLS_LINUX_ASM_TIMEX_H +#define __TOOLS_LINUX_ASM_TIMEX_H + +#include <time.h> + +#define cycles_t clock_t + +static inline cycles_t get_cycles(void) +{ + return clock(); +} +#endif // __TOOLS_LINUX_ASM_TIMEX_H diff --git a/tools/include/linux/args.h b/tools/include/linux/args.h new file mode 100644 index 000000000000..2e8e65d975c7 --- /dev/null +++ b/tools/include/linux/args.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_ARGS_H +#define _LINUX_ARGS_H + +/* + * How do these macros work? + * + * In __COUNT_ARGS() _0 to _12 are just placeholders from the start + * in order to make sure _n is positioned over the correct number + * from 12 to 0 (depending on X, which is a variadic argument list). + * They serve no purpose other than occupying a position. Since each + * macro parameter must have a distinct identifier, those identifiers + * are as good as any. + * + * In COUNT_ARGS() we use actual integers, so __COUNT_ARGS() returns + * that as _n. + */ + +/* This counts to 15. Any more, it will return 16th argument. */ +#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _n, X...) _n +#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + +/* Concatenate two parameters, but allow them to be expanded beforehand. */ +#define __CONCAT(a, b) a ## b +#define CONCATENATE(a, b) __CONCAT(a, b) + +#endif /* _LINUX_ARGS_H */ diff --git a/tools/include/linux/atomic.h b/tools/include/linux/atomic.h index 01907b33537e..50c66ba9ada5 100644 --- a/tools/include/linux/atomic.h +++ b/tools/include/linux/atomic.h @@ -12,4 +12,26 @@ void atomic_long_set(atomic_long_t *v, long i); #define atomic_cmpxchg_release atomic_cmpxchg #endif /* atomic_cmpxchg_relaxed */ +static inline bool atomic_try_cmpxchg(atomic_t *ptr, int *oldp, int new) +{ + int ret, old = *oldp; + + ret = atomic_cmpxchg(ptr, old, new); + if (ret != old) + *oldp = ret; + return ret == old; +} + +static inline bool atomic_inc_unless_negative(atomic_t *v) +{ + int c = atomic_read(v); + + do { + if (unlikely(c < 0)) + return false; + } while (!atomic_try_cmpxchg(v, &c, c + 1)); + + return true; +} + #endif /* __TOOLS_LINUX_ATOMIC_H */ diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index 2a7f260ef9dc..0d992245c600 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -3,6 +3,7 @@ #define _TOOLS_LINUX_BITMAP_H #include <string.h> +#include <asm-generic/bitsperlong.h> #include <linux/align.h> #include <linux/bitops.h> #include <linux/find.h> @@ -19,6 +20,7 @@ bool __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits); bool __bitmap_equal(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits); +void __bitmap_set(unsigned long *map, unsigned int start, int len); void __bitmap_clear(unsigned long *map, unsigned int start, int len); bool __bitmap_intersects(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits); @@ -79,6 +81,11 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, __bitmap_or(dst, src1, src2, nbits); } +static inline unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags __maybe_unused) +{ + return malloc(bitmap_size(nbits)); +} + /** * bitmap_zalloc - Allocate bitmap * @nbits: Number of bits @@ -150,6 +157,21 @@ static inline bool bitmap_intersects(const unsigned long *src1, return __bitmap_intersects(src1, src2, nbits); } +static inline void bitmap_set(unsigned long *map, unsigned int start, unsigned int nbits) +{ + if (__builtin_constant_p(nbits) && nbits == 1) + __set_bit(start, map); + else if (small_const_nbits(start + nbits)) + *map |= GENMASK(start + nbits - 1, start); + else if (__builtin_constant_p(start & BITMAP_MEM_MASK) && + IS_ALIGNED(start, BITMAP_MEM_ALIGNMENT) && + __builtin_constant_p(nbits & BITMAP_MEM_MASK) && + IS_ALIGNED(nbits, BITMAP_MEM_ALIGNMENT)) + memset((char *)map + start / 8, 0xff, nbits / 8); + else + __bitmap_set(map, start, nbits); +} + static inline void bitmap_clear(unsigned long *map, unsigned int start, unsigned int nbits) { diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h index 60044b608817..a40cc861b3a7 100644 --- a/tools/include/linux/bits.h +++ b/tools/include/linux/bits.h @@ -2,16 +2,15 @@ #ifndef __LINUX_BITS_H #define __LINUX_BITS_H -#include <linux/const.h> #include <vdso/bits.h> #include <uapi/linux/bits.h> -#include <asm/bitsperlong.h> #define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG)) #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) #define BIT_ULL_MASK(nr) (ULL(1) << ((nr) % BITS_PER_LONG_LONG)) #define BIT_ULL_WORD(nr) ((nr) / BITS_PER_LONG_LONG) #define BITS_PER_BYTE 8 +#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) /* * Create a contiguous bitmask starting at bit position @l and ending at @@ -19,36 +18,72 @@ * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. */ #if !defined(__ASSEMBLY__) + +/* + * Missing asm support + * + * GENMASK_U*() and BIT_U*() depend on BITS_PER_TYPE() which relies on sizeof(), + * something not available in asm. Nevertheless, fixed width integers is a C + * concept. Assembly code can rely on the long and long long versions instead. + */ + #include <linux/build_bug.h> -#define GENMASK_INPUT_CHECK(h, l) \ - (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ - __is_constexpr((l) > (h)), (l) > (h), 0))) -#else +#include <linux/compiler.h> +#include <linux/overflow.h> + +#define GENMASK_INPUT_CHECK(h, l) BUILD_BUG_ON_ZERO(const_true((l) > (h))) + /* - * BUILD_BUG_ON_ZERO is not available in h files included from asm files, - * disable the input check if that is the case. + * Generate a mask for the specified type @t. Additional checks are made to + * guarantee the value returned fits in that type, relying on + * -Wshift-count-overflow compiler check to detect incompatible arguments. + * For example, all these create build errors or warnings: + * + * - GENMASK(15, 20): wrong argument order + * - GENMASK(72, 15): doesn't fit unsigned long + * - GENMASK_U32(33, 15): doesn't fit in a u32 */ -#define GENMASK_INPUT_CHECK(h, l) 0 -#endif +#define GENMASK_TYPE(t, h, l) \ + ((t)(GENMASK_INPUT_CHECK(h, l) + \ + (type_max(t) << (l) & \ + type_max(t) >> (BITS_PER_TYPE(t) - 1 - (h))))) -#define GENMASK(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) -#define GENMASK_ULL(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) +#define GENMASK(h, l) GENMASK_TYPE(unsigned long, h, l) +#define GENMASK_ULL(h, l) GENMASK_TYPE(unsigned long long, h, l) + +#define GENMASK_U8(h, l) GENMASK_TYPE(u8, h, l) +#define GENMASK_U16(h, l) GENMASK_TYPE(u16, h, l) +#define GENMASK_U32(h, l) GENMASK_TYPE(u32, h, l) +#define GENMASK_U64(h, l) GENMASK_TYPE(u64, h, l) +#define GENMASK_U128(h, l) GENMASK_TYPE(u128, h, l) -#if !defined(__ASSEMBLY__) /* - * Missing asm support + * Fixed-type variants of BIT(), with additional checks like GENMASK_TYPE(). The + * following examples generate compiler warnings due to -Wshift-count-overflow: * - * __GENMASK_U128() depends on _BIT128() which would not work - * in the asm code, as it shifts an 'unsigned __init128' data - * type instead of direct representation of 128 bit constants - * such as long and unsigned long. The fundamental problem is - * that a 128 bit constant will get silently truncated by the - * gcc compiler. + * - BIT_U8(8) + * - BIT_U32(-1) + * - BIT_U32(40) */ -#define GENMASK_U128(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U128(h, l)) -#endif +#define BIT_INPUT_CHECK(type, nr) \ + BUILD_BUG_ON_ZERO(const_true((nr) >= BITS_PER_TYPE(type))) + +#define BIT_TYPE(type, nr) ((type)(BIT_INPUT_CHECK(type, nr) + BIT_ULL(nr))) + +#define BIT_U8(nr) BIT_TYPE(u8, nr) +#define BIT_U16(nr) BIT_TYPE(u16, nr) +#define BIT_U32(nr) BIT_TYPE(u32, nr) +#define BIT_U64(nr) BIT_TYPE(u64, nr) + +#else /* defined(__ASSEMBLY__) */ + +/* + * BUILD_BUG_ON_ZERO is not available in h files included from asm files, + * disable the input check if that is the case. + */ +#define GENMASK(h, l) __GENMASK(h, l) +#define GENMASK_ULL(h, l) __GENMASK_ULL(h, l) + +#endif /* !defined(__ASSEMBLY__) */ #endif /* __LINUX_BITS_H */ diff --git a/tools/include/linux/build_bug.h b/tools/include/linux/build_bug.h index b4898ff085de..ab2aa97bd8ce 100644 --- a/tools/include/linux/build_bug.h +++ b/tools/include/linux/build_bug.h @@ -4,17 +4,17 @@ #include <linux/compiler.h> -#ifdef __CHECKER__ -#define BUILD_BUG_ON_ZERO(e) (0) -#else /* __CHECKER__ */ /* * Force a compilation error if condition is true, but also produce a * result (of value 0 and type int), so the expression can be used * e.g. in a structure initializer (or where-ever else comma expressions * aren't permitted). + * + * Take an error message as an optional second argument. If omitted, + * default to the stringification of the tested expression. */ -#define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); }))) -#endif /* __CHECKER__ */ +#define BUILD_BUG_ON_ZERO(e, ...) \ + __BUILD_BUG_ON_ZERO_MSG(e, ##__VA_ARGS__, #e " is true") /* Force a compilation error if a constant expression is not a power of 2 */ #define __BUILD_BUG_ON_NOT_POWER_OF_2(n) \ diff --git a/tools/include/linux/cfi_types.h b/tools/include/linux/cfi_types.h new file mode 100644 index 000000000000..a86af9bc8bdc --- /dev/null +++ b/tools/include/linux/cfi_types.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Clang Control Flow Integrity (CFI) type definitions. + */ +#ifndef _LINUX_CFI_TYPES_H +#define _LINUX_CFI_TYPES_H + +#ifdef __ASSEMBLY__ +#include <linux/linkage.h> + +#ifdef CONFIG_CFI +/* + * Use the __kcfi_typeid_<function> type identifier symbol to + * annotate indirectly called assembly functions. The compiler emits + * these symbols for all address-taken function declarations in C + * code. + */ +#ifndef __CFI_TYPE +#define __CFI_TYPE(name) \ + .4byte __kcfi_typeid_##name +#endif + +#define SYM_TYPED_ENTRY(name, linkage, align...) \ + linkage(name) ASM_NL \ + align ASM_NL \ + __CFI_TYPE(name) ASM_NL \ + name: + +#define SYM_TYPED_START(name, linkage, align...) \ + SYM_TYPED_ENTRY(name, linkage, align) + +#else /* CONFIG_CFI */ + +#define SYM_TYPED_START(name, linkage, align...) \ + SYM_START(name, linkage, align) + +#endif /* CONFIG_CFI */ + +#ifndef SYM_TYPED_FUNC_START +#define SYM_TYPED_FUNC_START(name) \ + SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) +#endif + +#else /* __ASSEMBLY__ */ + +#ifdef CONFIG_CFI +#define DEFINE_CFI_TYPE(name, func) \ + /* \ + * Force a reference to the function so the compiler generates \ + * __kcfi_typeid_<func>. \ + */ \ + __ADDRESSABLE(func); \ + /* u32 name __ro_after_init = __kcfi_typeid_<func> */ \ + extern u32 name; \ + asm ( \ + " .pushsection .data..ro_after_init,\"aw\",\%progbits \n" \ + " .type " #name ",\%object \n" \ + " .globl " #name " \n" \ + " .p2align 2, 0x0 \n" \ + #name ": \n" \ + " .4byte __kcfi_typeid_" #func " \n" \ + " .size " #name ", 4 \n" \ + " .popsection \n" \ + ); +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* _LINUX_CFI_TYPES_H */ diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 9c05a59f0184..f40bd2b04c29 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -81,6 +81,28 @@ #define __is_constexpr(x) \ (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) +/* + * Similar to statically_true() but produces a constant expression + * + * To be used in conjunction with macros, such as BUILD_BUG_ON_ZERO(), + * which require their input to be a constant expression and for which + * statically_true() would otherwise fail. + * + * This is a trade-off: const_true() requires all its operands to be + * compile time constants. Else, it would always returns false even on + * the most trivial cases like: + * + * true || non_const_var + * + * On the opposite, statically_true() is able to fold more complex + * tautologies and will return true on expressions such as: + * + * !(non_const_var * 8 % 4) + * + * For the general case, statically_true() is better. + */ +#define const_true(x) __builtin_choose_expr(__is_constexpr(x), x, false) + #ifdef __ANDROID__ /* * FIXME: Big hammer to get rid of tons of: @@ -116,6 +138,10 @@ # define __force #endif +#ifndef __iomem +# define __iomem +#endif + #ifndef __weak # define __weak __attribute__((weak)) #endif @@ -222,6 +248,14 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s __asm__ ("" : "=r" (var) : "0" (var)) #endif +#ifndef __BUILD_BUG_ON_ZERO_MSG +#if defined(__clang__) +#define __BUILD_BUG_ON_ZERO_MSG(e, msg, ...) ((int)(sizeof(struct { int:(-!!(e)); }))) +#else +#define __BUILD_BUG_ON_ZERO_MSG(e, msg, ...) ((int)sizeof(struct {_Static_assert(!(e), msg);})) +#endif +#endif + #endif /* __ASSEMBLY__ */ #endif /* _TOOLS_LINUX_COMPILER_H */ diff --git a/tools/include/linux/container_of.h b/tools/include/linux/container_of.h new file mode 100644 index 000000000000..c879e14c3dd6 --- /dev/null +++ b/tools/include/linux/container_of.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_LINUX_CONTAINER_OF_H +#define _TOOLS_LINUX_CONTAINER_OF_H + +#ifndef container_of +/** + * container_of - cast a member of a structure out to the containing structure + * @ptr: the pointer to the member. + * @type: the type of the container struct this is embedded in. + * @member: the name of the member within the struct. + * + */ +#define container_of(ptr, type, member) ({ \ + const typeof(((type *)0)->member) * __mptr = (ptr); \ + (type *)((char *)__mptr - offsetof(type, member)); }) +#endif + +#endif /* _TOOLS_LINUX_CONTAINER_OF_H */ diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h index 65aa8ce142e5..bcc6df79301a 100644 --- a/tools/include/linux/filter.h +++ b/tools/include/linux/filter.h @@ -273,6 +273,16 @@ .off = OFF, \ .imm = 0 }) +/* Unconditional jumps, gotol pc + imm32 */ + +#define BPF_JMP32_A(IMM) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_JA, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + /* Function call */ #define BPF_EMIT_CALL(FUNC) \ diff --git a/tools/include/linux/gfp_types.h b/tools/include/linux/gfp_types.h index 5f9f1ed190a0..65db9349f905 100644 --- a/tools/include/linux/gfp_types.h +++ b/tools/include/linux/gfp_types.h @@ -1 +1,392 @@ -#include "../../../include/linux/gfp_types.h" +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_GFP_TYPES_H +#define __LINUX_GFP_TYPES_H + +#include <linux/bits.h> + +/* The typedef is in types.h but we want the documentation here */ +#if 0 +/** + * typedef gfp_t - Memory allocation flags. + * + * GFP flags are commonly used throughout Linux to indicate how memory + * should be allocated. The GFP acronym stands for get_free_pages(), + * the underlying memory allocation function. Not every GFP flag is + * supported by every function which may allocate memory. Most users + * will want to use a plain ``GFP_KERNEL``. + */ +typedef unsigned int __bitwise gfp_t; +#endif + +/* + * In case of changes, please don't forget to update + * include/trace/events/mmflags.h and tools/perf/builtin-kmem.c + */ + +enum { + ___GFP_DMA_BIT, + ___GFP_HIGHMEM_BIT, + ___GFP_DMA32_BIT, + ___GFP_MOVABLE_BIT, + ___GFP_RECLAIMABLE_BIT, + ___GFP_HIGH_BIT, + ___GFP_IO_BIT, + ___GFP_FS_BIT, + ___GFP_ZERO_BIT, + ___GFP_UNUSED_BIT, /* 0x200u unused */ + ___GFP_DIRECT_RECLAIM_BIT, + ___GFP_KSWAPD_RECLAIM_BIT, + ___GFP_WRITE_BIT, + ___GFP_NOWARN_BIT, + ___GFP_RETRY_MAYFAIL_BIT, + ___GFP_NOFAIL_BIT, + ___GFP_NORETRY_BIT, + ___GFP_MEMALLOC_BIT, + ___GFP_COMP_BIT, + ___GFP_NOMEMALLOC_BIT, + ___GFP_HARDWALL_BIT, + ___GFP_THISNODE_BIT, + ___GFP_ACCOUNT_BIT, + ___GFP_ZEROTAGS_BIT, +#ifdef CONFIG_KASAN_HW_TAGS + ___GFP_SKIP_ZERO_BIT, + ___GFP_SKIP_KASAN_BIT, +#endif +#ifdef CONFIG_LOCKDEP + ___GFP_NOLOCKDEP_BIT, +#endif +#ifdef CONFIG_SLAB_OBJ_EXT + ___GFP_NO_OBJ_EXT_BIT, +#endif + ___GFP_LAST_BIT +}; + +/* Plain integer GFP bitmasks. Do not use this directly. */ +#define ___GFP_DMA BIT(___GFP_DMA_BIT) +#define ___GFP_HIGHMEM BIT(___GFP_HIGHMEM_BIT) +#define ___GFP_DMA32 BIT(___GFP_DMA32_BIT) +#define ___GFP_MOVABLE BIT(___GFP_MOVABLE_BIT) +#define ___GFP_RECLAIMABLE BIT(___GFP_RECLAIMABLE_BIT) +#define ___GFP_HIGH BIT(___GFP_HIGH_BIT) +#define ___GFP_IO BIT(___GFP_IO_BIT) +#define ___GFP_FS BIT(___GFP_FS_BIT) +#define ___GFP_ZERO BIT(___GFP_ZERO_BIT) +/* 0x200u unused */ +#define ___GFP_DIRECT_RECLAIM BIT(___GFP_DIRECT_RECLAIM_BIT) +#define ___GFP_KSWAPD_RECLAIM BIT(___GFP_KSWAPD_RECLAIM_BIT) +#define ___GFP_WRITE BIT(___GFP_WRITE_BIT) +#define ___GFP_NOWARN BIT(___GFP_NOWARN_BIT) +#define ___GFP_RETRY_MAYFAIL BIT(___GFP_RETRY_MAYFAIL_BIT) +#define ___GFP_NOFAIL BIT(___GFP_NOFAIL_BIT) +#define ___GFP_NORETRY BIT(___GFP_NORETRY_BIT) +#define ___GFP_MEMALLOC BIT(___GFP_MEMALLOC_BIT) +#define ___GFP_COMP BIT(___GFP_COMP_BIT) +#define ___GFP_NOMEMALLOC BIT(___GFP_NOMEMALLOC_BIT) +#define ___GFP_HARDWALL BIT(___GFP_HARDWALL_BIT) +#define ___GFP_THISNODE BIT(___GFP_THISNODE_BIT) +#define ___GFP_ACCOUNT BIT(___GFP_ACCOUNT_BIT) +#define ___GFP_ZEROTAGS BIT(___GFP_ZEROTAGS_BIT) +#ifdef CONFIG_KASAN_HW_TAGS +#define ___GFP_SKIP_ZERO BIT(___GFP_SKIP_ZERO_BIT) +#define ___GFP_SKIP_KASAN BIT(___GFP_SKIP_KASAN_BIT) +#else +#define ___GFP_SKIP_ZERO 0 +#define ___GFP_SKIP_KASAN 0 +#endif +#ifdef CONFIG_LOCKDEP +#define ___GFP_NOLOCKDEP BIT(___GFP_NOLOCKDEP_BIT) +#else +#define ___GFP_NOLOCKDEP 0 +#endif +#ifdef CONFIG_SLAB_OBJ_EXT +#define ___GFP_NO_OBJ_EXT BIT(___GFP_NO_OBJ_EXT_BIT) +#else +#define ___GFP_NO_OBJ_EXT 0 +#endif + +/* + * Physical address zone modifiers (see linux/mmzone.h - low four bits) + * + * Do not put any conditional on these. If necessary modify the definitions + * without the underscores and use them consistently. The definitions here may + * be used in bit comparisons. + */ +#define __GFP_DMA ((__force gfp_t)___GFP_DMA) +#define __GFP_HIGHMEM ((__force gfp_t)___GFP_HIGHMEM) +#define __GFP_DMA32 ((__force gfp_t)___GFP_DMA32) +#define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* ZONE_MOVABLE allowed */ +#define GFP_ZONEMASK (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE) + +/** + * DOC: Page mobility and placement hints + * + * Page mobility and placement hints + * --------------------------------- + * + * These flags provide hints about how mobile the page is. Pages with similar + * mobility are placed within the same pageblocks to minimise problems due + * to external fragmentation. + * + * %__GFP_MOVABLE (also a zone modifier) indicates that the page can be + * moved by page migration during memory compaction or can be reclaimed. + * + * %__GFP_RECLAIMABLE is used for slab allocations that specify + * SLAB_RECLAIM_ACCOUNT and whose pages can be freed via shrinkers. + * + * %__GFP_WRITE indicates the caller intends to dirty the page. Where possible, + * these pages will be spread between local zones to avoid all the dirty + * pages being in one zone (fair zone allocation policy). + * + * %__GFP_HARDWALL enforces the cpuset memory allocation policy. + * + * %__GFP_THISNODE forces the allocation to be satisfied from the requested + * node with no fallbacks or placement policy enforcements. + * + * %__GFP_ACCOUNT causes the allocation to be accounted to kmemcg. + * + * %__GFP_NO_OBJ_EXT causes slab allocation to have no object extension. + */ +#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) +#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) +#define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL) +#define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE) +#define __GFP_ACCOUNT ((__force gfp_t)___GFP_ACCOUNT) +#define __GFP_NO_OBJ_EXT ((__force gfp_t)___GFP_NO_OBJ_EXT) + +/** + * DOC: Watermark modifiers + * + * Watermark modifiers -- controls access to emergency reserves + * ------------------------------------------------------------ + * + * %__GFP_HIGH indicates that the caller is high-priority and that granting + * the request is necessary before the system can make forward progress. + * For example creating an IO context to clean pages and requests + * from atomic context. + * + * %__GFP_MEMALLOC allows access to all memory. This should only be used when + * the caller guarantees the allocation will allow more memory to be freed + * very shortly e.g. process exiting or swapping. Users either should + * be the MM or co-ordinating closely with the VM (e.g. swap over NFS). + * Users of this flag have to be extremely careful to not deplete the reserve + * completely and implement a throttling mechanism which controls the + * consumption of the reserve based on the amount of freed memory. + * Usage of a pre-allocated pool (e.g. mempool) should be always considered + * before using this flag. + * + * %__GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves. + * This takes precedence over the %__GFP_MEMALLOC flag if both are set. + */ +#define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) +#define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC) +#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) + +/** + * DOC: Reclaim modifiers + * + * Reclaim modifiers + * ----------------- + * Please note that all the following flags are only applicable to sleepable + * allocations (e.g. %GFP_NOWAIT and %GFP_ATOMIC will ignore them). + * + * %__GFP_IO can start physical IO. + * + * %__GFP_FS can call down to the low-level FS. Clearing the flag avoids the + * allocator recursing into the filesystem which might already be holding + * locks. + * + * %__GFP_DIRECT_RECLAIM indicates that the caller may enter direct reclaim. + * This flag can be cleared to avoid unnecessary delays when a fallback + * option is available. + * + * %__GFP_KSWAPD_RECLAIM indicates that the caller wants to wake kswapd when + * the low watermark is reached and have it reclaim pages until the high + * watermark is reached. A caller may wish to clear this flag when fallback + * options are available and the reclaim is likely to disrupt the system. The + * canonical example is THP allocation where a fallback is cheap but + * reclaim/compaction may cause indirect stalls. + * + * %__GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim. + * + * The default allocator behavior depends on the request size. We have a concept + * of so-called costly allocations (with order > %PAGE_ALLOC_COSTLY_ORDER). + * !costly allocations are too essential to fail so they are implicitly + * non-failing by default (with some exceptions like OOM victims might fail so + * the caller still has to check for failures) while costly requests try to be + * not disruptive and back off even without invoking the OOM killer. + * The following three modifiers might be used to override some of these + * implicit rules. Please note that all of them must be used along with + * %__GFP_DIRECT_RECLAIM flag. + * + * %__GFP_NORETRY: The VM implementation will try only very lightweight + * memory direct reclaim to get some memory under memory pressure (thus + * it can sleep). It will avoid disruptive actions like OOM killer. The + * caller must handle the failure which is quite likely to happen under + * heavy memory pressure. The flag is suitable when failure can easily be + * handled at small cost, such as reduced throughput. + * + * %__GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim + * procedures that have previously failed if there is some indication + * that progress has been made elsewhere. It can wait for other + * tasks to attempt high-level approaches to freeing memory such as + * compaction (which removes fragmentation) and page-out. + * There is still a definite limit to the number of retries, but it is + * a larger limit than with %__GFP_NORETRY. + * Allocations with this flag may fail, but only when there is + * genuinely little unused memory. While these allocations do not + * directly trigger the OOM killer, their failure indicates that + * the system is likely to need to use the OOM killer soon. The + * caller must handle failure, but can reasonably do so by failing + * a higher-level request, or completing it only in a much less + * efficient manner. + * If the allocation does fail, and the caller is in a position to + * free some non-essential memory, doing so could benefit the system + * as a whole. + * + * %__GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller + * cannot handle allocation failures. The allocation could block + * indefinitely but will never return with failure. Testing for + * failure is pointless. + * It _must_ be blockable and used together with __GFP_DIRECT_RECLAIM. + * It should _never_ be used in non-sleepable contexts. + * New users should be evaluated carefully (and the flag should be + * used only when there is no reasonable failure policy) but it is + * definitely preferable to use the flag rather than opencode endless + * loop around allocator. + * Allocating pages from the buddy with __GFP_NOFAIL and order > 1 is + * not supported. Please consider using kvmalloc() instead. + */ +#define __GFP_IO ((__force gfp_t)___GFP_IO) +#define __GFP_FS ((__force gfp_t)___GFP_FS) +#define __GFP_DIRECT_RECLAIM ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */ +#define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */ +#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM)) +#define __GFP_RETRY_MAYFAIL ((__force gfp_t)___GFP_RETRY_MAYFAIL) +#define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL) +#define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY) + +/** + * DOC: Action modifiers + * + * Action modifiers + * ---------------- + * + * %__GFP_NOWARN suppresses allocation failure reports. + * + * %__GFP_COMP address compound page metadata. + * + * %__GFP_ZERO returns a zeroed page on success. + * + * %__GFP_ZEROTAGS zeroes memory tags at allocation time if the memory itself + * is being zeroed (either via __GFP_ZERO or via init_on_alloc, provided that + * __GFP_SKIP_ZERO is not set). This flag is intended for optimization: setting + * memory tags at the same time as zeroing memory has minimal additional + * performance impact. + * + * %__GFP_SKIP_KASAN makes KASAN skip unpoisoning on page allocation. + * Used for userspace and vmalloc pages; the latter are unpoisoned by + * kasan_unpoison_vmalloc instead. For userspace pages, results in + * poisoning being skipped as well, see should_skip_kasan_poison for + * details. Only effective in HW_TAGS mode. + */ +#define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) +#define __GFP_COMP ((__force gfp_t)___GFP_COMP) +#define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) +#define __GFP_ZEROTAGS ((__force gfp_t)___GFP_ZEROTAGS) +#define __GFP_SKIP_ZERO ((__force gfp_t)___GFP_SKIP_ZERO) +#define __GFP_SKIP_KASAN ((__force gfp_t)___GFP_SKIP_KASAN) + +/* Disable lockdep for GFP context tracking */ +#define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) + +/* Room for N __GFP_FOO bits */ +#define __GFP_BITS_SHIFT ___GFP_LAST_BIT +#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) + +/** + * DOC: Useful GFP flag combinations + * + * Useful GFP flag combinations + * ---------------------------- + * + * Useful GFP flag combinations that are commonly used. It is recommended + * that subsystems start with one of these combinations and then set/clear + * %__GFP_FOO flags as necessary. + * + * %GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower + * watermark is applied to allow access to "atomic reserves". + * The current implementation doesn't support NMI and few other strict + * non-preemptive contexts (e.g. raw_spin_lock). The same applies to %GFP_NOWAIT. + * + * %GFP_KERNEL is typical for kernel-internal allocations. The caller requires + * %ZONE_NORMAL or a lower zone for direct access but can direct reclaim. + * + * %GFP_KERNEL_ACCOUNT is the same as GFP_KERNEL, except the allocation is + * accounted to kmemcg. + * + * %GFP_NOWAIT is for kernel allocations that should not stall for direct + * reclaim, start physical IO or use any filesystem callback. It is very + * likely to fail to allocate memory, even for very small allocations. + * + * %GFP_NOIO will use direct reclaim to discard clean pages or slab pages + * that do not require the starting of any physical IO. + * Please try to avoid using this flag directly and instead use + * memalloc_noio_{save,restore} to mark the whole scope which cannot + * perform any IO with a short explanation why. All allocation requests + * will inherit GFP_NOIO implicitly. + * + * %GFP_NOFS will use direct reclaim but will not use any filesystem interfaces. + * Please try to avoid using this flag directly and instead use + * memalloc_nofs_{save,restore} to mark the whole scope which cannot/shouldn't + * recurse into the FS layer with a short explanation why. All allocation + * requests will inherit GFP_NOFS implicitly. + * + * %GFP_USER is for userspace allocations that also need to be directly + * accessibly by the kernel or hardware. It is typically used by hardware + * for buffers that are mapped to userspace (e.g. graphics) that hardware + * still must DMA to. cpuset limits are enforced for these allocations. + * + * %GFP_DMA exists for historical reasons and should be avoided where possible. + * The flags indicates that the caller requires that the lowest zone be + * used (%ZONE_DMA or 16M on x86-64). Ideally, this would be removed but + * it would require careful auditing as some users really require it and + * others use the flag to avoid lowmem reserves in %ZONE_DMA and treat the + * lowest zone as a type of emergency reserve. + * + * %GFP_DMA32 is similar to %GFP_DMA except that the caller requires a 32-bit + * address. Note that kmalloc(..., GFP_DMA32) does not return DMA32 memory + * because the DMA32 kmalloc cache array is not implemented. + * (Reason: there is no such user in kernel). + * + * %GFP_HIGHUSER is for userspace allocations that may be mapped to userspace, + * do not need to be directly accessible by the kernel but that cannot + * move once in use. An example may be a hardware allocation that maps + * data directly into userspace but has no addressing limitations. + * + * %GFP_HIGHUSER_MOVABLE is for userspace allocations that the kernel does not + * need direct access to but can use kmap() when access is required. They + * are expected to be movable via page reclaim or page migration. Typically, + * pages on the LRU would also be allocated with %GFP_HIGHUSER_MOVABLE. + * + * %GFP_TRANSHUGE and %GFP_TRANSHUGE_LIGHT are used for THP allocations. They + * are compound allocations that will generally fail quickly if memory is not + * available and will not wake kswapd/kcompactd on failure. The _LIGHT + * version does not attempt reclaim/compaction at all and is by default used + * in page fault path, while the non-light is used by khugepaged. + */ +#define GFP_ATOMIC (__GFP_HIGH|__GFP_KSWAPD_RECLAIM) +#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) +#define GFP_KERNEL_ACCOUNT (GFP_KERNEL | __GFP_ACCOUNT) +#define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM | __GFP_NOWARN) +#define GFP_NOIO (__GFP_RECLAIM) +#define GFP_NOFS (__GFP_RECLAIM | __GFP_IO) +#define GFP_USER (__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL) +#define GFP_DMA __GFP_DMA +#define GFP_DMA32 __GFP_DMA32 +#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) +#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE | __GFP_SKIP_KASAN) +#define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ + __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM) +#define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM) + +#endif /* __LINUX_GFP_TYPES_H */ diff --git a/tools/include/linux/interval_tree_generic.h b/tools/include/linux/interval_tree_generic.h index aaa8a0767aa3..c5a2fed49eb0 100644 --- a/tools/include/linux/interval_tree_generic.h +++ b/tools/include/linux/interval_tree_generic.h @@ -77,7 +77,7 @@ ITSTATIC void ITPREFIX ## _remove(ITSTRUCT *node, \ * Cond2: start <= ITLAST(node) \ */ \ \ -static ITSTRUCT * \ +ITSTATIC ITSTRUCT * \ ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last) \ { \ while (true) { \ @@ -104,12 +104,8 @@ ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last) \ if (ITSTART(node) <= last) { /* Cond1 */ \ if (start <= ITLAST(node)) /* Cond2 */ \ return node; /* node is leftmost match */ \ - if (node->ITRB.rb_right) { \ - node = rb_entry(node->ITRB.rb_right, \ - ITSTRUCT, ITRB); \ - if (start <= node->ITSUBTREE) \ - continue; \ - } \ + node = rb_entry(node->ITRB.rb_right, ITSTRUCT, ITRB); \ + continue; \ } \ return NULL; /* No match */ \ } \ diff --git a/tools/include/linux/io.h b/tools/include/linux/io.h index e129871fe661..4b94b84160b8 100644 --- a/tools/include/linux/io.h +++ b/tools/include/linux/io.h @@ -2,4 +2,6 @@ #ifndef _TOOLS_IO_H #define _TOOLS_IO_H -#endif +#include <asm/io.h> + +#endif /* _TOOLS_IO_H */ diff --git a/tools/include/linux/kallsyms.h b/tools/include/linux/kallsyms.h index 5a37ccbec54f..f61a01dd7eb7 100644 --- a/tools/include/linux/kallsyms.h +++ b/tools/include/linux/kallsyms.h @@ -18,6 +18,7 @@ static inline const char *kallsyms_lookup(unsigned long addr, return NULL; } +#ifdef HAVE_BACKTRACE_SUPPORT #include <execinfo.h> #include <stdlib.h> static inline void print_ip_sym(const char *loglvl, unsigned long ip) @@ -30,5 +31,8 @@ static inline void print_ip_sym(const char *loglvl, unsigned long ip) free(name); } +#else +static inline void print_ip_sym(const char *loglvl, unsigned long ip) {} +#endif #endif diff --git a/tools/include/linux/kasan-tags.h b/tools/include/linux/kasan-tags.h new file mode 100644 index 000000000000..4f85f562512c --- /dev/null +++ b/tools/include/linux/kasan-tags.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_KASAN_TAGS_H +#define _LINUX_KASAN_TAGS_H + +#define KASAN_TAG_KERNEL 0xFF /* native kernel pointers tag */ +#define KASAN_TAG_INVALID 0xFE /* inaccessible memory tag */ +#define KASAN_TAG_MAX 0xFD /* maximum value for random tags */ + +#ifdef CONFIG_KASAN_HW_TAGS +#define KASAN_TAG_MIN 0xF0 /* minimum value for random tags */ +#else +#define KASAN_TAG_MIN 0x00 /* minimum value for random tags */ +#endif + +#endif /* LINUX_KASAN_TAGS_H */ diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h index 07cfad817d53..c8c18d3908a9 100644 --- a/tools/include/linux/kernel.h +++ b/tools/include/linux/kernel.h @@ -11,6 +11,7 @@ #include <linux/panic.h> #include <endian.h> #include <byteswap.h> +#include <linux/container_of.h> #ifndef UINT_MAX #define UINT_MAX (~0U) @@ -25,19 +26,6 @@ #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) #endif -#ifndef container_of -/** - * container_of - cast a member of a structure out to the containing structure - * @ptr: the pointer to the member. - * @type: the type of the container struct this is embedded in. - * @member: the name of the member within the struct. - * - */ -#define container_of(ptr, type, member) ({ \ - const typeof(((type *)0)->member) * __mptr = (ptr); \ - (type *)((char *)__mptr - offsetof(type, member)); }) -#endif - #ifndef max #define max(x, y) ({ \ typeof(x) _max1 = (x); \ diff --git a/tools/include/linux/livepatch_external.h b/tools/include/linux/livepatch_external.h new file mode 100644 index 000000000000..138af19b0f5c --- /dev/null +++ b/tools/include/linux/livepatch_external.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * External livepatch interfaces for patch creation tooling + */ + +#ifndef _LINUX_LIVEPATCH_EXTERNAL_H_ +#define _LINUX_LIVEPATCH_EXTERNAL_H_ + +#include <linux/types.h> + +#define KLP_RELOC_SEC_PREFIX ".klp.rela." +#define KLP_SYM_PREFIX ".klp.sym." + +#define __KLP_PRE_PATCH_PREFIX __klp_pre_patch_callback_ +#define __KLP_POST_PATCH_PREFIX __klp_post_patch_callback_ +#define __KLP_PRE_UNPATCH_PREFIX __klp_pre_unpatch_callback_ +#define __KLP_POST_UNPATCH_PREFIX __klp_post_unpatch_callback_ + +#define KLP_PRE_PATCH_PREFIX __stringify(__KLP_PRE_PATCH_PREFIX) +#define KLP_POST_PATCH_PREFIX __stringify(__KLP_POST_PATCH_PREFIX) +#define KLP_PRE_UNPATCH_PREFIX __stringify(__KLP_PRE_UNPATCH_PREFIX) +#define KLP_POST_UNPATCH_PREFIX __stringify(__KLP_POST_UNPATCH_PREFIX) + +struct klp_object; + +typedef int (*klp_pre_patch_t)(struct klp_object *obj); +typedef void (*klp_post_patch_t)(struct klp_object *obj); +typedef void (*klp_pre_unpatch_t)(struct klp_object *obj); +typedef void (*klp_post_unpatch_t)(struct klp_object *obj); + +/** + * struct klp_callbacks - pre/post live-(un)patch callback structure + * @pre_patch: executed before code patching + * @post_patch: executed after code patching + * @pre_unpatch: executed before code unpatching + * @post_unpatch: executed after code unpatching + * @post_unpatch_enabled: flag indicating if post-unpatch callback + * should run + * + * All callbacks are optional. Only the pre-patch callback, if provided, + * will be unconditionally executed. If the parent klp_object fails to + * patch for any reason, including a non-zero error status returned from + * the pre-patch callback, no further callbacks will be executed. + */ +struct klp_callbacks { + klp_pre_patch_t pre_patch; + klp_post_patch_t post_patch; + klp_pre_unpatch_t pre_unpatch; + klp_post_unpatch_t post_unpatch; + bool post_unpatch_enabled; +}; + +/* + * 'struct klp_{func,object}_ext' are compact "external" representations of + * 'struct klp_{func,object}'. They are used by objtool for livepatch + * generation. The structs are then read by the livepatch module and converted + * to the real structs before calling klp_enable_patch(). + * + * TODO make these the official API for klp_enable_patch(). That should + * simplify livepatch's interface as well as its data structure lifetime + * management. + */ +struct klp_func_ext { + const char *old_name; + void *new_func; + unsigned long sympos; +}; + +struct klp_object_ext { + const char *name; + struct klp_func_ext *funcs; + struct klp_callbacks callbacks; + unsigned int nr_funcs; +}; + +#endif /* _LINUX_LIVEPATCH_EXTERNAL_H_ */ diff --git a/tools/include/linux/math64.h b/tools/include/linux/math64.h index 4ad45d5943dc..8a67d478bf19 100644 --- a/tools/include/linux/math64.h +++ b/tools/include/linux/math64.h @@ -72,4 +72,9 @@ static inline u64 mul_u64_u64_div64(u64 a, u64 b, u64 c) } #endif +static inline u64 div_u64(u64 dividend, u32 divisor) +{ + return dividend / divisor; +} + #endif /* _LINUX_MATH64_H */ diff --git a/tools/include/linux/moduleparam.h b/tools/include/linux/moduleparam.h new file mode 100644 index 000000000000..4c4d05bef0cb --- /dev/null +++ b/tools/include/linux/moduleparam.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_LINUX_MODULE_PARAMS_H +#define _TOOLS_LINUX_MODULE_PARAMS_H + +#define MODULE_PARM_DESC(parm, desc) + +#endif // _TOOLS_LINUX_MODULE_PARAMS_H diff --git a/tools/include/linux/objtool_types.h b/tools/include/linux/objtool_types.h index 453a4f4ef39d..c6def4049b1a 100644 --- a/tools/include/linux/objtool_types.h +++ b/tools/include/linux/objtool_types.h @@ -54,4 +54,19 @@ struct unwind_hint { #define UNWIND_HINT_TYPE_SAVE 6 #define UNWIND_HINT_TYPE_RESTORE 7 +/* + * Annotate types + */ +#define ANNOTYPE_NOENDBR 1 +#define ANNOTYPE_RETPOLINE_SAFE 2 +#define ANNOTYPE_INSTR_BEGIN 3 +#define ANNOTYPE_INSTR_END 4 +#define ANNOTYPE_UNRET_BEGIN 5 +#define ANNOTYPE_IGNORE_ALTS 6 +#define ANNOTYPE_INTRA_FUNCTION_CALL 7 +#define ANNOTYPE_REACHABLE 8 +#define ANNOTYPE_NOCFI 9 + +#define ANNOTYPE_DATA_SPECIAL 1 + #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/include/linux/pci_ids.h b/tools/include/linux/pci_ids.h new file mode 120000 index 000000000000..1c9e88f41261 --- /dev/null +++ b/tools/include/linux/pci_ids.h @@ -0,0 +1 @@ +../../../include/linux/pci_ids.h
\ No newline at end of file diff --git a/tools/include/linux/prandom.h b/tools/include/linux/prandom.h new file mode 100644 index 000000000000..b745041ccd6a --- /dev/null +++ b/tools/include/linux/prandom.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TOOLS_LINUX_PRANDOM_H +#define __TOOLS_LINUX_PRANDOM_H + +#include <linux/types.h> + +struct rnd_state { + __u32 s1, s2, s3, s4; +}; + +/* + * Handle minimum values for seeds + */ +static inline u32 __seed(u32 x, u32 m) +{ + return (x < m) ? x + m : x; +} + +/** + * prandom_seed_state - set seed for prandom_u32_state(). + * @state: pointer to state structure to receive the seed. + * @seed: arbitrary 64-bit value to use as a seed. + */ +static inline void prandom_seed_state(struct rnd_state *state, u64 seed) +{ + u32 i = ((seed >> 32) ^ (seed << 10) ^ seed) & 0xffffffffUL; + + state->s1 = __seed(i, 2U); + state->s2 = __seed(i, 8U); + state->s3 = __seed(i, 16U); + state->s4 = __seed(i, 128U); +} + +/** + * prandom_u32_state - seeded pseudo-random number generator. + * @state: pointer to state structure holding seeded state. + * + * This is used for pseudo-randomness with no outside seeding. + * For more random results, use get_random_u32(). + */ +static inline u32 prandom_u32_state(struct rnd_state *state) +{ +#define TAUSWORTHE(s, a, b, c, d) (((s & c) << d) ^ (((s << a) ^ s) >> b)) + state->s1 = TAUSWORTHE(state->s1, 6U, 13U, 4294967294U, 18U); + state->s2 = TAUSWORTHE(state->s2, 2U, 27U, 4294967288U, 2U); + state->s3 = TAUSWORTHE(state->s3, 13U, 21U, 4294967280U, 7U); + state->s4 = TAUSWORTHE(state->s4, 3U, 12U, 4294967168U, 13U); + + return (state->s1 ^ state->s2 ^ state->s3 ^ state->s4); +} +#endif // __TOOLS_LINUX_PRANDOM_H diff --git a/tools/include/linux/refcount.h b/tools/include/linux/refcount.h index 36cb29bc57c2..1f30956e070d 100644 --- a/tools/include/linux/refcount.h +++ b/tools/include/linux/refcount.h @@ -60,6 +60,11 @@ static inline void refcount_set(refcount_t *r, unsigned int n) atomic_set(&r->refs, n); } +static inline void refcount_set_release(refcount_t *r, unsigned int n) +{ + atomic_set(&r->refs, n); +} + static inline unsigned int refcount_read(const refcount_t *r) { return atomic_read(&r->refs); diff --git a/tools/include/linux/slab.h b/tools/include/linux/slab.h index 51b25e9c4ec7..94937a699402 100644 --- a/tools/include/linux/slab.h +++ b/tools/include/linux/slab.h @@ -4,14 +4,35 @@ #include <linux/types.h> #include <linux/gfp.h> +#include <pthread.h> -#define SLAB_PANIC 2 #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ #define kzalloc_node(size, flags, node) kmalloc(size, flags) +enum _slab_flag_bits { + _SLAB_KMALLOC, + _SLAB_HWCACHE_ALIGN, + _SLAB_PANIC, + _SLAB_TYPESAFE_BY_RCU, + _SLAB_ACCOUNT, + _SLAB_FLAGS_LAST_BIT +}; + +#define __SLAB_FLAG_BIT(nr) ((unsigned int __force)(1U << (nr))) +#define __SLAB_FLAG_UNUSED ((unsigned int __force)(0U)) + +#define SLAB_HWCACHE_ALIGN __SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN) +#define SLAB_PANIC __SLAB_FLAG_BIT(_SLAB_PANIC) +#define SLAB_TYPESAFE_BY_RCU __SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU) +#ifdef CONFIG_MEMCG +# define SLAB_ACCOUNT __SLAB_FLAG_BIT(_SLAB_ACCOUNT) +#else +# define SLAB_ACCOUNT __SLAB_FLAG_UNUSED +#endif void *kmalloc(size_t size, gfp_t gfp); void kfree(void *p); +void *kmalloc_array(size_t n, size_t size, gfp_t gfp); bool slab_is_available(void); @@ -22,6 +43,98 @@ enum slab_state { FULL }; +struct kmem_cache { + pthread_mutex_t lock; + unsigned int size; + unsigned int align; + unsigned int sheaf_capacity; + int nr_objs; + void *objs; + void (*ctor)(void *); + bool non_kernel_enabled; + unsigned int non_kernel; + unsigned long nr_allocated; + unsigned long nr_tallocated; + bool exec_callback; + void (*callback)(void *); + void *private; +}; + +struct kmem_cache_args { + /** + * @align: The required alignment for the objects. + * + * %0 means no specific alignment is requested. + */ + unsigned int align; + /** + * @sheaf_capacity: The maximum size of the sheaf. + */ + unsigned int sheaf_capacity; + /** + * @useroffset: Usercopy region offset. + * + * %0 is a valid offset, when @usersize is non-%0 + */ + unsigned int useroffset; + /** + * @usersize: Usercopy region size. + * + * %0 means no usercopy region is specified. + */ + unsigned int usersize; + /** + * @freeptr_offset: Custom offset for the free pointer + * in &SLAB_TYPESAFE_BY_RCU caches + * + * By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer + * outside of the object. This might cause the object to grow in size. + * Cache creators that have a reason to avoid this can specify a custom + * free pointer offset in their struct where the free pointer will be + * placed. + * + * Note that placing the free pointer inside the object requires the + * caller to ensure that no fields are invalidated that are required to + * guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for + * details). + * + * Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset + * is specified, %use_freeptr_offset must be set %true. + * + * Note that @ctor currently isn't supported with custom free pointers + * as a @ctor requires an external free pointer. + */ + unsigned int freeptr_offset; + /** + * @use_freeptr_offset: Whether a @freeptr_offset is used. + */ + bool use_freeptr_offset; + /** + * @ctor: A constructor for the objects. + * + * The constructor is invoked for each object in a newly allocated slab + * page. It is the cache user's responsibility to free object in the + * same state as after calling the constructor, or deal appropriately + * with any differences between a freshly constructed and a reallocated + * object. + * + * %NULL means no constructor. + */ + void (*ctor)(void *); +}; + +struct slab_sheaf { + union { + struct list_head barn_list; + /* only used for prefilled sheafs */ + unsigned int capacity; + }; + struct kmem_cache *cache; + unsigned int size; + int node; /* only used for rcu_sheaf */ + void *objects[]; +}; + static inline void *kzalloc(size_t size, gfp_t gfp) { return kmalloc(size, gfp | __GFP_ZERO); @@ -36,12 +149,57 @@ static inline void *kmem_cache_alloc(struct kmem_cache *cachep, int flags) } void kmem_cache_free(struct kmem_cache *cachep, void *objp); -struct kmem_cache *kmem_cache_create(const char *name, unsigned int size, - unsigned int align, unsigned int flags, - void (*ctor)(void *)); + +struct kmem_cache * +__kmem_cache_create_args(const char *name, unsigned int size, + struct kmem_cache_args *args, unsigned int flags); + +/* If NULL is passed for @args, use this variant with default arguments. */ +static inline struct kmem_cache * +__kmem_cache_default_args(const char *name, unsigned int size, + struct kmem_cache_args *args, unsigned int flags) +{ + struct kmem_cache_args kmem_default_args = {}; + + return __kmem_cache_create_args(name, size, &kmem_default_args, flags); +} + +static inline struct kmem_cache * +__kmem_cache_create(const char *name, unsigned int size, unsigned int align, + unsigned int flags, void (*ctor)(void *)) +{ + struct kmem_cache_args kmem_args = { + .align = align, + .ctor = ctor, + }; + + return __kmem_cache_create_args(name, size, &kmem_args, flags); +} + +#define kmem_cache_create(__name, __object_size, __args, ...) \ + _Generic((__args), \ + struct kmem_cache_args *: __kmem_cache_create_args, \ + void *: __kmem_cache_default_args, \ + default: __kmem_cache_create)(__name, __object_size, __args, __VA_ARGS__) void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list); int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, void **list); +struct slab_sheaf * +kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size); + +void * +kmem_cache_alloc_from_sheaf(struct kmem_cache *s, gfp_t gfp, + struct slab_sheaf *sheaf); + +void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp, + struct slab_sheaf *sheaf); +int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp, + struct slab_sheaf **sheafp, unsigned int size); + +static inline unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf) +{ + return sheaf->size; +} #endif /* _TOOLS_SLAB_H */ diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h index 5a00b8b2cf9f..cfb6ddeb292b 100644 --- a/tools/include/linux/static_call_types.h +++ b/tools/include/linux/static_call_types.h @@ -25,6 +25,8 @@ #define STATIC_CALL_SITE_INIT 2UL /* init section */ #define STATIC_CALL_SITE_FLAGS 3UL +#ifndef __ASSEMBLY__ + /* * The static call site table needs to be created by external tooling (objtool * or a compiler plugin). @@ -100,4 +102,6 @@ struct static_call_key { #endif /* CONFIG_HAVE_STATIC_CALL */ +#endif /* __ASSEMBLY__ */ + #endif /* _STATIC_CALL_TYPES_H */ diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h index 8499f509f03e..51ad3cf4fa82 100644 --- a/tools/include/linux/string.h +++ b/tools/include/linux/string.h @@ -44,6 +44,20 @@ static inline bool strstarts(const char *str, const char *prefix) return strncmp(str, prefix, strlen(prefix)) == 0; } +/* + * Checks if a string ends with another. + */ +static inline bool str_ends_with(const char *str, const char *substr) +{ + size_t len = strlen(str); + size_t sublen = strlen(substr); + + if (sublen > len) + return false; + + return !strcmp(str + len - sublen, substr); +} + extern char * __must_check skip_spaces(const char *); extern char *strim(char *); diff --git a/tools/include/linux/types.h b/tools/include/linux/types.h index 8519386acd23..4928e33d44ac 100644 --- a/tools/include/linux/types.h +++ b/tools/include/linux/types.h @@ -42,6 +42,8 @@ typedef __s16 s16; typedef __u8 u8; typedef __s8 s8; +typedef unsigned long long ullong; + #ifdef __CHECKER__ #define __bitwise __attribute__((bitwise)) #else diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index a1f55fb24bb3..8118e22844f1 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -23,23 +23,50 @@ else Q=@ endif -nolibc_arch := $(patsubst arm64,aarch64,$(ARCH)) -arch_file := arch-$(nolibc_arch).h +arch_files := arch.h $(wildcard arch-*.h) all_files := \ compiler.h \ crt.h \ ctype.h \ + dirent.h \ + elf.h \ errno.h \ + fcntl.h \ + getopt.h \ + inttypes.h \ + limits.h \ + math.h \ nolibc.h \ + poll.h \ + sched.h \ signal.h \ stackprotector.h \ std.h \ stdarg.h \ stdbool.h \ + stddef.h \ stdint.h \ stdlib.h \ string.h \ sys.h \ + sys/auxv.h \ + sys/ioctl.h \ + sys/mman.h \ + sys/mount.h \ + sys/prctl.h \ + sys/random.h \ + sys/reboot.h \ + sys/resource.h \ + sys/select.h \ + sys/stat.h \ + sys/syscall.h \ + sys/sysmacros.h \ + sys/time.h \ + sys/timerfd.h \ + sys/types.h \ + sys/uio.h \ + sys/utsname.h \ + sys/wait.h \ time.h \ types.h \ unistd.h \ @@ -55,7 +82,7 @@ help: @echo "Supported targets under nolibc:" @echo " all call \"headers\"" @echo " clean clean the sysroot" - @echo " headers prepare a sysroot in tools/include/nolibc/sysroot" + @echo " headers prepare a multi-arch sysroot in \$${OUTPUT}sysroot" @echo " headers_standalone like \"headers\", and also install kernel headers" @echo " help this help" @echo "" @@ -66,28 +93,21 @@ help: @echo " OUTPUT = $(OUTPUT)" @echo "" -# Note: when ARCH is "x86" we concatenate both x86_64 and i386 +# installs headers for all archs at once. headers: - $(Q)mkdir -p $(OUTPUT)sysroot - $(Q)mkdir -p $(OUTPUT)sysroot/include - $(Q)cp $(all_files) $(OUTPUT)sysroot/include/ - $(Q)if [ "$(ARCH)" = "x86" ]; then \ - sed -e \ - 's,^#ifndef _NOLIBC_ARCH_X86_64_H,#if !defined(_NOLIBC_ARCH_X86_64_H) \&\& defined(__x86_64__),' \ - arch-x86_64.h; \ - sed -e \ - 's,^#ifndef _NOLIBC_ARCH_I386_H,#if !defined(_NOLIBC_ARCH_I386_H) \&\& !defined(__x86_64__),' \ - arch-i386.h; \ - elif [ -e "$(arch_file)" ]; then \ - cat $(arch_file); \ - else \ - echo "Fatal: architecture $(ARCH) not yet supported by nolibc." >&2; \ - exit 1; \ - fi > $(OUTPUT)sysroot/include/arch.h + $(Q)mkdir -p "$(OUTPUT)sysroot" + $(Q)mkdir -p "$(OUTPUT)sysroot/include" + $(Q)cp --parents $(arch_files) $(all_files) "$(OUTPUT)sysroot/include/" headers_standalone: headers $(Q)$(MAKE) -C $(srctree) headers $(Q)$(MAKE) -C $(srctree) headers_install INSTALL_HDR_PATH=$(OUTPUT)sysroot +headers_check: headers_standalone + $(Q)for header in $(filter-out crt.h std.h,$(all_files)); do \ + $(CC) $(CLANG_CROSS_FLAGS) -Wall -Werror -nostdinc -fsyntax-only -x c /dev/null \ + -I$(or $(objtree),$(srctree))/usr/include -include $$header -include $$header || exit 1; \ + done + clean: $(call QUIET_CLEAN, nolibc) rm -rf "$(OUTPUT)sysroot" diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h index 6180ff99ab43..251c42579028 100644 --- a/tools/include/nolibc/arch-arm.h +++ b/tools/include/nolibc/arch-arm.h @@ -184,16 +184,16 @@ _arg1; \ }) +#ifndef NOLIBC_NO_RUNTIME /* startup code */ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) { __asm__ volatile ( "mov r0, sp\n" /* save stack pointer to %r0, as arg1 of _start_c */ - "and ip, r0, #-8\n" /* sp must be 8-byte aligned in the callee */ - "mov sp, ip\n" "bl _start_c\n" /* transfer to c runtime */ ); __nolibc_entrypoint_epilogue(); } +#endif /* NOLIBC_NO_RUNTIME */ #endif /* _NOLIBC_ARCH_ARM_H */ diff --git a/tools/include/nolibc/arch-aarch64.h b/tools/include/nolibc/arch-arm64.h index 06fdef7b291a..080a55a7144e 100644 --- a/tools/include/nolibc/arch-aarch64.h +++ b/tools/include/nolibc/arch-arm64.h @@ -1,16 +1,16 @@ /* SPDX-License-Identifier: LGPL-2.1 OR MIT */ /* - * AARCH64 specific definitions for NOLIBC + * ARM64 specific definitions for NOLIBC * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> */ -#ifndef _NOLIBC_ARCH_AARCH64_H -#define _NOLIBC_ARCH_AARCH64_H +#ifndef _NOLIBC_ARCH_ARM64_H +#define _NOLIBC_ARCH_ARM64_H #include "compiler.h" #include "crt.h" -/* Syscalls for AARCH64 : +/* Syscalls for ARM64 : * - registers are 64-bit * - stack is 16-byte aligned * - syscall number is passed in x8 @@ -141,14 +141,15 @@ _arg1; \ }) +#ifndef NOLIBC_NO_RUNTIME /* startup code */ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) { __asm__ volatile ( "mov x0, sp\n" /* save stack pointer to x0, as arg1 of _start_c */ - "and sp, x0, -16\n" /* sp must be 16-byte aligned in the callee */ "bl _start_c\n" /* transfer to c runtime */ ); __nolibc_entrypoint_epilogue(); } -#endif /* _NOLIBC_ARCH_AARCH64_H */ +#endif /* NOLIBC_NO_RUNTIME */ +#endif /* _NOLIBC_ARCH_ARM64_H */ diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h deleted file mode 100644 index ff5afc35bbd8..000000000000 --- a/tools/include/nolibc/arch-i386.h +++ /dev/null @@ -1,180 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * i386 specific definitions for NOLIBC - * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> - */ - -#ifndef _NOLIBC_ARCH_I386_H -#define _NOLIBC_ARCH_I386_H - -#include "compiler.h" -#include "crt.h" - -/* Syscalls for i386 : - * - mostly similar to x86_64 - * - registers are 32-bit - * - syscall number is passed in eax - * - arguments are in ebx, ecx, edx, esi, edi, ebp respectively - * - all registers are preserved (except eax of course) - * - the system call is performed by calling int $0x80 - * - syscall return comes in eax - * - the arguments are cast to long and assigned into the target registers - * which are then simply passed as registers to the asm code, so that we - * don't have to experience issues with register constraints. - * - the syscall number is always specified last in order to allow to force - * some registers before (gcc refuses a %-register at the last position). - * - * Also, i386 supports the old_select syscall if newselect is not available - */ -#define __ARCH_WANT_SYS_OLD_SELECT - -#define my_syscall0(num) \ -({ \ - long _ret; \ - register long _num __asm__ ("eax") = (num); \ - \ - __asm__ volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall1(num, arg1) \ -({ \ - long _ret; \ - register long _num __asm__ ("eax") = (num); \ - register long _arg1 __asm__ ("ebx") = (long)(arg1); \ - \ - __asm__ volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall2(num, arg1, arg2) \ -({ \ - long _ret; \ - register long _num __asm__ ("eax") = (num); \ - register long _arg1 __asm__ ("ebx") = (long)(arg1); \ - register long _arg2 __asm__ ("ecx") = (long)(arg2); \ - \ - __asm__ volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), "r"(_arg2), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall3(num, arg1, arg2, arg3) \ -({ \ - long _ret; \ - register long _num __asm__ ("eax") = (num); \ - register long _arg1 __asm__ ("ebx") = (long)(arg1); \ - register long _arg2 __asm__ ("ecx") = (long)(arg2); \ - register long _arg3 __asm__ ("edx") = (long)(arg3); \ - \ - __asm__ volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ -({ \ - long _ret; \ - register long _num __asm__ ("eax") = (num); \ - register long _arg1 __asm__ ("ebx") = (long)(arg1); \ - register long _arg2 __asm__ ("ecx") = (long)(arg2); \ - register long _arg3 __asm__ ("edx") = (long)(arg3); \ - register long _arg4 __asm__ ("esi") = (long)(arg4); \ - \ - __asm__ volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ -({ \ - long _ret; \ - register long _num __asm__ ("eax") = (num); \ - register long _arg1 __asm__ ("ebx") = (long)(arg1); \ - register long _arg2 __asm__ ("ecx") = (long)(arg2); \ - register long _arg3 __asm__ ("edx") = (long)(arg3); \ - register long _arg4 __asm__ ("esi") = (long)(arg4); \ - register long _arg5 __asm__ ("edi") = (long)(arg5); \ - \ - __asm__ volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ -({ \ - long _eax = (long)(num); \ - long _arg6 = (long)(arg6); /* Always in memory */ \ - __asm__ volatile ( \ - "pushl %[_arg6]\n\t" \ - "pushl %%ebp\n\t" \ - "movl 4(%%esp),%%ebp\n\t" \ - "int $0x80\n\t" \ - "popl %%ebp\n\t" \ - "addl $4,%%esp\n\t" \ - : "+a"(_eax) /* %eax */ \ - : "b"(arg1), /* %ebx */ \ - "c"(arg2), /* %ecx */ \ - "d"(arg3), /* %edx */ \ - "S"(arg4), /* %esi */ \ - "D"(arg5), /* %edi */ \ - [_arg6]"m"(_arg6) /* memory */ \ - : "memory", "cc" \ - ); \ - _eax; \ -}) - -/* startup code */ -/* - * i386 System V ABI mandates: - * 1) last pushed argument must be 16-byte aligned. - * 2) The deepest stack frame should be set to zero - * - */ -void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) -{ - __asm__ volatile ( - "xor %ebp, %ebp\n" /* zero the stack frame */ - "mov %esp, %eax\n" /* save stack pointer to %eax, as arg1 of _start_c */ - "add $12, %esp\n" /* avoid over-estimating after the 'and' & 'sub' below */ - "and $-16, %esp\n" /* the %esp must be 16-byte aligned on 'call' */ - "sub $12, %esp\n" /* sub 12 to keep it aligned after the push %eax */ - "push %eax\n" /* push arg1 on stack to support plain stack modes too */ - "call _start_c\n" /* transfer to c runtime */ - "hlt\n" /* ensure it does not return */ - ); - __nolibc_entrypoint_epilogue(); -} - -#endif /* _NOLIBC_ARCH_I386_H */ diff --git a/tools/include/nolibc/arch-loongarch.h b/tools/include/nolibc/arch-loongarch.h index fb519545959e..c894176c3f89 100644 --- a/tools/include/nolibc/arch-loongarch.h +++ b/tools/include/nolibc/arch-loongarch.h @@ -142,21 +142,16 @@ _arg1; \ }) -#if __loongarch_grlen == 32 -#define LONG_BSTRINS "bstrins.w" -#else /* __loongarch_grlen == 64 */ -#define LONG_BSTRINS "bstrins.d" -#endif - +#ifndef NOLIBC_NO_RUNTIME /* startup code */ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) { __asm__ volatile ( "move $a0, $sp\n" /* save stack pointer to $a0, as arg1 of _start_c */ - LONG_BSTRINS " $sp, $zero, 3, 0\n" /* $sp must be 16-byte aligned */ "bl _start_c\n" /* transfer to c runtime */ ); __nolibc_entrypoint_epilogue(); } +#endif /* NOLIBC_NO_RUNTIME */ #endif /* _NOLIBC_ARCH_LOONGARCH_H */ diff --git a/tools/include/nolibc/arch-m68k.h b/tools/include/nolibc/arch-m68k.h new file mode 100644 index 000000000000..2a4fbada5e79 --- /dev/null +++ b/tools/include/nolibc/arch-m68k.h @@ -0,0 +1,143 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * m68k specific definitions for NOLIBC + * Copyright (C) 2025 Daniel Palmer<daniel@thingy.jp> + * + * Roughly based on one or more of the other arch files. + * + */ + +#ifndef _NOLIBC_ARCH_M68K_H +#define _NOLIBC_ARCH_M68K_H + +#include "compiler.h" +#include "crt.h" + +#define _NOLIBC_SYSCALL_CLOBBERLIST "memory" + +#define my_syscall0(num) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r"(_num) \ + : "r"(_num) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + register long _arg1 __asm__ ("d1") = (long)(arg1); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r"(_num) \ + : "r"(_arg1) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + register long _arg1 __asm__ ("d1") = (long)(arg1); \ + register long _arg2 __asm__ ("d2") = (long)(arg2); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r"(_num) \ + : "r"(_arg1), "r"(_arg2) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + register long _arg1 __asm__ ("d1") = (long)(arg1); \ + register long _arg2 __asm__ ("d2") = (long)(arg2); \ + register long _arg3 __asm__ ("d3") = (long)(arg3); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r"(_num) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + register long _arg1 __asm__ ("d1") = (long)(arg1); \ + register long _arg2 __asm__ ("d2") = (long)(arg2); \ + register long _arg3 __asm__ ("d3") = (long)(arg3); \ + register long _arg4 __asm__ ("d4") = (long)(arg4); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r" (_num) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + register long _arg1 __asm__ ("d1") = (long)(arg1); \ + register long _arg2 __asm__ ("d2") = (long)(arg2); \ + register long _arg3 __asm__ ("d3") = (long)(arg3); \ + register long _arg4 __asm__ ("d4") = (long)(arg4); \ + register long _arg5 __asm__ ("d5") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r" (_num) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + register long _arg1 __asm__ ("d1") = (long)(arg1); \ + register long _arg2 __asm__ ("d2") = (long)(arg2); \ + register long _arg3 __asm__ ("d3") = (long)(arg3); \ + register long _arg4 __asm__ ("d4") = (long)(arg4); \ + register long _arg5 __asm__ ("d5") = (long)(arg5); \ + register long _arg6 __asm__ ("a0") = (long)(arg6); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r" (_num) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_arg6) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#ifndef NOLIBC_NO_RUNTIME +void _start(void); +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) +{ + __asm__ volatile ( + "movel %sp, %sp@-\n" + "jsr _start_c\n" + ); + __nolibc_entrypoint_epilogue(); +} +#endif /* NOLIBC_NO_RUNTIME */ + +#endif /* _NOLIBC_ARCH_M68K_H */ diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h index 1791a8ce58da..a72506ceec6b 100644 --- a/tools/include/nolibc/arch-mips.h +++ b/tools/include/nolibc/arch-mips.h @@ -10,7 +10,7 @@ #include "compiler.h" #include "crt.h" -#if !defined(_ABIO32) +#if !defined(_ABIO32) && !defined(_ABIN32) && !defined(_ABI64) #error Unsupported MIPS ABI #endif @@ -32,11 +32,32 @@ * - the arguments are cast to long and assigned into the target registers * which are then simply passed as registers to the asm code, so that we * don't have to experience issues with register constraints. + * + * Syscalls for MIPS ABI N32, same as ABI O32 with the following differences : + * - arguments are in a0, a1, a2, a3, t0, t1, t2, t3. + * t0..t3 are also known as a4..a7. + * - stack is 16-byte aligned */ +#if defined(_ABIO32) + #define _NOLIBC_SYSCALL_CLOBBERLIST \ "memory", "cc", "at", "v1", "hi", "lo", \ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" +#define _NOLIBC_SYSCALL_STACK_RESERVE "addiu $sp, $sp, -32\n" +#define _NOLIBC_SYSCALL_STACK_UNRESERVE "addiu $sp, $sp, 32\n" + +#else /* _ABIN32 || _ABI64 */ + +/* binutils, GCC and clang disagree about register aliases, use numbers instead. */ +#define _NOLIBC_SYSCALL_CLOBBERLIST \ + "memory", "cc", "at", "v1", \ + "10", "11", "12", "13", "14", "15", "24", "25" + +#define _NOLIBC_SYSCALL_STACK_RESERVE +#define _NOLIBC_SYSCALL_STACK_UNRESERVE + +#endif /* _ABIO32 */ #define my_syscall0(num) \ ({ \ @@ -44,9 +65,9 @@ register long _arg4 __asm__ ("a3"); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r"(_num), "=r"(_arg4) \ : "r"(_num) \ : _NOLIBC_SYSCALL_CLOBBERLIST \ @@ -61,9 +82,9 @@ register long _arg4 __asm__ ("a3"); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r"(_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1) \ @@ -80,9 +101,9 @@ register long _arg4 __asm__ ("a3"); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r"(_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2) \ @@ -100,9 +121,9 @@ register long _arg4 __asm__ ("a3"); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r"(_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2), "r"(_arg3) \ @@ -120,9 +141,9 @@ register long _arg4 __asm__ ("a3") = (long)(arg4); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r" (_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \ @@ -131,6 +152,8 @@ _arg4 ? -_num : _num; \ }) +#if defined(_ABIO32) + #define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ ({ \ register long _num __asm__ ("v0") = (num); \ @@ -141,10 +164,10 @@ register long _arg5 = (long)(arg5); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "sw %7, 16($sp)\n" \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r" (_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \ @@ -164,11 +187,53 @@ register long _arg6 = (long)(arg6); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "sw %7, 16($sp)\n" \ "sw %8, 20($sp)\n" \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ + : "=r" (_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_arg6) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#else /* _ABIN32 || _ABI64 */ + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("v0") = (num); \ + register long _arg1 __asm__ ("$4") = (long)(arg1); \ + register long _arg2 __asm__ ("$5") = (long)(arg2); \ + register long _arg3 __asm__ ("$6") = (long)(arg3); \ + register long _arg4 __asm__ ("$7") = (long)(arg4); \ + register long _arg5 __asm__ ("$8") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "syscall\n" \ + : "=r" (_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("v0") = (num); \ + register long _arg1 __asm__ ("$4") = (long)(arg1); \ + register long _arg2 __asm__ ("$5") = (long)(arg2); \ + register long _arg3 __asm__ ("$6") = (long)(arg3); \ + register long _arg4 __asm__ ("$7") = (long)(arg4); \ + register long _arg5 __asm__ ("$8") = (long)(arg5); \ + register long _arg6 __asm__ ("$9") = (long)(arg6); \ + \ + __asm__ volatile ( \ + "syscall\n" \ : "=r" (_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ @@ -178,29 +243,30 @@ _arg4 ? -_num : _num; \ }) +#endif /* _ABIO32 */ + +#ifndef NOLIBC_NO_RUNTIME /* startup code, note that it's called __start on MIPS */ +void __start(void); void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector __start(void) { __asm__ volatile ( - ".set push\n" - ".set noreorder\n" - "bal 1f\n" /* prime $ra for .cpload */ - "nop\n" - "1:\n" - ".cpload $ra\n" "move $a0, $sp\n" /* save stack pointer to $a0, as arg1 of _start_c */ - "addiu $sp, $sp, -4\n" /* space for .cprestore to store $gp */ - ".cprestore 0\n" - "li $t0, -8\n" - "and $sp, $sp, $t0\n" /* $sp must be 8-byte aligned */ +#if defined(_ABIO32) "addiu $sp, $sp, -16\n" /* the callee expects to save a0..a3 there */ +#endif /* _ABIO32 */ "lui $t9, %hi(_start_c)\n" /* ABI requires current function address in $t9 */ "ori $t9, %lo(_start_c)\n" +#if defined(_ABI64) + "lui $t0, %highest(_start_c)\n" + "ori $t0, %higher(_start_c)\n" + "dsll $t0, 0x20\n" + "or $t9, $t0\n" +#endif /* _ABI64 */ "jalr $t9\n" /* transfer to c runtime */ - " nop\n" /* delayed slot */ - ".set pop\n" ); __nolibc_entrypoint_epilogue(); } +#endif /* NOLIBC_NO_RUNTIME */ #endif /* _NOLIBC_ARCH_MIPS_H */ diff --git a/tools/include/nolibc/arch-powerpc.h b/tools/include/nolibc/arch-powerpc.h index ee2fdb8d601d..e0c7e0b81f7c 100644 --- a/tools/include/nolibc/arch-powerpc.h +++ b/tools/include/nolibc/arch-powerpc.h @@ -183,6 +183,7 @@ #endif #endif /* !__powerpc64__ */ +#ifndef NOLIBC_NO_RUNTIME /* startup code */ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) { @@ -201,7 +202,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s __asm__ volatile ( "mr 3, 1\n" /* save stack pointer to r3, as arg1 of _start_c */ - "clrrdi 1, 1, 4\n" /* align the stack to 16 bytes */ "li 0, 0\n" /* zero the frame pointer */ "stdu 1, -32(1)\n" /* the initial stack frame */ "bl _start_c\n" /* transfer to c runtime */ @@ -209,7 +209,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s #else __asm__ volatile ( "mr 3, 1\n" /* save stack pointer to r3, as arg1 of _start_c */ - "clrrwi 1, 1, 4\n" /* align the stack to 16 bytes */ "li 0, 0\n" /* zero the frame pointer */ "stwu 1, -16(1)\n" /* the initial stack frame */ "bl _start_c\n" /* transfer to c runtime */ @@ -217,5 +216,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s #endif __nolibc_entrypoint_epilogue(); } +#endif /* NOLIBC_NO_RUNTIME */ #endif /* _NOLIBC_ARCH_POWERPC_H */ diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h index 8827bf936212..1c00cacf57e1 100644 --- a/tools/include/nolibc/arch-riscv.h +++ b/tools/include/nolibc/arch-riscv.h @@ -139,6 +139,7 @@ _arg1; \ }) +#ifndef NOLIBC_NO_RUNTIME /* startup code */ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) { @@ -148,10 +149,10 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s "lla gp, __global_pointer$\n" ".option pop\n" "mv a0, sp\n" /* save stack pointer to a0, as arg1 of _start_c */ - "andi sp, a0, -16\n" /* sp must be 16-byte aligned */ "call _start_c\n" /* transfer to c runtime */ ); __nolibc_entrypoint_epilogue(); } +#endif /* NOLIBC_NO_RUNTIME */ #endif /* _NOLIBC_ARCH_RISCV_H */ diff --git a/tools/include/nolibc/arch-s390.h b/tools/include/nolibc/arch-s390.h index f9ab83a219b8..74125a254ce3 100644 --- a/tools/include/nolibc/arch-s390.h +++ b/tools/include/nolibc/arch-s390.h @@ -5,8 +5,8 @@ #ifndef _NOLIBC_ARCH_S390_H #define _NOLIBC_ARCH_S390_H -#include <asm/signal.h> -#include <asm/unistd.h> +#include <linux/signal.h> +#include <linux/unistd.h> #include "compiler.h" #include "crt.h" @@ -139,6 +139,7 @@ _arg1; \ }) +#ifndef NOLIBC_NO_RUNTIME /* startup code */ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) { @@ -150,6 +151,7 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s ); __nolibc_entrypoint_epilogue(); } +#endif /* NOLIBC_NO_RUNTIME */ struct s390_mmap_arg_struct { unsigned long addr; diff --git a/tools/include/nolibc/arch-sh.h b/tools/include/nolibc/arch-sh.h new file mode 100644 index 000000000000..7a421197d104 --- /dev/null +++ b/tools/include/nolibc/arch-sh.h @@ -0,0 +1,164 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * SuperH specific definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net> + */ + +#ifndef _NOLIBC_ARCH_SH_H +#define _NOLIBC_ARCH_SH_H + +#include "compiler.h" +#include "crt.h" + +/* + * Syscalls for SuperH: + * - registers are 32bit wide + * - syscall number is passed in r3 + * - arguments are in r4, r5, r6, r7, r0, r1, r2 + * - the system call is performed by calling trapa #31 + * - syscall return value is in r0 + */ + +#define my_syscall0(num) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + register long _arg1 __asm__ ("r4") = (long)(arg1); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num), "r"(_arg1) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + register long _arg1 __asm__ ("r4") = (long)(arg1); \ + register long _arg2 __asm__ ("r5") = (long)(arg2); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num), "r"(_arg1), "r"(_arg2) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + register long _arg1 __asm__ ("r4") = (long)(arg1); \ + register long _arg2 __asm__ ("r5") = (long)(arg2); \ + register long _arg3 __asm__ ("r6") = (long)(arg3); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num), "r"(_arg1), "r"(_arg2), "r"(_arg3) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + register long _arg1 __asm__ ("r4") = (long)(arg1); \ + register long _arg2 __asm__ ("r5") = (long)(arg2); \ + register long _arg3 __asm__ ("r6") = (long)(arg3); \ + register long _arg4 __asm__ ("r7") = (long)(arg4); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num), "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + register long _arg1 __asm__ ("r4") = (long)(arg1); \ + register long _arg2 __asm__ ("r5") = (long)(arg2); \ + register long _arg3 __asm__ ("r6") = (long)(arg3); \ + register long _arg4 __asm__ ("r7") = (long)(arg4); \ + register long _arg5 __asm__ ("r0") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num), "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "r"(_arg5) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + register long _arg1 __asm__ ("r4") = (long)(arg1); \ + register long _arg2 __asm__ ("r5") = (long)(arg2); \ + register long _arg3 __asm__ ("r6") = (long)(arg3); \ + register long _arg4 __asm__ ("r7") = (long)(arg4); \ + register long _arg5 __asm__ ("r0") = (long)(arg5); \ + register long _arg6 __asm__ ("r1") = (long)(arg6); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num), "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "r"(_arg5), "r"(_arg6) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#ifndef NOLIBC_NO_RUNTIME +/* startup code */ +void _start_wrapper(void); +void __attribute__((weak,noreturn)) __nolibc_entrypoint __no_stack_protector _start_wrapper(void) +{ + __asm__ volatile ( + ".global _start\n" /* The C function will have a prologue, */ + ".type _start, @function\n" /* corrupting "sp" */ + ".weak _start\n" + "_start:\n" + + "mov sp, r4\n" /* save argc pointer to r4, as arg1 of _start_c */ + "bsr _start_c\n" /* transfer to c runtime */ + "nop\n" /* delay slot */ + + ".size _start, .-_start\n" + ); + __nolibc_entrypoint_epilogue(); +} +#endif /* NOLIBC_NO_RUNTIME */ + +#endif /* _NOLIBC_ARCH_SH_H */ diff --git a/tools/include/nolibc/arch-sparc.h b/tools/include/nolibc/arch-sparc.h new file mode 100644 index 000000000000..2ebb5686e105 --- /dev/null +++ b/tools/include/nolibc/arch-sparc.h @@ -0,0 +1,209 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * SPARC (32bit and 64bit) specific definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net> + */ + +#ifndef _NOLIBC_ARCH_SPARC_H +#define _NOLIBC_ARCH_SPARC_H + +#include <linux/unistd.h> + +#include "compiler.h" +#include "crt.h" + +/* + * Syscalls for SPARC: + * - registers are native word size + * - syscall number is passed in g1 + * - arguments are in o0-o5 + * - the system call is performed by calling a trap instruction + * - syscall return value is in o0 + * - syscall error flag is in the carry bit of the processor status register + */ + +#ifdef __arch64__ + +#define _NOLIBC_SYSCALL "t 0x6d\n" \ + "bcs,a %%xcc, 1f\n" \ + "sub %%g0, %%o0, %%o0\n" \ + "1:\n" + +#else + +#define _NOLIBC_SYSCALL "t 0x10\n" \ + "bcs,a 1f\n" \ + "sub %%g0, %%o0, %%o0\n" \ + "1:\n" + +#endif /* __arch64__ */ + +#define my_syscall0(num) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0"); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + register long _arg3 __asm__ ("o2") = (long)(arg3); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + register long _arg3 __asm__ ("o2") = (long)(arg3); \ + register long _arg4 __asm__ ("o3") = (long)(arg4); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + register long _arg3 __asm__ ("o2") = (long)(arg3); \ + register long _arg4 __asm__ ("o3") = (long)(arg4); \ + register long _arg5 __asm__ ("o4") = (long)(arg5); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + register long _arg3 __asm__ ("o2") = (long)(arg3); \ + register long _arg4 __asm__ ("o3") = (long)(arg4); \ + register long _arg5 __asm__ ("o4") = (long)(arg5); \ + register long _arg6 __asm__ ("o5") = (long)(arg6); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#ifndef NOLIBC_NO_RUNTIME +/* startup code */ +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) +{ + __asm__ volatile ( + /* + * Save argc pointer to o0, as arg1 of _start_c. + * Account for the window save area, which is 16 registers wide. + */ +#ifdef __arch64__ + "add %sp, 128 + 2047, %o0\n" /* on sparc64 / v9 the stack is offset by 2047 */ +#else + "add %sp, 64, %o0\n" +#endif + "b,a _start_c\n" /* transfer to c runtime */ + ); + __nolibc_entrypoint_epilogue(); +} +#endif /* NOLIBC_NO_RUNTIME */ + +static pid_t getpid(void); + +static __attribute__((unused)) +pid_t sys_fork(void) +{ + pid_t parent, ret; + + parent = getpid(); + ret = my_syscall0(__NR_fork); + + /* The syscall returns the parent pid in the child instead of 0 */ + if (ret == parent) + return 0; + else + return ret; +} +#define sys_fork sys_fork + +static __attribute__((unused)) +pid_t sys_vfork(void) +{ + pid_t parent, ret; + + parent = getpid(); + ret = my_syscall0(__NR_vfork); + + /* The syscall returns the parent pid in the child instead of 0 */ + if (ret == parent) + return 0; + else + return ret; +} +#define sys_vfork sys_vfork + +#endif /* _NOLIBC_ARCH_SPARC_H */ diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86.h index 1e40620a2b33..f6c43ac5377b 100644 --- a/tools/include/nolibc/arch-x86_64.h +++ b/tools/include/nolibc/arch-x86.h @@ -1,15 +1,186 @@ /* SPDX-License-Identifier: LGPL-2.1 OR MIT */ /* - * x86_64 specific definitions for NOLIBC - * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> + * x86 specific definitions for NOLIBC (both 32- and 64-bit) + * Copyright (C) 2017-2025 Willy Tarreau <w@1wt.eu> */ -#ifndef _NOLIBC_ARCH_X86_64_H -#define _NOLIBC_ARCH_X86_64_H +#ifndef _NOLIBC_ARCH_X86_H +#define _NOLIBC_ARCH_X86_H #include "compiler.h" #include "crt.h" +#if !defined(__x86_64__) + +/* Syscalls for i386 : + * - mostly similar to x86_64 + * - registers are 32-bit + * - syscall number is passed in eax + * - arguments are in ebx, ecx, edx, esi, edi, ebp respectively + * - all registers are preserved (except eax of course) + * - the system call is performed by calling int $0x80 + * - syscall return comes in eax + * - the arguments are cast to long and assigned into the target registers + * which are then simply passed as registers to the asm code, so that we + * don't have to experience issues with register constraints. + * - the syscall number is always specified last in order to allow to force + * some registers before (gcc refuses a %-register at the last position). + * + * Also, i386 supports the old_select syscall if newselect is not available + */ +#define __ARCH_WANT_SYS_OLD_SELECT + +#define my_syscall0(num) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + register long _arg1 __asm__ ("ebx") = (long)(arg1); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + register long _arg1 __asm__ ("ebx") = (long)(arg1); \ + register long _arg2 __asm__ ("ecx") = (long)(arg2); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + register long _arg1 __asm__ ("ebx") = (long)(arg1); \ + register long _arg2 __asm__ ("ecx") = (long)(arg2); \ + register long _arg3 __asm__ ("edx") = (long)(arg3); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + register long _arg1 __asm__ ("ebx") = (long)(arg1); \ + register long _arg2 __asm__ ("ecx") = (long)(arg2); \ + register long _arg3 __asm__ ("edx") = (long)(arg3); \ + register long _arg4 __asm__ ("esi") = (long)(arg4); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + register long _arg1 __asm__ ("ebx") = (long)(arg1); \ + register long _arg2 __asm__ ("ecx") = (long)(arg2); \ + register long _arg3 __asm__ ("edx") = (long)(arg3); \ + register long _arg4 __asm__ ("esi") = (long)(arg4); \ + register long _arg5 __asm__ ("edi") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + long _eax = (long)(num); \ + long _arg6 = (long)(arg6); /* Always in memory */ \ + __asm__ volatile ( \ + "pushl %[_arg6]\n\t" \ + "pushl %%ebp\n\t" \ + "movl 4(%%esp),%%ebp\n\t" \ + "int $0x80\n\t" \ + "popl %%ebp\n\t" \ + "addl $4,%%esp\n\t" \ + : "+a"(_eax) /* %eax */ \ + : "b"(arg1), /* %ebx */ \ + "c"(arg2), /* %ecx */ \ + "d"(arg3), /* %edx */ \ + "S"(arg4), /* %esi */ \ + "D"(arg5), /* %edi */ \ + [_arg6]"m"(_arg6) /* memory */ \ + : "memory", "cc" \ + ); \ + _eax; \ +}) + +#ifndef NOLIBC_NO_RUNTIME +/* startup code */ +/* + * i386 System V ABI mandates: + * 1) last pushed argument must be 16-byte aligned. + * 2) The deepest stack frame should be set to zero + * + */ +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) +{ + __asm__ volatile ( + "xor %ebp, %ebp\n" /* zero the stack frame */ + "mov %esp, %eax\n" /* save stack pointer to %eax, as arg1 of _start_c */ + "sub $12, %esp\n" /* sub 12 to keep it aligned after the push %eax */ + "push %eax\n" /* push arg1 on stack to support plain stack modes too */ + "call _start_c\n" /* transfer to c runtime */ + "hlt\n" /* ensure it does not return */ + ); + __nolibc_entrypoint_epilogue(); +} +#endif /* NOLIBC_NO_RUNTIME */ + +#else /* !defined(__x86_64__) */ + /* Syscalls for x86_64 : * - registers are 64-bit * - syscall number is passed in rax @@ -154,6 +325,7 @@ _ret; \ }) +#ifndef NOLIBC_NO_RUNTIME /* startup code */ /* * x86-64 System V ABI mandates: @@ -166,12 +338,12 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s __asm__ volatile ( "xor %ebp, %ebp\n" /* zero the stack frame */ "mov %rsp, %rdi\n" /* save stack pointer to %rdi, as arg1 of _start_c */ - "and $-16, %rsp\n" /* %rsp must be 16-byte aligned before call */ "call _start_c\n" /* transfer to c runtime */ "hlt\n" /* ensure it does not return */ ); __nolibc_entrypoint_epilogue(); } +#endif /* NOLIBC_NO_RUNTIME */ #define NOLIBC_ARCH_HAS_MEMMOVE void *memmove(void *dst, const void *src, size_t len); @@ -183,7 +355,7 @@ void *memcpy(void *dst, const void *src, size_t len); void *memset(void *dst, int c, size_t len); __asm__ ( -".section .text.nolibc_memmove_memcpy\n" +".pushsection .text.nolibc_memmove_memcpy\n" ".weak memmove\n" ".weak memcpy\n" "memmove:\n" @@ -203,8 +375,9 @@ __asm__ ( "rep movsb\n\t" "cld\n\t" "retq\n" +".popsection\n" -".section .text.nolibc_memset\n" +".pushsection .text.nolibc_memset\n" ".weak memset\n" "memset:\n" "xchgl %eax, %esi\n\t" @@ -213,6 +386,8 @@ __asm__ ( "rep stosb\n\t" "popq %rax\n\t" "retq\n" +".popsection\n" ); -#endif /* _NOLIBC_ARCH_X86_64_H */ +#endif /* !defined(__x86_64__) */ +#endif /* _NOLIBC_ARCH_X86_H */ diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h index c8f4e5d3add9..a3adaf433f2c 100644 --- a/tools/include/nolibc/arch.h +++ b/tools/include/nolibc/arch.h @@ -3,26 +3,15 @@ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> */ -/* Below comes the architecture-specific code. For each architecture, we have - * the syscall declarations and the _start code definition. This is the only - * global part. On all architectures the kernel puts everything in the stack - * before jumping to _start just above us, without any return address (_start - * is not a function but an entry point). So at the stack pointer we find argc. - * Then argv[] begins, and ends at the first NULL. Then we have envp which - * starts and ends with a NULL as well. So envp=argv+argc+1. - */ - #ifndef _NOLIBC_ARCH_H #define _NOLIBC_ARCH_H -#if defined(__x86_64__) -#include "arch-x86_64.h" -#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) -#include "arch-i386.h" +#if defined(__x86_64__) || defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) +#include "arch-x86.h" #elif defined(__ARM_EABI__) #include "arch-arm.h" #elif defined(__aarch64__) -#include "arch-aarch64.h" +#include "arch-arm64.h" #elif defined(__mips__) #include "arch-mips.h" #elif defined(__powerpc__) @@ -33,6 +22,12 @@ #include "arch-s390.h" #elif defined(__loongarch__) #include "arch-loongarch.h" +#elif defined(__sparc__) +#include "arch-sparc.h" +#elif defined(__m68k__) +#include "arch-m68k.h" +#elif defined(__sh__) +#include "arch-sh.h" #else #error Unsupported Architecture #endif diff --git a/tools/include/nolibc/compiler.h b/tools/include/nolibc/compiler.h index fa1f547e7f13..87090bbc53e0 100644 --- a/tools/include/nolibc/compiler.h +++ b/tools/include/nolibc/compiler.h @@ -12,6 +12,15 @@ # define __nolibc_has_attribute(attr) 0 #endif +#if defined(__has_feature) +# define __nolibc_has_feature(feature) __has_feature(feature) +#else +# define __nolibc_has_feature(feature) 0 +#endif + +#define __nolibc_aligned(alignment) __attribute__((aligned(alignment))) +#define __nolibc_aligned_as(type) __nolibc_aligned(__alignof__(type)) + #if __nolibc_has_attribute(naked) # define __nolibc_entrypoint __attribute__((naked)) # define __nolibc_entrypoint_epilogue() @@ -32,8 +41,8 @@ # define __no_stack_protector __attribute__((__optimize__("-fno-stack-protector"))) #endif /* __nolibc_has_attribute(no_stack_protector) */ -#if __nolibc_has_attribute(fallthrough) -# define __nolibc_fallthrough do { } while (0); __attribute__((fallthrough)) +#if __nolibc_has_attribute(__fallthrough__) +# define __nolibc_fallthrough do { } while (0); __attribute__((__fallthrough__)) #else # define __nolibc_fallthrough do { } while (0) #endif /* __nolibc_has_attribute(fallthrough) */ diff --git a/tools/include/nolibc/crt.h b/tools/include/nolibc/crt.h index bbcd5fd09806..d9262998dae9 100644 --- a/tools/include/nolibc/crt.h +++ b/tools/include/nolibc/crt.h @@ -7,9 +7,14 @@ #ifndef _NOLIBC_CRT_H #define _NOLIBC_CRT_H +#ifndef NOLIBC_NO_RUNTIME + +#include "compiler.h" + char **environ __attribute__((weak)); const unsigned long *_auxv __attribute__((weak)); +void _start(void); static void __stack_chk_init(void); static void exit(int); @@ -22,7 +27,11 @@ extern void (*const __init_array_end[])(int, char **, char**) __attribute__((wea extern void (*const __fini_array_start[])(void) __attribute__((weak)); extern void (*const __fini_array_end[])(void) __attribute__((weak)); +void _start_c(long *sp); __attribute__((weak,used)) +#if __nolibc_has_feature(undefined_behavior_sanitizer) + __attribute__((no_sanitize("function"))) +#endif void _start_c(long *sp) { long argc; @@ -81,4 +90,5 @@ void _start_c(long *sp) exit(exitcode); } +#endif /* NOLIBC_NO_RUNTIME */ #endif /* _NOLIBC_CRT_H */ diff --git a/tools/include/nolibc/ctype.h b/tools/include/nolibc/ctype.h index 6f90706d0644..470fdf34394a 100644 --- a/tools/include/nolibc/ctype.h +++ b/tools/include/nolibc/ctype.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_CTYPE_H #define _NOLIBC_CTYPE_H @@ -96,7 +99,4 @@ int ispunct(int c) return isgraph(c) && !isalnum(c); } -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_CTYPE_H */ diff --git a/tools/include/nolibc/dirent.h b/tools/include/nolibc/dirent.h new file mode 100644 index 000000000000..61a122a60327 --- /dev/null +++ b/tools/include/nolibc/dirent.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Directory access for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_DIRENT_H +#define _NOLIBC_DIRENT_H + +#include "compiler.h" +#include "stdint.h" +#include "types.h" +#include "fcntl.h" + +#include <linux/limits.h> + +struct dirent { + ino_t d_ino; + char d_name[NAME_MAX + 1]; +}; + +/* See comment of FILE in stdio.h */ +typedef struct { + char dummy[1]; +} DIR; + +static __attribute__((unused)) +DIR *fdopendir(int fd) +{ + if (fd < 0) { + SET_ERRNO(EBADF); + return NULL; + } + return (DIR *)(intptr_t)~fd; +} + +static __attribute__((unused)) +DIR *opendir(const char *name) +{ + int fd; + + fd = open(name, O_RDONLY); + if (fd == -1) + return NULL; + return fdopendir(fd); +} + +static __attribute__((unused)) +int closedir(DIR *dirp) +{ + intptr_t i = (intptr_t)dirp; + + if (i >= 0) { + SET_ERRNO(EBADF); + return -1; + } + return close(~i); +} + +static __attribute__((unused)) +int readdir_r(DIR *dirp, struct dirent *entry, struct dirent **result) +{ + char buf[sizeof(struct linux_dirent64) + NAME_MAX + 1] __nolibc_aligned_as(struct linux_dirent64); + struct linux_dirent64 *ldir = (void *)buf; + intptr_t i = (intptr_t)dirp; + int fd, ret; + + if (i >= 0) + return EBADF; + + fd = ~i; + + ret = sys_getdents64(fd, ldir, sizeof(buf)); + if (ret < 0) + return -ret; + if (ret == 0) { + *result = NULL; + return 0; + } + + /* + * getdents64() returns as many entries as fit the buffer. + * readdir() can only return one entry at a time. + * Make sure the non-returned ones are not skipped. + */ + ret = sys_lseek(fd, ldir->d_off, SEEK_SET); + if (ret < 0) + return -ret; + + entry->d_ino = ldir->d_ino; + /* the destination should always be big enough */ + strlcpy(entry->d_name, ldir->d_name, sizeof(entry->d_name)); + *result = entry; + return 0; +} + +#endif /* _NOLIBC_DIRENT_H */ diff --git a/tools/include/nolibc/elf.h b/tools/include/nolibc/elf.h new file mode 100644 index 000000000000..3e2c5228bf3d --- /dev/null +++ b/tools/include/nolibc/elf.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Shim elf.h header for NOLIBC. + * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_SYS_ELF_H +#define _NOLIBC_SYS_ELF_H + +#include <linux/elf.h> + +#endif /* _NOLIBC_SYS_ELF_H */ diff --git a/tools/include/nolibc/errno.h b/tools/include/nolibc/errno.h index a44486ff0477..08a33c40ec0c 100644 --- a/tools/include/nolibc/errno.h +++ b/tools/include/nolibc/errno.h @@ -4,10 +4,13 @@ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_ERRNO_H #define _NOLIBC_ERRNO_H -#include <asm/errno.h> +#include <linux/errno.h> #ifndef NOLIBC_IGNORE_ERRNO #define SET_ERRNO(v) do { errno = (v); } while (0) @@ -22,7 +25,4 @@ int errno __attribute__((weak)); */ #define MAX_ERRNO 4095 -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_ERRNO_H */ diff --git a/tools/include/nolibc/fcntl.h b/tools/include/nolibc/fcntl.h new file mode 100644 index 000000000000..bff2e542f20f --- /dev/null +++ b/tools/include/nolibc/fcntl.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * fcntl definition for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_FCNTL_H +#define _NOLIBC_FCNTL_H + +#include "arch.h" +#include "types.h" +#include "sys.h" + +/* + * int openat(int dirfd, const char *path, int flags[, mode_t mode]); + */ + +static __attribute__((unused)) +int sys_openat(int dirfd, const char *path, int flags, mode_t mode) +{ + return my_syscall4(__NR_openat, dirfd, path, flags, mode); +} + +static __attribute__((unused)) +int openat(int dirfd, const char *path, int flags, ...) +{ + mode_t mode = 0; + + if (flags & O_CREAT) { + va_list args; + + va_start(args, flags); + mode = va_arg(args, mode_t); + va_end(args); + } + + return __sysret(sys_openat(dirfd, path, flags, mode)); +} + +/* + * int open(const char *path, int flags[, mode_t mode]); + */ + +static __attribute__((unused)) +int sys_open(const char *path, int flags, mode_t mode) +{ + return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode); +} + +static __attribute__((unused)) +int open(const char *path, int flags, ...) +{ + mode_t mode = 0; + + if (flags & O_CREAT) { + va_list args; + + va_start(args, flags); + mode = va_arg(args, mode_t); + va_end(args); + } + + return __sysret(sys_open(path, flags, mode)); +} + +#endif /* _NOLIBC_FCNTL_H */ diff --git a/tools/include/nolibc/getopt.h b/tools/include/nolibc/getopt.h new file mode 100644 index 000000000000..87565e3b6a33 --- /dev/null +++ b/tools/include/nolibc/getopt.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * getopt function definitions for NOLIBC, adapted from musl libc + * Copyright (C) 2005-2020 Rich Felker, et al. + * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_GETOPT_H +#define _NOLIBC_GETOPT_H + +struct FILE; +static struct FILE *const stderr; +static int fprintf(struct FILE *stream, const char *fmt, ...); + +__attribute__((weak,unused,section(".data.nolibc_getopt"))) +char *optarg; + +__attribute__((weak,unused,section(".data.nolibc_getopt"))) +int optind = 1, opterr = 1, optopt; + +static __attribute__((unused)) +int getopt(int argc, char * const argv[], const char *optstring) +{ + static int __optpos; + int i; + char c, d; + char *optchar; + + if (!optind) { + __optpos = 0; + optind = 1; + } + + if (optind >= argc || !argv[optind]) + return -1; + + if (argv[optind][0] != '-') { + if (optstring[0] == '-') { + optarg = argv[optind++]; + return 1; + } + return -1; + } + + if (!argv[optind][1]) + return -1; + + if (argv[optind][1] == '-' && !argv[optind][2]) + return optind++, -1; + + if (!__optpos) + __optpos++; + c = argv[optind][__optpos]; + optchar = argv[optind] + __optpos; + __optpos++; + + if (!argv[optind][__optpos]) { + optind++; + __optpos = 0; + } + + if (optstring[0] == '-' || optstring[0] == '+') + optstring++; + + i = 0; + d = 0; + do { + d = optstring[i++]; + } while (d && d != c); + + if (d != c || c == ':') { + optopt = c; + if (optstring[0] != ':' && opterr) + fprintf(stderr, "%s: unrecognized option: %c\n", argv[0], *optchar); + return '?'; + } + if (optstring[i] == ':') { + optarg = NULL; + if (optstring[i + 1] != ':' || __optpos) { + optarg = argv[optind++]; + if (__optpos) + optarg += __optpos; + __optpos = 0; + } + if (optind > argc) { + optopt = c; + if (optstring[0] == ':') + return ':'; + if (opterr) + fprintf(stderr, "%s: option requires argument: %c\n", + argv[0], *optchar); + return '?'; + } + } + return c; +} + +#endif /* _NOLIBC_GETOPT_H */ diff --git a/tools/include/nolibc/inttypes.h b/tools/include/nolibc/inttypes.h new file mode 100644 index 000000000000..1977bd74bfeb --- /dev/null +++ b/tools/include/nolibc/inttypes.h @@ -0,0 +1,3 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ + +#include "nolibc.h" diff --git a/tools/include/nolibc/limits.h b/tools/include/nolibc/limits.h new file mode 100644 index 000000000000..306d4141f4d2 --- /dev/null +++ b/tools/include/nolibc/limits.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Shim limits.h header for NOLIBC. + * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de> + */ + +#include "nolibc.h" diff --git a/tools/include/nolibc/math.h b/tools/include/nolibc/math.h new file mode 100644 index 000000000000..9df823ddd412 --- /dev/null +++ b/tools/include/nolibc/math.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * math definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_SYS_MATH_H +#define _NOLIBC_SYS_MATH_H + +static __inline__ +double fabs(double x) +{ + return x >= 0 ? x : -x; +} + +static __inline__ +float fabsf(float x) +{ + return x >= 0 ? x : -x; +} + +static __inline__ +long double fabsl(long double x) +{ + return x >= 0 ? x : -x; +} + +#endif /* _NOLIBC_SYS_MATH_H */ diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index 92436b1e4441..272dfc961158 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -31,8 +31,7 @@ * - The third level is the libc call definition. It exposes the lower raw * sys_<name>() calls in a way that looks like what a libc usually does, * takes care of specific input values, and of setting errno upon error. - * There can be minor variations compared to standard libc calls. For - * example the open() call always takes 3 args here. + * There can be minor variations compared to standard libc calls. * * The errno variable is declared static and unused. This way it can be * optimized away if not used. However this means that a program made of @@ -97,14 +96,39 @@ #include "arch.h" #include "types.h" #include "sys.h" +#include "sys/auxv.h" +#include "sys/ioctl.h" +#include "sys/mman.h" +#include "sys/mount.h" +#include "sys/prctl.h" +#include "sys/random.h" +#include "sys/reboot.h" +#include "sys/resource.h" +#include "sys/select.h" +#include "sys/stat.h" +#include "sys/syscall.h" +#include "sys/sysmacros.h" +#include "sys/time.h" +#include "sys/timerfd.h" +#include "sys/uio.h" +#include "sys/utsname.h" +#include "sys/wait.h" #include "ctype.h" +#include "elf.h" +#include "sched.h" #include "signal.h" #include "unistd.h" +#include "stdbool.h" #include "stdio.h" #include "stdlib.h" #include "string.h" #include "time.h" #include "stackprotector.h" +#include "dirent.h" +#include "fcntl.h" +#include "getopt.h" +#include "poll.h" +#include "math.h" /* Used by programs to avoid std includes */ #define NOLIBC diff --git a/tools/include/nolibc/poll.h b/tools/include/nolibc/poll.h new file mode 100644 index 000000000000..0d053f93ea99 --- /dev/null +++ b/tools/include/nolibc/poll.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * poll definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_POLL_H +#define _NOLIBC_POLL_H + +#include "arch.h" +#include "sys.h" + +#include <linux/poll.h> +#include <linux/time.h> + +/* + * int poll(struct pollfd *fds, int nfds, int timeout); + */ + +static __attribute__((unused)) +int sys_poll(struct pollfd *fds, int nfds, int timeout) +{ +#if defined(__NR_ppoll) + struct timespec t; + + if (timeout >= 0) { + t.tv_sec = timeout / 1000; + t.tv_nsec = (timeout % 1000) * 1000000; + } + return my_syscall5(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0); +#elif defined(__NR_ppoll_time64) + struct __kernel_timespec t; + + if (timeout >= 0) { + t.tv_sec = timeout / 1000; + t.tv_nsec = (timeout % 1000) * 1000000; + } + return my_syscall5(__NR_ppoll_time64, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0); +#else + return my_syscall3(__NR_poll, fds, nfds, timeout); +#endif +} + +static __attribute__((unused)) +int poll(struct pollfd *fds, int nfds, int timeout) +{ + return __sysret(sys_poll(fds, nfds, timeout)); +} + +#endif /* _NOLIBC_POLL_H */ diff --git a/tools/include/nolibc/sched.h b/tools/include/nolibc/sched.h new file mode 100644 index 000000000000..32221562c166 --- /dev/null +++ b/tools/include/nolibc/sched.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * sched function definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_SCHED_H +#define _NOLIBC_SCHED_H + +#include "sys.h" + +#include <linux/sched.h> + +/* + * int setns(int fd, int nstype); + */ + +static __attribute__((unused)) +int sys_setns(int fd, int nstype) +{ + return my_syscall2(__NR_setns, fd, nstype); +} + +static __attribute__((unused)) +int setns(int fd, int nstype) +{ + return __sysret(sys_setns(fd, nstype)); +} + + +/* + * int unshare(int flags); + */ + +static __attribute__((unused)) +int sys_unshare(int flags) +{ + return my_syscall1(__NR_unshare, flags); +} + +static __attribute__((unused)) +int unshare(int flags) +{ + return __sysret(sys_unshare(flags)); +} + +#endif /* _NOLIBC_SCHED_H */ diff --git a/tools/include/nolibc/signal.h b/tools/include/nolibc/signal.h index 137552216e46..ac13e53ac31d 100644 --- a/tools/include/nolibc/signal.h +++ b/tools/include/nolibc/signal.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_SIGNAL_H #define _NOLIBC_SIGNAL_H @@ -13,13 +16,11 @@ #include "sys.h" /* This one is not marked static as it's needed by libgcc for divide by zero */ +int raise(int signal); __attribute__((weak,unused,section(".text.nolibc_raise"))) int raise(int signal) { return sys_kill(sys_getpid(), signal); } -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_SIGNAL_H */ diff --git a/tools/include/nolibc/stackprotector.h b/tools/include/nolibc/stackprotector.h index 1d0d5259ec41..7123aa056cb0 100644 --- a/tools/include/nolibc/stackprotector.h +++ b/tools/include/nolibc/stackprotector.h @@ -9,6 +9,7 @@ #include "compiler.h" +#ifndef NOLIBC_NO_RUNTIME #if defined(_NOLIBC_STACKPROTECTOR) #include "sys.h" @@ -18,6 +19,7 @@ * triggering stack protector errors themselves */ +void __stack_chk_fail(void); __attribute__((weak,used,noreturn,section(".text.nolibc_stack_chk"))) void __stack_chk_fail(void) { @@ -28,6 +30,7 @@ void __stack_chk_fail(void) for (;;); } +void __stack_chk_fail_local(void); __attribute__((weak,noreturn,section(".text.nolibc_stack_chk"))) void __stack_chk_fail_local(void) { @@ -47,5 +50,6 @@ static __no_stack_protector void __stack_chk_init(void) #else /* !defined(_NOLIBC_STACKPROTECTOR) */ static void __stack_chk_init(void) {} #endif /* defined(_NOLIBC_STACKPROTECTOR) */ +#endif /* NOLIBC_NO_RUNTIME */ #endif /* _NOLIBC_STACKPROTECTOR_H */ diff --git a/tools/include/nolibc/std.h b/tools/include/nolibc/std.h index 933bc0be7e1c..392f4dd94158 100644 --- a/tools/include/nolibc/std.h +++ b/tools/include/nolibc/std.h @@ -13,24 +13,22 @@ * syscall-specific stuff, as this file is expected to be included very early. */ -/* note: may already be defined */ -#ifndef NULL -#define NULL ((void *)0) -#endif - #include "stdint.h" +#include "stddef.h" + +#include <linux/types.h> /* those are commonly provided by sys/types.h */ typedef unsigned int dev_t; -typedef unsigned long ino_t; +typedef uint64_t ino_t; typedef unsigned int mode_t; typedef signed int pid_t; typedef unsigned int uid_t; typedef unsigned int gid_t; typedef unsigned long nlink_t; -typedef signed long off_t; +typedef int64_t off_t; typedef signed long blksize_t; typedef signed long blkcnt_t; -typedef signed long time_t; +typedef __kernel_time_t time_t; #endif /* _NOLIBC_STD_H */ diff --git a/tools/include/nolibc/stddef.h b/tools/include/nolibc/stddef.h new file mode 100644 index 000000000000..ecbd13eab1f5 --- /dev/null +++ b/tools/include/nolibc/stddef.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Stddef definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_STDDEF_H +#define _NOLIBC_STDDEF_H + +#include "stdint.h" + +/* note: may already be defined */ +#ifndef NULL +#define NULL ((void *)0) +#endif + +#ifndef offsetof +#define offsetof(TYPE, FIELD) ((size_t) &((TYPE *)0)->FIELD) +#endif + +#endif /* _NOLIBC_STDDEF_H */ diff --git a/tools/include/nolibc/stdint.h b/tools/include/nolibc/stdint.h index cd79ddd6170e..b052ad6303c3 100644 --- a/tools/include/nolibc/stdint.h +++ b/tools/include/nolibc/stdint.h @@ -39,8 +39,8 @@ typedef size_t uint_fast32_t; typedef int64_t int_fast64_t; typedef uint64_t uint_fast64_t; -typedef int64_t intmax_t; -typedef uint64_t uintmax_t; +typedef __INTMAX_TYPE__ intmax_t; +typedef __UINTMAX_TYPE__ uintmax_t; /* limits of integral types */ diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h index 3892034198dd..1f16dab2ac88 100644 --- a/tools/include/nolibc/stdio.h +++ b/tools/include/nolibc/stdio.h @@ -4,12 +4,16 @@ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_STDIO_H #define _NOLIBC_STDIO_H #include "std.h" #include "arch.h" #include "errno.h" +#include "fcntl.h" #include "types.h" #include "sys.h" #include "stdarg.h" @@ -17,6 +21,8 @@ #include "string.h" #include "compiler.h" +static const char *strerror(int errnum); + #ifndef EOF #define EOF (-1) #endif @@ -50,6 +56,32 @@ FILE *fdopen(int fd, const char *mode __attribute__((unused))) return (FILE*)(intptr_t)~fd; } +static __attribute__((unused)) +FILE *fopen(const char *pathname, const char *mode) +{ + int flags, fd; + + switch (*mode) { + case 'r': + flags = O_RDONLY; + break; + case 'w': + flags = O_WRONLY | O_CREAT | O_TRUNC; + break; + case 'a': + flags = O_WRONLY | O_CREAT | O_APPEND; + break; + default: + SET_ERRNO(EINVAL); return NULL; + } + + if (mode[1] == '+') + flags = (flags & ~(O_RDONLY | O_WRONLY)) | O_RDWR; + + fd = open(pathname, flags, 0666); + return fdopen(fd, mode); +} + /* provides the fd of stream. */ static __attribute__((unused)) int fileno(FILE *stream) @@ -208,28 +240,40 @@ char *fgets(char *s, int size, FILE *stream) } -/* minimal vfprintf(). It supports the following formats: +/* minimal printf(). It supports the following formats: * - %[l*]{d,u,c,x,p} * - %s * - unknown modifiers are ignored. */ -static __attribute__((unused, format(printf, 2, 0))) -int vfprintf(FILE *stream, const char *fmt, va_list args) +typedef int (*__nolibc_printf_cb)(intptr_t state, const char *buf, size_t size); + +static __attribute__((unused, format(printf, 4, 0))) +int __nolibc_printf(__nolibc_printf_cb cb, intptr_t state, size_t n, const char *fmt, va_list args) { char escape, lpref, c; unsigned long long v; - unsigned int written; - size_t len, ofs; + unsigned int written, width; + size_t len, ofs, w; char tmpbuf[21]; const char *outstr; written = ofs = escape = lpref = 0; while (1) { c = fmt[ofs++]; + width = 0; if (escape) { /* we're in an escape sequence, ofs == 1 */ escape = 0; + + /* width */ + while (c >= '0' && c <= '9') { + width *= 10; + width += c - '0'; + + c = fmt[ofs++]; + } + if (c == 'c' || c == 'd' || c == 'u' || c == 'x' || c == 'p') { char *out = tmpbuf; @@ -277,6 +321,13 @@ int vfprintf(FILE *stream, const char *fmt, va_list args) if (!outstr) outstr="(null)"; } + else if (c == 'm') { +#ifdef NOLIBC_IGNORE_ERRNO + outstr = "unknown error"; +#else + outstr = strerror(errno); +#endif /* NOLIBC_IGNORE_ERRNO */ + } else if (c == '%') { /* queue it verbatim */ continue; @@ -286,6 +337,8 @@ int vfprintf(FILE *stream, const char *fmt, va_list args) if (c == 'l') { /* long format prefix, maintain the escape */ lpref++; + } else if (c == 'j') { + lpref = 2; } escape = 1; goto do_escape; @@ -302,8 +355,17 @@ int vfprintf(FILE *stream, const char *fmt, va_list args) outstr = fmt; len = ofs - 1; flush_str: - if (_fwrite(outstr, len, stream) != 0) - break; + if (n) { + w = len < n ? len : n; + n -= w; + while (width-- > w) { + if (cb(state, " ", 1) != 0) + return -1; + written += 1; + } + if (cb(state, outstr, w) != 0) + return -1; + } written += len; do_escape: @@ -319,6 +381,17 @@ int vfprintf(FILE *stream, const char *fmt, va_list args) return written; } +static int __nolibc_fprintf_cb(intptr_t state, const char *buf, size_t size) +{ + return _fwrite(buf, size, (FILE *)state); +} + +static __attribute__((unused, format(printf, 2, 0))) +int vfprintf(FILE *stream, const char *fmt, va_list args) +{ + return __nolibc_printf(__nolibc_fprintf_cb, (intptr_t)stream, SIZE_MAX, fmt, args); +} + static __attribute__((unused, format(printf, 1, 0))) int vprintf(const char *fmt, va_list args) { @@ -349,10 +422,191 @@ int printf(const char *fmt, ...) return ret; } +static __attribute__((unused, format(printf, 2, 0))) +int vdprintf(int fd, const char *fmt, va_list args) +{ + FILE *stream; + + stream = fdopen(fd, NULL); + if (!stream) + return -1; + /* Technically 'stream' is leaked, but as it's only a wrapper around 'fd' that is fine */ + return vfprintf(stream, fmt, args); +} + +static __attribute__((unused, format(printf, 2, 3))) +int dprintf(int fd, const char *fmt, ...) +{ + va_list args; + int ret; + + va_start(args, fmt); + ret = vdprintf(fd, fmt, args); + va_end(args); + + return ret; +} + +static int __nolibc_sprintf_cb(intptr_t _state, const char *buf, size_t size) +{ + char **state = (char **)_state; + + memcpy(*state, buf, size); + *state += size; + return 0; +} + +static __attribute__((unused, format(printf, 3, 0))) +int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) +{ + char *state = buf; + int ret; + + ret = __nolibc_printf(__nolibc_sprintf_cb, (intptr_t)&state, size, fmt, args); + if (ret < 0) + return ret; + buf[(size_t)ret < size ? (size_t)ret : size - 1] = '\0'; + return ret; +} + +static __attribute__((unused, format(printf, 3, 4))) +int snprintf(char *buf, size_t size, const char *fmt, ...) +{ + va_list args; + int ret; + + va_start(args, fmt); + ret = vsnprintf(buf, size, fmt, args); + va_end(args); + + return ret; +} + +static __attribute__((unused, format(printf, 2, 0))) +int vsprintf(char *buf, const char *fmt, va_list args) +{ + return vsnprintf(buf, SIZE_MAX, fmt, args); +} + +static __attribute__((unused, format(printf, 2, 3))) +int sprintf(char *buf, const char *fmt, ...) +{ + va_list args; + int ret; + + va_start(args, fmt); + ret = vsprintf(buf, fmt, args); + va_end(args); + + return ret; +} + +static __attribute__((unused)) +int vsscanf(const char *str, const char *format, va_list args) +{ + uintmax_t uval; + intmax_t ival; + int base; + char *endptr; + int matches; + int lpref; + + matches = 0; + + while (1) { + if (*format == '%') { + /* start of pattern */ + lpref = 0; + format++; + + if (*format == 'l') { + /* same as in printf() */ + lpref = 1; + format++; + if (*format == 'l') { + lpref = 2; + format++; + } + } + + if (*format == '%') { + /* literal % */ + if ('%' != *str) + goto done; + str++; + format++; + continue; + } else if (*format == 'd') { + ival = strtoll(str, &endptr, 10); + if (lpref == 0) + *va_arg(args, int *) = ival; + else if (lpref == 1) + *va_arg(args, long *) = ival; + else if (lpref == 2) + *va_arg(args, long long *) = ival; + } else if (*format == 'u' || *format == 'x' || *format == 'X') { + base = *format == 'u' ? 10 : 16; + uval = strtoull(str, &endptr, base); + if (lpref == 0) + *va_arg(args, unsigned int *) = uval; + else if (lpref == 1) + *va_arg(args, unsigned long *) = uval; + else if (lpref == 2) + *va_arg(args, unsigned long long *) = uval; + } else if (*format == 'p') { + *va_arg(args, void **) = (void *)strtoul(str, &endptr, 16); + } else { + SET_ERRNO(EILSEQ); + goto done; + } + + format++; + str = endptr; + matches++; + + } else if (*format == '\0') { + goto done; + } else if (isspace(*format)) { + /* skip spaces in format and str */ + while (isspace(*format)) + format++; + while (isspace(*str)) + str++; + } else if (*format == *str) { + /* literal match */ + format++; + str++; + } else { + if (!matches) + matches = EOF; + goto done; + } + } + +done: + return matches; +} + +static __attribute__((unused, format(scanf, 2, 3))) +int sscanf(const char *str, const char *format, ...) +{ + va_list args; + int ret; + + va_start(args, format); + ret = vsscanf(str, format, args); + va_end(args); + return ret; +} + static __attribute__((unused)) void perror(const char *msg) { +#ifdef NOLIBC_IGNORE_ERRNO + fprintf(stderr, "%s%sunknown error\n", (msg && *msg) ? msg : "", (msg && *msg) ? ": " : ""); +#else fprintf(stderr, "%s%serrno=%d\n", (msg && *msg) ? msg : "", (msg && *msg) ? ": " : "", errno); +#endif } static __attribute__((unused)) @@ -387,7 +641,4 @@ const char *strerror(int errno) return buf; } -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_STDIO_H */ diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h index 75aa273c23a6..f184e108ed0a 100644 --- a/tools/include/nolibc/stdlib.h +++ b/tools/include/nolibc/stdlib.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_STDLIB_H #define _NOLIBC_STDLIB_H @@ -29,7 +32,26 @@ static __attribute__((unused)) char itoa_buffer[21]; * As much as possible, please keep functions alphabetically sorted. */ +static __inline__ +int abs(int j) +{ + return j >= 0 ? j : -j; +} + +static __inline__ +long labs(long j) +{ + return j >= 0 ? j : -j; +} + +static __inline__ +long long llabs(long long j) +{ + return j >= 0 ? j : -j; +} + /* must be exported, as it's used by libgcc for various divide functions */ +void abort(void); __attribute__((weak,unused,noreturn,section(".text.nolibc_abort"))) void abort(void) { @@ -78,6 +100,7 @@ void free(void *ptr) munmap(heap, heap->len); } +#ifndef NOLIBC_NO_RUNTIME /* getenv() tries to find the environment variable named <name> in the * environment array pointed to by global variable "environ" which must be * declared as a char **, and must be terminated by a NULL (it is recommended @@ -100,32 +123,7 @@ char *getenv(const char *name) } return NULL; } - -static __attribute__((unused)) -unsigned long getauxval(unsigned long type) -{ - const unsigned long *auxv = _auxv; - unsigned long ret; - - if (!auxv) - return 0; - - while (1) { - if (!auxv[0] && !auxv[1]) { - ret = 0; - break; - } - - if (auxv[0] == type) { - ret = auxv[1]; - break; - } - - auxv += 2; - } - - return ret; -} +#endif /* NOLIBC_NO_RUNTIME */ static __attribute__((unused)) void *malloc(size_t len) @@ -274,7 +272,7 @@ int itoa_r(long in, char *buffer) int len = 0; if (in < 0) { - in = -in; + in = -(unsigned long)in; *(ptr++) = '-'; len++; } @@ -410,7 +408,7 @@ int i64toa_r(int64_t in, char *buffer) int len = 0; if (in < 0) { - in = -in; + in = -(uint64_t)in; *(ptr++) = '-'; len++; } @@ -547,7 +545,4 @@ uintmax_t strtoumax(const char *nptr, char **endptr, int base) return __strtox(nptr, endptr, base, 0, UINTMAX_MAX); } -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_STDLIB_H */ diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h index 9ec9c24f38c0..4000926f44ac 100644 --- a/tools/include/nolibc/string.h +++ b/tools/include/nolibc/string.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_STRING_H #define _NOLIBC_STRING_H @@ -32,6 +35,7 @@ int memcmp(const void *s1, const void *s2, size_t n) /* might be ignored by the compiler without -ffreestanding, then found as * missing. */ +void *memmove(void *dst, const void *src, size_t len); __attribute__((weak,unused,section(".text.nolibc_memmove"))) void *memmove(void *dst, const void *src, size_t len) { @@ -56,6 +60,7 @@ void *memmove(void *dst, const void *src, size_t len) #ifndef NOLIBC_ARCH_HAS_MEMCPY /* must be exported, as it's used by libgcc on ARM */ +void *memcpy(void *dst, const void *src, size_t len); __attribute__((weak,unused,section(".text.nolibc_memcpy"))) void *memcpy(void *dst, const void *src, size_t len) { @@ -73,6 +78,7 @@ void *memcpy(void *dst, const void *src, size_t len) /* might be ignored by the compiler without -ffreestanding, then found as * missing. */ +void *memset(void *dst, int b, size_t len); __attribute__((weak,unused,section(".text.nolibc_memset"))) void *memset(void *dst, int b, size_t len) { @@ -87,6 +93,21 @@ void *memset(void *dst, int b, size_t len) } #endif /* #ifndef NOLIBC_ARCH_HAS_MEMSET */ +#ifndef NOLIBC_ARCH_HAS_MEMCHR +static __attribute__((unused)) +void *memchr(const void *s, int c, size_t len) +{ + char *p = (char *)s; + + while (len--) { + if (*p == (char)c) + return p; + p++; + } + return NULL; +} +#endif /* #ifndef NOLIBC_ARCH_HAS_MEMCHR */ + static __attribute__((unused)) char *strchr(const char *s, int c) { @@ -124,6 +145,7 @@ char *strcpy(char *dst, const char *src) * thus itself, hence the asm() statement below that's meant to disable this * confusing practice. */ +size_t strlen(const char *str); __attribute__((weak,unused,section(".text.nolibc_strlen"))) size_t strlen(const char *str) { @@ -285,7 +307,40 @@ char *strrchr(const char *s, int c) return (char *)ret; } -/* make sure to include all global symbols */ -#include "nolibc.h" +static __attribute__((unused)) +char *strstr(const char *haystack, const char *needle) +{ + size_t len_haystack, len_needle; + + len_needle = strlen(needle); + if (!len_needle) + return NULL; + + len_haystack = strlen(haystack); + while (len_haystack >= len_needle) { + if (!memcmp(haystack, needle, len_needle)) + return (char *)haystack; + haystack++; + len_haystack--; + } + + return NULL; +} + +static __attribute__((unused)) +int tolower(int c) +{ + if (c >= 'A' && c <= 'Z') + return c - 'A' + 'a'; + return c; +} + +static __attribute__((unused)) +int toupper(int c) +{ + if (c >= 'a' && c <= 'z') + return c - 'a' + 'A'; + return c; +} #endif /* _NOLIBC_STRING_H */ diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index 7b82bc3cf107..847af1ccbdc9 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -4,27 +4,27 @@ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_SYS_H #define _NOLIBC_SYS_H #include "std.h" /* system includes */ -#include <asm/unistd.h> -#include <asm/signal.h> /* for SIGCHLD */ -#include <asm/ioctls.h> -#include <asm/mman.h> +#include <linux/unistd.h> +#include <linux/signal.h> /* for SIGCHLD */ +#include <linux/termios.h> +#include <linux/mman.h> #include <linux/fs.h> #include <linux/loop.h> #include <linux/time.h> #include <linux/auxvec.h> #include <linux/fcntl.h> /* for O_* and AT_* */ +#include <linux/sched.h> /* for clone_args */ #include <linux/stat.h> /* for statx() */ -#include <linux/prctl.h> -#include <linux/resource.h> -#include <linux/utsname.h> -#include "arch.h" #include "errno.h" #include "stdarg.h" #include "types.h" @@ -106,7 +106,7 @@ static __attribute__((unused)) void *sbrk(intptr_t inc) { /* first call to find current end */ - void *ret = sys_brk(0); + void *ret = sys_brk(NULL); if (ret && sys_brk(ret + inc) == ret + inc) return ret + inc; @@ -118,6 +118,7 @@ void *sbrk(intptr_t inc) /* * int chdir(const char *path); + * int fchdir(int fildes); */ static __attribute__((unused)) @@ -132,6 +133,18 @@ int chdir(const char *path) return __sysret(sys_chdir(path)); } +static __attribute__((unused)) +int sys_fchdir(int fildes) +{ + return my_syscall1(__NR_fchdir, fildes); +} + +static __attribute__((unused)) +int fchdir(int fildes) +{ + return __sysret(sys_fchdir(fildes)); +} + /* * int chmod(const char *path, mode_t mode); @@ -140,12 +153,10 @@ int chdir(const char *path) static __attribute__((unused)) int sys_chmod(const char *path, mode_t mode) { -#ifdef __NR_fchmodat +#if defined(__NR_fchmodat) return my_syscall4(__NR_fchmodat, AT_FDCWD, path, mode, 0); -#elif defined(__NR_chmod) - return my_syscall2(__NR_chmod, path, mode); #else - return __nolibc_enosys(__func__, path, mode); + return my_syscall2(__NR_chmod, path, mode); #endif } @@ -163,12 +174,10 @@ int chmod(const char *path, mode_t mode) static __attribute__((unused)) int sys_chown(const char *path, uid_t owner, gid_t group) { -#ifdef __NR_fchownat +#if defined(__NR_fchownat) return my_syscall5(__NR_fchownat, AT_FDCWD, path, owner, group, 0); -#elif defined(__NR_chown) - return my_syscall3(__NR_chown, path, owner, group); #else - return __nolibc_enosys(__func__, path, owner, group); + return my_syscall3(__NR_chown, path, owner, group); #endif } @@ -237,12 +246,23 @@ int dup(int fd) static __attribute__((unused)) int sys_dup2(int old, int new) { -#ifdef __NR_dup3 +#if defined(__NR_dup3) + int ret, nr_fcntl; + +#ifdef __NR_fcntl64 + nr_fcntl = __NR_fcntl64; +#else + nr_fcntl = __NR_fcntl; +#endif + + if (old == new) { + ret = my_syscall2(nr_fcntl, old, F_GETFD); + return ret < 0 ? ret : old; + } + return my_syscall3(__NR_dup3, old, new, 0); -#elif defined(__NR_dup2) - return my_syscall2(__NR_dup2, old, new); #else - return __nolibc_enosys(__func__, old, new); + return my_syscall2(__NR_dup2, old, new); #endif } @@ -257,7 +277,7 @@ int dup2(int old, int new) * int dup3(int old, int new, int flags); */ -#ifdef __NR_dup3 +#if defined(__NR_dup3) static __attribute__((unused)) int sys_dup3(int old, int new, int flags) { @@ -301,11 +321,17 @@ void sys_exit(int status) } static __attribute__((noreturn,unused)) -void exit(int status) +void _exit(int status) { sys_exit(status); } +static __attribute__((noreturn,unused)) +void exit(int status) +{ + _exit(status); +} + /* * pid_t fork(void); @@ -315,16 +341,14 @@ void exit(int status) static __attribute__((unused)) pid_t sys_fork(void) { -#ifdef __NR_clone +#if defined(__NR_clone) /* note: some archs only have clone() and not fork(). Different archs * have a different API, but most archs have the flags on first arg and * will not use the rest with no other flag. */ return my_syscall5(__NR_clone, SIGCHLD, 0, 0, 0, 0); -#elif defined(__NR_fork) - return my_syscall0(__NR_fork); #else - return __nolibc_enosys(__func__); + return my_syscall0(__NR_fork); #endif } #endif @@ -335,6 +359,32 @@ pid_t fork(void) return __sysret(sys_fork()); } +#ifndef sys_vfork +static __attribute__((unused)) +pid_t sys_vfork(void) +{ +#if defined(__NR_vfork) + return my_syscall0(__NR_vfork); +#else + /* + * clone() could be used but has different argument orders per + * architecture. + */ + struct clone_args args = { + .flags = CLONE_VM | CLONE_VFORK, + .exit_signal = SIGCHLD, + }; + + return my_syscall2(__NR_clone3, &args, sizeof(args)); +#endif +} +#endif + +static __attribute__((unused)) +pid_t vfork(void) +{ + return __sysret(sys_vfork()); +} /* * int fsync(int fd); @@ -377,7 +427,7 @@ int getdents64(int fd, struct linux_dirent64 *dirp, int count) static __attribute__((unused)) uid_t sys_geteuid(void) { -#ifdef __NR_geteuid32 +#if defined(__NR_geteuid32) return my_syscall0(__NR_geteuid32); #else return my_syscall0(__NR_geteuid); @@ -475,6 +525,7 @@ pid_t gettid(void) return sys_gettid(); } +#ifndef NOLIBC_NO_RUNTIME static unsigned long getauxval(unsigned long key); /* @@ -486,28 +537,7 @@ int getpagesize(void) { return __sysret((int)getauxval(AT_PAGESZ) ?: -ENOENT); } - - -/* - * int gettimeofday(struct timeval *tv, struct timezone *tz); - */ - -static __attribute__((unused)) -int sys_gettimeofday(struct timeval *tv, struct timezone *tz) -{ -#ifdef __NR_gettimeofday - return my_syscall2(__NR_gettimeofday, tv, tz); -#else - return __nolibc_enosys(__func__, tv, tz); -#endif -} - -static __attribute__((unused)) -int gettimeofday(struct timeval *tv, struct timezone *tz) -{ - return __sysret(sys_gettimeofday(tv, tz)); -} - +#endif /* NOLIBC_NO_RUNTIME */ /* * uid_t getuid(void); @@ -516,7 +546,7 @@ int gettimeofday(struct timeval *tv, struct timezone *tz) static __attribute__((unused)) uid_t sys_getuid(void) { -#ifdef __NR_getuid32 +#if defined(__NR_getuid32) return my_syscall0(__NR_getuid32); #else return my_syscall0(__NR_getuid); @@ -531,22 +561,6 @@ uid_t getuid(void) /* - * int ioctl(int fd, unsigned long req, void *value); - */ - -static __attribute__((unused)) -int sys_ioctl(int fd, unsigned long req, void *value) -{ - return my_syscall3(__NR_ioctl, fd, req, value); -} - -static __attribute__((unused)) -int ioctl(int fd, unsigned long req, void *value) -{ - return __sysret(sys_ioctl(fd, req, value)); -} - -/* * int kill(pid_t pid, int signal); */ @@ -570,12 +584,10 @@ int kill(pid_t pid, int signal) static __attribute__((unused)) int sys_link(const char *old, const char *new) { -#ifdef __NR_linkat +#if defined(__NR_linkat) return my_syscall5(__NR_linkat, AT_FDCWD, old, AT_FDCWD, new, 0); -#elif defined(__NR_link) - return my_syscall2(__NR_link, old, new); #else - return __nolibc_enosys(__func__, old, new); + return my_syscall2(__NR_link, old, new); #endif } @@ -593,10 +605,20 @@ int link(const char *old, const char *new) static __attribute__((unused)) off_t sys_lseek(int fd, off_t offset, int whence) { -#ifdef __NR_lseek - return my_syscall3(__NR_lseek, fd, offset, whence); +#if defined(__NR_llseek) + __kernel_loff_t loff = 0; + off_t result; + int ret; + + ret = my_syscall5(__NR_llseek, fd, offset >> 32, (uint32_t)offset, &loff, whence); + if (ret < 0) + result = ret; + else + result = loff; + + return result; #else - return __nolibc_enosys(__func__, fd, offset, whence); + return my_syscall3(__NR_lseek, fd, offset, whence); #endif } @@ -614,12 +636,10 @@ off_t lseek(int fd, off_t offset, int whence) static __attribute__((unused)) int sys_mkdir(const char *path, mode_t mode) { -#ifdef __NR_mkdirat +#if defined(__NR_mkdirat) return my_syscall3(__NR_mkdirat, AT_FDCWD, path, mode); -#elif defined(__NR_mkdir) - return my_syscall2(__NR_mkdir, path, mode); #else - return __nolibc_enosys(__func__, path, mode); + return my_syscall2(__NR_mkdir, path, mode); #endif } @@ -636,12 +656,10 @@ int mkdir(const char *path, mode_t mode) static __attribute__((unused)) int sys_rmdir(const char *path) { -#ifdef __NR_rmdir +#if defined(__NR_rmdir) return my_syscall1(__NR_rmdir, path); -#elif defined(__NR_unlinkat) - return my_syscall3(__NR_unlinkat, AT_FDCWD, path, AT_REMOVEDIR); #else - return __nolibc_enosys(__func__, path); + return my_syscall3(__NR_unlinkat, AT_FDCWD, path, AT_REMOVEDIR); #endif } @@ -659,12 +677,10 @@ int rmdir(const char *path) static __attribute__((unused)) long sys_mknod(const char *path, mode_t mode, dev_t dev) { -#ifdef __NR_mknodat +#if defined(__NR_mknodat) return my_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev); -#elif defined(__NR_mknod) - return my_syscall3(__NR_mknod, path, mode, dev); #else - return __nolibc_enosys(__func__, path, mode, dev); + return my_syscall3(__NR_mknod, path, mode, dev); #endif } @@ -674,106 +690,6 @@ int mknod(const char *path, mode_t mode, dev_t dev) return __sysret(sys_mknod(path, mode, dev)); } -#ifndef sys_mmap -static __attribute__((unused)) -void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd, - off_t offset) -{ - int n; - -#if defined(__NR_mmap2) - n = __NR_mmap2; - offset >>= 12; -#else - n = __NR_mmap; -#endif - - return (void *)my_syscall6(n, addr, length, prot, flags, fd, offset); -} -#endif - -/* Note that on Linux, MAP_FAILED is -1 so we can use the generic __sysret() - * which returns -1 upon error and still satisfy user land that checks for - * MAP_FAILED. - */ - -static __attribute__((unused)) -void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) -{ - void *ret = sys_mmap(addr, length, prot, flags, fd, offset); - - if ((unsigned long)ret >= -4095UL) { - SET_ERRNO(-(long)ret); - ret = MAP_FAILED; - } - return ret; -} - -static __attribute__((unused)) -int sys_munmap(void *addr, size_t length) -{ - return my_syscall2(__NR_munmap, addr, length); -} - -static __attribute__((unused)) -int munmap(void *addr, size_t length) -{ - return __sysret(sys_munmap(addr, length)); -} - -/* - * int mount(const char *source, const char *target, - * const char *fstype, unsigned long flags, - * const void *data); - */ -static __attribute__((unused)) -int sys_mount(const char *src, const char *tgt, const char *fst, - unsigned long flags, const void *data) -{ - return my_syscall5(__NR_mount, src, tgt, fst, flags, data); -} - -static __attribute__((unused)) -int mount(const char *src, const char *tgt, - const char *fst, unsigned long flags, - const void *data) -{ - return __sysret(sys_mount(src, tgt, fst, flags, data)); -} - - -/* - * int open(const char *path, int flags[, mode_t mode]); - */ - -static __attribute__((unused)) -int sys_open(const char *path, int flags, mode_t mode) -{ -#ifdef __NR_openat - return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode); -#elif defined(__NR_open) - return my_syscall3(__NR_open, path, flags, mode); -#else - return __nolibc_enosys(__func__, path, flags, mode); -#endif -} - -static __attribute__((unused)) -int open(const char *path, int flags, ...) -{ - mode_t mode = 0; - - if (flags & O_CREAT) { - va_list args; - - va_start(args, flags); - mode = va_arg(args, int); - va_end(args); - } - - return __sysret(sys_open(path, flags, mode)); -} - /* * int pipe2(int pipefd[2], int flags); @@ -800,26 +716,6 @@ int pipe(int pipefd[2]) /* - * int prctl(int option, unsigned long arg2, unsigned long arg3, - * unsigned long arg4, unsigned long arg5); - */ - -static __attribute__((unused)) -int sys_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5) -{ - return my_syscall5(__NR_prctl, option, arg2, arg3, arg4, arg5); -} - -static __attribute__((unused)) -int prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5) -{ - return __sysret(sys_prctl(option, arg2, arg3, arg4, arg5)); -} - - -/* * int pivot_root(const char *new, const char *old); */ @@ -837,35 +733,6 @@ int pivot_root(const char *new, const char *old) /* - * int poll(struct pollfd *fds, int nfds, int timeout); - */ - -static __attribute__((unused)) -int sys_poll(struct pollfd *fds, int nfds, int timeout) -{ -#if defined(__NR_ppoll) - struct timespec t; - - if (timeout >= 0) { - t.tv_sec = timeout / 1000; - t.tv_nsec = (timeout % 1000) * 1000000; - } - return my_syscall5(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0); -#elif defined(__NR_poll) - return my_syscall3(__NR_poll, fds, nfds, timeout); -#else - return __nolibc_enosys(__func__, fds, nfds, timeout); -#endif -} - -static __attribute__((unused)) -int poll(struct pollfd *fds, int nfds, int timeout) -{ - return __sysret(sys_poll(fds, nfds, timeout)); -} - - -/* * ssize_t read(int fd, void *buf, size_t count); */ @@ -883,61 +750,6 @@ ssize_t read(int fd, void *buf, size_t count) /* - * int reboot(int cmd); - * <cmd> is among LINUX_REBOOT_CMD_* - */ - -static __attribute__((unused)) -ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg) -{ - return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg); -} - -static __attribute__((unused)) -int reboot(int cmd) -{ - return __sysret(sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0)); -} - - -/* - * int getrlimit(int resource, struct rlimit *rlim); - * int setrlimit(int resource, const struct rlimit *rlim); - */ - -static __attribute__((unused)) -int sys_prlimit64(pid_t pid, int resource, - const struct rlimit64 *new_limit, struct rlimit64 *old_limit) -{ - return my_syscall4(__NR_prlimit64, pid, resource, new_limit, old_limit); -} - -static __attribute__((unused)) -int getrlimit(int resource, struct rlimit *rlim) -{ - struct rlimit64 rlim64; - int ret; - - ret = __sysret(sys_prlimit64(0, resource, NULL, &rlim64)); - rlim->rlim_cur = rlim64.rlim_cur; - rlim->rlim_max = rlim64.rlim_max; - - return ret; -} - -static __attribute__((unused)) -int setrlimit(int resource, const struct rlimit *rlim) -{ - struct rlimit64 rlim64 = { - .rlim_cur = rlim->rlim_cur, - .rlim_max = rlim->rlim_max, - }; - - return __sysret(sys_prlimit64(0, resource, &rlim64, NULL)); -} - - -/* * int sched_yield(void); */ @@ -955,45 +767,6 @@ int sched_yield(void) /* - * int select(int nfds, fd_set *read_fds, fd_set *write_fds, - * fd_set *except_fds, struct timeval *timeout); - */ - -static __attribute__((unused)) -int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout) -{ -#if defined(__ARCH_WANT_SYS_OLD_SELECT) && !defined(__NR__newselect) - struct sel_arg_struct { - unsigned long n; - fd_set *r, *w, *e; - struct timeval *t; - } arg = { .n = nfds, .r = rfds, .w = wfds, .e = efds, .t = timeout }; - return my_syscall1(__NR_select, &arg); -#elif defined(__NR__newselect) - return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout); -#elif defined(__NR_select) - return my_syscall5(__NR_select, nfds, rfds, wfds, efds, timeout); -#elif defined(__NR_pselect6) - struct timespec t; - - if (timeout) { - t.tv_sec = timeout->tv_sec; - t.tv_nsec = timeout->tv_usec * 1000; - } - return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL); -#else - return __nolibc_enosys(__func__, nfds, rfds, wfds, efds, timeout); -#endif -} - -static __attribute__((unused)) -int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout) -{ - return __sysret(sys_select(nfds, rfds, wfds, efds, timeout)); -} - - -/* * int setpgid(pid_t pid, pid_t pgid); */ @@ -1009,77 +782,31 @@ int setpgid(pid_t pid, pid_t pgid) return __sysret(sys_setpgid(pid, pgid)); } - /* - * pid_t setsid(void); + * pid_t setpgrp(void) */ static __attribute__((unused)) -pid_t sys_setsid(void) +pid_t setpgrp(void) { - return my_syscall0(__NR_setsid); + return setpgid(0, 0); } -static __attribute__((unused)) -pid_t setsid(void) -{ - return __sysret(sys_setsid()); -} /* - * int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf); - * int stat(const char *path, struct stat *buf); + * pid_t setsid(void); */ static __attribute__((unused)) -int sys_statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf) -{ -#ifdef __NR_statx - return my_syscall5(__NR_statx, fd, path, flags, mask, buf); -#else - return __nolibc_enosys(__func__, fd, path, flags, mask, buf); -#endif -} - -static __attribute__((unused)) -int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf) +pid_t sys_setsid(void) { - return __sysret(sys_statx(fd, path, flags, mask, buf)); + return my_syscall0(__NR_setsid); } - static __attribute__((unused)) -int stat(const char *path, struct stat *buf) +pid_t setsid(void) { - struct statx statx; - long ret; - - ret = __sysret(sys_statx(AT_FDCWD, path, AT_NO_AUTOMOUNT, STATX_BASIC_STATS, &statx)); - if (ret == -1) - return ret; - - buf->st_dev = ((statx.stx_dev_minor & 0xff) - | (statx.stx_dev_major << 8) - | ((statx.stx_dev_minor & ~0xff) << 12)); - buf->st_ino = statx.stx_ino; - buf->st_mode = statx.stx_mode; - buf->st_nlink = statx.stx_nlink; - buf->st_uid = statx.stx_uid; - buf->st_gid = statx.stx_gid; - buf->st_rdev = ((statx.stx_rdev_minor & 0xff) - | (statx.stx_rdev_major << 8) - | ((statx.stx_rdev_minor & ~0xff) << 12)); - buf->st_size = statx.stx_size; - buf->st_blksize = statx.stx_blksize; - buf->st_blocks = statx.stx_blocks; - buf->st_atim.tv_sec = statx.stx_atime.tv_sec; - buf->st_atim.tv_nsec = statx.stx_atime.tv_nsec; - buf->st_mtim.tv_sec = statx.stx_mtime.tv_sec; - buf->st_mtim.tv_nsec = statx.stx_mtime.tv_nsec; - buf->st_ctim.tv_sec = statx.stx_ctime.tv_sec; - buf->st_ctim.tv_nsec = statx.stx_ctime.tv_nsec; - - return 0; + return __sysret(sys_setsid()); } @@ -1090,12 +817,10 @@ int stat(const char *path, struct stat *buf) static __attribute__((unused)) int sys_symlink(const char *old, const char *new) { -#ifdef __NR_symlinkat +#if defined(__NR_symlinkat) return my_syscall3(__NR_symlinkat, old, AT_FDCWD, new); -#elif defined(__NR_symlink) - return my_syscall2(__NR_symlink, old, new); #else - return __nolibc_enosys(__func__, old, new); + return my_syscall2(__NR_symlink, old, new); #endif } @@ -1141,44 +866,16 @@ int umount2(const char *path, int flags) /* - * int uname(struct utsname *buf); - */ - -struct utsname { - char sysname[65]; - char nodename[65]; - char release[65]; - char version[65]; - char machine[65]; - char domainname[65]; -}; - -static __attribute__((unused)) -int sys_uname(struct utsname *buf) -{ - return my_syscall1(__NR_uname, buf); -} - -static __attribute__((unused)) -int uname(struct utsname *buf) -{ - return __sysret(sys_uname(buf)); -} - - -/* * int unlink(const char *path); */ static __attribute__((unused)) int sys_unlink(const char *path) { -#ifdef __NR_unlinkat +#if defined(__NR_unlinkat) return my_syscall3(__NR_unlinkat, AT_FDCWD, path, 0); -#elif defined(__NR_unlink) - return my_syscall1(__NR_unlink, path); #else - return __nolibc_enosys(__func__, path); + return my_syscall1(__NR_unlink, path); #endif } @@ -1190,42 +887,6 @@ int unlink(const char *path) /* - * pid_t wait(int *status); - * pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage); - * pid_t waitpid(pid_t pid, int *status, int options); - */ - -static __attribute__((unused)) -pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage) -{ -#ifdef __NR_wait4 - return my_syscall4(__NR_wait4, pid, status, options, rusage); -#else - return __nolibc_enosys(__func__, pid, status, options, rusage); -#endif -} - -static __attribute__((unused)) -pid_t wait(int *status) -{ - return __sysret(sys_wait4(-1, status, 0, NULL)); -} - -static __attribute__((unused)) -pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage) -{ - return __sysret(sys_wait4(pid, status, options, rusage)); -} - - -static __attribute__((unused)) -pid_t waitpid(pid_t pid, int *status, int options) -{ - return __sysret(sys_wait4(pid, status, options, NULL)); -} - - -/* * ssize_t write(int fd, const void *buf, size_t count); */ @@ -1258,7 +919,4 @@ int memfd_create(const char *name, unsigned int flags) return __sysret(sys_memfd_create(name, flags)); } -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_SYS_H */ diff --git a/tools/include/nolibc/sys/auxv.h b/tools/include/nolibc/sys/auxv.h new file mode 100644 index 000000000000..0e98325e7347 --- /dev/null +++ b/tools/include/nolibc/sys/auxv.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * auxv definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_AUXV_H +#define _NOLIBC_SYS_AUXV_H + +#ifndef NOLIBC_NO_RUNTIME + +#include "../crt.h" + +static __attribute__((unused)) +unsigned long getauxval(unsigned long type) +{ + const unsigned long *auxv = _auxv; + unsigned long ret; + + if (!auxv) + return 0; + + while (1) { + if (!auxv[0] && !auxv[1]) { + ret = 0; + break; + } + + if (auxv[0] == type) { + ret = auxv[1]; + break; + } + + auxv += 2; + } + + return ret; +} + +#endif /* NOLIBC_NO_RUNTIME */ +#endif /* _NOLIBC_SYS_AUXV_H */ diff --git a/tools/include/nolibc/sys/ioctl.h b/tools/include/nolibc/sys/ioctl.h new file mode 100644 index 000000000000..fc880687e02a --- /dev/null +++ b/tools/include/nolibc/sys/ioctl.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Ioctl definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_IOCTL_H +#define _NOLIBC_SYS_IOCTL_H + +#include "../sys.h" + +#include <linux/ioctl.h> + +/* + * int ioctl(int fd, unsigned long cmd, ... arg); + */ + +static __attribute__((unused)) +long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + return my_syscall3(__NR_ioctl, fd, cmd, arg); +} + +#define ioctl(fd, cmd, arg) __sysret(sys_ioctl(fd, cmd, (unsigned long)(arg))) + +#endif /* _NOLIBC_SYS_IOCTL_H */ diff --git a/tools/include/nolibc/sys/mman.h b/tools/include/nolibc/sys/mman.h new file mode 100644 index 000000000000..77084ac3405a --- /dev/null +++ b/tools/include/nolibc/sys/mman.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * mm definition for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_MMAN_H +#define _NOLIBC_SYS_MMAN_H + +#include "../arch.h" +#include "../sys.h" + +#ifndef sys_mmap +static __attribute__((unused)) +void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd, + off_t offset) +{ + int n; + +#if defined(__NR_mmap2) + n = __NR_mmap2; + offset >>= 12; +#else + n = __NR_mmap; +#endif + + return (void *)my_syscall6(n, addr, length, prot, flags, fd, offset); +} +#endif + +static __attribute__((unused)) +void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) +{ + void *ret = sys_mmap(addr, length, prot, flags, fd, offset); + + if ((unsigned long)ret >= -4095UL) { + SET_ERRNO(-(long)ret); + ret = MAP_FAILED; + } + return ret; +} + +static __attribute__((unused)) +void *sys_mremap(void *old_address, size_t old_size, size_t new_size, int flags, void *new_address) +{ + return (void *)my_syscall5(__NR_mremap, old_address, old_size, + new_size, flags, new_address); +} + +static __attribute__((unused)) +void *mremap(void *old_address, size_t old_size, size_t new_size, int flags, void *new_address) +{ + void *ret = sys_mremap(old_address, old_size, new_size, flags, new_address); + + if ((unsigned long)ret >= -4095UL) { + SET_ERRNO(-(long)ret); + ret = MAP_FAILED; + } + return ret; +} + +static __attribute__((unused)) +int sys_munmap(void *addr, size_t length) +{ + return my_syscall2(__NR_munmap, addr, length); +} + +static __attribute__((unused)) +int munmap(void *addr, size_t length) +{ + return __sysret(sys_munmap(addr, length)); +} + +#endif /* _NOLIBC_SYS_MMAN_H */ diff --git a/tools/include/nolibc/sys/mount.h b/tools/include/nolibc/sys/mount.h new file mode 100644 index 000000000000..e39ec02ea24c --- /dev/null +++ b/tools/include/nolibc/sys/mount.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Mount definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_MOUNT_H +#define _NOLIBC_SYS_MOUNT_H + +#include "../sys.h" + +#include <linux/mount.h> + +/* + * int mount(const char *source, const char *target, + * const char *fstype, unsigned long flags, + * const void *data); + */ +static __attribute__((unused)) +int sys_mount(const char *src, const char *tgt, const char *fst, + unsigned long flags, const void *data) +{ + return my_syscall5(__NR_mount, src, tgt, fst, flags, data); +} + +static __attribute__((unused)) +int mount(const char *src, const char *tgt, + const char *fst, unsigned long flags, + const void *data) +{ + return __sysret(sys_mount(src, tgt, fst, flags, data)); +} + +#endif /* _NOLIBC_SYS_MOUNT_H */ diff --git a/tools/include/nolibc/sys/prctl.h b/tools/include/nolibc/sys/prctl.h new file mode 100644 index 000000000000..0205907b6ac8 --- /dev/null +++ b/tools/include/nolibc/sys/prctl.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Prctl definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_PRCTL_H +#define _NOLIBC_SYS_PRCTL_H + +#include "../sys.h" + +#include <linux/prctl.h> + +/* + * int prctl(int option, unsigned long arg2, unsigned long arg3, + * unsigned long arg4, unsigned long arg5); + */ + +static __attribute__((unused)) +int sys_prctl(int option, unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5) +{ + return my_syscall5(__NR_prctl, option, arg2, arg3, arg4, arg5); +} + +static __attribute__((unused)) +int prctl(int option, unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5) +{ + return __sysret(sys_prctl(option, arg2, arg3, arg4, arg5)); +} + +#endif /* _NOLIBC_SYS_PRCTL_H */ diff --git a/tools/include/nolibc/sys/random.h b/tools/include/nolibc/sys/random.h new file mode 100644 index 000000000000..cd5d25c571a8 --- /dev/null +++ b/tools/include/nolibc/sys/random.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * random definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_RANDOM_H +#define _NOLIBC_SYS_RANDOM_H + +#include "../arch.h" +#include "../sys.h" + +#include <linux/random.h> + +/* + * ssize_t getrandom(void *buf, size_t buflen, unsigned int flags); + */ + +static __attribute__((unused)) +ssize_t sys_getrandom(void *buf, size_t buflen, unsigned int flags) +{ + return my_syscall3(__NR_getrandom, buf, buflen, flags); +} + +static __attribute__((unused)) +ssize_t getrandom(void *buf, size_t buflen, unsigned int flags) +{ + return __sysret(sys_getrandom(buf, buflen, flags)); +} + +#endif /* _NOLIBC_SYS_RANDOM_H */ diff --git a/tools/include/nolibc/sys/reboot.h b/tools/include/nolibc/sys/reboot.h new file mode 100644 index 000000000000..38274c64a722 --- /dev/null +++ b/tools/include/nolibc/sys/reboot.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Reboot definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_REBOOT_H +#define _NOLIBC_SYS_REBOOT_H + +#include "../sys.h" + +#include <linux/reboot.h> + +/* + * int reboot(int cmd); + * <cmd> is among LINUX_REBOOT_CMD_* + */ + +static __attribute__((unused)) +ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg) +{ + return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg); +} + +static __attribute__((unused)) +int reboot(int cmd) +{ + return __sysret(sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, NULL)); +} + +#endif /* _NOLIBC_SYS_REBOOT_H */ diff --git a/tools/include/nolibc/sys/resource.h b/tools/include/nolibc/sys/resource.h new file mode 100644 index 000000000000..b990f914dc56 --- /dev/null +++ b/tools/include/nolibc/sys/resource.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Resource definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_RESOURCE_H +#define _NOLIBC_SYS_RESOURCE_H + +#include "../sys.h" + +#include <linux/resource.h> + +/* + * int getrlimit(int resource, struct rlimit *rlim); + * int setrlimit(int resource, const struct rlimit *rlim); + */ + +static __attribute__((unused)) +int sys_prlimit64(pid_t pid, int resource, + const struct rlimit64 *new_limit, struct rlimit64 *old_limit) +{ + return my_syscall4(__NR_prlimit64, pid, resource, new_limit, old_limit); +} + +static __attribute__((unused)) +int getrlimit(int resource, struct rlimit *rlim) +{ + struct rlimit64 rlim64; + int ret; + + ret = __sysret(sys_prlimit64(0, resource, NULL, &rlim64)); + rlim->rlim_cur = rlim64.rlim_cur; + rlim->rlim_max = rlim64.rlim_max; + + return ret; +} + +static __attribute__((unused)) +int setrlimit(int resource, const struct rlimit *rlim) +{ + struct rlimit64 rlim64 = { + .rlim_cur = rlim->rlim_cur, + .rlim_max = rlim->rlim_max, + }; + + return __sysret(sys_prlimit64(0, resource, &rlim64, NULL)); +} + +#endif /* _NOLIBC_SYS_RESOURCE_H */ diff --git a/tools/include/nolibc/sys/select.h b/tools/include/nolibc/sys/select.h new file mode 100644 index 000000000000..2a5619c01277 --- /dev/null +++ b/tools/include/nolibc/sys/select.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ + +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_SELECT_H +#define _NOLIBC_SYS_SELECT_H + +#include <linux/time.h> +#include <linux/unistd.h> + +/* commonly an fd_set represents 256 FDs */ +#ifndef FD_SETSIZE +#define FD_SETSIZE 256 +#endif + +#define FD_SETIDXMASK (8 * sizeof(unsigned long)) +#define FD_SETBITMASK (8 * sizeof(unsigned long)-1) + +/* for select() */ +typedef struct { + unsigned long fds[(FD_SETSIZE + FD_SETBITMASK) / FD_SETIDXMASK]; +} fd_set; + +#define FD_CLR(fd, set) do { \ + fd_set *__set = (set); \ + int __fd = (fd); \ + if (__fd >= 0) \ + __set->fds[__fd / FD_SETIDXMASK] &= \ + ~(1U << (__fd & FD_SETBITMASK)); \ + } while (0) + +#define FD_SET(fd, set) do { \ + fd_set *__set = (set); \ + int __fd = (fd); \ + if (__fd >= 0) \ + __set->fds[__fd / FD_SETIDXMASK] |= \ + 1 << (__fd & FD_SETBITMASK); \ + } while (0) + +#define FD_ISSET(fd, set) ({ \ + fd_set *__set = (set); \ + int __fd = (fd); \ + int __r = 0; \ + if (__fd >= 0) \ + __r = !!(__set->fds[__fd / FD_SETIDXMASK] & \ +1U << (__fd & FD_SETBITMASK)); \ + __r; \ + }) + +#define FD_ZERO(set) do { \ + fd_set *__set = (set); \ + int __idx; \ + int __size = (FD_SETSIZE+FD_SETBITMASK) / FD_SETIDXMASK;\ + for (__idx = 0; __idx < __size; __idx++) \ + __set->fds[__idx] = 0; \ + } while (0) + +/* + * int select(int nfds, fd_set *read_fds, fd_set *write_fds, + * fd_set *except_fds, struct timeval *timeout); + */ + +static __attribute__((unused)) +int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout) +{ +#if defined(__ARCH_WANT_SYS_OLD_SELECT) && !defined(__NR__newselect) + struct sel_arg_struct { + unsigned long n; + fd_set *r, *w, *e; + struct timeval *t; + } arg = { .n = nfds, .r = rfds, .w = wfds, .e = efds, .t = timeout }; + return my_syscall1(__NR_select, &arg); +#elif defined(__NR__newselect) + return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout); +#elif defined(__NR_select) + return my_syscall5(__NR_select, nfds, rfds, wfds, efds, timeout); +#elif defined(__NR_pselect6) + struct timespec t; + + if (timeout) { + t.tv_sec = timeout->tv_sec; + t.tv_nsec = timeout->tv_usec * 1000; + } + return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL); +#else + struct __kernel_timespec t; + + if (timeout) { + t.tv_sec = timeout->tv_sec; + t.tv_nsec = timeout->tv_usec * 1000; + } + return my_syscall6(__NR_pselect6_time64, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL); +#endif +} + +static __attribute__((unused)) +int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout) +{ + return __sysret(sys_select(nfds, rfds, wfds, efds, timeout)); +} + + +#endif /* _NOLIBC_SYS_SELECT_H */ diff --git a/tools/include/nolibc/sys/stat.h b/tools/include/nolibc/sys/stat.h new file mode 100644 index 000000000000..8b4d80e3ea03 --- /dev/null +++ b/tools/include/nolibc/sys/stat.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * stat definition for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_STAT_H +#define _NOLIBC_SYS_STAT_H + +#include "../arch.h" +#include "../types.h" +#include "../sys.h" + +/* + * int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf); + * int stat(const char *path, struct stat *buf); + * int fstatat(int fd, const char *path, struct stat *buf, int flag); + * int fstat(int fildes, struct stat *buf); + * int lstat(const char *path, struct stat *buf); + */ + +static __attribute__((unused)) +int sys_statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf) +{ +#ifdef __NR_statx + return my_syscall5(__NR_statx, fd, path, flags, mask, buf); +#else + return __nolibc_enosys(__func__, fd, path, flags, mask, buf); +#endif +} + +static __attribute__((unused)) +int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf) +{ + return __sysret(sys_statx(fd, path, flags, mask, buf)); +} + + +static __attribute__((unused)) +int fstatat(int fd, const char *path, struct stat *buf, int flag) +{ + struct statx statx; + long ret; + + ret = __sysret(sys_statx(fd, path, flag | AT_NO_AUTOMOUNT, STATX_BASIC_STATS, &statx)); + if (ret == -1) + return ret; + + buf->st_dev = ((statx.stx_dev_minor & 0xff) + | (statx.stx_dev_major << 8) + | ((statx.stx_dev_minor & ~0xff) << 12)); + buf->st_ino = statx.stx_ino; + buf->st_mode = statx.stx_mode; + buf->st_nlink = statx.stx_nlink; + buf->st_uid = statx.stx_uid; + buf->st_gid = statx.stx_gid; + buf->st_rdev = ((statx.stx_rdev_minor & 0xff) + | (statx.stx_rdev_major << 8) + | ((statx.stx_rdev_minor & ~0xff) << 12)); + buf->st_size = statx.stx_size; + buf->st_blksize = statx.stx_blksize; + buf->st_blocks = statx.stx_blocks; + buf->st_atim.tv_sec = statx.stx_atime.tv_sec; + buf->st_atim.tv_nsec = statx.stx_atime.tv_nsec; + buf->st_mtim.tv_sec = statx.stx_mtime.tv_sec; + buf->st_mtim.tv_nsec = statx.stx_mtime.tv_nsec; + buf->st_ctim.tv_sec = statx.stx_ctime.tv_sec; + buf->st_ctim.tv_nsec = statx.stx_ctime.tv_nsec; + + return 0; +} + +static __attribute__((unused)) +int stat(const char *path, struct stat *buf) +{ + return fstatat(AT_FDCWD, path, buf, 0); +} + +static __attribute__((unused)) +int fstat(int fildes, struct stat *buf) +{ + return fstatat(fildes, "", buf, AT_EMPTY_PATH); +} + +static __attribute__((unused)) +int lstat(const char *path, struct stat *buf) +{ + return fstatat(AT_FDCWD, path, buf, AT_SYMLINK_NOFOLLOW); +} + +#endif /* _NOLIBC_SYS_STAT_H */ diff --git a/tools/include/nolibc/sys/syscall.h b/tools/include/nolibc/sys/syscall.h new file mode 100644 index 000000000000..4bf97f1386a0 --- /dev/null +++ b/tools/include/nolibc/sys/syscall.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * syscall() definition for NOLIBC + * Copyright (C) 2024 Thomas Weißschuh <linux@weissschuh.net> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_SYSCALL_H +#define _NOLIBC_SYS_SYSCALL_H + +#define __syscall_narg(_0, _1, _2, _3, _4, _5, _6, N, ...) N +#define _syscall_narg(...) __syscall_narg(__VA_ARGS__, 6, 5, 4, 3, 2, 1, 0) +#define _syscall(N, ...) __sysret(my_syscall##N(__VA_ARGS__)) +#define _syscall_n(N, ...) _syscall(N, __VA_ARGS__) +#define syscall(...) _syscall_n(_syscall_narg(__VA_ARGS__), ##__VA_ARGS__) + +#endif /* _NOLIBC_SYS_SYSCALL_H */ diff --git a/tools/include/nolibc/sys/sysmacros.h b/tools/include/nolibc/sys/sysmacros.h new file mode 100644 index 000000000000..37c33f030f02 --- /dev/null +++ b/tools/include/nolibc/sys/sysmacros.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Sysmacro definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_SYSMACROS_H +#define _NOLIBC_SYS_SYSMACROS_H + +#include "../std.h" + +/* WARNING, it only deals with the 4096 first majors and 256 first minors */ +#define makedev(major, minor) ((dev_t)((((major) & 0xfff) << 8) | ((minor) & 0xff))) +#define major(dev) ((unsigned int)(((dev) >> 8) & 0xfff)) +#define minor(dev) ((unsigned int)((dev) & 0xff)) + +#endif /* _NOLIBC_SYS_SYSMACROS_H */ diff --git a/tools/include/nolibc/sys/time.h b/tools/include/nolibc/sys/time.h new file mode 100644 index 000000000000..33782a19aae9 --- /dev/null +++ b/tools/include/nolibc/sys/time.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * time definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_TIME_H +#define _NOLIBC_SYS_TIME_H + +#include "../arch.h" +#include "../sys.h" + +static int sys_clock_gettime(clockid_t clockid, struct timespec *tp); + +/* + * int gettimeofday(struct timeval *tv, struct timezone *tz); + */ + +static __attribute__((unused)) +int sys_gettimeofday(struct timeval *tv, struct timezone *tz) +{ +#ifdef __NR_gettimeofday + return my_syscall2(__NR_gettimeofday, tv, tz); +#else + (void) tz; /* Non-NULL tz is undefined behaviour */ + + struct timespec tp; + int ret; + + ret = sys_clock_gettime(CLOCK_REALTIME, &tp); + if (!ret && tv) { + tv->tv_sec = tp.tv_sec; + tv->tv_usec = tp.tv_nsec / 1000; + } + + return ret; +#endif +} + +static __attribute__((unused)) +int gettimeofday(struct timeval *tv, struct timezone *tz) +{ + return __sysret(sys_gettimeofday(tv, tz)); +} + +#endif /* _NOLIBC_SYS_TIME_H */ diff --git a/tools/include/nolibc/sys/timerfd.h b/tools/include/nolibc/sys/timerfd.h new file mode 100644 index 000000000000..5dd61030c991 --- /dev/null +++ b/tools/include/nolibc/sys/timerfd.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * timerfd definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_TIMERFD_H +#define _NOLIBC_SYS_TIMERFD_H + +#include "../sys.h" +#include "../time.h" + +#include <linux/timerfd.h> + + +static __attribute__((unused)) +int sys_timerfd_create(int clockid, int flags) +{ + return my_syscall2(__NR_timerfd_create, clockid, flags); +} + +static __attribute__((unused)) +int timerfd_create(int clockid, int flags) +{ + return __sysret(sys_timerfd_create(clockid, flags)); +} + + +static __attribute__((unused)) +int sys_timerfd_gettime(int fd, struct itimerspec *curr_value) +{ +#if defined(__NR_timerfd_gettime) + return my_syscall2(__NR_timerfd_gettime, fd, curr_value); +#else + struct __kernel_itimerspec kcurr_value; + int ret; + + ret = my_syscall2(__NR_timerfd_gettime64, fd, &kcurr_value); + __nolibc_timespec_kernel_to_user(&kcurr_value.it_interval, &curr_value->it_interval); + __nolibc_timespec_kernel_to_user(&kcurr_value.it_value, &curr_value->it_value); + return ret; +#endif +} + +static __attribute__((unused)) +int timerfd_gettime(int fd, struct itimerspec *curr_value) +{ + return __sysret(sys_timerfd_gettime(fd, curr_value)); +} + + +static __attribute__((unused)) +int sys_timerfd_settime(int fd, int flags, + const struct itimerspec *new_value, struct itimerspec *old_value) +{ +#if defined(__NR_timerfd_settime) + return my_syscall4(__NR_timerfd_settime, fd, flags, new_value, old_value); +#else + struct __kernel_itimerspec knew_value, kold_value; + int ret; + + __nolibc_timespec_user_to_kernel(&new_value->it_value, &knew_value.it_value); + __nolibc_timespec_user_to_kernel(&new_value->it_interval, &knew_value.it_interval); + ret = my_syscall4(__NR_timerfd_settime64, fd, flags, &knew_value, &kold_value); + if (old_value) { + __nolibc_timespec_kernel_to_user(&kold_value.it_interval, &old_value->it_interval); + __nolibc_timespec_kernel_to_user(&kold_value.it_value, &old_value->it_value); + } + return ret; +#endif +} + +static __attribute__((unused)) +int timerfd_settime(int fd, int flags, + const struct itimerspec *new_value, struct itimerspec *old_value) +{ + return __sysret(sys_timerfd_settime(fd, flags, new_value, old_value)); +} + +#endif /* _NOLIBC_SYS_TIMERFD_H */ diff --git a/tools/include/nolibc/sys/types.h b/tools/include/nolibc/sys/types.h new file mode 100644 index 000000000000..8a264a13275c --- /dev/null +++ b/tools/include/nolibc/sys/types.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * sys/types.h shim for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de> + */ + +#include "../types.h" diff --git a/tools/include/nolibc/sys/uio.h b/tools/include/nolibc/sys/uio.h new file mode 100644 index 000000000000..7ad42b927d2f --- /dev/null +++ b/tools/include/nolibc/sys/uio.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * uio for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + * Copyright (C) 2025 Intel Corporation + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_UIO_H +#define _NOLIBC_SYS_UIO_H + +#include "../sys.h" +#include <linux/uio.h> + + +/* + * ssize_t readv(int fd, const struct iovec *iovec, int count); + */ +static __attribute__((unused)) +ssize_t sys_readv(int fd, const struct iovec *iovec, int count) +{ + return my_syscall3(__NR_readv, fd, iovec, count); +} + +static __attribute__((unused)) +ssize_t readv(int fd, const struct iovec *iovec, int count) +{ + return __sysret(sys_readv(fd, iovec, count)); +} + +/* + * ssize_t writev(int fd, const struct iovec *iovec, int count); + */ +static __attribute__((unused)) +ssize_t sys_writev(int fd, const struct iovec *iovec, int count) +{ + return my_syscall3(__NR_writev, fd, iovec, count); +} + +static __attribute__((unused)) +ssize_t writev(int fd, const struct iovec *iovec, int count) +{ + return __sysret(sys_writev(fd, iovec, count)); +} + + +#endif /* _NOLIBC_SYS_UIO_H */ diff --git a/tools/include/nolibc/sys/utsname.h b/tools/include/nolibc/sys/utsname.h new file mode 100644 index 000000000000..01023e1bb439 --- /dev/null +++ b/tools/include/nolibc/sys/utsname.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Utsname definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_UTSNAME_H +#define _NOLIBC_SYS_UTSNAME_H + +#include "../sys.h" + +#include <linux/utsname.h> + +/* + * int uname(struct utsname *buf); + */ + +struct utsname { + char sysname[65]; + char nodename[65]; + char release[65]; + char version[65]; + char machine[65]; + char domainname[65]; +}; + +static __attribute__((unused)) +int sys_uname(struct utsname *buf) +{ + return my_syscall1(__NR_uname, buf); +} + +static __attribute__((unused)) +int uname(struct utsname *buf) +{ + return __sysret(sys_uname(buf)); +} + +#endif /* _NOLIBC_SYS_UTSNAME_H */ diff --git a/tools/include/nolibc/sys/wait.h b/tools/include/nolibc/sys/wait.h new file mode 100644 index 000000000000..9d9319ba92cb --- /dev/null +++ b/tools/include/nolibc/sys/wait.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * wait definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_WAIT_H +#define _NOLIBC_SYS_WAIT_H + +#include "../arch.h" +#include "../std.h" +#include "../types.h" + +/* + * pid_t wait(int *status); + * pid_t waitpid(pid_t pid, int *status, int options); + * int waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options); + */ + +static __attribute__((unused)) +int sys_waitid(int which, pid_t pid, siginfo_t *infop, int options, struct rusage *rusage) +{ + return my_syscall5(__NR_waitid, which, pid, infop, options, rusage); +} + +static __attribute__((unused)) +int waitid(int which, pid_t pid, siginfo_t *infop, int options) +{ + return __sysret(sys_waitid(which, pid, infop, options, NULL)); +} + + +static __attribute__((unused)) +pid_t waitpid(pid_t pid, int *status, int options) +{ + int idtype, ret; + siginfo_t info; + pid_t id; + + if (pid == INT_MIN) { + SET_ERRNO(ESRCH); + return -1; + } else if (pid < -1) { + idtype = P_PGID; + id = -pid; + } else if (pid == -1) { + idtype = P_ALL; + id = 0; + } else if (pid == 0) { + idtype = P_PGID; + id = 0; + } else { + idtype = P_PID; + id = pid; + } + + options |= WEXITED; + + ret = waitid(idtype, id, &info, options); + if (ret) + return -1; + + switch (info.si_code) { + case 0: + if (status) + *status = 0; + break; + case CLD_EXITED: + if (status) + *status = (info.si_status & 0xff) << 8; + break; + case CLD_KILLED: + if (status) + *status = info.si_status & 0x7f; + break; + case CLD_DUMPED: + if (status) + *status = (info.si_status & 0x7f) | 0x80; + break; + case CLD_STOPPED: + case CLD_TRAPPED: + if (status) + *status = (info.si_status << 8) + 0x7f; + break; + case CLD_CONTINUED: + if (status) + *status = 0xffff; + break; + default: + return -1; + } + + return info.si_pid; +} + +static __attribute__((unused)) +pid_t wait(int *status) +{ + return waitpid(-1, status, 0); +} + +#endif /* _NOLIBC_SYS_WAIT_H */ diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h index 84655361b9ad..48e78f8becf9 100644 --- a/tools/include/nolibc/time.h +++ b/tools/include/nolibc/time.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_TIME_H #define _NOLIBC_TIME_H @@ -12,6 +15,133 @@ #include "types.h" #include "sys.h" +#include <linux/signal.h> +#include <linux/time.h> + +static __inline__ +void __nolibc_timespec_user_to_kernel(const struct timespec *ts, struct __kernel_timespec *kts) +{ + kts->tv_sec = ts->tv_sec; + kts->tv_nsec = ts->tv_nsec; +} + +static __inline__ +void __nolibc_timespec_kernel_to_user(const struct __kernel_timespec *kts, struct timespec *ts) +{ + ts->tv_sec = kts->tv_sec; + ts->tv_nsec = kts->tv_nsec; +} + +/* + * int clock_getres(clockid_t clockid, struct timespec *res); + * int clock_gettime(clockid_t clockid, struct timespec *tp); + * int clock_settime(clockid_t clockid, const struct timespec *tp); + * int clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqtp, + * struct timespec *rmtp) + */ + +static __attribute__((unused)) +int sys_clock_getres(clockid_t clockid, struct timespec *res) +{ +#if defined(__NR_clock_getres) + return my_syscall2(__NR_clock_getres, clockid, res); +#else + struct __kernel_timespec kres; + int ret; + + ret = my_syscall2(__NR_clock_getres_time64, clockid, &kres); + if (res) + __nolibc_timespec_kernel_to_user(&kres, res); + return ret; +#endif +} + +static __attribute__((unused)) +int clock_getres(clockid_t clockid, struct timespec *res) +{ + return __sysret(sys_clock_getres(clockid, res)); +} + +static __attribute__((unused)) +int sys_clock_gettime(clockid_t clockid, struct timespec *tp) +{ +#if defined(__NR_clock_gettime) + return my_syscall2(__NR_clock_gettime, clockid, tp); +#else + struct __kernel_timespec ktp; + int ret; + + ret = my_syscall2(__NR_clock_gettime64, clockid, &ktp); + if (tp) + __nolibc_timespec_kernel_to_user(&ktp, tp); + return ret; +#endif +} + +static __attribute__((unused)) +int clock_gettime(clockid_t clockid, struct timespec *tp) +{ + return __sysret(sys_clock_gettime(clockid, tp)); +} + +static __attribute__((unused)) +int sys_clock_settime(clockid_t clockid, struct timespec *tp) +{ +#if defined(__NR_clock_settime) + return my_syscall2(__NR_clock_settime, clockid, tp); +#else + struct __kernel_timespec ktp; + + __nolibc_timespec_user_to_kernel(tp, &ktp); + return my_syscall2(__NR_clock_settime64, clockid, &ktp); +#endif +} + +static __attribute__((unused)) +int clock_settime(clockid_t clockid, struct timespec *tp) +{ + return __sysret(sys_clock_settime(clockid, tp)); +} + +static __attribute__((unused)) +int sys_clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqtp, + struct timespec *rmtp) +{ +#if defined(__NR_clock_nanosleep) + return my_syscall4(__NR_clock_nanosleep, clockid, flags, rqtp, rmtp); +#else + struct __kernel_timespec krqtp, krmtp; + int ret; + + __nolibc_timespec_user_to_kernel(rqtp, &krqtp); + ret = my_syscall4(__NR_clock_nanosleep_time64, clockid, flags, &krqtp, &krmtp); + if (rmtp) + __nolibc_timespec_kernel_to_user(&krmtp, rmtp); + return ret; +#endif +} + +static __attribute__((unused)) +int clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqtp, + struct timespec *rmtp) +{ + /* Directly return a positive error number */ + return -sys_clock_nanosleep(clockid, flags, rqtp, rmtp); +} + +static __inline__ +double difftime(time_t time1, time_t time2) +{ + return time1 - time2; +} + +static __inline__ +int nanosleep(const struct timespec *rqtp, struct timespec *rmtp) +{ + return __sysret(sys_clock_nanosleep(CLOCK_REALTIME, 0, rqtp, rmtp)); +} + + static __attribute__((unused)) time_t time(time_t *tptr) { @@ -25,7 +155,85 @@ time_t time(time_t *tptr) return tv.tv_sec; } -/* make sure to include all global symbols */ -#include "nolibc.h" + +/* + * int timer_create(clockid_t clockid, struct sigevent *evp, timer_t *timerid); + * int timer_gettime(timer_t timerid, struct itimerspec *curr_value); + * int timer_settime(timer_t timerid, int flags, const struct itimerspec *new_value, struct itimerspec *old_value); + */ + +static __attribute__((unused)) +int sys_timer_create(clockid_t clockid, struct sigevent *evp, timer_t *timerid) +{ + return my_syscall3(__NR_timer_create, clockid, evp, timerid); +} + +static __attribute__((unused)) +int timer_create(clockid_t clockid, struct sigevent *evp, timer_t *timerid) +{ + return __sysret(sys_timer_create(clockid, evp, timerid)); +} + +static __attribute__((unused)) +int sys_timer_delete(timer_t timerid) +{ + return my_syscall1(__NR_timer_delete, timerid); +} + +static __attribute__((unused)) +int timer_delete(timer_t timerid) +{ + return __sysret(sys_timer_delete(timerid)); +} + +static __attribute__((unused)) +int sys_timer_gettime(timer_t timerid, struct itimerspec *curr_value) +{ +#if defined(__NR_timer_gettime) + return my_syscall2(__NR_timer_gettime, timerid, curr_value); +#else + struct __kernel_itimerspec kcurr_value; + int ret; + + ret = my_syscall2(__NR_timer_gettime64, timerid, &kcurr_value); + __nolibc_timespec_kernel_to_user(&kcurr_value.it_interval, &curr_value->it_interval); + __nolibc_timespec_kernel_to_user(&kcurr_value.it_value, &curr_value->it_value); + return ret; +#endif +} + +static __attribute__((unused)) +int timer_gettime(timer_t timerid, struct itimerspec *curr_value) +{ + return __sysret(sys_timer_gettime(timerid, curr_value)); +} + +static __attribute__((unused)) +int sys_timer_settime(timer_t timerid, int flags, + const struct itimerspec *new_value, struct itimerspec *old_value) +{ +#if defined(__NR_timer_settime) + return my_syscall4(__NR_timer_settime, timerid, flags, new_value, old_value); +#else + struct __kernel_itimerspec knew_value, kold_value; + int ret; + + __nolibc_timespec_user_to_kernel(&new_value->it_value, &knew_value.it_value); + __nolibc_timespec_user_to_kernel(&new_value->it_interval, &knew_value.it_interval); + ret = my_syscall4(__NR_timer_settime64, timerid, flags, &knew_value, &kold_value); + if (old_value) { + __nolibc_timespec_kernel_to_user(&kold_value.it_interval, &old_value->it_interval); + __nolibc_timespec_kernel_to_user(&kold_value.it_value, &old_value->it_value); + } + return ret; +#endif +} + +static __attribute__((unused)) +int timer_settime(timer_t timerid, int flags, + const struct itimerspec *new_value, struct itimerspec *old_value) +{ + return __sysret(sys_timer_settime(timerid, flags, new_value, old_value)); +} #endif /* _NOLIBC_TIME_H */ diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h index b26a5d0c417c..470a5f77bc0f 100644 --- a/tools/include/nolibc/types.h +++ b/tools/include/nolibc/types.h @@ -4,16 +4,17 @@ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_TYPES_H #define _NOLIBC_TYPES_H #include "std.h" #include <linux/mman.h> -#include <linux/reboot.h> /* for LINUX_REBOOT_* */ #include <linux/stat.h> #include <linux/time.h> #include <linux/wait.h> -#include <linux/resource.h> /* Only the generic macros and types may be defined here. The arch-specific @@ -69,11 +70,6 @@ #define DT_LNK 0xa #define DT_SOCK 0xc -/* commonly an fd_set represents 256 FDs */ -#ifndef FD_SETSIZE -#define FD_SETSIZE 256 -#endif - /* PATH_MAX and MAXPATHLEN are often used and found with plenty of different * values. */ @@ -114,62 +110,6 @@ #define EXIT_SUCCESS 0 #define EXIT_FAILURE 1 -#define FD_SETIDXMASK (8 * sizeof(unsigned long)) -#define FD_SETBITMASK (8 * sizeof(unsigned long)-1) - -/* for select() */ -typedef struct { - unsigned long fds[(FD_SETSIZE + FD_SETBITMASK) / FD_SETIDXMASK]; -} fd_set; - -#define FD_CLR(fd, set) do { \ - fd_set *__set = (set); \ - int __fd = (fd); \ - if (__fd >= 0) \ - __set->fds[__fd / FD_SETIDXMASK] &= \ - ~(1U << (__fd & FX_SETBITMASK)); \ - } while (0) - -#define FD_SET(fd, set) do { \ - fd_set *__set = (set); \ - int __fd = (fd); \ - if (__fd >= 0) \ - __set->fds[__fd / FD_SETIDXMASK] |= \ - 1 << (__fd & FD_SETBITMASK); \ - } while (0) - -#define FD_ISSET(fd, set) ({ \ - fd_set *__set = (set); \ - int __fd = (fd); \ - int __r = 0; \ - if (__fd >= 0) \ - __r = !!(__set->fds[__fd / FD_SETIDXMASK] & \ -1U << (__fd & FD_SET_BITMASK)); \ - __r; \ - }) - -#define FD_ZERO(set) do { \ - fd_set *__set = (set); \ - int __idx; \ - int __size = (FD_SETSIZE+FD_SETBITMASK) / FD_SETIDXMASK;\ - for (__idx = 0; __idx < __size; __idx++) \ - __set->fds[__idx] = 0; \ - } while (0) - -/* for poll() */ -#define POLLIN 0x0001 -#define POLLPRI 0x0002 -#define POLLOUT 0x0004 -#define POLLERR 0x0008 -#define POLLHUP 0x0010 -#define POLLNVAL 0x0020 - -struct pollfd { - int fd; - short int events; - short int revents; -}; - /* for getdents64() */ struct linux_dirent64 { uint64_t d_ino; @@ -198,14 +138,8 @@ struct stat { union { time_t st_ctime; struct timespec st_ctim; }; /* time of last status change */ }; -/* WARNING, it only deals with the 4096 first majors and 256 first minors */ -#define makedev(major, minor) ((dev_t)((((major) & 0xfff) << 8) | ((minor) & 0xff))) -#define major(dev) ((unsigned int)(((dev) >> 8) & 0xfff)) -#define minor(dev) ((unsigned int)(((dev) & 0xff)) - -#ifndef offsetof -#define offsetof(TYPE, FIELD) ((size_t) &((TYPE *)0)->FIELD) -#endif +typedef __kernel_clockid_t clockid_t; +typedef int timer_t; #ifndef container_of #define container_of(PTR, TYPE, FIELD) ({ \ @@ -214,7 +148,4 @@ struct stat { }) #endif -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_TYPES_H */ diff --git a/tools/include/nolibc/unistd.h b/tools/include/nolibc/unistd.h index e38f3660c051..bb5e80f3f05d 100644 --- a/tools/include/nolibc/unistd.h +++ b/tools/include/nolibc/unistd.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_UNISTD_H #define _NOLIBC_UNISTD_H @@ -17,13 +20,41 @@ #define STDOUT_FILENO 1 #define STDERR_FILENO 2 +#define F_OK 0 +#define X_OK 1 +#define W_OK 2 +#define R_OK 4 + +/* + * int access(const char *path, int amode); + * int faccessat(int fd, const char *path, int amode, int flag); + */ + +static __attribute__((unused)) +int sys_faccessat(int fd, const char *path, int amode, int flag) +{ + return my_syscall4(__NR_faccessat, fd, path, amode, flag); +} + +static __attribute__((unused)) +int faccessat(int fd, const char *path, int amode, int flag) +{ + return __sysret(sys_faccessat(fd, path, amode, flag)); +} + +static __attribute__((unused)) +int access(const char *path, int amode) +{ + return faccessat(AT_FDCWD, path, amode, 0); +} + static __attribute__((unused)) int msleep(unsigned int msecs) { struct timeval my_timeval = { msecs / 1000, (msecs % 1000) * 1000 }; - if (sys_select(0, 0, 0, 0, &my_timeval) < 0) + if (sys_select(0, NULL, NULL, NULL, &my_timeval) < 0) return (my_timeval.tv_sec * 1000) + (my_timeval.tv_usec / 1000) + !!(my_timeval.tv_usec % 1000); @@ -36,7 +67,7 @@ unsigned int sleep(unsigned int seconds) { struct timeval my_timeval = { seconds, 0 }; - if (sys_select(0, 0, 0, 0, &my_timeval) < 0) + if (sys_select(0, NULL, NULL, NULL, &my_timeval) < 0) return my_timeval.tv_sec + !!my_timeval.tv_usec; else return 0; @@ -47,7 +78,7 @@ int usleep(unsigned int usecs) { struct timeval my_timeval = { usecs / 1000000, usecs % 1000000 }; - return sys_select(0, 0, 0, 0, &my_timeval); + return sys_select(0, NULL, NULL, NULL, &my_timeval); } static __attribute__((unused)) @@ -56,13 +87,4 @@ int tcsetpgrp(int fd, pid_t pid) return ioctl(fd, TIOCSPGRP, &pid); } -#define __syscall_narg(_0, _1, _2, _3, _4, _5, _6, N, ...) N -#define _syscall_narg(...) __syscall_narg(__VA_ARGS__, 6, 5, 4, 3, 2, 1, 0) -#define _syscall(N, ...) __sysret(my_syscall##N(__VA_ARGS__)) -#define _syscall_n(N, ...) _syscall(N, __VA_ARGS__) -#define syscall(...) _syscall_n(_syscall_narg(__VA_ARGS__), ##__VA_ARGS__) - -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_UNISTD_H */ diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index 1ea2c4c33b86..ef1c27fa3c57 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -85,6 +85,7 @@ /* compatibility flags */ #define MAP_FILE 0 +#define PKEY_UNRESTRICTED 0x0 #define PKEY_DISABLE_ACCESS 0x1 #define PKEY_DISABLE_WRITE 0x2 #define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ diff --git a/tools/include/uapi/asm-generic/mman.h b/tools/include/uapi/asm-generic/mman.h index 406f7718f9ad..51d2556af54a 100644 --- a/tools/include/uapi/asm-generic/mman.h +++ b/tools/include/uapi/asm-generic/mman.h @@ -19,4 +19,8 @@ #define MCL_FUTURE 2 /* lock all future mappings */ #define MCL_ONFAULT 4 /* lock all pages that are faulted in */ +#define SHADOW_STACK_SET_TOKEN (1ULL << 0) /* Set up a restore token in the shadow stack */ +#define SHADOW_STACK_SET_MARKER (1ULL << 1) /* Set up a top of stack marker in the shadow stack */ + + #endif /* __ASM_GENERIC_MMAN_H */ diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h index 281df9139d2b..f333a0ac4ee4 100644 --- a/tools/include/uapi/asm-generic/socket.h +++ b/tools/include/uapi/asm-generic/socket.h @@ -119,12 +119,33 @@ #define SO_DETACH_REUSEPORT_BPF 68 +#define SO_PREFER_BUSY_POLL 69 +#define SO_BUSY_POLL_BUDGET 70 + +#define SO_NETNS_COOKIE 71 + +#define SO_BUF_LOCK 72 + +#define SO_RESERVE_MEM 73 + +#define SO_TXREHASH 74 + #define SO_RCVMARK 75 #define SO_PASSPIDFD 76 #define SO_PEERPIDFD 77 -#define SCM_TS_OPT_ID 78 +#define SO_DEVMEM_LINEAR 78 +#define SCM_DEVMEM_LINEAR SO_DEVMEM_LINEAR +#define SO_DEVMEM_DMABUF 79 +#define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF +#define SO_DEVMEM_DONTNEED 80 + +#define SCM_TS_OPT_ID 81 + +#define SO_RCVPRIORITY 82 + +#define SO_PASSRIGHTS 83 #if !defined(__KERNEL__) diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 5bf6148cac2b..04e0077fb4c9 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -841,8 +841,25 @@ __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules) #define __NR_mseal 462 __SYSCALL(__NR_mseal, sys_mseal) +#define __NR_setxattrat 463 +__SYSCALL(__NR_setxattrat, sys_setxattrat) +#define __NR_getxattrat 464 +__SYSCALL(__NR_getxattrat, sys_getxattrat) +#define __NR_listxattrat 465 +__SYSCALL(__NR_listxattrat, sys_listxattrat) +#define __NR_removexattrat 466 +__SYSCALL(__NR_removexattrat, sys_removexattrat) +#define __NR_open_tree_attr 467 +__SYSCALL(__NR_open_tree_attr, sys_open_tree_attr) + +/* fs/inode.c */ +#define __NR_file_getattr 468 +__SYSCALL(__NR_file_getattr, sys_file_getattr) +#define __NR_file_setattr 469 +__SYSCALL(__NR_file_setattr, sys_file_setattr) + #undef __NR_syscalls -#define __NR_syscalls 463 +#define __NR_syscalls 470 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 16122819edfe..3cd5cf15e3c9 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -597,35 +597,66 @@ struct drm_set_version { int drm_dd_minor; }; -/* DRM_IOCTL_GEM_CLOSE ioctl argument type */ +/** + * struct drm_gem_close - Argument for &DRM_IOCTL_GEM_CLOSE ioctl. + * @handle: Handle of the object to be closed. + * @pad: Padding. + * + * Releases the handle to an mm object. + */ struct drm_gem_close { - /** Handle of the object to be closed. */ __u32 handle; __u32 pad; }; -/* DRM_IOCTL_GEM_FLINK ioctl argument type */ +/** + * struct drm_gem_flink - Argument for &DRM_IOCTL_GEM_FLINK ioctl. + * @handle: Handle for the object being named. + * @name: Returned global name. + * + * Create a global name for an object, returning the name. + * + * Note that the name does not hold a reference; when the object + * is freed, the name goes away. + */ struct drm_gem_flink { - /** Handle for the object being named */ __u32 handle; - - /** Returned global name */ __u32 name; }; -/* DRM_IOCTL_GEM_OPEN ioctl argument type */ +/** + * struct drm_gem_open - Argument for &DRM_IOCTL_GEM_OPEN ioctl. + * @name: Name of object being opened. + * @handle: Returned handle for the object. + * @size: Returned size of the object + * + * Open an object using the global name, returning a handle and the size. + * + * This handle (of course) holds a reference to the object, so the object + * will not go away until the handle is deleted. + */ struct drm_gem_open { - /** Name of object being opened */ __u32 name; - - /** Returned handle for the object */ __u32 handle; - - /** Returned size of the object */ __u64 size; }; /** + * struct drm_gem_change_handle - Argument for &DRM_IOCTL_GEM_CHANGE_HANDLE ioctl. + * @handle: The handle of a gem object. + * @new_handle: An available gem handle. + * + * This ioctl changes the handle of a GEM object to the specified one. + * The new handle must be unused. On success the old handle is closed + * and all further IOCTL should refer to the new handle only. + * Calls to DRM_IOCTL_PRIME_FD_TO_HANDLE will return the new handle. + */ +struct drm_gem_change_handle { + __u32 handle; + __u32 new_handle; +}; + +/** * DRM_CAP_DUMB_BUFFER * * If set to 1, the driver supports creating dumb buffers via the @@ -905,13 +936,17 @@ struct drm_syncobj_destroy { }; #define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE (1 << 0) +#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_TIMELINE (1 << 1) #define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE (1 << 0) +#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_TIMELINE (1 << 1) struct drm_syncobj_handle { __u32 handle; __u32 flags; __s32 fd; __u32 pad; + + __u64 point; }; struct drm_syncobj_transfer { @@ -1024,6 +1059,13 @@ struct drm_crtc_queue_sequence { __u64 user_data; /* user data passed to event */ }; +#define DRM_CLIENT_NAME_MAX_LEN 64 +struct drm_set_client_name { + __u64 name_len; + __u64 name; +}; + + #if defined(__cplusplus) } #endif @@ -1288,6 +1330,24 @@ extern "C" { */ #define DRM_IOCTL_MODE_CLOSEFB DRM_IOWR(0xD0, struct drm_mode_closefb) +/** + * DRM_IOCTL_SET_CLIENT_NAME - Attach a name to a drm_file + * + * Having a name allows for easier tracking and debugging. + * The length of the name (without null ending char) must be + * <= DRM_CLIENT_NAME_MAX_LEN. + * The call will fail if the name contains whitespaces or non-printable chars. + */ +#define DRM_IOCTL_SET_CLIENT_NAME DRM_IOWR(0xD1, struct drm_set_client_name) + +/** + * DRM_IOCTL_GEM_CHANGE_HANDLE - Move an object to a different handle + * + * Some applications (notably CRIU) need objects to have specific gem handles. + * This ioctl changes the object at one gem handle to use a new gem handle. + */ +#define DRM_IOCTL_GEM_CHANGE_HANDLE DRM_IOWR(0xD2, struct drm_gem_change_handle) + /* * Device specific ioctls should only be in their respective headers * The device specific ioctl range is from 0x40 to 0x9f. diff --git a/tools/include/uapi/linux/bits.h b/tools/include/uapi/linux/bits.h index 5ee30f882736..a04afef9efca 100644 --- a/tools/include/uapi/linux/bits.h +++ b/tools/include/uapi/linux/bits.h @@ -4,13 +4,9 @@ #ifndef _UAPI_LINUX_BITS_H #define _UAPI_LINUX_BITS_H -#define __GENMASK(h, l) \ - (((~_UL(0)) - (_UL(1) << (l)) + 1) & \ - (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) +#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) -#define __GENMASK_ULL(h, l) \ - (((~_ULL(0)) - (_ULL(1) << (l)) + 1) & \ - (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) #define __GENMASK_U128(h, l) \ ((_BIT128((h)) << 1) - (_BIT128(l))) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 4162afc6b5d0..be7d8e060e10 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -51,6 +51,9 @@ #define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */ #define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */ +#define BPF_LOAD_ACQ 0x100 /* load-acquire */ +#define BPF_STORE_REL 0x110 /* store-release */ + enum bpf_cond_pseudo_jmp { BPF_MAY_GOTO = 0, }; @@ -447,6 +450,7 @@ union bpf_iter_link_info { * * **struct bpf_map_info** * * **struct bpf_btf_info** * * **struct bpf_link_info** + * * **struct bpf_token_info** * * Return * Returns zero on success. On error, -1 is returned and *errno* @@ -903,6 +907,17 @@ union bpf_iter_link_info { * A new file descriptor (a nonnegative integer), or -1 if an * error occurred (in which case, *errno* is set appropriately). * + * BPF_PROG_STREAM_READ_BY_FD + * Description + * Read data of a program's BPF stream. The program is identified + * by *prog_fd*, and the stream is identified by the *stream_id*. + * The data is copied to a buffer pointed to by *stream_buf*, and + * filled less than or equal to *stream_buf_len* bytes. + * + * Return + * Number of bytes read from the stream on success, or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * * NOTES * eBPF objects (maps and programs) can be shared between processes. * @@ -958,6 +973,7 @@ enum bpf_cmd { BPF_LINK_DETACH, BPF_PROG_BIND_MAP, BPF_TOKEN_CREATE, + BPF_PROG_STREAM_READ_BY_FD, __MAX_BPF_CMD, }; @@ -1010,6 +1026,7 @@ enum bpf_map_type { BPF_MAP_TYPE_USER_RINGBUF, BPF_MAP_TYPE_CGRP_STORAGE, BPF_MAP_TYPE_ARENA, + BPF_MAP_TYPE_INSN_ARRAY, __MAX_BPF_MAP_TYPE }; @@ -1207,6 +1224,7 @@ enum bpf_perf_event_type { #define BPF_F_BEFORE (1U << 3) #define BPF_F_AFTER (1U << 4) #define BPF_F_ID (1U << 5) +#define BPF_F_PREORDER (1U << 6) #define BPF_F_LINK BPF_F_LINK /* 1 << 13 */ /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the @@ -1413,6 +1431,9 @@ enum { /* Do not translate kernel bpf_arena pointers to user pointers */ BPF_F_NO_USER_CONV = (1U << 18), + +/* Enable BPF ringbuf overwrite mode */ + BPF_F_RB_OVERWRITE = (1U << 19), }; /* Flags for BPF_PROG_QUERY. */ @@ -1459,6 +1480,11 @@ struct bpf_stack_build_id { #define BPF_OBJ_NAME_LEN 16U +enum { + BPF_STREAM_STDOUT = 1, + BPF_STREAM_STDERR = 2, +}; + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ @@ -1500,9 +1526,15 @@ union bpf_attr { * If provided, map_flags should have BPF_F_TOKEN_FD flag set. */ __s32 map_token_fd; + + /* Hash of the program that has exclusive access to the map. + */ + __aligned_u64 excl_prog_hash; + /* Size of the passed excl_prog_hash. */ + __u32 excl_prog_hash_size; }; - struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ + struct { /* anonymous struct used by BPF_MAP_*_ELEM and BPF_MAP_FREEZE commands */ __u32 map_fd; __aligned_u64 key; union { @@ -1573,6 +1605,26 @@ union bpf_attr { * If provided, prog_flags should have BPF_F_TOKEN_FD flag set. */ __s32 prog_token_fd; + /* The fd_array_cnt can be used to pass the length of the + * fd_array array. In this case all the [map] file descriptors + * passed in this array will be bound to the program, even if + * the maps are not referenced directly. The functionality is + * similar to the BPF_PROG_BIND_MAP syscall, but maps can be + * used by the verifier during the program load. If provided, + * then the fd_array[0,...,fd_array_cnt-1] is expected to be + * continuous. + */ + __u32 fd_array_cnt; + /* Pointer to a buffer containing the signature of the BPF + * program. + */ + __aligned_u64 signature; + /* Size of the signature buffer in bytes. */ + __u32 signature_size; + /* ID of the kernel keyring to be used for signature + * verification. + */ + __s32 keyring_id; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -1638,6 +1690,7 @@ union bpf_attr { }; __u32 next_id; __u32 open_flags; + __s32 fd_by_id_token_fd; }; struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */ @@ -1779,6 +1832,13 @@ union bpf_attr { }; __u64 expected_revision; } netkit; + struct { + union { + __u32 relative_fd; + __u32 relative_id; + }; + __u64 expected_revision; + } cgroup; }; } link_create; @@ -1827,6 +1887,13 @@ union bpf_attr { __u32 bpffs_fd; } token_create; + struct { + __aligned_u64 stream_buf; + __u32 stream_buf_len; + __u32 stream_id; + __u32 prog_fd; + } prog_stream_read; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF @@ -1980,11 +2047,15 @@ union bpf_attr { * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) * Description * Store *len* bytes from address *from* into the packet - * associated to *skb*, at *offset*. *flags* are a combination of - * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the - * checksum for the packet after storing the bytes) and - * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\ - * **->swhash** and *skb*\ **->l4hash** to 0). + * associated to *skb*, at *offset*. The *flags* are a combination + * of the following values: + * + * **BPF_F_RECOMPUTE_CSUM** + * Automatically update *skb*\ **->csum** after storing the + * bytes. + * **BPF_F_INVALIDATE_HASH** + * Set *skb*\ **->hash**, *skb*\ **->swhash** and *skb*\ + * **->l4hash** to 0. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers @@ -2036,7 +2107,8 @@ union bpf_attr { * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and * for updates resulting in a null checksum the value is set to * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates - * the checksum is to be computed against a pseudo-header. + * that the modified header field is part of the pseudo-header. + * Flag **BPF_F_IPV6** should be set for IPv6 packets. * * This helper works in combination with **bpf_csum_diff**\ (), * which does not update the checksum in-place, but offers more @@ -2383,7 +2455,7 @@ union bpf_attr { * into it. An example is available in file * *samples/bpf/trace_output_user.c* in the Linux kernel source * tree (the eBPF program counterpart is in - * *samples/bpf/trace_output_kern.c*). + * *samples/bpf/trace_output.bpf.c*). * * **bpf_perf_event_output**\ () achieves better performance * than **bpf_trace_printk**\ () for sharing data with user @@ -4823,7 +4895,7 @@ union bpf_attr { * * **-ENOENT** if the bpf_local_storage cannot be found. * - * long bpf_d_path(struct path *path, char *buf, u32 sz) + * long bpf_d_path(const struct path *path, char *buf, u32 sz) * Description * Return full path for given **struct path** object, which * needs to be the kernel BTF *path* object. The path is @@ -4953,6 +5025,9 @@ union bpf_attr { * the netns switch takes place from ingress to ingress without * going through the CPU's backlog queue. * + * *skb*\ **->mark** and *skb*\ **->tstamp** are not cleared during + * the netns switch. + * * The *flags* argument is reserved and must be 0. The helper is * currently only supported for tc BPF program types at the * ingress hook and for veth and netkit target device types. The @@ -5547,7 +5622,7 @@ union bpf_attr { * Return * *sk* if casting is valid, or **NULL** otherwise. * - * long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr) + * long bpf_dynptr_from_mem(void *data, u64 size, u64 flags, struct bpf_dynptr *ptr) * Description * Get a dynptr to local memory *data*. * @@ -5590,7 +5665,7 @@ union bpf_attr { * Return * Nothing. Always succeeds. * - * long bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr *src, u32 offset, u64 flags) + * long bpf_dynptr_read(void *dst, u64 len, const struct bpf_dynptr *src, u64 offset, u64 flags) * Description * Read *len* bytes from *src* into *dst*, starting from *offset* * into *src*. @@ -5600,7 +5675,7 @@ union bpf_attr { * of *src*'s data, -EINVAL if *src* is an invalid dynptr or if * *flags* is not 0. * - * long bpf_dynptr_write(const struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags) + * long bpf_dynptr_write(const struct bpf_dynptr *dst, u64 offset, void *src, u64 len, u64 flags) * Description * Write *len* bytes from *src* into *dst*, starting from *offset* * into *dst*. @@ -5621,7 +5696,7 @@ union bpf_attr { * is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs, * other errors correspond to errors returned by **bpf_skb_store_bytes**\ (). * - * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len) + * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u64 offset, u64 len) * Description * Get a pointer to the underlying dynptr data. * @@ -6009,7 +6084,10 @@ union bpf_attr { FN(user_ringbuf_drain, 209, ##ctx) \ FN(cgrp_storage_get, 210, ##ctx) \ FN(cgrp_storage_delete, 211, ##ctx) \ - /* */ + /* This helper list is effectively frozen. If you are trying to \ + * add a new helper, you should add a kfunc instead which has \ + * less stability guarantees. See Documentation/bpf/kfuncs.rst \ + */ /* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't * know or care about integer value that is now passed as second argument @@ -6047,6 +6125,7 @@ enum { BPF_F_PSEUDO_HDR = (1ULL << 4), BPF_F_MARK_MANGLED_0 = (1ULL << 5), BPF_F_MARK_ENFORCE = (1ULL << 6), + BPF_F_IPV6 = (1ULL << 7), }; /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ @@ -6156,6 +6235,7 @@ enum { BPF_RB_RING_SIZE = 1, BPF_RB_CONS_POS = 2, BPF_RB_PROD_POS = 3, + BPF_RB_OVERWRITE_POS = 4, }; /* BPF ring buffer constants */ @@ -6607,6 +6687,8 @@ struct bpf_map_info { __u32 btf_value_type_id; __u32 btf_vmlinux_id; __u64 map_extra; + __aligned_u64 hash; + __u32 hash_size; } __attribute__((aligned(8))); struct bpf_btf_info { @@ -6626,11 +6708,15 @@ struct bpf_link_info { struct { __aligned_u64 tp_name; /* in/out: tp_name buffer ptr */ __u32 tp_name_len; /* in/out: tp_name buffer len */ + __u32 :32; + __u64 cookie; } raw_tracepoint; struct { __u32 attach_type; __u32 target_obj_id; /* prog_id for PROG_EXT, otherwise btf object id */ __u32 target_btf_id; /* BTF type id inside the object */ + __u32 :32; + __u64 cookie; } tracing; struct { __u64 cgroup_id; @@ -6702,6 +6788,7 @@ struct bpf_link_info { __u32 name_len; __u32 offset; /* offset from file_name */ __u64 cookie; + __u64 ref_ctr_offset; } uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */ struct { __aligned_u64 func_name; /* in/out */ @@ -6740,6 +6827,13 @@ struct bpf_link_info { }; } __attribute__((aligned(8))); +struct bpf_token_info { + __u64 allowed_cmds; + __u64 allowed_maps; + __u64 allowed_progs; + __u64 allowed_attachs; +} __attribute__((aligned(8))); + /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed * by user and intended to be used by socket (e.g. to bind to, depends on * attach type). @@ -6903,6 +6997,12 @@ enum { BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F, }; +enum { + SK_BPF_CB_TX_TIMESTAMPING = 1<<0, + SK_BPF_CB_MASK = (SK_BPF_CB_TX_TIMESTAMPING - 1) | + SK_BPF_CB_TX_TIMESTAMPING +}; + /* List of known BPF sock_ops operators. * New entries can only be added at the end */ @@ -7015,6 +7115,29 @@ enum { * by the kernel or the * earlier bpf-progs. */ + BPF_SOCK_OPS_TSTAMP_SCHED_CB, /* Called when skb is passing + * through dev layer when + * SK_BPF_CB_TX_TIMESTAMPING + * feature is on. + */ + BPF_SOCK_OPS_TSTAMP_SND_SW_CB, /* Called when skb is about to send + * to the nic when SK_BPF_CB_TX_TIMESTAMPING + * feature is on. + */ + BPF_SOCK_OPS_TSTAMP_SND_HW_CB, /* Called in hardware phase when + * SK_BPF_CB_TX_TIMESTAMPING feature + * is on. + */ + BPF_SOCK_OPS_TSTAMP_ACK_CB, /* Called when all the skbs in the + * same sendmsg call are acked + * when SK_BPF_CB_TX_TIMESTAMPING + * feature is on. + */ + BPF_SOCK_OPS_TSTAMP_SENDMSG_CB, /* Called when every sendmsg syscall + * is triggered. It's used to correlate + * sendmsg timestamp with corresponding + * tskey. + */ }; /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect @@ -7081,6 +7204,8 @@ enum { TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */ TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */ TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */ + SK_BPF_CB_FLAGS = 1009, /* Get or set sock ops flags in socket */ + SK_BPF_BYPASS_PROT_MEM = 1010, /* Get or Set sk->sk_bypass_prot_mem */ }; enum { @@ -7317,6 +7442,10 @@ struct bpf_timer { __u64 __opaque[2]; } __attribute__((aligned(8))); +struct bpf_task_work { + __u64 __opaque; +} __attribute__((aligned(8))); + struct bpf_wq { __u64 __opaque[2]; } __attribute__((aligned(8))); @@ -7522,4 +7651,24 @@ enum bpf_kfunc_flags { BPF_F_PAD_ZEROS = (1ULL << 0), }; +/* + * Values of a BPF_MAP_TYPE_INSN_ARRAY entry must be of this type. + * + * Before the map is used the orig_off field should point to an + * instruction inside the program being loaded. The other fields + * must be set to 0. + * + * After the program is loaded, the xlated_off will be adjusted + * by the verifier to point to the index of the original instruction + * in the xlated program. If the instruction is deleted, it will + * be set to (u32)-1. The jitted_off will be set to the corresponding + * offset in the jitted image of the program. + */ +struct bpf_insn_array_value { + __u32 orig_off; + __u32 xlated_off; + __u32 jitted_off; + __u32 :32; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index ec1798b6d3ff..266d4ffa6c07 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -36,7 +36,8 @@ struct btf_type { * bits 24-28: kind (e.g. int, ptr, array...etc) * bits 29-30: unused * bit 31: kind_flag, currently used by - * struct, union, enum, fwd and enum64 + * struct, union, enum, fwd, enum64, + * decl_tag and type_tag */ __u32 info; /* "size" is used by INT, ENUM, STRUCT, UNION, DATASEC and ENUM64. diff --git a/tools/include/uapi/linux/const.h b/tools/include/uapi/linux/const.h index e16be0d37746..b8f629ef135f 100644 --- a/tools/include/uapi/linux/const.h +++ b/tools/include/uapi/linux/const.h @@ -33,7 +33,7 @@ * Missing asm support * * __BIT128() would not work in the asm code, as it shifts an - * 'unsigned __init128' data type as direct representation of + * 'unsigned __int128' data type as direct representation of * 128 bit constants is not supported in the gcc compiler, as * they get silently truncated. * diff --git a/tools/include/uapi/linux/coredump.h b/tools/include/uapi/linux/coredump.h new file mode 100644 index 000000000000..dc3789b78af0 --- /dev/null +++ b/tools/include/uapi/linux/coredump.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _UAPI_LINUX_COREDUMP_H +#define _UAPI_LINUX_COREDUMP_H + +#include <linux/types.h> + +/** + * coredump_{req,ack} flags + * @COREDUMP_KERNEL: kernel writes coredump + * @COREDUMP_USERSPACE: userspace writes coredump + * @COREDUMP_REJECT: don't generate coredump + * @COREDUMP_WAIT: wait for coredump server + */ +enum { + COREDUMP_KERNEL = (1ULL << 0), + COREDUMP_USERSPACE = (1ULL << 1), + COREDUMP_REJECT = (1ULL << 2), + COREDUMP_WAIT = (1ULL << 3), +}; + +/** + * struct coredump_req - message kernel sends to userspace + * @size: size of struct coredump_req + * @size_ack: known size of struct coredump_ack on this kernel + * @mask: supported features + * + * When a coredump happens the kernel will connect to the coredump + * socket and send a coredump request to the coredump server. The @size + * member is set to the size of struct coredump_req and provides a hint + * to userspace how much data can be read. Userspace may use MSG_PEEK to + * peek the size of struct coredump_req and then choose to consume it in + * one go. Userspace may also simply read a COREDUMP_ACK_SIZE_VER0 + * request. If the size the kernel sends is larger userspace simply + * discards any remaining data. + * + * The coredump_req->mask member is set to the currently know features. + * Userspace may only set coredump_ack->mask to the bits raised by the + * kernel in coredump_req->mask. + * + * The coredump_req->size_ack member is set by the kernel to the size of + * struct coredump_ack the kernel knows. Userspace may only send up to + * coredump_req->size_ack bytes to the kernel and must set + * coredump_ack->size accordingly. + */ +struct coredump_req { + __u32 size; + __u32 size_ack; + __u64 mask; +}; + +enum { + COREDUMP_REQ_SIZE_VER0 = 16U, /* size of first published struct */ +}; + +/** + * struct coredump_ack - message userspace sends to kernel + * @size: size of the struct + * @spare: unused + * @mask: features kernel is supposed to use + * + * The @size member must be set to the size of struct coredump_ack. It + * may never exceed what the kernel returned in coredump_req->size_ack + * but it may of course be smaller (>= COREDUMP_ACK_SIZE_VER0 and <= + * coredump_req->size_ack). + * + * The @mask member must be set to the features the coredump server + * wants the kernel to use. Only bits the kernel returned in + * coredump_req->mask may be set. + */ +struct coredump_ack { + __u32 size; + __u32 spare; + __u64 mask; +}; + +enum { + COREDUMP_ACK_SIZE_VER0 = 16U, /* size of first published struct */ +}; + +/** + * enum coredump_mark - Markers for the coredump socket + * + * The kernel will place a single byte on the coredump socket. The + * markers notify userspace whether the coredump ack succeeded or + * failed. + * + * @COREDUMP_MARK_MINSIZE: the provided coredump_ack size was too small + * @COREDUMP_MARK_MAXSIZE: the provided coredump_ack size was too big + * @COREDUMP_MARK_UNSUPPORTED: the provided coredump_ack mask was invalid + * @COREDUMP_MARK_CONFLICTING: the provided coredump_ack mask has conflicting options + * @COREDUMP_MARK_REQACK: the coredump request and ack was successful + * @__COREDUMP_MARK_MAX: the maximum coredump mark value + */ +enum coredump_mark { + COREDUMP_MARK_REQACK = 0U, + COREDUMP_MARK_MINSIZE = 1U, + COREDUMP_MARK_MAXSIZE = 2U, + COREDUMP_MARK_UNSUPPORTED = 3U, + COREDUMP_MARK_CONFLICTING = 4U, + __COREDUMP_MARK_MAX = (1U << 31), +}; + +#endif /* _UAPI_LINUX_COREDUMP_H */ diff --git a/tools/include/uapi/linux/elf.h b/tools/include/uapi/linux/elf.h new file mode 100644 index 000000000000..5834b83d7f9a --- /dev/null +++ b/tools/include/uapi/linux/elf.h @@ -0,0 +1,524 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_ELF_H +#define _LINUX_ELF_H + +#include <linux/types.h> +#include <linux/elf-em.h> + +/* 32-bit ELF base types. */ +typedef __u32 Elf32_Addr; +typedef __u16 Elf32_Half; +typedef __u32 Elf32_Off; +typedef __s32 Elf32_Sword; +typedef __u32 Elf32_Word; +typedef __u16 Elf32_Versym; + +/* 64-bit ELF base types. */ +typedef __u64 Elf64_Addr; +typedef __u16 Elf64_Half; +typedef __s16 Elf64_SHalf; +typedef __u64 Elf64_Off; +typedef __s32 Elf64_Sword; +typedef __u32 Elf64_Word; +typedef __u64 Elf64_Xword; +typedef __s64 Elf64_Sxword; +typedef __u16 Elf64_Versym; + +/* These constants are for the segment types stored in the image headers */ +#define PT_NULL 0 +#define PT_LOAD 1 +#define PT_DYNAMIC 2 +#define PT_INTERP 3 +#define PT_NOTE 4 +#define PT_SHLIB 5 +#define PT_PHDR 6 +#define PT_TLS 7 /* Thread local storage segment */ +#define PT_LOOS 0x60000000 /* OS-specific */ +#define PT_HIOS 0x6fffffff /* OS-specific */ +#define PT_LOPROC 0x70000000 +#define PT_HIPROC 0x7fffffff +#define PT_GNU_EH_FRAME (PT_LOOS + 0x474e550) +#define PT_GNU_STACK (PT_LOOS + 0x474e551) +#define PT_GNU_RELRO (PT_LOOS + 0x474e552) +#define PT_GNU_PROPERTY (PT_LOOS + 0x474e553) + + +/* ARM MTE memory tag segment type */ +#define PT_AARCH64_MEMTAG_MTE (PT_LOPROC + 0x2) + +/* + * Extended Numbering + * + * If the real number of program header table entries is larger than + * or equal to PN_XNUM(0xffff), it is set to sh_info field of the + * section header at index 0, and PN_XNUM is set to e_phnum + * field. Otherwise, the section header at index 0 is zero + * initialized, if it exists. + * + * Specifications are available in: + * + * - Oracle: Linker and Libraries. + * Part No: 817–1984–19, August 2011. + * https://docs.oracle.com/cd/E18752_01/pdf/817-1984.pdf + * + * - System V ABI AMD64 Architecture Processor Supplement + * Draft Version 0.99.4, + * January 13, 2010. + * http://www.cs.washington.edu/education/courses/cse351/12wi/supp-docs/abi.pdf + */ +#define PN_XNUM 0xffff + +/* These constants define the different elf file types */ +#define ET_NONE 0 +#define ET_REL 1 +#define ET_EXEC 2 +#define ET_DYN 3 +#define ET_CORE 4 +#define ET_LOPROC 0xff00 +#define ET_HIPROC 0xffff + +/* This is the info that is needed to parse the dynamic section of the file */ +#define DT_NULL 0 +#define DT_NEEDED 1 +#define DT_PLTRELSZ 2 +#define DT_PLTGOT 3 +#define DT_HASH 4 +#define DT_STRTAB 5 +#define DT_SYMTAB 6 +#define DT_RELA 7 +#define DT_RELASZ 8 +#define DT_RELAENT 9 +#define DT_STRSZ 10 +#define DT_SYMENT 11 +#define DT_INIT 12 +#define DT_FINI 13 +#define DT_SONAME 14 +#define DT_RPATH 15 +#define DT_SYMBOLIC 16 +#define DT_REL 17 +#define DT_RELSZ 18 +#define DT_RELENT 19 +#define DT_PLTREL 20 +#define DT_DEBUG 21 +#define DT_TEXTREL 22 +#define DT_JMPREL 23 +#define DT_ENCODING 32 +#define OLD_DT_LOOS 0x60000000 +#define DT_LOOS 0x6000000d +#define DT_HIOS 0x6ffff000 +#define DT_VALRNGLO 0x6ffffd00 +#define DT_VALRNGHI 0x6ffffdff +#define DT_ADDRRNGLO 0x6ffffe00 +#define DT_GNU_HASH 0x6ffffef5 +#define DT_ADDRRNGHI 0x6ffffeff +#define DT_VERSYM 0x6ffffff0 +#define DT_RELACOUNT 0x6ffffff9 +#define DT_RELCOUNT 0x6ffffffa +#define DT_FLAGS_1 0x6ffffffb +#define DT_VERDEF 0x6ffffffc +#define DT_VERDEFNUM 0x6ffffffd +#define DT_VERNEED 0x6ffffffe +#define DT_VERNEEDNUM 0x6fffffff +#define OLD_DT_HIOS 0x6fffffff +#define DT_LOPROC 0x70000000 +#define DT_HIPROC 0x7fffffff + +/* This info is needed when parsing the symbol table */ +#define STB_LOCAL 0 +#define STB_GLOBAL 1 +#define STB_WEAK 2 + +#define STN_UNDEF 0 + +#define STT_NOTYPE 0 +#define STT_OBJECT 1 +#define STT_FUNC 2 +#define STT_SECTION 3 +#define STT_FILE 4 +#define STT_COMMON 5 +#define STT_TLS 6 + +#define VER_FLG_BASE 0x1 +#define VER_FLG_WEAK 0x2 + +#define ELF_ST_BIND(x) ((x) >> 4) +#define ELF_ST_TYPE(x) ((x) & 0xf) +#define ELF32_ST_BIND(x) ELF_ST_BIND(x) +#define ELF32_ST_TYPE(x) ELF_ST_TYPE(x) +#define ELF64_ST_BIND(x) ELF_ST_BIND(x) +#define ELF64_ST_TYPE(x) ELF_ST_TYPE(x) + +typedef struct { + Elf32_Sword d_tag; + union { + Elf32_Sword d_val; + Elf32_Addr d_ptr; + } d_un; +} Elf32_Dyn; + +typedef struct { + Elf64_Sxword d_tag; /* entry tag value */ + union { + Elf64_Xword d_val; + Elf64_Addr d_ptr; + } d_un; +} Elf64_Dyn; + +/* The following are used with relocations */ +#define ELF32_R_SYM(x) ((x) >> 8) +#define ELF32_R_TYPE(x) ((x) & 0xff) + +#define ELF64_R_SYM(i) ((i) >> 32) +#define ELF64_R_TYPE(i) ((i) & 0xffffffff) + +typedef struct elf32_rel { + Elf32_Addr r_offset; + Elf32_Word r_info; +} Elf32_Rel; + +typedef struct elf64_rel { + Elf64_Addr r_offset; /* Location at which to apply the action */ + Elf64_Xword r_info; /* index and type of relocation */ +} Elf64_Rel; + +typedef struct elf32_rela { + Elf32_Addr r_offset; + Elf32_Word r_info; + Elf32_Sword r_addend; +} Elf32_Rela; + +typedef struct elf64_rela { + Elf64_Addr r_offset; /* Location at which to apply the action */ + Elf64_Xword r_info; /* index and type of relocation */ + Elf64_Sxword r_addend; /* Constant addend used to compute value */ +} Elf64_Rela; + +typedef struct elf32_sym { + Elf32_Word st_name; + Elf32_Addr st_value; + Elf32_Word st_size; + unsigned char st_info; + unsigned char st_other; + Elf32_Half st_shndx; +} Elf32_Sym; + +typedef struct elf64_sym { + Elf64_Word st_name; /* Symbol name, index in string tbl */ + unsigned char st_info; /* Type and binding attributes */ + unsigned char st_other; /* No defined meaning, 0 */ + Elf64_Half st_shndx; /* Associated section index */ + Elf64_Addr st_value; /* Value of the symbol */ + Elf64_Xword st_size; /* Associated symbol size */ +} Elf64_Sym; + + +#define EI_NIDENT 16 + +typedef struct elf32_hdr { + unsigned char e_ident[EI_NIDENT]; + Elf32_Half e_type; + Elf32_Half e_machine; + Elf32_Word e_version; + Elf32_Addr e_entry; /* Entry point */ + Elf32_Off e_phoff; + Elf32_Off e_shoff; + Elf32_Word e_flags; + Elf32_Half e_ehsize; + Elf32_Half e_phentsize; + Elf32_Half e_phnum; + Elf32_Half e_shentsize; + Elf32_Half e_shnum; + Elf32_Half e_shstrndx; +} Elf32_Ehdr; + +typedef struct elf64_hdr { + unsigned char e_ident[EI_NIDENT]; /* ELF "magic number" */ + Elf64_Half e_type; + Elf64_Half e_machine; + Elf64_Word e_version; + Elf64_Addr e_entry; /* Entry point virtual address */ + Elf64_Off e_phoff; /* Program header table file offset */ + Elf64_Off e_shoff; /* Section header table file offset */ + Elf64_Word e_flags; + Elf64_Half e_ehsize; + Elf64_Half e_phentsize; + Elf64_Half e_phnum; + Elf64_Half e_shentsize; + Elf64_Half e_shnum; + Elf64_Half e_shstrndx; +} Elf64_Ehdr; + +/* These constants define the permissions on sections in the program + header, p_flags. */ +#define PF_R 0x4 +#define PF_W 0x2 +#define PF_X 0x1 + +typedef struct elf32_phdr { + Elf32_Word p_type; + Elf32_Off p_offset; + Elf32_Addr p_vaddr; + Elf32_Addr p_paddr; + Elf32_Word p_filesz; + Elf32_Word p_memsz; + Elf32_Word p_flags; + Elf32_Word p_align; +} Elf32_Phdr; + +typedef struct elf64_phdr { + Elf64_Word p_type; + Elf64_Word p_flags; + Elf64_Off p_offset; /* Segment file offset */ + Elf64_Addr p_vaddr; /* Segment virtual address */ + Elf64_Addr p_paddr; /* Segment physical address */ + Elf64_Xword p_filesz; /* Segment size in file */ + Elf64_Xword p_memsz; /* Segment size in memory */ + Elf64_Xword p_align; /* Segment alignment, file & memory */ +} Elf64_Phdr; + +/* sh_type */ +#define SHT_NULL 0 +#define SHT_PROGBITS 1 +#define SHT_SYMTAB 2 +#define SHT_STRTAB 3 +#define SHT_RELA 4 +#define SHT_HASH 5 +#define SHT_DYNAMIC 6 +#define SHT_NOTE 7 +#define SHT_NOBITS 8 +#define SHT_REL 9 +#define SHT_SHLIB 10 +#define SHT_DYNSYM 11 +#define SHT_NUM 12 +#define SHT_LOPROC 0x70000000 +#define SHT_HIPROC 0x7fffffff +#define SHT_LOUSER 0x80000000 +#define SHT_HIUSER 0xffffffff + +/* sh_flags */ +#define SHF_WRITE 0x1 +#define SHF_ALLOC 0x2 +#define SHF_EXECINSTR 0x4 +#define SHF_RELA_LIVEPATCH 0x00100000 +#define SHF_RO_AFTER_INIT 0x00200000 +#define SHF_MASKPROC 0xf0000000 + +/* special section indexes */ +#define SHN_UNDEF 0 +#define SHN_LORESERVE 0xff00 +#define SHN_LOPROC 0xff00 +#define SHN_HIPROC 0xff1f +#define SHN_LIVEPATCH 0xff20 +#define SHN_ABS 0xfff1 +#define SHN_COMMON 0xfff2 +#define SHN_HIRESERVE 0xffff + +typedef struct elf32_shdr { + Elf32_Word sh_name; + Elf32_Word sh_type; + Elf32_Word sh_flags; + Elf32_Addr sh_addr; + Elf32_Off sh_offset; + Elf32_Word sh_size; + Elf32_Word sh_link; + Elf32_Word sh_info; + Elf32_Word sh_addralign; + Elf32_Word sh_entsize; +} Elf32_Shdr; + +typedef struct elf64_shdr { + Elf64_Word sh_name; /* Section name, index in string tbl */ + Elf64_Word sh_type; /* Type of section */ + Elf64_Xword sh_flags; /* Miscellaneous section attributes */ + Elf64_Addr sh_addr; /* Section virtual addr at execution */ + Elf64_Off sh_offset; /* Section file offset */ + Elf64_Xword sh_size; /* Size of section in bytes */ + Elf64_Word sh_link; /* Index of another section */ + Elf64_Word sh_info; /* Additional section information */ + Elf64_Xword sh_addralign; /* Section alignment */ + Elf64_Xword sh_entsize; /* Entry size if section holds table */ +} Elf64_Shdr; + +#define EI_MAG0 0 /* e_ident[] indexes */ +#define EI_MAG1 1 +#define EI_MAG2 2 +#define EI_MAG3 3 +#define EI_CLASS 4 +#define EI_DATA 5 +#define EI_VERSION 6 +#define EI_OSABI 7 +#define EI_PAD 8 + +#define ELFMAG0 0x7f /* EI_MAG */ +#define ELFMAG1 'E' +#define ELFMAG2 'L' +#define ELFMAG3 'F' +#define ELFMAG "\177ELF" +#define SELFMAG 4 + +#define ELFCLASSNONE 0 /* EI_CLASS */ +#define ELFCLASS32 1 +#define ELFCLASS64 2 +#define ELFCLASSNUM 3 + +#define ELFDATANONE 0 /* e_ident[EI_DATA] */ +#define ELFDATA2LSB 1 +#define ELFDATA2MSB 2 + +#define EV_NONE 0 /* e_version, EI_VERSION */ +#define EV_CURRENT 1 +#define EV_NUM 2 + +#define ELFOSABI_NONE 0 +#define ELFOSABI_LINUX 3 + +#ifndef ELF_OSABI +#define ELF_OSABI ELFOSABI_NONE +#endif + +/* + * Notes used in ET_CORE. Architectures export some of the arch register sets + * using the corresponding note types via the PTRACE_GETREGSET and + * PTRACE_SETREGSET requests. + * The note name for these types is "LINUX", except NT_PRFPREG that is named + * "CORE". + */ +#define NT_PRSTATUS 1 +#define NT_PRFPREG 2 +#define NT_PRPSINFO 3 +#define NT_TASKSTRUCT 4 +#define NT_AUXV 6 +/* + * Note to userspace developers: size of NT_SIGINFO note may increase + * in the future to accomodate more fields, don't assume it is fixed! + */ +#define NT_SIGINFO 0x53494749 +#define NT_FILE 0x46494c45 +#define NT_PRXFPREG 0x46e62b7f /* copied from gdb5.1/include/elf/common.h */ +#define NT_PPC_VMX 0x100 /* PowerPC Altivec/VMX registers */ +#define NT_PPC_SPE 0x101 /* PowerPC SPE/EVR registers */ +#define NT_PPC_VSX 0x102 /* PowerPC VSX registers */ +#define NT_PPC_TAR 0x103 /* Target Address Register */ +#define NT_PPC_PPR 0x104 /* Program Priority Register */ +#define NT_PPC_DSCR 0x105 /* Data Stream Control Register */ +#define NT_PPC_EBB 0x106 /* Event Based Branch Registers */ +#define NT_PPC_PMU 0x107 /* Performance Monitor Registers */ +#define NT_PPC_TM_CGPR 0x108 /* TM checkpointed GPR Registers */ +#define NT_PPC_TM_CFPR 0x109 /* TM checkpointed FPR Registers */ +#define NT_PPC_TM_CVMX 0x10a /* TM checkpointed VMX Registers */ +#define NT_PPC_TM_CVSX 0x10b /* TM checkpointed VSX Registers */ +#define NT_PPC_TM_SPR 0x10c /* TM Special Purpose Registers */ +#define NT_PPC_TM_CTAR 0x10d /* TM checkpointed Target Address Register */ +#define NT_PPC_TM_CPPR 0x10e /* TM checkpointed Program Priority Register */ +#define NT_PPC_TM_CDSCR 0x10f /* TM checkpointed Data Stream Control Register */ +#define NT_PPC_PKEY 0x110 /* Memory Protection Keys registers */ +#define NT_PPC_DEXCR 0x111 /* PowerPC DEXCR registers */ +#define NT_PPC_HASHKEYR 0x112 /* PowerPC HASHKEYR register */ +#define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */ +#define NT_386_IOPERM 0x201 /* x86 io permission bitmap (1=deny) */ +#define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */ +/* Old binutils treats 0x203 as a CET state */ +#define NT_X86_SHSTK 0x204 /* x86 SHSTK state */ +#define NT_X86_XSAVE_LAYOUT 0x205 /* XSAVE layout description */ +#define NT_S390_HIGH_GPRS 0x300 /* s390 upper register halves */ +#define NT_S390_TIMER 0x301 /* s390 timer register */ +#define NT_S390_TODCMP 0x302 /* s390 TOD clock comparator register */ +#define NT_S390_TODPREG 0x303 /* s390 TOD programmable register */ +#define NT_S390_CTRS 0x304 /* s390 control registers */ +#define NT_S390_PREFIX 0x305 /* s390 prefix register */ +#define NT_S390_LAST_BREAK 0x306 /* s390 breaking event address */ +#define NT_S390_SYSTEM_CALL 0x307 /* s390 system call restart data */ +#define NT_S390_TDB 0x308 /* s390 transaction diagnostic block */ +#define NT_S390_VXRS_LOW 0x309 /* s390 vector registers 0-15 upper half */ +#define NT_S390_VXRS_HIGH 0x30a /* s390 vector registers 16-31 */ +#define NT_S390_GS_CB 0x30b /* s390 guarded storage registers */ +#define NT_S390_GS_BC 0x30c /* s390 guarded storage broadcast control block */ +#define NT_S390_RI_CB 0x30d /* s390 runtime instrumentation */ +#define NT_S390_PV_CPU_DATA 0x30e /* s390 protvirt cpu dump data */ +#define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */ +#define NT_ARM_TLS 0x401 /* ARM TLS register */ +#define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */ +#define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */ +#define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */ +#define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension registers */ +#define NT_ARM_PAC_MASK 0x406 /* ARM pointer authentication code masks */ +#define NT_ARM_PACA_KEYS 0x407 /* ARM pointer authentication address keys */ +#define NT_ARM_PACG_KEYS 0x408 /* ARM pointer authentication generic key */ +#define NT_ARM_TAGGED_ADDR_CTRL 0x409 /* arm64 tagged address control (prctl()) */ +#define NT_ARM_PAC_ENABLED_KEYS 0x40a /* arm64 ptr auth enabled keys (prctl()) */ +#define NT_ARM_SSVE 0x40b /* ARM Streaming SVE registers */ +#define NT_ARM_ZA 0x40c /* ARM SME ZA registers */ +#define NT_ARM_ZT 0x40d /* ARM SME ZT registers */ +#define NT_ARM_FPMR 0x40e /* ARM floating point mode register */ +#define NT_ARM_POE 0x40f /* ARM POE registers */ +#define NT_ARM_GCS 0x410 /* ARM GCS state */ +#define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ +#define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */ +#define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ +#define NT_MIPS_FP_MODE 0x801 /* MIPS floating-point mode */ +#define NT_MIPS_MSA 0x802 /* MIPS SIMD registers */ +#define NT_RISCV_CSR 0x900 /* RISC-V Control and Status Registers */ +#define NT_RISCV_VECTOR 0x901 /* RISC-V vector registers */ +#define NT_RISCV_TAGGED_ADDR_CTRL 0x902 /* RISC-V tagged address control (prctl()) */ +#define NT_LOONGARCH_CPUCFG 0xa00 /* LoongArch CPU config registers */ +#define NT_LOONGARCH_CSR 0xa01 /* LoongArch control and status registers */ +#define NT_LOONGARCH_LSX 0xa02 /* LoongArch Loongson SIMD Extension registers */ +#define NT_LOONGARCH_LASX 0xa03 /* LoongArch Loongson Advanced SIMD Extension registers */ +#define NT_LOONGARCH_LBT 0xa04 /* LoongArch Loongson Binary Translation registers */ +#define NT_LOONGARCH_HW_BREAK 0xa05 /* LoongArch hardware breakpoint registers */ +#define NT_LOONGARCH_HW_WATCH 0xa06 /* LoongArch hardware watchpoint registers */ + +/* Note types with note name "GNU" */ +#define NT_GNU_PROPERTY_TYPE_0 5 + +/* Note header in a PT_NOTE section */ +typedef struct elf32_note { + Elf32_Word n_namesz; /* Name size */ + Elf32_Word n_descsz; /* Content size */ + Elf32_Word n_type; /* Content type */ +} Elf32_Nhdr; + +/* Note header in a PT_NOTE section */ +typedef struct elf64_note { + Elf64_Word n_namesz; /* Name size */ + Elf64_Word n_descsz; /* Content size */ + Elf64_Word n_type; /* Content type */ +} Elf64_Nhdr; + +/* .note.gnu.property types for EM_AARCH64: */ +#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000 + +/* Bits for GNU_PROPERTY_AARCH64_FEATURE_1_BTI */ +#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0) + +typedef struct { + Elf32_Half vd_version; + Elf32_Half vd_flags; + Elf32_Half vd_ndx; + Elf32_Half vd_cnt; + Elf32_Word vd_hash; + Elf32_Word vd_aux; + Elf32_Word vd_next; +} Elf32_Verdef; + +typedef struct { + Elf64_Half vd_version; + Elf64_Half vd_flags; + Elf64_Half vd_ndx; + Elf64_Half vd_cnt; + Elf64_Word vd_hash; + Elf64_Word vd_aux; + Elf64_Word vd_next; +} Elf64_Verdef; + +typedef struct { + Elf32_Word vda_name; + Elf32_Word vda_next; +} Elf32_Verdaux; + +typedef struct { + Elf64_Word vda_name; + Elf64_Word vda_next; +} Elf64_Verdaux; + +#endif /* _LINUX_ELF_H */ diff --git a/tools/include/uapi/linux/fanotify.h b/tools/include/uapi/linux/fanotify.h new file mode 100644 index 000000000000..e710967c7c26 --- /dev/null +++ b/tools/include/uapi/linux/fanotify.h @@ -0,0 +1,274 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_FANOTIFY_H +#define _UAPI_LINUX_FANOTIFY_H + +#include <linux/types.h> + +/* the following events that user-space can register for */ +#define FAN_ACCESS 0x00000001 /* File was accessed */ +#define FAN_MODIFY 0x00000002 /* File was modified */ +#define FAN_ATTRIB 0x00000004 /* Metadata changed */ +#define FAN_CLOSE_WRITE 0x00000008 /* Writable file closed */ +#define FAN_CLOSE_NOWRITE 0x00000010 /* Unwritable file closed */ +#define FAN_OPEN 0x00000020 /* File was opened */ +#define FAN_MOVED_FROM 0x00000040 /* File was moved from X */ +#define FAN_MOVED_TO 0x00000080 /* File was moved to Y */ +#define FAN_CREATE 0x00000100 /* Subfile was created */ +#define FAN_DELETE 0x00000200 /* Subfile was deleted */ +#define FAN_DELETE_SELF 0x00000400 /* Self was deleted */ +#define FAN_MOVE_SELF 0x00000800 /* Self was moved */ +#define FAN_OPEN_EXEC 0x00001000 /* File was opened for exec */ + +#define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ +#define FAN_FS_ERROR 0x00008000 /* Filesystem error */ + +#define FAN_OPEN_PERM 0x00010000 /* File open in perm check */ +#define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */ +#define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */ +/* #define FAN_DIR_MODIFY 0x00080000 */ /* Deprecated (reserved) */ + +#define FAN_PRE_ACCESS 0x00100000 /* Pre-content access hook */ +#define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */ +#define FAN_MNT_DETACH 0x02000000 /* Mount was detached */ + +#define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */ + +#define FAN_RENAME 0x10000000 /* File was renamed */ + +#define FAN_ONDIR 0x40000000 /* Event occurred against dir */ + +/* helper events */ +#define FAN_CLOSE (FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE) /* close */ +#define FAN_MOVE (FAN_MOVED_FROM | FAN_MOVED_TO) /* moves */ + +/* flags used for fanotify_init() */ +#define FAN_CLOEXEC 0x00000001 +#define FAN_NONBLOCK 0x00000002 + +/* These are NOT bitwise flags. Both bits are used together. */ +#define FAN_CLASS_NOTIF 0x00000000 +#define FAN_CLASS_CONTENT 0x00000004 +#define FAN_CLASS_PRE_CONTENT 0x00000008 + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_CLASS_BITS (FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \ + FAN_CLASS_PRE_CONTENT) + +#define FAN_UNLIMITED_QUEUE 0x00000010 +#define FAN_UNLIMITED_MARKS 0x00000020 +#define FAN_ENABLE_AUDIT 0x00000040 + +/* Flags to determine fanotify event format */ +#define FAN_REPORT_PIDFD 0x00000080 /* Report pidfd for event->pid */ +#define FAN_REPORT_TID 0x00000100 /* event->pid is thread id */ +#define FAN_REPORT_FID 0x00000200 /* Report unique file id */ +#define FAN_REPORT_DIR_FID 0x00000400 /* Report unique directory id */ +#define FAN_REPORT_NAME 0x00000800 /* Report events with name */ +#define FAN_REPORT_TARGET_FID 0x00001000 /* Report dirent target id */ +#define FAN_REPORT_FD_ERROR 0x00002000 /* event->fd can report error */ +#define FAN_REPORT_MNT 0x00004000 /* Report mount events */ + +/* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */ +#define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME) +/* Convenience macro - FAN_REPORT_TARGET_FID requires all other FID flags */ +#define FAN_REPORT_DFID_NAME_TARGET (FAN_REPORT_DFID_NAME | \ + FAN_REPORT_FID | FAN_REPORT_TARGET_FID) + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \ + FAN_ALL_CLASS_BITS | FAN_UNLIMITED_QUEUE |\ + FAN_UNLIMITED_MARKS) + +/* flags used for fanotify_modify_mark() */ +#define FAN_MARK_ADD 0x00000001 +#define FAN_MARK_REMOVE 0x00000002 +#define FAN_MARK_DONT_FOLLOW 0x00000004 +#define FAN_MARK_ONLYDIR 0x00000008 +/* FAN_MARK_MOUNT is 0x00000010 */ +#define FAN_MARK_IGNORED_MASK 0x00000020 +#define FAN_MARK_IGNORED_SURV_MODIFY 0x00000040 +#define FAN_MARK_FLUSH 0x00000080 +/* FAN_MARK_FILESYSTEM is 0x00000100 */ +#define FAN_MARK_EVICTABLE 0x00000200 +/* This bit is mutually exclusive with FAN_MARK_IGNORED_MASK bit */ +#define FAN_MARK_IGNORE 0x00000400 + +/* These are NOT bitwise flags. Both bits can be used togther. */ +#define FAN_MARK_INODE 0x00000000 +#define FAN_MARK_MOUNT 0x00000010 +#define FAN_MARK_FILESYSTEM 0x00000100 +#define FAN_MARK_MNTNS 0x00000110 + +/* + * Convenience macro - FAN_MARK_IGNORE requires FAN_MARK_IGNORED_SURV_MODIFY + * for non-inode mark types. + */ +#define FAN_MARK_IGNORE_SURV (FAN_MARK_IGNORE | FAN_MARK_IGNORED_SURV_MODIFY) + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\ + FAN_MARK_REMOVE |\ + FAN_MARK_DONT_FOLLOW |\ + FAN_MARK_ONLYDIR |\ + FAN_MARK_MOUNT |\ + FAN_MARK_IGNORED_MASK |\ + FAN_MARK_IGNORED_SURV_MODIFY |\ + FAN_MARK_FLUSH) + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_EVENTS (FAN_ACCESS |\ + FAN_MODIFY |\ + FAN_CLOSE |\ + FAN_OPEN) + +/* + * All events which require a permission response from userspace + */ +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_PERM_EVENTS (FAN_OPEN_PERM |\ + FAN_ACCESS_PERM) + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_OUTGOING_EVENTS (FAN_ALL_EVENTS |\ + FAN_ALL_PERM_EVENTS |\ + FAN_Q_OVERFLOW) + +#define FANOTIFY_METADATA_VERSION 3 + +struct fanotify_event_metadata { + __u32 event_len; + __u8 vers; + __u8 reserved; + __u16 metadata_len; + __aligned_u64 mask; + __s32 fd; + __s32 pid; +}; + +#define FAN_EVENT_INFO_TYPE_FID 1 +#define FAN_EVENT_INFO_TYPE_DFID_NAME 2 +#define FAN_EVENT_INFO_TYPE_DFID 3 +#define FAN_EVENT_INFO_TYPE_PIDFD 4 +#define FAN_EVENT_INFO_TYPE_ERROR 5 +#define FAN_EVENT_INFO_TYPE_RANGE 6 +#define FAN_EVENT_INFO_TYPE_MNT 7 + +/* Special info types for FAN_RENAME */ +#define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME 10 +/* Reserved for FAN_EVENT_INFO_TYPE_OLD_DFID 11 */ +#define FAN_EVENT_INFO_TYPE_NEW_DFID_NAME 12 +/* Reserved for FAN_EVENT_INFO_TYPE_NEW_DFID 13 */ + +/* Variable length info record following event metadata */ +struct fanotify_event_info_header { + __u8 info_type; + __u8 pad; + __u16 len; +}; + +/* + * Unique file identifier info record. + * This structure is used for records of types FAN_EVENT_INFO_TYPE_FID, + * FAN_EVENT_INFO_TYPE_DFID and FAN_EVENT_INFO_TYPE_DFID_NAME. + * For FAN_EVENT_INFO_TYPE_DFID_NAME there is additionally a null terminated + * name immediately after the file handle. + */ +struct fanotify_event_info_fid { + struct fanotify_event_info_header hdr; + __kernel_fsid_t fsid; + /* + * Following is an opaque struct file_handle that can be passed as + * an argument to open_by_handle_at(2). + */ + unsigned char handle[]; +}; + +/* + * This structure is used for info records of type FAN_EVENT_INFO_TYPE_PIDFD. + * It holds a pidfd for the pid that was responsible for generating an event. + */ +struct fanotify_event_info_pidfd { + struct fanotify_event_info_header hdr; + __s32 pidfd; +}; + +struct fanotify_event_info_error { + struct fanotify_event_info_header hdr; + __s32 error; + __u32 error_count; +}; + +struct fanotify_event_info_range { + struct fanotify_event_info_header hdr; + __u32 pad; + __u64 offset; + __u64 count; +}; + +struct fanotify_event_info_mnt { + struct fanotify_event_info_header hdr; + __u64 mnt_id; +}; + +/* + * User space may need to record additional information about its decision. + * The extra information type records what kind of information is included. + * The default is none. We also define an extra information buffer whose + * size is determined by the extra information type. + * + * If the information type is Audit Rule, then the information following + * is the rule number that triggered the user space decision that + * requires auditing. + */ + +#define FAN_RESPONSE_INFO_NONE 0 +#define FAN_RESPONSE_INFO_AUDIT_RULE 1 + +struct fanotify_response { + __s32 fd; + __u32 response; +}; + +struct fanotify_response_info_header { + __u8 type; + __u8 pad; + __u16 len; +}; + +struct fanotify_response_info_audit_rule { + struct fanotify_response_info_header hdr; + __u32 rule_number; + __u32 subj_trust; + __u32 obj_trust; +}; + +/* Legit userspace responses to a _PERM event */ +#define FAN_ALLOW 0x01 +#define FAN_DENY 0x02 +/* errno other than EPERM can specified in upper byte of deny response */ +#define FAN_ERRNO_BITS 8 +#define FAN_ERRNO_SHIFT (32 - FAN_ERRNO_BITS) +#define FAN_ERRNO_MASK ((1 << FAN_ERRNO_BITS) - 1) +#define FAN_DENY_ERRNO(err) \ + (FAN_DENY | ((((__u32)(err)) & FAN_ERRNO_MASK) << FAN_ERRNO_SHIFT)) + +#define FAN_AUDIT 0x10 /* Bitmask to create audit record for result */ +#define FAN_INFO 0x20 /* Bitmask to indicate additional information */ + +/* No fd set in event */ +#define FAN_NOFD -1 +#define FAN_NOPIDFD FAN_NOFD +#define FAN_EPIDFD -2 + +/* Helper functions to deal with fanotify_event_metadata buffers */ +#define FAN_EVENT_METADATA_LEN (sizeof(struct fanotify_event_metadata)) + +#define FAN_EVENT_NEXT(meta, len) ((len) -= (meta)->event_len, \ + (struct fanotify_event_metadata*)(((char *)(meta)) + \ + (meta)->event_len)) + +#define FAN_EVENT_OK(meta, len) ((long)(len) >= (long)FAN_EVENT_METADATA_LEN && \ + (long)(meta)->event_len >= (long)FAN_EVENT_METADATA_LEN && \ + (long)(meta)->event_len <= (long)(len)) + +#endif /* _UAPI_LINUX_FANOTIFY_H */ diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h index 8a27bc5c7a7f..24ddf7bc4f25 100644 --- a/tools/include/uapi/linux/fs.h +++ b/tools/include/uapi/linux/fs.h @@ -40,6 +40,15 @@ #define BLOCK_SIZE_BITS 10 #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS) +/* flags for integrity meta */ +#define IO_INTEGRITY_CHK_GUARD (1U << 0) /* enforce guard check */ +#define IO_INTEGRITY_CHK_REFTAG (1U << 1) /* enforce ref check */ +#define IO_INTEGRITY_CHK_APPTAG (1U << 2) /* enforce app check */ + +#define IO_INTEGRITY_VALID_FLAGS (IO_INTEGRITY_CHK_GUARD | \ + IO_INTEGRITY_CHK_REFTAG | \ + IO_INTEGRITY_CHK_APPTAG) + #define SEEK_SET 0 /* seek relative to beginning of file */ #define SEEK_CUR 1 /* seek relative to current file position */ #define SEEK_END 2 /* seek relative to end of file */ @@ -329,9 +338,16 @@ typedef int __bitwise __kernel_rwf_t; /* per-IO negation of O_APPEND */ #define RWF_NOAPPEND ((__force __kernel_rwf_t)0x00000020) +/* Atomic Write */ +#define RWF_ATOMIC ((__force __kernel_rwf_t)0x00000040) + +/* buffered IO that drops the cache after reading or writing data */ +#define RWF_DONTCACHE ((__force __kernel_rwf_t)0x00000080) + /* mask of flags supported by the kernel */ #define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ - RWF_APPEND | RWF_NOAPPEND) + RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\ + RWF_DONTCACHE) #define PROCFS_IOCTL_MAGIC 'f' @@ -347,6 +363,7 @@ typedef int __bitwise __kernel_rwf_t; #define PAGE_IS_PFNZERO (1 << 5) #define PAGE_IS_HUGE (1 << 6) #define PAGE_IS_SOFT_DIRTY (1 << 7) +#define PAGE_IS_GUARD (1 << 8) /* * struct page_region - Page region with flags diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h index 7a8f4c290187..3aff99f2696a 100644 --- a/tools/include/uapi/linux/fscrypt.h +++ b/tools/include/uapi/linux/fscrypt.h @@ -119,7 +119,7 @@ struct fscrypt_key_specifier { */ struct fscrypt_provisioning_key_payload { __u32 type; - __u32 __reserved; + __u32 flags; __u8 raw[]; }; @@ -128,7 +128,9 @@ struct fscrypt_add_key_arg { struct fscrypt_key_specifier key_spec; __u32 raw_size; __u32 key_id; - __u32 __reserved[8]; +#define FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED 0x00000001 + __u32 flags; + __u32 __reserved[7]; __u8 raw[]; }; diff --git a/tools/include/uapi/linux/genetlink.h b/tools/include/uapi/linux/genetlink.h new file mode 100644 index 000000000000..ddba3ca01e39 --- /dev/null +++ b/tools/include/uapi/linux/genetlink.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_GENERIC_NETLINK_H +#define _UAPI__LINUX_GENERIC_NETLINK_H + +#include <linux/types.h> +#include <linux/netlink.h> + +#define GENL_NAMSIZ 16 /* length of family name */ + +#define GENL_MIN_ID NLMSG_MIN_TYPE +#define GENL_MAX_ID 1023 + +struct genlmsghdr { + __u8 cmd; + __u8 version; + __u16 reserved; +}; + +#define GENL_HDRLEN NLMSG_ALIGN(sizeof(struct genlmsghdr)) + +#define GENL_ADMIN_PERM 0x01 +#define GENL_CMD_CAP_DO 0x02 +#define GENL_CMD_CAP_DUMP 0x04 +#define GENL_CMD_CAP_HASPOL 0x08 +#define GENL_UNS_ADMIN_PERM 0x10 + +/* + * List of reserved static generic netlink identifiers: + */ +#define GENL_ID_CTRL NLMSG_MIN_TYPE +#define GENL_ID_VFS_DQUOT (NLMSG_MIN_TYPE + 1) +#define GENL_ID_PMCRAID (NLMSG_MIN_TYPE + 2) +/* must be last reserved + 1 */ +#define GENL_START_ALLOC (NLMSG_MIN_TYPE + 3) + +/************************************************************************** + * Controller + **************************************************************************/ + +enum { + CTRL_CMD_UNSPEC, + CTRL_CMD_NEWFAMILY, + CTRL_CMD_DELFAMILY, + CTRL_CMD_GETFAMILY, + CTRL_CMD_NEWOPS, + CTRL_CMD_DELOPS, + CTRL_CMD_GETOPS, + CTRL_CMD_NEWMCAST_GRP, + CTRL_CMD_DELMCAST_GRP, + CTRL_CMD_GETMCAST_GRP, /* unused */ + CTRL_CMD_GETPOLICY, + __CTRL_CMD_MAX, +}; + +#define CTRL_CMD_MAX (__CTRL_CMD_MAX - 1) + +enum { + CTRL_ATTR_UNSPEC, + CTRL_ATTR_FAMILY_ID, + CTRL_ATTR_FAMILY_NAME, + CTRL_ATTR_VERSION, + CTRL_ATTR_HDRSIZE, + CTRL_ATTR_MAXATTR, + CTRL_ATTR_OPS, + CTRL_ATTR_MCAST_GROUPS, + CTRL_ATTR_POLICY, + CTRL_ATTR_OP_POLICY, + CTRL_ATTR_OP, + __CTRL_ATTR_MAX, +}; + +#define CTRL_ATTR_MAX (__CTRL_ATTR_MAX - 1) + +enum { + CTRL_ATTR_OP_UNSPEC, + CTRL_ATTR_OP_ID, + CTRL_ATTR_OP_FLAGS, + __CTRL_ATTR_OP_MAX, +}; + +#define CTRL_ATTR_OP_MAX (__CTRL_ATTR_OP_MAX - 1) + +enum { + CTRL_ATTR_MCAST_GRP_UNSPEC, + CTRL_ATTR_MCAST_GRP_NAME, + CTRL_ATTR_MCAST_GRP_ID, + __CTRL_ATTR_MCAST_GRP_MAX, +}; + +#define CTRL_ATTR_MCAST_GRP_MAX (__CTRL_ATTR_MCAST_GRP_MAX - 1) + +enum { + CTRL_ATTR_POLICY_UNSPEC, + CTRL_ATTR_POLICY_DO, + CTRL_ATTR_POLICY_DUMP, + + __CTRL_ATTR_POLICY_DUMP_MAX, + CTRL_ATTR_POLICY_DUMP_MAX = __CTRL_ATTR_POLICY_DUMP_MAX - 1 +}; + +#define CTRL_ATTR_POLICY_MAX (__CTRL_ATTR_POLICY_DUMP_MAX - 1) + +#endif /* _UAPI__LINUX_GENERIC_NETLINK_H */ diff --git a/tools/include/uapi/linux/if_addr.h b/tools/include/uapi/linux/if_addr.h new file mode 100644 index 000000000000..aa7958b4e41d --- /dev/null +++ b/tools/include/uapi/linux/if_addr.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_IF_ADDR_H +#define _UAPI__LINUX_IF_ADDR_H + +#include <linux/types.h> +#include <linux/netlink.h> + +struct ifaddrmsg { + __u8 ifa_family; + __u8 ifa_prefixlen; /* The prefix length */ + __u8 ifa_flags; /* Flags */ + __u8 ifa_scope; /* Address scope */ + __u32 ifa_index; /* Link index */ +}; + +/* + * Important comment: + * IFA_ADDRESS is prefix address, rather than local interface address. + * It makes no difference for normally configured broadcast interfaces, + * but for point-to-point IFA_ADDRESS is DESTINATION address, + * local address is supplied in IFA_LOCAL attribute. + * + * IFA_FLAGS is a u32 attribute that extends the u8 field ifa_flags. + * If present, the value from struct ifaddrmsg will be ignored. + */ +enum { + IFA_UNSPEC, + IFA_ADDRESS, + IFA_LOCAL, + IFA_LABEL, + IFA_BROADCAST, + IFA_ANYCAST, + IFA_CACHEINFO, + IFA_MULTICAST, + IFA_FLAGS, + IFA_RT_PRIORITY, /* u32, priority/metric for prefix route */ + IFA_TARGET_NETNSID, + IFA_PROTO, /* u8, address protocol */ + __IFA_MAX, +}; + +#define IFA_MAX (__IFA_MAX - 1) + +/* ifa_flags */ +#define IFA_F_SECONDARY 0x01 +#define IFA_F_TEMPORARY IFA_F_SECONDARY + +#define IFA_F_NODAD 0x02 +#define IFA_F_OPTIMISTIC 0x04 +#define IFA_F_DADFAILED 0x08 +#define IFA_F_HOMEADDRESS 0x10 +#define IFA_F_DEPRECATED 0x20 +#define IFA_F_TENTATIVE 0x40 +#define IFA_F_PERMANENT 0x80 +#define IFA_F_MANAGETEMPADDR 0x100 +#define IFA_F_NOPREFIXROUTE 0x200 +#define IFA_F_MCAUTOJOIN 0x400 +#define IFA_F_STABLE_PRIVACY 0x800 + +struct ifa_cacheinfo { + __u32 ifa_prefered; + __u32 ifa_valid; + __u32 cstamp; /* created timestamp, hundredths of seconds */ + __u32 tstamp; /* updated timestamp, hundredths of seconds */ +}; + +/* backwards compatibility for userspace */ +#ifndef __KERNEL__ +#define IFA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifaddrmsg)))) +#define IFA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifaddrmsg)) +#endif + +/* ifa_proto */ +#define IFAPROT_UNSPEC 0 +#define IFAPROT_KERNEL_LO 1 /* loopback */ +#define IFAPROT_KERNEL_RA 2 /* set by kernel from router announcement */ +#define IFAPROT_KERNEL_LL 3 /* link-local set by kernel */ + +#endif diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 8516c1ccd57a..7e46ca4cd31b 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -1315,6 +1315,8 @@ enum { IFLA_NETKIT_MODE, IFLA_NETKIT_SCRUB, IFLA_NETKIT_PEER_SCRUB, + IFLA_NETKIT_HEADROOM, + IFLA_NETKIT_TAILROOM, __IFLA_NETKIT_MAX, }; #define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1) diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h index 2f082b01ff22..23a062781468 100644 --- a/tools/include/uapi/linux/if_xdp.h +++ b/tools/include/uapi/linux/if_xdp.h @@ -7,8 +7,8 @@ * Magnus Karlsson <magnus.karlsson@intel.com> */ -#ifndef _LINUX_IF_XDP_H -#define _LINUX_IF_XDP_H +#ifndef _UAPI_LINUX_IF_XDP_H +#define _UAPI_LINUX_IF_XDP_H #include <linux/types.h> @@ -79,6 +79,7 @@ struct xdp_mmap_offsets { #define XDP_UMEM_COMPLETION_RING 6 #define XDP_STATISTICS 7 #define XDP_OPTIONS 8 +#define XDP_MAX_TX_SKB_BUDGET 9 struct xdp_umem_reg { __u64 addr; /* Start of packet data area */ @@ -117,16 +118,22 @@ struct xdp_options { ((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1) /* Request transmit timestamp. Upon completion, put it into tx_timestamp - * field of union xsk_tx_metadata. + * field of struct xsk_tx_metadata. */ #define XDP_TXMD_FLAGS_TIMESTAMP (1 << 0) /* Request transmit checksum offload. Checksum start position and offset - * are communicated via csum_start and csum_offset fields of union + * are communicated via csum_start and csum_offset fields of struct * xsk_tx_metadata. */ #define XDP_TXMD_FLAGS_CHECKSUM (1 << 1) +/* Request launch time hardware offload. The device will schedule the packet for + * transmission at a pre-determined time called launch time. The value of + * launch time is communicated via launch_time field of struct xsk_tx_metadata. + */ +#define XDP_TXMD_FLAGS_LAUNCH_TIME (1 << 2) + /* AF_XDP offloads request. 'request' union member is consumed by the driver * when the packet is being transmitted. 'completion' union member is * filled by the driver when the transmit completion arrives. @@ -142,6 +149,10 @@ struct xsk_tx_metadata { __u16 csum_start; /* Offset from csum_start where checksum should be stored. */ __u16 csum_offset; + + /* XDP_TXMD_FLAGS_LAUNCH_TIME */ + /* Launch time in nanosecond against the PTP HW Clock */ + __u64 launch_time; } request; struct { @@ -170,4 +181,4 @@ struct xdp_desc { /* TX packet carries valid metadata. */ #define XDP_TX_METADATA (1 << 1) -#endif /* _LINUX_IF_XDP_H */ +#endif /* _UAPI_LINUX_IF_XDP_H */ diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index 5d32d53508d9..ced0fc3c3aa5 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -79,6 +79,8 @@ enum { #define IPPROTO_MPLS IPPROTO_MPLS IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */ #define IPPROTO_ETHERNET IPPROTO_ETHERNET + IPPROTO_AGGFRAG = 144, /* AGGFRAG in ESP (RFC 9347) */ +#define IPPROTO_AGGFRAG IPPROTO_AGGFRAG IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW IPPROTO_SMC = 256, /* Shared Memory Communications */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 637efc055145..52f6000ab020 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -178,6 +178,7 @@ struct kvm_xen_exit { #define KVM_EXIT_NOTIFY 37 #define KVM_EXIT_LOONGARCH_IOCSR 38 #define KVM_EXIT_MEMORY_FAULT 39 +#define KVM_EXIT_TDX 40 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -375,6 +376,7 @@ struct kvm_run { #define KVM_SYSTEM_EVENT_WAKEUP 4 #define KVM_SYSTEM_EVENT_SUSPEND 5 #define KVM_SYSTEM_EVENT_SEV_TERM 6 +#define KVM_SYSTEM_EVENT_TDX_FATAL 7 __u32 type; __u32 ndata; union { @@ -446,6 +448,31 @@ struct kvm_run { __u64 gpa; __u64 size; } memory_fault; + /* KVM_EXIT_TDX */ + struct { + __u64 flags; + __u64 nr; + union { + struct { + __u64 ret; + __u64 data[5]; + } unknown; + struct { + __u64 ret; + __u64 gpa; + __u64 size; + } get_quote; + struct { + __u64 ret; + __u64 leaf; + __u64 r11, r12, r13, r14; + } get_tdvmcall_info; + struct { + __u64 ret; + __u64 vector; + } setup_event_notify; + }; + } tdx; /* Fix the size of the union. */ char padding[256]; }; @@ -617,10 +644,7 @@ struct kvm_ioeventfd { #define KVM_X86_DISABLE_EXITS_HLT (1 << 1) #define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) #define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3) -#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ - KVM_X86_DISABLE_EXITS_HLT | \ - KVM_X86_DISABLE_EXITS_PAUSE | \ - KVM_X86_DISABLE_EXITS_CSTATE) +#define KVM_X86_DISABLE_EXITS_APERFMPERF (1 << 4) /* for KVM_ENABLE_CAP */ struct kvm_enable_cap { @@ -933,6 +957,12 @@ struct kvm_enable_cap { #define KVM_CAP_PRE_FAULT_MEMORY 236 #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 #define KVM_CAP_X86_GUEST_MODE 238 +#define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239 +#define KVM_CAP_ARM_EL2 240 +#define KVM_CAP_ARM_EL2_E2H0 241 +#define KVM_CAP_RISCV_MP_STATE_RESET 242 +#define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243 +#define KVM_CAP_GUEST_MEMFD_FLAGS 244 struct kvm_irq_routing_irqchip { __u32 irqchip; @@ -1070,6 +1100,10 @@ struct kvm_dirty_tlb { #define KVM_REG_SIZE_SHIFT 52 #define KVM_REG_SIZE_MASK 0x00f0000000000000ULL + +#define KVM_REG_SIZE(id) \ + (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) + #define KVM_REG_SIZE_U8 0x0000000000000000ULL #define KVM_REG_SIZE_U16 0x0010000000000000ULL #define KVM_REG_SIZE_U32 0x0020000000000000ULL @@ -1158,7 +1192,15 @@ enum kvm_device_type { #define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_RISCV_AIA, #define KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_RISCV_AIA + KVM_DEV_TYPE_LOONGARCH_IPI, +#define KVM_DEV_TYPE_LOONGARCH_IPI KVM_DEV_TYPE_LOONGARCH_IPI + KVM_DEV_TYPE_LOONGARCH_EIOINTC, +#define KVM_DEV_TYPE_LOONGARCH_EIOINTC KVM_DEV_TYPE_LOONGARCH_EIOINTC + KVM_DEV_TYPE_LOONGARCH_PCHPIC, +#define KVM_DEV_TYPE_LOONGARCH_PCHPIC KVM_DEV_TYPE_LOONGARCH_PCHPIC + KVM_DEV_TYPE_MAX, + }; struct kvm_vfio_spapr_tce { @@ -1557,6 +1599,8 @@ struct kvm_memory_attributes { #define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3) #define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd) +#define GUEST_MEMFD_FLAG_MMAP (1ULL << 0) +#define GUEST_MEMFD_FLAG_INIT_SHARED (1ULL << 1) struct kvm_create_guest_memfd { __u64 size; diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h new file mode 100644 index 000000000000..7fa67c2031a5 --- /dev/null +++ b/tools/include/uapi/linux/mount.h @@ -0,0 +1,235 @@ +#ifndef _UAPI_LINUX_MOUNT_H +#define _UAPI_LINUX_MOUNT_H + +#include <linux/types.h> + +/* + * These are the fs-independent mount-flags: up to 32 flags are supported + * + * Usage of these is restricted within the kernel to core mount(2) code and + * callers of sys_mount() only. Filesystems should be using the SB_* + * equivalent instead. + */ +#define MS_RDONLY 1 /* Mount read-only */ +#define MS_NOSUID 2 /* Ignore suid and sgid bits */ +#define MS_NODEV 4 /* Disallow access to device special files */ +#define MS_NOEXEC 8 /* Disallow program execution */ +#define MS_SYNCHRONOUS 16 /* Writes are synced at once */ +#define MS_REMOUNT 32 /* Alter flags of a mounted FS */ +#define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ +#define MS_DIRSYNC 128 /* Directory modifications are synchronous */ +#define MS_NOSYMFOLLOW 256 /* Do not follow symlinks */ +#define MS_NOATIME 1024 /* Do not update access times. */ +#define MS_NODIRATIME 2048 /* Do not update directory access times */ +#define MS_BIND 4096 +#define MS_MOVE 8192 +#define MS_REC 16384 +#define MS_VERBOSE 32768 /* War is peace. Verbosity is silence. + MS_VERBOSE is deprecated. */ +#define MS_SILENT 32768 +#define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ +#define MS_UNBINDABLE (1<<17) /* change to unbindable */ +#define MS_PRIVATE (1<<18) /* change to private */ +#define MS_SLAVE (1<<19) /* change to slave */ +#define MS_SHARED (1<<20) /* change to shared */ +#define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */ +#define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ +#define MS_I_VERSION (1<<23) /* Update inode I_version field */ +#define MS_STRICTATIME (1<<24) /* Always perform atime updates */ +#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */ + +/* These sb flags are internal to the kernel */ +#define MS_SUBMOUNT (1<<26) +#define MS_NOREMOTELOCK (1<<27) +#define MS_NOSEC (1<<28) +#define MS_BORN (1<<29) +#define MS_ACTIVE (1<<30) +#define MS_NOUSER (1<<31) + +/* + * Superblock flags that can be altered by MS_REMOUNT + */ +#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\ + MS_LAZYTIME) + +/* + * Old magic mount flag and mask + */ +#define MS_MGC_VAL 0xC0ED0000 +#define MS_MGC_MSK 0xffff0000 + +/* + * open_tree() flags. + */ +#define OPEN_TREE_CLONE 1 /* Clone the target tree and attach the clone */ +#define OPEN_TREE_CLOEXEC O_CLOEXEC /* Close the file on execve() */ + +/* + * move_mount() flags. + */ +#define MOVE_MOUNT_F_SYMLINKS 0x00000001 /* Follow symlinks on from path */ +#define MOVE_MOUNT_F_AUTOMOUNTS 0x00000002 /* Follow automounts on from path */ +#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ +#define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */ +#define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */ +#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ +#define MOVE_MOUNT_SET_GROUP 0x00000100 /* Set sharing group instead */ +#define MOVE_MOUNT_BENEATH 0x00000200 /* Mount beneath top mount */ +#define MOVE_MOUNT__MASK 0x00000377 + +/* + * fsopen() flags. + */ +#define FSOPEN_CLOEXEC 0x00000001 + +/* + * fspick() flags. + */ +#define FSPICK_CLOEXEC 0x00000001 +#define FSPICK_SYMLINK_NOFOLLOW 0x00000002 +#define FSPICK_NO_AUTOMOUNT 0x00000004 +#define FSPICK_EMPTY_PATH 0x00000008 + +/* + * The type of fsconfig() call made. + */ +enum fsconfig_command { + FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */ + FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */ + FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */ + FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */ + FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */ + FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */ + FSCONFIG_CMD_CREATE = 6, /* Create new or reuse existing superblock */ + FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */ + FSCONFIG_CMD_CREATE_EXCL = 8, /* Create new superblock, fail if reusing existing superblock */ +}; + +/* + * fsmount() flags. + */ +#define FSMOUNT_CLOEXEC 0x00000001 + +/* + * Mount attributes. + */ +#define MOUNT_ATTR_RDONLY 0x00000001 /* Mount read-only */ +#define MOUNT_ATTR_NOSUID 0x00000002 /* Ignore suid and sgid bits */ +#define MOUNT_ATTR_NODEV 0x00000004 /* Disallow access to device special files */ +#define MOUNT_ATTR_NOEXEC 0x00000008 /* Disallow program execution */ +#define MOUNT_ATTR__ATIME 0x00000070 /* Setting on how atime should be updated */ +#define MOUNT_ATTR_RELATIME 0x00000000 /* - Update atime relative to mtime/ctime. */ +#define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */ +#define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */ +#define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */ +#define MOUNT_ATTR_IDMAP 0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */ +#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000 /* Do not follow symlinks */ + +/* + * mount_setattr() + */ +struct mount_attr { + __u64 attr_set; + __u64 attr_clr; + __u64 propagation; + __u64 userns_fd; +}; + +/* List of all mount_attr versions. */ +#define MOUNT_ATTR_SIZE_VER0 32 /* sizeof first published struct */ + + +/* + * Structure for getting mount/superblock/filesystem info with statmount(2). + * + * The interface is similar to statx(2): individual fields or groups can be + * selected with the @mask argument of statmount(). Kernel will set the @mask + * field according to the supported fields. + * + * If string fields are selected, then the caller needs to pass a buffer that + * has space after the fixed part of the structure. Nul terminated strings are + * copied there and offsets relative to @str are stored in the relevant fields. + * If the buffer is too small, then EOVERFLOW is returned. The actually used + * size is returned in @size. + */ +struct statmount { + __u32 size; /* Total size, including strings */ + __u32 mnt_opts; /* [str] Options (comma separated, escaped) */ + __u64 mask; /* What results were written */ + __u32 sb_dev_major; /* Device ID */ + __u32 sb_dev_minor; + __u64 sb_magic; /* ..._SUPER_MAGIC */ + __u32 sb_flags; /* SB_{RDONLY,SYNCHRONOUS,DIRSYNC,LAZYTIME} */ + __u32 fs_type; /* [str] Filesystem type */ + __u64 mnt_id; /* Unique ID of mount */ + __u64 mnt_parent_id; /* Unique ID of parent (for root == mnt_id) */ + __u32 mnt_id_old; /* Reused IDs used in proc/.../mountinfo */ + __u32 mnt_parent_id_old; + __u64 mnt_attr; /* MOUNT_ATTR_... */ + __u64 mnt_propagation; /* MS_{SHARED,SLAVE,PRIVATE,UNBINDABLE} */ + __u64 mnt_peer_group; /* ID of shared peer group */ + __u64 mnt_master; /* Mount receives propagation from this ID */ + __u64 propagate_from; /* Propagation from in current namespace */ + __u32 mnt_root; /* [str] Root of mount relative to root of fs */ + __u32 mnt_point; /* [str] Mountpoint relative to current root */ + __u64 mnt_ns_id; /* ID of the mount namespace */ + __u32 fs_subtype; /* [str] Subtype of fs_type (if any) */ + __u32 sb_source; /* [str] Source string of the mount */ + __u32 opt_num; /* Number of fs options */ + __u32 opt_array; /* [str] Array of nul terminated fs options */ + __u32 opt_sec_num; /* Number of security options */ + __u32 opt_sec_array; /* [str] Array of nul terminated security options */ + __u64 supported_mask; /* Mask flags that this kernel supports */ + __u32 mnt_uidmap_num; /* Number of uid mappings */ + __u32 mnt_uidmap; /* [str] Array of uid mappings (as seen from callers namespace) */ + __u32 mnt_gidmap_num; /* Number of gid mappings */ + __u32 mnt_gidmap; /* [str] Array of gid mappings (as seen from callers namespace) */ + __u64 __spare2[43]; + char str[]; /* Variable size part containing strings */ +}; + +/* + * Structure for passing mount ID and miscellaneous parameters to statmount(2) + * and listmount(2). + * + * For statmount(2) @param represents the request mask. + * For listmount(2) @param represents the last listed mount id (or zero). + */ +struct mnt_id_req { + __u32 size; + __u32 spare; + __u64 mnt_id; + __u64 param; + __u64 mnt_ns_id; +}; + +/* List of all mnt_id_req versions. */ +#define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */ +#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */ + +/* + * @mask bits for statmount(2) + */ +#define STATMOUNT_SB_BASIC 0x00000001U /* Want/got sb_... */ +#define STATMOUNT_MNT_BASIC 0x00000002U /* Want/got mnt_... */ +#define STATMOUNT_PROPAGATE_FROM 0x00000004U /* Want/got propagate_from */ +#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */ +#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ +#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ +#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ +#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ +#define STATMOUNT_FS_SUBTYPE 0x00000100U /* Want/got fs_subtype */ +#define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */ +#define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */ +#define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */ +#define STATMOUNT_SUPPORTED_MASK 0x00001000U /* Want/got supported mask flags */ +#define STATMOUNT_MNT_UIDMAP 0x00002000U /* Want/got uidmap... */ +#define STATMOUNT_MNT_GIDMAP 0x00004000U /* Want/got gidmap... */ + +/* + * Special @mnt_id values that can be passed to listmount + */ +#define LSMT_ROOT 0xffffffffffffffff /* root mount */ +#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */ + +#endif /* _UAPI_LINUX_MOUNT_H */ diff --git a/tools/include/uapi/linux/neighbour.h b/tools/include/uapi/linux/neighbour.h new file mode 100644 index 000000000000..c34a81245f87 --- /dev/null +++ b/tools/include/uapi/linux/neighbour.h @@ -0,0 +1,229 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_NEIGHBOUR_H +#define _UAPI__LINUX_NEIGHBOUR_H + +#include <linux/types.h> +#include <linux/netlink.h> + +struct ndmsg { + __u8 ndm_family; + __u8 ndm_pad1; + __u16 ndm_pad2; + __s32 ndm_ifindex; + __u16 ndm_state; + __u8 ndm_flags; + __u8 ndm_type; +}; + +enum { + NDA_UNSPEC, + NDA_DST, + NDA_LLADDR, + NDA_CACHEINFO, + NDA_PROBES, + NDA_VLAN, + NDA_PORT, + NDA_VNI, + NDA_IFINDEX, + NDA_MASTER, + NDA_LINK_NETNSID, + NDA_SRC_VNI, + NDA_PROTOCOL, /* Originator of entry */ + NDA_NH_ID, + NDA_FDB_EXT_ATTRS, + NDA_FLAGS_EXT, + NDA_NDM_STATE_MASK, + NDA_NDM_FLAGS_MASK, + __NDA_MAX +}; + +#define NDA_MAX (__NDA_MAX - 1) + +/* + * Neighbor Cache Entry Flags + */ + +#define NTF_USE (1 << 0) +#define NTF_SELF (1 << 1) +#define NTF_MASTER (1 << 2) +#define NTF_PROXY (1 << 3) /* == ATF_PUBL */ +#define NTF_EXT_LEARNED (1 << 4) +#define NTF_OFFLOADED (1 << 5) +#define NTF_STICKY (1 << 6) +#define NTF_ROUTER (1 << 7) +/* Extended flags under NDA_FLAGS_EXT: */ +#define NTF_EXT_MANAGED (1 << 0) +#define NTF_EXT_LOCKED (1 << 1) +#define NTF_EXT_EXT_VALIDATED (1 << 2) + +/* + * Neighbor Cache Entry States. + */ + +#define NUD_INCOMPLETE 0x01 +#define NUD_REACHABLE 0x02 +#define NUD_STALE 0x04 +#define NUD_DELAY 0x08 +#define NUD_PROBE 0x10 +#define NUD_FAILED 0x20 + +/* Dummy states */ +#define NUD_NOARP 0x40 +#define NUD_PERMANENT 0x80 +#define NUD_NONE 0x00 + +/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change and make no + * address resolution or NUD. + * + * NUD_PERMANENT also cannot be deleted by garbage collectors. This holds true + * for dynamic entries with NTF_EXT_LEARNED flag as well. However, upon carrier + * down event, NUD_PERMANENT entries are not flushed whereas NTF_EXT_LEARNED + * flagged entries explicitly are (which is also consistent with the routing + * subsystem). + * + * When NTF_EXT_LEARNED is set for a bridge fdb entry the different cache entry + * states don't make sense and thus are ignored. Such entries don't age and + * can roam. + * + * NTF_EXT_MANAGED flagged neigbor entries are managed by the kernel on behalf + * of a user space control plane, and automatically refreshed so that (if + * possible) they remain in NUD_REACHABLE state. + * + * NTF_EXT_LOCKED flagged bridge FDB entries are entries generated by the + * bridge in response to a host trying to communicate via a locked bridge port + * with MAB enabled. Their purpose is to notify user space that a host requires + * authentication. + * + * NTF_EXT_EXT_VALIDATED flagged neighbor entries were externally validated by + * a user space control plane. The kernel will not remove or invalidate them, + * but it can probe them and notify user space when they become reachable. + */ + +struct nda_cacheinfo { + __u32 ndm_confirmed; + __u32 ndm_used; + __u32 ndm_updated; + __u32 ndm_refcnt; +}; + +/***************************************************************** + * Neighbour tables specific messages. + * + * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the + * NLM_F_DUMP flag set. Every neighbour table configuration is + * spread over multiple messages to avoid running into message + * size limits on systems with many interfaces. The first message + * in the sequence transports all not device specific data such as + * statistics, configuration, and the default parameter set. + * This message is followed by 0..n messages carrying device + * specific parameter sets. + * Although the ordering should be sufficient, NDTA_NAME can be + * used to identify sequences. The initial message can be identified + * by checking for NDTA_CONFIG. The device specific messages do + * not contain this TLV but have NDTPA_IFINDEX set to the + * corresponding interface index. + * + * To change neighbour table attributes, send RTM_SETNEIGHTBL + * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3], + * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked + * otherwise. Device specific parameter sets can be changed by + * setting NDTPA_IFINDEX to the interface index of the corresponding + * device. + ****/ + +struct ndt_stats { + __u64 ndts_allocs; + __u64 ndts_destroys; + __u64 ndts_hash_grows; + __u64 ndts_res_failed; + __u64 ndts_lookups; + __u64 ndts_hits; + __u64 ndts_rcv_probes_mcast; + __u64 ndts_rcv_probes_ucast; + __u64 ndts_periodic_gc_runs; + __u64 ndts_forced_gc_runs; + __u64 ndts_table_fulls; +}; + +enum { + NDTPA_UNSPEC, + NDTPA_IFINDEX, /* u32, unchangeable */ + NDTPA_REFCNT, /* u32, read-only */ + NDTPA_REACHABLE_TIME, /* u64, read-only, msecs */ + NDTPA_BASE_REACHABLE_TIME, /* u64, msecs */ + NDTPA_RETRANS_TIME, /* u64, msecs */ + NDTPA_GC_STALETIME, /* u64, msecs */ + NDTPA_DELAY_PROBE_TIME, /* u64, msecs */ + NDTPA_QUEUE_LEN, /* u32 */ + NDTPA_APP_PROBES, /* u32 */ + NDTPA_UCAST_PROBES, /* u32 */ + NDTPA_MCAST_PROBES, /* u32 */ + NDTPA_ANYCAST_DELAY, /* u64, msecs */ + NDTPA_PROXY_DELAY, /* u64, msecs */ + NDTPA_PROXY_QLEN, /* u32 */ + NDTPA_LOCKTIME, /* u64, msecs */ + NDTPA_QUEUE_LENBYTES, /* u32 */ + NDTPA_MCAST_REPROBES, /* u32 */ + NDTPA_PAD, + NDTPA_INTERVAL_PROBE_TIME_MS, /* u64, msecs */ + __NDTPA_MAX +}; +#define NDTPA_MAX (__NDTPA_MAX - 1) + +struct ndtmsg { + __u8 ndtm_family; + __u8 ndtm_pad1; + __u16 ndtm_pad2; +}; + +struct ndt_config { + __u16 ndtc_key_len; + __u16 ndtc_entry_size; + __u32 ndtc_entries; + __u32 ndtc_last_flush; /* delta to now in msecs */ + __u32 ndtc_last_rand; /* delta to now in msecs */ + __u32 ndtc_hash_rnd; + __u32 ndtc_hash_mask; + __u32 ndtc_hash_chain_gc; + __u32 ndtc_proxy_qlen; +}; + +enum { + NDTA_UNSPEC, + NDTA_NAME, /* char *, unchangeable */ + NDTA_THRESH1, /* u32 */ + NDTA_THRESH2, /* u32 */ + NDTA_THRESH3, /* u32 */ + NDTA_CONFIG, /* struct ndt_config, read-only */ + NDTA_PARMS, /* nested TLV NDTPA_* */ + NDTA_STATS, /* struct ndt_stats, read-only */ + NDTA_GC_INTERVAL, /* u64, msecs */ + NDTA_PAD, + __NDTA_MAX +}; +#define NDTA_MAX (__NDTA_MAX - 1) + + /* FDB activity notification bits used in NFEA_ACTIVITY_NOTIFY: + * - FDB_NOTIFY_BIT - notify on activity/expire for any entry + * - FDB_NOTIFY_INACTIVE_BIT - mark as inactive to avoid multiple notifications + */ +enum { + FDB_NOTIFY_BIT = (1 << 0), + FDB_NOTIFY_INACTIVE_BIT = (1 << 1) +}; + +/* embedded into NDA_FDB_EXT_ATTRS: + * [NDA_FDB_EXT_ATTRS] = { + * [NFEA_ACTIVITY_NOTIFY] + * ... + * } + */ +enum { + NFEA_UNSPEC, + NFEA_ACTIVITY_NOTIFY, + NFEA_DONT_REFRESH, + __NFEA_MAX +}; +#define NFEA_MAX (__NFEA_MAX - 1) + +#endif diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index e4be227d3ad6..e0b579a1df4f 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -2,6 +2,7 @@ /* Do not edit directly, auto-generated from: */ /* Documentation/netlink/specs/netdev.yaml */ /* YNL-GEN uapi header */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ #ifndef _UAPI_LINUX_NETDEV_H #define _UAPI_LINUX_NETDEV_H @@ -59,10 +60,13 @@ enum netdev_xdp_rx_metadata { * by the driver. * @NETDEV_XSK_FLAGS_TX_CHECKSUM: L3 checksum HW offload is supported by the * driver. + * @NETDEV_XSK_FLAGS_TX_LAUNCH_TIME_FIFO: Launch time HW offload is supported + * by the driver. */ enum netdev_xsk_flags { NETDEV_XSK_FLAGS_TX_TIMESTAMP = 1, NETDEV_XSK_FLAGS_TX_CHECKSUM = 2, + NETDEV_XSK_FLAGS_TX_LAUNCH_TIME_FIFO = 4, }; enum netdev_queue_type { @@ -74,6 +78,12 @@ enum netdev_qstats_scope { NETDEV_QSTATS_SCOPE_QUEUE = 1, }; +enum netdev_napi_threaded { + NETDEV_NAPI_THREADED_DISABLED, + NETDEV_NAPI_THREADED_ENABLED, + NETDEV_NAPI_THREADED_BUSY_POLL, +}; + enum { NETDEV_A_DEV_IFINDEX = 1, NETDEV_A_DEV_PAD, @@ -87,6 +97,11 @@ enum { }; enum { + __NETDEV_A_IO_URING_PROVIDER_INFO_MAX, + NETDEV_A_IO_URING_PROVIDER_INFO_MAX = (__NETDEV_A_IO_URING_PROVIDER_INFO_MAX - 1) +}; + +enum { NETDEV_A_PAGE_POOL_ID = 1, NETDEV_A_PAGE_POOL_IFINDEX, NETDEV_A_PAGE_POOL_NAPI_ID, @@ -94,6 +109,7 @@ enum { NETDEV_A_PAGE_POOL_INFLIGHT_MEM, NETDEV_A_PAGE_POOL_DETACH_TIME, NETDEV_A_PAGE_POOL_DMABUF, + NETDEV_A_PAGE_POOL_IO_URING, __NETDEV_A_PAGE_POOL_MAX, NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1) @@ -125,17 +141,25 @@ enum { NETDEV_A_NAPI_DEFER_HARD_IRQS, NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, + NETDEV_A_NAPI_THREADED, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) }; enum { + __NETDEV_A_XSK_INFO_MAX, + NETDEV_A_XSK_INFO_MAX = (__NETDEV_A_XSK_INFO_MAX - 1) +}; + +enum { NETDEV_A_QUEUE_ID = 1, NETDEV_A_QUEUE_IFINDEX, NETDEV_A_QUEUE_TYPE, NETDEV_A_QUEUE_NAPI_ID, NETDEV_A_QUEUE_DMABUF, + NETDEV_A_QUEUE_IO_URING, + NETDEV_A_QUEUE_XSK, __NETDEV_A_QUEUE_MAX, NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) @@ -203,6 +227,7 @@ enum { NETDEV_CMD_QSTATS_GET, NETDEV_CMD_BIND_RX, NETDEV_CMD_NAPI_SET, + NETDEV_CMD_BIND_TX, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) diff --git a/tools/include/uapi/linux/netfilter.h b/tools/include/uapi/linux/netfilter.h new file mode 100644 index 000000000000..5a79ccb76701 --- /dev/null +++ b/tools/include/uapi/linux/netfilter.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_NETFILTER_H +#define _UAPI__LINUX_NETFILTER_H + +#include <linux/types.h> +#include <linux/compiler.h> +#include <linux/in.h> +#include <linux/in6.h> + +/* Responses from hook functions. */ +#define NF_DROP 0 +#define NF_ACCEPT 1 +#define NF_STOLEN 2 +#define NF_QUEUE 3 +#define NF_REPEAT 4 +#define NF_STOP 5 /* Deprecated, for userspace nf_queue compatibility. */ +#define NF_MAX_VERDICT NF_STOP + +/* we overload the higher bits for encoding auxiliary data such as the queue + * number or errno values. Not nice, but better than additional function + * arguments. */ +#define NF_VERDICT_MASK 0x000000ff + +/* extra verdict flags have mask 0x0000ff00 */ +#define NF_VERDICT_FLAG_QUEUE_BYPASS 0x00008000 + +/* queue number (NF_QUEUE) or errno (NF_DROP) */ +#define NF_VERDICT_QMASK 0xffff0000 +#define NF_VERDICT_QBITS 16 + +#define NF_QUEUE_NR(x) ((((x) << 16) & NF_VERDICT_QMASK) | NF_QUEUE) + +#define NF_DROP_ERR(x) (((-x) << 16) | NF_DROP) + +/* only for userspace compatibility */ +#ifndef __KERNEL__ + +/* NF_VERDICT_BITS should be 8 now, but userspace might break if this changes */ +#define NF_VERDICT_BITS 16 +#endif + +enum nf_inet_hooks { + NF_INET_PRE_ROUTING, + NF_INET_LOCAL_IN, + NF_INET_FORWARD, + NF_INET_LOCAL_OUT, + NF_INET_POST_ROUTING, + NF_INET_NUMHOOKS, + NF_INET_INGRESS = NF_INET_NUMHOOKS, +}; + +enum nf_dev_hooks { + NF_NETDEV_INGRESS, + NF_NETDEV_EGRESS, + NF_NETDEV_NUMHOOKS +}; + +enum { + NFPROTO_UNSPEC = 0, + NFPROTO_INET = 1, + NFPROTO_IPV4 = 2, + NFPROTO_ARP = 3, + NFPROTO_NETDEV = 5, + NFPROTO_BRIDGE = 7, + NFPROTO_IPV6 = 10, +#ifndef __KERNEL__ /* no longer supported by kernel */ + NFPROTO_DECNET = 12, +#endif + NFPROTO_NUMPROTO, +}; + +union nf_inet_addr { + __u32 all[4]; + __be32 ip; + __be32 ip6[4]; + struct in_addr in; + struct in6_addr in6; +}; + +#endif /* _UAPI__LINUX_NETFILTER_H */ diff --git a/tools/include/uapi/linux/netfilter_arp.h b/tools/include/uapi/linux/netfilter_arp.h new file mode 100644 index 000000000000..791dfc5ae907 --- /dev/null +++ b/tools/include/uapi/linux/netfilter_arp.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note */ +#ifndef __LINUX_ARP_NETFILTER_H +#define __LINUX_ARP_NETFILTER_H + +/* ARP-specific defines for netfilter. + * (C)2002 Rusty Russell IBM -- This code is GPL. + */ + +#include <linux/netfilter.h> + +/* There is no PF_ARP. */ +#define NF_ARP 0 + +/* ARP Hooks */ +#define NF_ARP_IN 0 +#define NF_ARP_OUT 1 +#define NF_ARP_FORWARD 2 + +#ifndef __KERNEL__ +#define NF_ARP_NUMHOOKS 3 +#endif + +#endif /* __LINUX_ARP_NETFILTER_H */ diff --git a/tools/include/uapi/linux/nsfs.h b/tools/include/uapi/linux/nsfs.h new file mode 100644 index 000000000000..a25e38d1c874 --- /dev/null +++ b/tools/include/uapi/linux/nsfs.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __LINUX_NSFS_H +#define __LINUX_NSFS_H + +#include <linux/ioctl.h> +#include <linux/types.h> + +#define NSIO 0xb7 + +/* Returns a file descriptor that refers to an owning user namespace */ +#define NS_GET_USERNS _IO(NSIO, 0x1) +/* Returns a file descriptor that refers to a parent namespace */ +#define NS_GET_PARENT _IO(NSIO, 0x2) +/* Returns the type of namespace (CLONE_NEW* value) referred to by + file descriptor */ +#define NS_GET_NSTYPE _IO(NSIO, 0x3) +/* Get owner UID (in the caller's user namespace) for a user namespace */ +#define NS_GET_OWNER_UID _IO(NSIO, 0x4) +/* Translate pid from target pid namespace into the caller's pid namespace. */ +#define NS_GET_PID_FROM_PIDNS _IOR(NSIO, 0x6, int) +/* Return thread-group leader id of pid in the callers pid namespace. */ +#define NS_GET_TGID_FROM_PIDNS _IOR(NSIO, 0x7, int) +/* Translate pid from caller's pid namespace into a target pid namespace. */ +#define NS_GET_PID_IN_PIDNS _IOR(NSIO, 0x8, int) +/* Return thread-group leader id of pid in the target pid namespace. */ +#define NS_GET_TGID_IN_PIDNS _IOR(NSIO, 0x9, int) + +struct mnt_ns_info { + __u32 size; + __u32 nr_mounts; + __u64 mnt_ns_id; +}; + +#define MNT_NS_INFO_SIZE_VER0 16 /* size of first published struct */ + +/* Get information about namespace. */ +#define NS_MNT_GET_INFO _IOR(NSIO, 10, struct mnt_ns_info) +/* Get next namespace. */ +#define NS_MNT_GET_NEXT _IOR(NSIO, 11, struct mnt_ns_info) +/* Get previous namespace. */ +#define NS_MNT_GET_PREV _IOR(NSIO, 12, struct mnt_ns_info) + +/* Retrieve namespace identifiers. */ +#define NS_GET_MNTNS_ID _IOR(NSIO, 5, __u64) +#define NS_GET_ID _IOR(NSIO, 13, __u64) + +enum init_ns_ino { + IPC_NS_INIT_INO = 0xEFFFFFFFU, + UTS_NS_INIT_INO = 0xEFFFFFFEU, + USER_NS_INIT_INO = 0xEFFFFFFDU, + PID_NS_INIT_INO = 0xEFFFFFFCU, + CGROUP_NS_INIT_INO = 0xEFFFFFFBU, + TIME_NS_INIT_INO = 0xEFFFFFFAU, + NET_NS_INIT_INO = 0xEFFFFFF9U, + MNT_NS_INIT_INO = 0xEFFFFFF8U, +#ifdef __KERNEL__ + MNT_NS_ANON_INO = 0xEFFFFFF7U, +#endif +}; + +struct nsfs_file_handle { + __u64 ns_id; + __u32 ns_type; + __u32 ns_inum; +}; + +#define NSFS_FILE_HANDLE_SIZE_VER0 16 /* sizeof first published struct */ +#define NSFS_FILE_HANDLE_SIZE_LATEST sizeof(struct nsfs_file_handle) /* sizeof latest published struct */ + +enum init_ns_id { + IPC_NS_INIT_ID = 1ULL, + UTS_NS_INIT_ID = 2ULL, + USER_NS_INIT_ID = 3ULL, + PID_NS_INIT_ID = 4ULL, + CGROUP_NS_INIT_ID = 5ULL, + TIME_NS_INIT_ID = 6ULL, + NET_NS_INIT_ID = 7ULL, + MNT_NS_INIT_ID = 8ULL, +#ifdef __KERNEL__ + NS_LAST_INIT_ID = MNT_NS_INIT_ID, +#endif +}; + +enum ns_type { + TIME_NS = (1ULL << 7), /* CLONE_NEWTIME */ + MNT_NS = (1ULL << 17), /* CLONE_NEWNS */ + CGROUP_NS = (1ULL << 25), /* CLONE_NEWCGROUP */ + UTS_NS = (1ULL << 26), /* CLONE_NEWUTS */ + IPC_NS = (1ULL << 27), /* CLONE_NEWIPC */ + USER_NS = (1ULL << 28), /* CLONE_NEWUSER */ + PID_NS = (1ULL << 29), /* CLONE_NEWPID */ + NET_NS = (1ULL << 30), /* CLONE_NEWNET */ +}; + +/** + * struct ns_id_req - namespace ID request structure + * @size: size of this structure + * @spare: reserved for future use + * @filter: filter mask + * @ns_id: last namespace id + * @user_ns_id: owning user namespace ID + * + * Structure for passing namespace ID and miscellaneous parameters to + * statns(2) and listns(2). + * + * For statns(2) @param represents the request mask. + * For listns(2) @param represents the last listed mount id (or zero). + */ +struct ns_id_req { + __u32 size; + __u32 spare; + __u64 ns_id; + struct /* listns */ { + __u32 ns_type; + __u32 spare2; + __u64 user_ns_id; + }; +}; + +/* + * Special @user_ns_id value that can be passed to listns() + */ +#define LISTNS_CURRENT_USER 0xffffffffffffffff /* Caller's userns */ + +/* List of all ns_id_req versions. */ +#define NS_ID_REQ_SIZE_VER0 32 /* sizeof first published struct */ + +#endif /* __LINUX_NSFS_H */ diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 4842c36fdf80..c44a8fb3e418 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -39,18 +39,21 @@ enum perf_type_id { /* * attr.config layout for type PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE + * * PERF_TYPE_HARDWARE: 0xEEEEEEEE000000AA * AA: hardware event ID * EEEEEEEE: PMU type ID + * * PERF_TYPE_HW_CACHE: 0xEEEEEEEE00DDCCBB * BB: hardware cache ID * CC: hardware cache op ID * DD: hardware cache op result ID * EEEEEEEE: PMU type ID - * If the PMU type ID is 0, the PERF_TYPE_RAW will be applied. + * + * If the PMU type ID is 0, PERF_TYPE_RAW will be applied. */ -#define PERF_PMU_TYPE_SHIFT 32 -#define PERF_HW_EVENT_MASK 0xffffffff +#define PERF_PMU_TYPE_SHIFT 32 +#define PERF_HW_EVENT_MASK 0xffffffff /* * Generalized performance event event_id types, used by the @@ -112,7 +115,7 @@ enum perf_hw_cache_op_result_id { /* * Special "software" events provided by the kernel, even if the hardware * does not support performance events. These events measure various - * physical and sw events of the kernel (and allow the profiling of them as + * physical and SW events of the kernel (and allow the profiling of them as * well): */ enum perf_sw_ids { @@ -167,8 +170,9 @@ enum perf_event_sample_format { }; #define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT) + /* - * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set + * Values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set. * * If the user does not pass priv level information via branch_sample_type, * the kernel uses the event's priv level. Branch and event priv levels do @@ -178,20 +182,20 @@ enum perf_event_sample_format { * of branches and therefore it supersedes all the other types. */ enum perf_branch_sample_type_shift { - PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */ - PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */ - PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */ - - PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */ - PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */ - PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */ - PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */ - PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */ - PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */ - PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */ + PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */ + PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */ + PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */ + + PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */ + PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */ + PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */ + PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */ + PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */ + PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */ + PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */ PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */ - PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* call/ret stack */ + PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* CALL/RET stack */ PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT = 12, /* indirect jumps */ PERF_SAMPLE_BRANCH_CALL_SHIFT = 13, /* direct call */ @@ -210,96 +214,95 @@ enum perf_branch_sample_type_shift { }; enum perf_branch_sample_type { - PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT, - PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT, - PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT, + PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT, + PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT, + PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT, - PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT, - PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT, - PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT, - PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT, - PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT, - PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT, - PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT, - PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT, + PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT, + PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT, + PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT, + PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT, + PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT, + PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT, - PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, - PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT, - PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT, + PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, + PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT, + PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT, - PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT, - PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT, + PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT, + PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT, - PERF_SAMPLE_BRANCH_TYPE_SAVE = - 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_TYPE_SAVE = 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, - PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, + PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, - PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, - PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT, + PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT, - PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; /* - * Common flow change classification + * Common control flow change classifications: */ enum { - PERF_BR_UNKNOWN = 0, /* unknown */ - PERF_BR_COND = 1, /* conditional */ - PERF_BR_UNCOND = 2, /* unconditional */ - PERF_BR_IND = 3, /* indirect */ - PERF_BR_CALL = 4, /* function call */ - PERF_BR_IND_CALL = 5, /* indirect function call */ - PERF_BR_RET = 6, /* function return */ - PERF_BR_SYSCALL = 7, /* syscall */ - PERF_BR_SYSRET = 8, /* syscall return */ - PERF_BR_COND_CALL = 9, /* conditional function call */ - PERF_BR_COND_RET = 10, /* conditional function return */ - PERF_BR_ERET = 11, /* exception return */ - PERF_BR_IRQ = 12, /* irq */ - PERF_BR_SERROR = 13, /* system error */ - PERF_BR_NO_TX = 14, /* not in transaction */ - PERF_BR_EXTEND_ABI = 15, /* extend ABI */ + PERF_BR_UNKNOWN = 0, /* Unknown */ + PERF_BR_COND = 1, /* Conditional */ + PERF_BR_UNCOND = 2, /* Unconditional */ + PERF_BR_IND = 3, /* Indirect */ + PERF_BR_CALL = 4, /* Function call */ + PERF_BR_IND_CALL = 5, /* Indirect function call */ + PERF_BR_RET = 6, /* Function return */ + PERF_BR_SYSCALL = 7, /* Syscall */ + PERF_BR_SYSRET = 8, /* Syscall return */ + PERF_BR_COND_CALL = 9, /* Conditional function call */ + PERF_BR_COND_RET = 10, /* Conditional function return */ + PERF_BR_ERET = 11, /* Exception return */ + PERF_BR_IRQ = 12, /* IRQ */ + PERF_BR_SERROR = 13, /* System error */ + PERF_BR_NO_TX = 14, /* Not in transaction */ + PERF_BR_EXTEND_ABI = 15, /* Extend ABI */ PERF_BR_MAX, }; /* - * Common branch speculation outcome classification + * Common branch speculation outcome classifications: */ enum { - PERF_BR_SPEC_NA = 0, /* Not available */ - PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */ - PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */ - PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */ + PERF_BR_SPEC_NA = 0, /* Not available */ + PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */ + PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */ + PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */ PERF_BR_SPEC_MAX, }; enum { - PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */ - PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */ - PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */ - PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */ - PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */ - PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */ - PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */ - PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */ + PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */ + PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */ + PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */ + PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */ + PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */ + PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */ + PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */ + PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */ PERF_BR_NEW_MAX, }; enum { - PERF_BR_PRIV_UNKNOWN = 0, - PERF_BR_PRIV_USER = 1, - PERF_BR_PRIV_KERNEL = 2, - PERF_BR_PRIV_HV = 3, + PERF_BR_PRIV_UNKNOWN = 0, + PERF_BR_PRIV_USER = 1, + PERF_BR_PRIV_KERNEL = 2, + PERF_BR_PRIV_HV = 3, }; -#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 -#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 -#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 -#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 -#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 +#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 +#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 +#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 +#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 +#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ @@ -310,9 +313,9 @@ enum { * Values to determine ABI of the registers dump. */ enum perf_sample_regs_abi { - PERF_SAMPLE_REGS_ABI_NONE = 0, - PERF_SAMPLE_REGS_ABI_32 = 1, - PERF_SAMPLE_REGS_ABI_64 = 2, + PERF_SAMPLE_REGS_ABI_NONE = 0, + PERF_SAMPLE_REGS_ABI_32 = 1, + PERF_SAMPLE_REGS_ABI_64 = 2, }; /* @@ -320,21 +323,21 @@ enum perf_sample_regs_abi { * abort events. Multiple bits can be set. */ enum { - PERF_TXN_ELISION = (1 << 0), /* From elision */ - PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */ - PERF_TXN_SYNC = (1 << 2), /* Instruction is related */ - PERF_TXN_ASYNC = (1 << 3), /* Instruction not related */ - PERF_TXN_RETRY = (1 << 4), /* Retry possible */ - PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */ - PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */ - PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */ + PERF_TXN_ELISION = (1 << 0), /* From elision */ + PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */ + PERF_TXN_SYNC = (1 << 2), /* Instruction is related */ + PERF_TXN_ASYNC = (1 << 3), /* Instruction is not related */ + PERF_TXN_RETRY = (1 << 4), /* Retry possible */ + PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */ + PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */ + PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */ - PERF_TXN_MAX = (1 << 8), /* non-ABI */ + PERF_TXN_MAX = (1 << 8), /* non-ABI */ - /* bits 32..63 are reserved for the abort code */ + /* Bits 32..63 are reserved for the abort code */ - PERF_TXN_ABORT_MASK = (0xffffffffULL << 32), - PERF_TXN_ABORT_SHIFT = 32, + PERF_TXN_ABORT_MASK = (0xffffffffULL << 32), + PERF_TXN_ABORT_SHIFT = 32, }; /* @@ -369,22 +372,23 @@ enum perf_event_read_format { PERF_FORMAT_MAX = 1U << 5, /* non-ABI */ }; -#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ -#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ -#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ -#define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */ - /* add: sample_stack_user */ -#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */ -#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */ -#define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */ -#define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */ -#define PERF_ATTR_SIZE_VER8 136 /* add: config3 */ +#define PERF_ATTR_SIZE_VER0 64 /* Size of first published 'struct perf_event_attr' */ +#define PERF_ATTR_SIZE_VER1 72 /* Add: config2 */ +#define PERF_ATTR_SIZE_VER2 80 /* Add: branch_sample_type */ +#define PERF_ATTR_SIZE_VER3 96 /* Add: sample_regs_user */ + /* Add: sample_stack_user */ +#define PERF_ATTR_SIZE_VER4 104 /* Add: sample_regs_intr */ +#define PERF_ATTR_SIZE_VER5 112 /* Add: aux_watermark */ +#define PERF_ATTR_SIZE_VER6 120 /* Add: aux_sample_size */ +#define PERF_ATTR_SIZE_VER7 128 /* Add: sig_data */ +#define PERF_ATTR_SIZE_VER8 136 /* Add: config3 */ +#define PERF_ATTR_SIZE_VER9 144 /* add: config4 */ /* - * Hardware event_id to monitor via a performance monitoring event: - * - * @sample_max_stack: Max number of frame pointers in a callchain, - * should be < /proc/sys/kernel/perf_event_max_stack + * 'struct perf_event_attr' contains various attributes that define + * a performance event - most of them hardware related configuration + * details, but also a lot of behavioral switches and values implemented + * by the kernel. */ struct perf_event_attr { @@ -394,7 +398,7 @@ struct perf_event_attr { __u32 type; /* - * Size of the attr structure, for fwd/bwd compat. + * Size of the attr structure, for forward/backwards compatibility. */ __u32 size; @@ -449,21 +453,23 @@ struct perf_event_attr { comm_exec : 1, /* flag comm events that are due to an exec */ use_clockid : 1, /* use @clockid for time fields */ context_switch : 1, /* context switch data */ - write_backward : 1, /* Write ring buffer from end to beginning */ + write_backward : 1, /* write ring buffer from end to beginning */ namespaces : 1, /* include namespaces data */ ksymbol : 1, /* include ksymbol events */ - bpf_event : 1, /* include bpf events */ + bpf_event : 1, /* include BPF events */ aux_output : 1, /* generate AUX records instead of events */ cgroup : 1, /* include cgroup events */ text_poke : 1, /* include text poke events */ - build_id : 1, /* use build id in mmap2 events */ + build_id : 1, /* use build ID in mmap2 events */ inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */ remove_on_exec : 1, /* event is removed from task on exec */ sigtrap : 1, /* send synchronous SIGTRAP on event */ - __reserved_1 : 26; + defer_callchain: 1, /* request PERF_RECORD_CALLCHAIN_DEFERRED records */ + defer_output : 1, /* output PERF_RECORD_CALLCHAIN_DEFERRED records */ + __reserved_1 : 24; union { - __u32 wakeup_events; /* wakeup every n events */ + __u32 wakeup_events; /* wake up every n events */ __u32 wakeup_watermark; /* bytes before wakeup */ }; @@ -472,13 +478,13 @@ struct perf_event_attr { __u64 bp_addr; __u64 kprobe_func; /* for perf_kprobe */ __u64 uprobe_path; /* for perf_uprobe */ - __u64 config1; /* extension of config */ + __u64 config1; /* extension of config */ }; union { __u64 bp_len; - __u64 kprobe_addr; /* when kprobe_func == NULL */ + __u64 kprobe_addr; /* when kprobe_func == NULL */ __u64 probe_offset; /* for perf_[k,u]probe */ - __u64 config2; /* extension of config1 */ + __u64 config2; /* extension of config1 */ }; __u64 branch_sample_type; /* enum perf_branch_sample_type */ @@ -508,10 +514,28 @@ struct perf_event_attr { * Wakeup watermark for AUX area */ __u32 aux_watermark; + + /* + * Max number of frame pointers in a callchain, should be + * lower than /proc/sys/kernel/perf_event_max_stack. + * + * Max number of entries of branch stack should be lower + * than the hardware limit. + */ __u16 sample_max_stack; + __u16 __reserved_2; __u32 aux_sample_size; - __u32 __reserved_3; + + union { + __u32 aux_action; + struct { + __u32 aux_start_paused : 1, /* start AUX area tracing paused */ + aux_pause : 1, /* on overflow, pause AUX area tracing */ + aux_resume : 1, /* on overflow, resume AUX area tracing */ + __reserved_3 : 29; + }; + }; /* * User provided data if sigtrap=1, passed back to user via @@ -522,11 +546,12 @@ struct perf_event_attr { __u64 sig_data; __u64 config3; /* extension of config2 */ + __u64 config4; /* extension of config3 */ }; /* * Structure used by below PERF_EVENT_IOC_QUERY_BPF command - * to query bpf programs attached to the same perf tracepoint + * to query BPF programs attached to the same perf tracepoint * as the given perf event. */ struct perf_event_query_bpf { @@ -548,21 +573,21 @@ struct perf_event_query_bpf { /* * Ioctls that can be done on a perf event fd: */ -#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) -#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) -#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) -#define PERF_EVENT_IOC_RESET _IO ('$', 3) -#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64) -#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) -#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) -#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *) -#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32) -#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32) +#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) +#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) +#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) +#define PERF_EVENT_IOC_RESET _IO ('$', 3) +#define PERF_EVENT_IOC_PERIOD _IOW ('$', 4, __u64) +#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) +#define PERF_EVENT_IOC_SET_FILTER _IOW ('$', 6, char *) +#define PERF_EVENT_IOC_ID _IOR ('$', 7, __u64 *) +#define PERF_EVENT_IOC_SET_BPF _IOW ('$', 8, __u32) +#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW ('$', 9, __u32) #define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *) -#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW('$', 11, struct perf_event_attr *) +#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW ('$', 11, struct perf_event_attr *) enum perf_event_ioc_flags { - PERF_IOC_FLAG_GROUP = 1U << 0, + PERF_IOC_FLAG_GROUP = 1U << 0, }; /* @@ -573,7 +598,7 @@ struct perf_event_mmap_page { __u32 compat_version; /* lowest version this is compat with */ /* - * Bits needed to read the hw events in user-space. + * Bits needed to read the HW events in user-space. * * u32 seq, time_mult, time_shift, index, width; * u64 count, enabled, running; @@ -611,7 +636,7 @@ struct perf_event_mmap_page { __u32 index; /* hardware event identifier */ __s64 offset; /* add to hardware event value */ __u64 time_enabled; /* time event active */ - __u64 time_running; /* time event on cpu */ + __u64 time_running; /* time event on CPU */ union { __u64 capabilities; struct { @@ -639,7 +664,7 @@ struct perf_event_mmap_page { /* * If cap_usr_time the below fields can be used to compute the time - * delta since time_enabled (in ns) using rdtsc or similar. + * delta since time_enabled (in ns) using RDTSC or similar. * * u64 quot, rem; * u64 delta; @@ -712,7 +737,7 @@ struct perf_event_mmap_page { * after reading this value. * * When the mapping is PROT_WRITE the @data_tail value should be - * written by userspace to reflect the last read data, after issueing + * written by user-space to reflect the last read data, after issuing * an smp_mb() to separate the data read from the ->data_tail store. * In this case the kernel will not over-write unread data. * @@ -728,7 +753,7 @@ struct perf_event_mmap_page { /* * AUX area is defined by aux_{offset,size} fields that should be set - * by the userspace, so that + * by the user-space, so that * * aux_offset >= data_offset + data_size * @@ -802,7 +827,7 @@ struct perf_event_mmap_page { * Indicates that thread was preempted in TASK_RUNNING state. * * PERF_RECORD_MISC_MMAP_BUILD_ID: - * Indicates that mmap2 event carries build id data. + * Indicates that mmap2 event carries build ID data. */ #define PERF_RECORD_MISC_EXACT_IP (1 << 14) #define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14) @@ -813,26 +838,26 @@ struct perf_event_mmap_page { #define PERF_RECORD_MISC_EXT_RESERVED (1 << 15) struct perf_event_header { - __u32 type; - __u16 misc; - __u16 size; + __u32 type; + __u16 misc; + __u16 size; }; struct perf_ns_link_info { - __u64 dev; - __u64 ino; + __u64 dev; + __u64 ino; }; enum { - NET_NS_INDEX = 0, - UTS_NS_INDEX = 1, - IPC_NS_INDEX = 2, - PID_NS_INDEX = 3, - USER_NS_INDEX = 4, - MNT_NS_INDEX = 5, - CGROUP_NS_INDEX = 6, - - NR_NAMESPACES, /* number of available namespaces */ + NET_NS_INDEX = 0, + UTS_NS_INDEX = 1, + IPC_NS_INDEX = 2, + PID_NS_INDEX = 3, + USER_NS_INDEX = 4, + MNT_NS_INDEX = 5, + CGROUP_NS_INDEX = 6, + + NR_NAMESPACES, /* number of available namespaces */ }; enum perf_event_type { @@ -848,11 +873,11 @@ enum perf_event_type { * optional fields being ignored. * * struct sample_id { - * { u32 pid, tid; } && PERF_SAMPLE_TID - * { u64 time; } && PERF_SAMPLE_TIME - * { u64 id; } && PERF_SAMPLE_ID - * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID - * { u32 cpu, res; } && PERF_SAMPLE_CPU + * { u32 pid, tid; } && PERF_SAMPLE_TID + * { u64 time; } && PERF_SAMPLE_TIME + * { u64 id; } && PERF_SAMPLE_ID + * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID + * { u32 cpu, res; } && PERF_SAMPLE_CPU * { u64 id; } && PERF_SAMPLE_IDENTIFIER * } && perf_event_attr::sample_id_all * @@ -863,7 +888,7 @@ enum perf_event_type { /* * The MMAP events record the PROT_EXEC mappings so that we can - * correlate userspace IPs to code. They have the following structure: + * correlate user-space IPs to code. They have the following structure: * * struct { * struct perf_event_header header; @@ -873,7 +898,7 @@ enum perf_event_type { * u64 len; * u64 pgoff; * char filename[]; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_MMAP = 1, @@ -883,7 +908,7 @@ enum perf_event_type { * struct perf_event_header header; * u64 id; * u64 lost; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_LOST = 2, @@ -894,7 +919,7 @@ enum perf_event_type { * * u32 pid, tid; * char comm[]; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_COMM = 3, @@ -905,7 +930,7 @@ enum perf_event_type { * u32 pid, ppid; * u32 tid, ptid; * u64 time; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_EXIT = 4, @@ -916,7 +941,7 @@ enum perf_event_type { * u64 time; * u64 id; * u64 stream_id; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_THROTTLE = 5, @@ -928,7 +953,7 @@ enum perf_event_type { * u32 pid, ppid; * u32 tid, ptid; * u64 time; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_FORK = 7, @@ -939,7 +964,7 @@ enum perf_event_type { * u32 pid, tid; * * struct read_format values; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_READ = 8, @@ -994,12 +1019,12 @@ enum perf_event_type { * { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS * } && PERF_SAMPLE_BRANCH_STACK * - * { u64 abi; # enum perf_sample_regs_abi - * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER + * { u64 abi; # enum perf_sample_regs_abi + * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER * - * { u64 size; - * char data[size]; - * u64 dyn_size; } && PERF_SAMPLE_STACK_USER + * { u64 size; + * char data[size]; + * u64 dyn_size; } && PERF_SAMPLE_STACK_USER * * { union perf_sample_weight * { @@ -1024,10 +1049,11 @@ enum perf_event_type { * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR * { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR - * { u64 size; - * char data[size]; } && PERF_SAMPLE_AUX + * { u64 cgroup;} && PERF_SAMPLE_CGROUP * { u64 data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE * { u64 code_page_size;} && PERF_SAMPLE_CODE_PAGE_SIZE + * { u64 size; + * char data[size]; } && PERF_SAMPLE_AUX * }; */ PERF_RECORD_SAMPLE = 9, @@ -1059,7 +1085,7 @@ enum perf_event_type { * }; * u32 prot, flags; * char filename[]; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_MMAP2 = 10, @@ -1068,12 +1094,12 @@ enum perf_event_type { * Records that new data landed in the AUX buffer part. * * struct { - * struct perf_event_header header; + * struct perf_event_header header; * - * u64 aux_offset; - * u64 aux_size; + * u64 aux_offset; + * u64 aux_size; * u64 flags; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_AUX = 11, @@ -1156,7 +1182,7 @@ enum perf_event_type { PERF_RECORD_KSYMBOL = 17, /* - * Record bpf events: + * Record BPF events: * enum perf_bpf_event_type { * PERF_BPF_EVENT_UNKNOWN = 0, * PERF_BPF_EVENT_PROG_LOAD = 1, @@ -1217,6 +1243,22 @@ enum perf_event_type { */ PERF_RECORD_AUX_OUTPUT_HW_ID = 21, + /* + * This user callchain capture was deferred until shortly before + * returning to user space. Previous samples would have kernel + * callchains only and they need to be stitched with this to make full + * callchains. + * + * struct { + * struct perf_event_header header; + * u64 cookie; + * u64 nr; + * u64 ips[nr]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_CALLCHAIN_DEFERRED = 22, + PERF_RECORD_MAX, /* non-ABI */ }; @@ -1234,181 +1276,182 @@ enum perf_record_ksymbol_type { #define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0) enum perf_bpf_event_type { - PERF_BPF_EVENT_UNKNOWN = 0, - PERF_BPF_EVENT_PROG_LOAD = 1, - PERF_BPF_EVENT_PROG_UNLOAD = 2, - PERF_BPF_EVENT_MAX, /* non-ABI */ + PERF_BPF_EVENT_UNKNOWN = 0, + PERF_BPF_EVENT_PROG_LOAD = 1, + PERF_BPF_EVENT_PROG_UNLOAD = 2, + PERF_BPF_EVENT_MAX, /* non-ABI */ }; -#define PERF_MAX_STACK_DEPTH 127 -#define PERF_MAX_CONTEXTS_PER_STACK 8 +#define PERF_MAX_STACK_DEPTH 127 +#define PERF_MAX_CONTEXTS_PER_STACK 8 enum perf_callchain_context { - PERF_CONTEXT_HV = (__u64)-32, - PERF_CONTEXT_KERNEL = (__u64)-128, - PERF_CONTEXT_USER = (__u64)-512, + PERF_CONTEXT_HV = (__u64)-32, + PERF_CONTEXT_KERNEL = (__u64)-128, + PERF_CONTEXT_USER = (__u64)-512, + PERF_CONTEXT_USER_DEFERRED = (__u64)-640, - PERF_CONTEXT_GUEST = (__u64)-2048, - PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, - PERF_CONTEXT_GUEST_USER = (__u64)-2560, + PERF_CONTEXT_GUEST = (__u64)-2048, + PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, + PERF_CONTEXT_GUEST_USER = (__u64)-2560, - PERF_CONTEXT_MAX = (__u64)-4095, + PERF_CONTEXT_MAX = (__u64)-4095, }; /** * PERF_RECORD_AUX::flags bits */ -#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ -#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ -#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ -#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */ +#define PERF_AUX_FLAG_TRUNCATED 0x0001 /* Record was truncated to fit */ +#define PERF_AUX_FLAG_OVERWRITE 0x0002 /* Snapshot from overwrite mode */ +#define PERF_AUX_FLAG_PARTIAL 0x0004 /* Record contains gaps */ +#define PERF_AUX_FLAG_COLLISION 0x0008 /* Sample collided with another */ #define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK 0xff00 /* PMU specific trace format type */ /* CoreSight PMU AUX buffer formats */ -#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */ -#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */ -#define PERF_FLAG_FD_NO_GROUP (1UL << 0) -#define PERF_FLAG_FD_OUTPUT (1UL << 1) -#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */ -#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ +#define PERF_FLAG_FD_NO_GROUP (1UL << 0) +#define PERF_FLAG_FD_OUTPUT (1UL << 1) +#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup ID, per-CPU mode only */ +#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ #if defined(__LITTLE_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_op:5, /* type of opcode */ - mem_lvl:14, /* memory hierarchy level */ - mem_snoop:5, /* snoop mode */ - mem_lock:2, /* lock instr */ - mem_dtlb:7, /* tlb access */ - mem_lvl_num:4, /* memory hierarchy level number */ - mem_remote:1, /* remote */ - mem_snoopx:2, /* snoop mode, ext */ - mem_blk:3, /* access blocked */ - mem_hops:3, /* hop level */ - mem_rsvd:18; + __u64 mem_op : 5, /* Type of opcode */ + mem_lvl : 14, /* Memory hierarchy level */ + mem_snoop : 5, /* Snoop mode */ + mem_lock : 2, /* Lock instr */ + mem_dtlb : 7, /* TLB access */ + mem_lvl_num : 4, /* Memory hierarchy level number */ + mem_remote : 1, /* Remote */ + mem_snoopx : 2, /* Snoop mode, ext */ + mem_blk : 3, /* Access blocked */ + mem_hops : 3, /* Hop level */ + mem_rsvd : 18; }; }; #elif defined(__BIG_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_rsvd:18, - mem_hops:3, /* hop level */ - mem_blk:3, /* access blocked */ - mem_snoopx:2, /* snoop mode, ext */ - mem_remote:1, /* remote */ - mem_lvl_num:4, /* memory hierarchy level number */ - mem_dtlb:7, /* tlb access */ - mem_lock:2, /* lock instr */ - mem_snoop:5, /* snoop mode */ - mem_lvl:14, /* memory hierarchy level */ - mem_op:5; /* type of opcode */ + __u64 mem_rsvd : 18, + mem_hops : 3, /* Hop level */ + mem_blk : 3, /* Access blocked */ + mem_snoopx : 2, /* Snoop mode, ext */ + mem_remote : 1, /* Remote */ + mem_lvl_num : 4, /* Memory hierarchy level number */ + mem_dtlb : 7, /* TLB access */ + mem_lock : 2, /* Lock instr */ + mem_snoop : 5, /* Snoop mode */ + mem_lvl : 14, /* Memory hierarchy level */ + mem_op : 5; /* Type of opcode */ }; }; #else -#error "Unknown endianness" +# error "Unknown endianness" #endif -/* type of opcode (load/store/prefetch,code) */ -#define PERF_MEM_OP_NA 0x01 /* not available */ -#define PERF_MEM_OP_LOAD 0x02 /* load instruction */ -#define PERF_MEM_OP_STORE 0x04 /* store instruction */ -#define PERF_MEM_OP_PFETCH 0x08 /* prefetch */ -#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */ -#define PERF_MEM_OP_SHIFT 0 +/* Type of memory opcode: */ +#define PERF_MEM_OP_NA 0x0001 /* Not available */ +#define PERF_MEM_OP_LOAD 0x0002 /* Load instruction */ +#define PERF_MEM_OP_STORE 0x0004 /* Store instruction */ +#define PERF_MEM_OP_PFETCH 0x0008 /* Prefetch */ +#define PERF_MEM_OP_EXEC 0x0010 /* Code (execution) */ +#define PERF_MEM_OP_SHIFT 0 /* - * PERF_MEM_LVL_* namespace being depricated to some extent in the + * The PERF_MEM_LVL_* namespace is being deprecated to some extent in * favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields. - * Supporting this namespace inorder to not break defined ABIs. + * We support this namespace in order to not break defined ABIs. * - * memory hierarchy (memory level, hit or miss) + * Memory hierarchy (memory level, hit or miss) */ -#define PERF_MEM_LVL_NA 0x01 /* not available */ -#define PERF_MEM_LVL_HIT 0x02 /* hit level */ -#define PERF_MEM_LVL_MISS 0x04 /* miss level */ -#define PERF_MEM_LVL_L1 0x08 /* L1 */ -#define PERF_MEM_LVL_LFB 0x10 /* Line Fill Buffer */ -#define PERF_MEM_LVL_L2 0x20 /* L2 */ -#define PERF_MEM_LVL_L3 0x40 /* L3 */ -#define PERF_MEM_LVL_LOC_RAM 0x80 /* Local DRAM */ -#define PERF_MEM_LVL_REM_RAM1 0x100 /* Remote DRAM (1 hop) */ -#define PERF_MEM_LVL_REM_RAM2 0x200 /* Remote DRAM (2 hops) */ -#define PERF_MEM_LVL_REM_CCE1 0x400 /* Remote Cache (1 hop) */ -#define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */ -#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */ -#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */ -#define PERF_MEM_LVL_SHIFT 5 - -#define PERF_MEM_REMOTE_REMOTE 0x01 /* Remote */ -#define PERF_MEM_REMOTE_SHIFT 37 - -#define PERF_MEM_LVLNUM_L1 0x01 /* L1 */ -#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ -#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ -#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ -#define PERF_MEM_LVLNUM_L2_MHB 0x05 /* L2 Miss Handling Buffer */ -#define PERF_MEM_LVLNUM_MSC 0x06 /* Memory-side Cache */ -/* 0x7 available */ -#define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */ -#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */ -#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ -#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ -#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB / L1 Miss Handling Buffer */ -#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ -#define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */ -#define PERF_MEM_LVLNUM_NA 0x0f /* N/A */ - -#define PERF_MEM_LVLNUM_SHIFT 33 - -/* snoop mode */ -#define PERF_MEM_SNOOP_NA 0x01 /* not available */ -#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */ -#define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */ -#define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */ -#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */ -#define PERF_MEM_SNOOP_SHIFT 19 - -#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ -#define PERF_MEM_SNOOPX_PEER 0x02 /* xfer from peer */ -#define PERF_MEM_SNOOPX_SHIFT 38 - -/* locked instruction */ -#define PERF_MEM_LOCK_NA 0x01 /* not available */ -#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */ -#define PERF_MEM_LOCK_SHIFT 24 +#define PERF_MEM_LVL_NA 0x0001 /* Not available */ +#define PERF_MEM_LVL_HIT 0x0002 /* Hit level */ +#define PERF_MEM_LVL_MISS 0x0004 /* Miss level */ +#define PERF_MEM_LVL_L1 0x0008 /* L1 */ +#define PERF_MEM_LVL_LFB 0x0010 /* Line Fill Buffer */ +#define PERF_MEM_LVL_L2 0x0020 /* L2 */ +#define PERF_MEM_LVL_L3 0x0040 /* L3 */ +#define PERF_MEM_LVL_LOC_RAM 0x0080 /* Local DRAM */ +#define PERF_MEM_LVL_REM_RAM1 0x0100 /* Remote DRAM (1 hop) */ +#define PERF_MEM_LVL_REM_RAM2 0x0200 /* Remote DRAM (2 hops) */ +#define PERF_MEM_LVL_REM_CCE1 0x0400 /* Remote Cache (1 hop) */ +#define PERF_MEM_LVL_REM_CCE2 0x0800 /* Remote Cache (2 hops) */ +#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */ +#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */ +#define PERF_MEM_LVL_SHIFT 5 + +#define PERF_MEM_REMOTE_REMOTE 0x0001 /* Remote */ +#define PERF_MEM_REMOTE_SHIFT 37 + +#define PERF_MEM_LVLNUM_L1 0x0001 /* L1 */ +#define PERF_MEM_LVLNUM_L2 0x0002 /* L2 */ +#define PERF_MEM_LVLNUM_L3 0x0003 /* L3 */ +#define PERF_MEM_LVLNUM_L4 0x0004 /* L4 */ +#define PERF_MEM_LVLNUM_L2_MHB 0x0005 /* L2 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_MSC 0x0006 /* Memory-side Cache */ +/* 0x007 available */ +#define PERF_MEM_LVLNUM_UNC 0x0008 /* Uncached */ +#define PERF_MEM_LVLNUM_CXL 0x0009 /* CXL */ +#define PERF_MEM_LVLNUM_IO 0x000a /* I/O */ +#define PERF_MEM_LVLNUM_ANY_CACHE 0x000b /* Any cache */ +#define PERF_MEM_LVLNUM_LFB 0x000c /* LFB / L1 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_RAM 0x000d /* RAM */ +#define PERF_MEM_LVLNUM_PMEM 0x000e /* PMEM */ +#define PERF_MEM_LVLNUM_NA 0x000f /* N/A */ + +#define PERF_MEM_LVLNUM_SHIFT 33 + +/* Snoop mode */ +#define PERF_MEM_SNOOP_NA 0x0001 /* Not available */ +#define PERF_MEM_SNOOP_NONE 0x0002 /* No snoop */ +#define PERF_MEM_SNOOP_HIT 0x0004 /* Snoop hit */ +#define PERF_MEM_SNOOP_MISS 0x0008 /* Snoop miss */ +#define PERF_MEM_SNOOP_HITM 0x0010 /* Snoop hit modified */ +#define PERF_MEM_SNOOP_SHIFT 19 + +#define PERF_MEM_SNOOPX_FWD 0x0001 /* Forward */ +#define PERF_MEM_SNOOPX_PEER 0x0002 /* Transfer from peer */ +#define PERF_MEM_SNOOPX_SHIFT 38 + +/* Locked instruction */ +#define PERF_MEM_LOCK_NA 0x0001 /* Not available */ +#define PERF_MEM_LOCK_LOCKED 0x0002 /* Locked transaction */ +#define PERF_MEM_LOCK_SHIFT 24 /* TLB access */ -#define PERF_MEM_TLB_NA 0x01 /* not available */ -#define PERF_MEM_TLB_HIT 0x02 /* hit level */ -#define PERF_MEM_TLB_MISS 0x04 /* miss level */ -#define PERF_MEM_TLB_L1 0x08 /* L1 */ -#define PERF_MEM_TLB_L2 0x10 /* L2 */ -#define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/ -#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */ -#define PERF_MEM_TLB_SHIFT 26 +#define PERF_MEM_TLB_NA 0x0001 /* Not available */ +#define PERF_MEM_TLB_HIT 0x0002 /* Hit level */ +#define PERF_MEM_TLB_MISS 0x0004 /* Miss level */ +#define PERF_MEM_TLB_L1 0x0008 /* L1 */ +#define PERF_MEM_TLB_L2 0x0010 /* L2 */ +#define PERF_MEM_TLB_WK 0x0020 /* Hardware Walker*/ +#define PERF_MEM_TLB_OS 0x0040 /* OS fault handler */ +#define PERF_MEM_TLB_SHIFT 26 /* Access blocked */ -#define PERF_MEM_BLK_NA 0x01 /* not available */ -#define PERF_MEM_BLK_DATA 0x02 /* data could not be forwarded */ -#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */ -#define PERF_MEM_BLK_SHIFT 40 - -/* hop level */ -#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */ -#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */ -#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */ -#define PERF_MEM_HOPS_3 0x04 /* remote board */ +#define PERF_MEM_BLK_NA 0x0001 /* Not available */ +#define PERF_MEM_BLK_DATA 0x0002 /* Data could not be forwarded */ +#define PERF_MEM_BLK_ADDR 0x0004 /* Address conflict */ +#define PERF_MEM_BLK_SHIFT 40 + +/* Hop level */ +#define PERF_MEM_HOPS_0 0x0001 /* Remote core, same node */ +#define PERF_MEM_HOPS_1 0x0002 /* Remote node, same socket */ +#define PERF_MEM_HOPS_2 0x0003 /* Remote socket, same board */ +#define PERF_MEM_HOPS_3 0x0004 /* Remote board */ /* 5-7 available */ -#define PERF_MEM_HOPS_SHIFT 43 +#define PERF_MEM_HOPS_SHIFT 43 #define PERF_MEM_S(a, s) \ (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) /* - * single taken branch record layout: + * Layout of single taken branch records: * * from: source instruction (may not always be a branch insn) * to: branch target @@ -1427,37 +1470,37 @@ union perf_mem_data_src { struct perf_branch_entry { __u64 from; __u64 to; - __u64 mispred:1, /* target mispredicted */ - predicted:1,/* target predicted */ - in_tx:1, /* in transaction */ - abort:1, /* transaction abort */ - cycles:16, /* cycle count to last branch */ - type:4, /* branch type */ - spec:2, /* branch speculation info */ - new_type:4, /* additional branch type */ - priv:3, /* privilege level */ - reserved:31; + __u64 mispred : 1, /* target mispredicted */ + predicted : 1, /* target predicted */ + in_tx : 1, /* in transaction */ + abort : 1, /* transaction abort */ + cycles : 16, /* cycle count to last branch */ + type : 4, /* branch type */ + spec : 2, /* branch speculation info */ + new_type : 4, /* additional branch type */ + priv : 3, /* privilege level */ + reserved : 31; }; /* Size of used info bits in struct perf_branch_entry */ #define PERF_BRANCH_ENTRY_INFO_BITS_MAX 33 union perf_sample_weight { - __u64 full; + __u64 full; #if defined(__LITTLE_ENDIAN_BITFIELD) struct { - __u32 var1_dw; - __u16 var2_w; - __u16 var3_w; + __u32 var1_dw; + __u16 var2_w; + __u16 var3_w; }; #elif defined(__BIG_ENDIAN_BITFIELD) struct { - __u16 var3_w; - __u16 var2_w; - __u32 var1_dw; + __u16 var3_w; + __u16 var2_w; + __u32 var1_dw; }; #else -#error "Unknown endianness" +# error "Unknown endianness" #endif }; diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 35791791a879..475fc8ca4403 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -230,7 +230,7 @@ struct prctl_mm_map { # define PR_PAC_APDBKEY (1UL << 3) # define PR_PAC_APGAKEY (1UL << 4) -/* Tagged user address controls for arm64 */ +/* Tagged user address controls for arm64 and RISC-V */ #define PR_SET_TAGGED_ADDR_CTRL 55 #define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) @@ -244,6 +244,9 @@ struct prctl_mm_map { # define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) /* Unused; kept only for source compatibility */ # define PR_MTE_TCF_SHIFT 1 +/* RISC-V pointer masking tag length */ +# define PR_PMLEN_SHIFT 24 +# define PR_PMLEN_MASK (0x7fUL << PR_PMLEN_SHIFT) /* Control reclaim behavior when allocating memory */ #define PR_SET_IO_FLUSHER 57 @@ -252,7 +255,12 @@ struct prctl_mm_map { /* Dispatch syscalls to a userspace handler */ #define PR_SET_SYSCALL_USER_DISPATCH 59 # define PR_SYS_DISPATCH_OFF 0 -# define PR_SYS_DISPATCH_ON 1 +/* Enable dispatch except for the specified range */ +# define PR_SYS_DISPATCH_EXCLUSIVE_ON 1 +/* Enable dispatch for the specified range */ +# define PR_SYS_DISPATCH_INCLUSIVE_ON 2 +/* Legacy name for backwards compatibility */ +# define PR_SYS_DISPATCH_ON PR_SYS_DISPATCH_EXCLUSIVE_ON /* The control values for the user space selector when dispatch is enabled */ # define SYSCALL_DISPATCH_FILTER_ALLOW 0 # define SYSCALL_DISPATCH_FILTER_BLOCK 1 @@ -328,4 +336,42 @@ struct prctl_mm_map { # define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC 0x10 /* Clear the aspect on exec */ # define PR_PPC_DEXCR_CTRL_MASK 0x1f +/* + * Get the current shadow stack configuration for the current thread, + * this will be the value configured via PR_SET_SHADOW_STACK_STATUS. + */ +#define PR_GET_SHADOW_STACK_STATUS 74 + +/* + * Set the current shadow stack configuration. Enabling the shadow + * stack will cause a shadow stack to be allocated for the thread. + */ +#define PR_SET_SHADOW_STACK_STATUS 75 +# define PR_SHADOW_STACK_ENABLE (1UL << 0) +# define PR_SHADOW_STACK_WRITE (1UL << 1) +# define PR_SHADOW_STACK_PUSH (1UL << 2) + +/* + * Prevent further changes to the specified shadow stack + * configuration. All bits may be locked via this call, including + * undefined bits. + */ +#define PR_LOCK_SHADOW_STACK_STATUS 76 + +/* + * Controls the mode of timer_create() for CRIU restore operations. + * Enabling this allows CRIU to restore timers with explicit IDs. + * + * Don't use for normal operations as the result might be undefined. + */ +#define PR_TIMER_CREATE_RESTORE_IDS 77 +# define PR_TIMER_CREATE_RESTORE_IDS_OFF 0 +# define PR_TIMER_CREATE_RESTORE_IDS_ON 1 +# define PR_TIMER_CREATE_RESTORE_IDS_GET 2 + +/* FUTEX hash management */ +#define PR_FUTEX_HASH 78 +# define PR_FUTEX_HASH_SET_SLOTS 1 +# define PR_FUTEX_HASH_GET_SLOTS 2 + #endif /* _LINUX_PRCTL_H */ diff --git a/tools/include/uapi/linux/rtnetlink.h b/tools/include/uapi/linux/rtnetlink.h new file mode 100644 index 000000000000..dab9493c791b --- /dev/null +++ b/tools/include/uapi/linux/rtnetlink.h @@ -0,0 +1,848 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_RTNETLINK_H +#define _UAPI__LINUX_RTNETLINK_H + +#include <linux/types.h> +#include <linux/netlink.h> +#include <linux/if_link.h> +#include <linux/if_addr.h> +#include <linux/neighbour.h> + +/* rtnetlink families. Values up to 127 are reserved for real address + * families, values above 128 may be used arbitrarily. + */ +#define RTNL_FAMILY_IPMR 128 +#define RTNL_FAMILY_IP6MR 129 +#define RTNL_FAMILY_MAX 129 + +/**** + * Routing/neighbour discovery messages. + ****/ + +/* Types of messages */ + +enum { + RTM_BASE = 16, +#define RTM_BASE RTM_BASE + + RTM_NEWLINK = 16, +#define RTM_NEWLINK RTM_NEWLINK + RTM_DELLINK, +#define RTM_DELLINK RTM_DELLINK + RTM_GETLINK, +#define RTM_GETLINK RTM_GETLINK + RTM_SETLINK, +#define RTM_SETLINK RTM_SETLINK + + RTM_NEWADDR = 20, +#define RTM_NEWADDR RTM_NEWADDR + RTM_DELADDR, +#define RTM_DELADDR RTM_DELADDR + RTM_GETADDR, +#define RTM_GETADDR RTM_GETADDR + + RTM_NEWROUTE = 24, +#define RTM_NEWROUTE RTM_NEWROUTE + RTM_DELROUTE, +#define RTM_DELROUTE RTM_DELROUTE + RTM_GETROUTE, +#define RTM_GETROUTE RTM_GETROUTE + + RTM_NEWNEIGH = 28, +#define RTM_NEWNEIGH RTM_NEWNEIGH + RTM_DELNEIGH, +#define RTM_DELNEIGH RTM_DELNEIGH + RTM_GETNEIGH, +#define RTM_GETNEIGH RTM_GETNEIGH + + RTM_NEWRULE = 32, +#define RTM_NEWRULE RTM_NEWRULE + RTM_DELRULE, +#define RTM_DELRULE RTM_DELRULE + RTM_GETRULE, +#define RTM_GETRULE RTM_GETRULE + + RTM_NEWQDISC = 36, +#define RTM_NEWQDISC RTM_NEWQDISC + RTM_DELQDISC, +#define RTM_DELQDISC RTM_DELQDISC + RTM_GETQDISC, +#define RTM_GETQDISC RTM_GETQDISC + + RTM_NEWTCLASS = 40, +#define RTM_NEWTCLASS RTM_NEWTCLASS + RTM_DELTCLASS, +#define RTM_DELTCLASS RTM_DELTCLASS + RTM_GETTCLASS, +#define RTM_GETTCLASS RTM_GETTCLASS + + RTM_NEWTFILTER = 44, +#define RTM_NEWTFILTER RTM_NEWTFILTER + RTM_DELTFILTER, +#define RTM_DELTFILTER RTM_DELTFILTER + RTM_GETTFILTER, +#define RTM_GETTFILTER RTM_GETTFILTER + + RTM_NEWACTION = 48, +#define RTM_NEWACTION RTM_NEWACTION + RTM_DELACTION, +#define RTM_DELACTION RTM_DELACTION + RTM_GETACTION, +#define RTM_GETACTION RTM_GETACTION + + RTM_NEWPREFIX = 52, +#define RTM_NEWPREFIX RTM_NEWPREFIX + + RTM_NEWMULTICAST = 56, +#define RTM_NEWMULTICAST RTM_NEWMULTICAST + RTM_DELMULTICAST, +#define RTM_DELMULTICAST RTM_DELMULTICAST + RTM_GETMULTICAST, +#define RTM_GETMULTICAST RTM_GETMULTICAST + + RTM_NEWANYCAST = 60, +#define RTM_NEWANYCAST RTM_NEWANYCAST + RTM_DELANYCAST, +#define RTM_DELANYCAST RTM_DELANYCAST + RTM_GETANYCAST, +#define RTM_GETANYCAST RTM_GETANYCAST + + RTM_NEWNEIGHTBL = 64, +#define RTM_NEWNEIGHTBL RTM_NEWNEIGHTBL + RTM_GETNEIGHTBL = 66, +#define RTM_GETNEIGHTBL RTM_GETNEIGHTBL + RTM_SETNEIGHTBL, +#define RTM_SETNEIGHTBL RTM_SETNEIGHTBL + + RTM_NEWNDUSEROPT = 68, +#define RTM_NEWNDUSEROPT RTM_NEWNDUSEROPT + + RTM_NEWADDRLABEL = 72, +#define RTM_NEWADDRLABEL RTM_NEWADDRLABEL + RTM_DELADDRLABEL, +#define RTM_DELADDRLABEL RTM_DELADDRLABEL + RTM_GETADDRLABEL, +#define RTM_GETADDRLABEL RTM_GETADDRLABEL + + RTM_GETDCB = 78, +#define RTM_GETDCB RTM_GETDCB + RTM_SETDCB, +#define RTM_SETDCB RTM_SETDCB + + RTM_NEWNETCONF = 80, +#define RTM_NEWNETCONF RTM_NEWNETCONF + RTM_DELNETCONF, +#define RTM_DELNETCONF RTM_DELNETCONF + RTM_GETNETCONF = 82, +#define RTM_GETNETCONF RTM_GETNETCONF + + RTM_NEWMDB = 84, +#define RTM_NEWMDB RTM_NEWMDB + RTM_DELMDB = 85, +#define RTM_DELMDB RTM_DELMDB + RTM_GETMDB = 86, +#define RTM_GETMDB RTM_GETMDB + + RTM_NEWNSID = 88, +#define RTM_NEWNSID RTM_NEWNSID + RTM_DELNSID = 89, +#define RTM_DELNSID RTM_DELNSID + RTM_GETNSID = 90, +#define RTM_GETNSID RTM_GETNSID + + RTM_NEWSTATS = 92, +#define RTM_NEWSTATS RTM_NEWSTATS + RTM_GETSTATS = 94, +#define RTM_GETSTATS RTM_GETSTATS + RTM_SETSTATS, +#define RTM_SETSTATS RTM_SETSTATS + + RTM_NEWCACHEREPORT = 96, +#define RTM_NEWCACHEREPORT RTM_NEWCACHEREPORT + + RTM_NEWCHAIN = 100, +#define RTM_NEWCHAIN RTM_NEWCHAIN + RTM_DELCHAIN, +#define RTM_DELCHAIN RTM_DELCHAIN + RTM_GETCHAIN, +#define RTM_GETCHAIN RTM_GETCHAIN + + RTM_NEWNEXTHOP = 104, +#define RTM_NEWNEXTHOP RTM_NEWNEXTHOP + RTM_DELNEXTHOP, +#define RTM_DELNEXTHOP RTM_DELNEXTHOP + RTM_GETNEXTHOP, +#define RTM_GETNEXTHOP RTM_GETNEXTHOP + + RTM_NEWLINKPROP = 108, +#define RTM_NEWLINKPROP RTM_NEWLINKPROP + RTM_DELLINKPROP, +#define RTM_DELLINKPROP RTM_DELLINKPROP + RTM_GETLINKPROP, +#define RTM_GETLINKPROP RTM_GETLINKPROP + + RTM_NEWVLAN = 112, +#define RTM_NEWVLAN RTM_NEWVLAN + RTM_DELVLAN, +#define RTM_DELVLAN RTM_DELVLAN + RTM_GETVLAN, +#define RTM_GETVLAN RTM_GETVLAN + + RTM_NEWNEXTHOPBUCKET = 116, +#define RTM_NEWNEXTHOPBUCKET RTM_NEWNEXTHOPBUCKET + RTM_DELNEXTHOPBUCKET, +#define RTM_DELNEXTHOPBUCKET RTM_DELNEXTHOPBUCKET + RTM_GETNEXTHOPBUCKET, +#define RTM_GETNEXTHOPBUCKET RTM_GETNEXTHOPBUCKET + + RTM_NEWTUNNEL = 120, +#define RTM_NEWTUNNEL RTM_NEWTUNNEL + RTM_DELTUNNEL, +#define RTM_DELTUNNEL RTM_DELTUNNEL + RTM_GETTUNNEL, +#define RTM_GETTUNNEL RTM_GETTUNNEL + + __RTM_MAX, +#define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) +}; + +#define RTM_NR_MSGTYPES (RTM_MAX + 1 - RTM_BASE) +#define RTM_NR_FAMILIES (RTM_NR_MSGTYPES >> 2) +#define RTM_FAM(cmd) (((cmd) - RTM_BASE) >> 2) + +/* + Generic structure for encapsulation of optional route information. + It is reminiscent of sockaddr, but with sa_family replaced + with attribute type. + */ + +struct rtattr { + unsigned short rta_len; + unsigned short rta_type; +}; + +/* Macros to handle rtattributes */ + +#define RTA_ALIGNTO 4U +#define RTA_ALIGN(len) ( ((len)+RTA_ALIGNTO-1) & ~(RTA_ALIGNTO-1) ) +#define RTA_OK(rta,len) ((len) >= (int)sizeof(struct rtattr) && \ + (rta)->rta_len >= sizeof(struct rtattr) && \ + (rta)->rta_len <= (len)) +#define RTA_NEXT(rta,attrlen) ((attrlen) -= RTA_ALIGN((rta)->rta_len), \ + (struct rtattr*)(((char*)(rta)) + RTA_ALIGN((rta)->rta_len))) +#define RTA_LENGTH(len) (RTA_ALIGN(sizeof(struct rtattr)) + (len)) +#define RTA_SPACE(len) RTA_ALIGN(RTA_LENGTH(len)) +#define RTA_DATA(rta) ((void*)(((char*)(rta)) + RTA_LENGTH(0))) +#define RTA_PAYLOAD(rta) ((int)((rta)->rta_len) - RTA_LENGTH(0)) + + + + +/****************************************************************************** + * Definitions used in routing table administration. + ****/ + +struct rtmsg { + unsigned char rtm_family; + unsigned char rtm_dst_len; + unsigned char rtm_src_len; + unsigned char rtm_tos; + + unsigned char rtm_table; /* Routing table id */ + unsigned char rtm_protocol; /* Routing protocol; see below */ + unsigned char rtm_scope; /* See below */ + unsigned char rtm_type; /* See below */ + + unsigned rtm_flags; +}; + +/* rtm_type */ + +enum { + RTN_UNSPEC, + RTN_UNICAST, /* Gateway or direct route */ + RTN_LOCAL, /* Accept locally */ + RTN_BROADCAST, /* Accept locally as broadcast, + send as broadcast */ + RTN_ANYCAST, /* Accept locally as broadcast, + but send as unicast */ + RTN_MULTICAST, /* Multicast route */ + RTN_BLACKHOLE, /* Drop */ + RTN_UNREACHABLE, /* Destination is unreachable */ + RTN_PROHIBIT, /* Administratively prohibited */ + RTN_THROW, /* Not in this table */ + RTN_NAT, /* Translate this address */ + RTN_XRESOLVE, /* Use external resolver */ + __RTN_MAX +}; + +#define RTN_MAX (__RTN_MAX - 1) + + +/* rtm_protocol */ + +#define RTPROT_UNSPEC 0 +#define RTPROT_REDIRECT 1 /* Route installed by ICMP redirects; + not used by current IPv4 */ +#define RTPROT_KERNEL 2 /* Route installed by kernel */ +#define RTPROT_BOOT 3 /* Route installed during boot */ +#define RTPROT_STATIC 4 /* Route installed by administrator */ + +/* Values of protocol >= RTPROT_STATIC are not interpreted by kernel; + they are just passed from user and back as is. + It will be used by hypothetical multiple routing daemons. + Note that protocol values should be standardized in order to + avoid conflicts. + */ + +#define RTPROT_GATED 8 /* Apparently, GateD */ +#define RTPROT_RA 9 /* RDISC/ND router advertisements */ +#define RTPROT_MRT 10 /* Merit MRT */ +#define RTPROT_ZEBRA 11 /* Zebra */ +#define RTPROT_BIRD 12 /* BIRD */ +#define RTPROT_DNROUTED 13 /* DECnet routing daemon */ +#define RTPROT_XORP 14 /* XORP */ +#define RTPROT_NTK 15 /* Netsukuku */ +#define RTPROT_DHCP 16 /* DHCP client */ +#define RTPROT_MROUTED 17 /* Multicast daemon */ +#define RTPROT_KEEPALIVED 18 /* Keepalived daemon */ +#define RTPROT_BABEL 42 /* Babel daemon */ +#define RTPROT_OVN 84 /* OVN daemon */ +#define RTPROT_OPENR 99 /* Open Routing (Open/R) Routes */ +#define RTPROT_BGP 186 /* BGP Routes */ +#define RTPROT_ISIS 187 /* ISIS Routes */ +#define RTPROT_OSPF 188 /* OSPF Routes */ +#define RTPROT_RIP 189 /* RIP Routes */ +#define RTPROT_EIGRP 192 /* EIGRP Routes */ + +/* rtm_scope + + Really it is not scope, but sort of distance to the destination. + NOWHERE are reserved for not existing destinations, HOST is our + local addresses, LINK are destinations, located on directly attached + link and UNIVERSE is everywhere in the Universe. + + Intermediate values are also possible f.e. interior routes + could be assigned a value between UNIVERSE and LINK. +*/ + +enum rt_scope_t { + RT_SCOPE_UNIVERSE=0, +/* User defined values */ + RT_SCOPE_SITE=200, + RT_SCOPE_LINK=253, + RT_SCOPE_HOST=254, + RT_SCOPE_NOWHERE=255 +}; + +/* rtm_flags */ + +#define RTM_F_NOTIFY 0x100 /* Notify user of route change */ +#define RTM_F_CLONED 0x200 /* This route is cloned */ +#define RTM_F_EQUALIZE 0x400 /* Multipath equalizer: NI */ +#define RTM_F_PREFIX 0x800 /* Prefix addresses */ +#define RTM_F_LOOKUP_TABLE 0x1000 /* set rtm_table to FIB lookup result */ +#define RTM_F_FIB_MATCH 0x2000 /* return full fib lookup match */ +#define RTM_F_OFFLOAD 0x4000 /* route is offloaded */ +#define RTM_F_TRAP 0x8000 /* route is trapping packets */ +#define RTM_F_OFFLOAD_FAILED 0x20000000 /* route offload failed, this value + * is chosen to avoid conflicts with + * other flags defined in + * include/uapi/linux/ipv6_route.h + */ + +/* Reserved table identifiers */ + +enum rt_class_t { + RT_TABLE_UNSPEC=0, +/* User defined values */ + RT_TABLE_COMPAT=252, + RT_TABLE_DEFAULT=253, + RT_TABLE_MAIN=254, + RT_TABLE_LOCAL=255, + RT_TABLE_MAX=0xFFFFFFFF +}; + + +/* Routing message attributes */ + +enum rtattr_type_t { + RTA_UNSPEC, + RTA_DST, + RTA_SRC, + RTA_IIF, + RTA_OIF, + RTA_GATEWAY, + RTA_PRIORITY, + RTA_PREFSRC, + RTA_METRICS, + RTA_MULTIPATH, + RTA_PROTOINFO, /* no longer used */ + RTA_FLOW, + RTA_CACHEINFO, + RTA_SESSION, /* no longer used */ + RTA_MP_ALGO, /* no longer used */ + RTA_TABLE, + RTA_MARK, + RTA_MFC_STATS, + RTA_VIA, + RTA_NEWDST, + RTA_PREF, + RTA_ENCAP_TYPE, + RTA_ENCAP, + RTA_EXPIRES, + RTA_PAD, + RTA_UID, + RTA_TTL_PROPAGATE, + RTA_IP_PROTO, + RTA_SPORT, + RTA_DPORT, + RTA_NH_ID, + RTA_FLOWLABEL, + __RTA_MAX +}; + +#define RTA_MAX (__RTA_MAX - 1) + +#define RTM_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct rtmsg)))) +#define RTM_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct rtmsg)) + +/* RTM_MULTIPATH --- array of struct rtnexthop. + * + * "struct rtnexthop" describes all necessary nexthop information, + * i.e. parameters of path to a destination via this nexthop. + * + * At the moment it is impossible to set different prefsrc, mtu, window + * and rtt for different paths from multipath. + */ + +struct rtnexthop { + unsigned short rtnh_len; + unsigned char rtnh_flags; + unsigned char rtnh_hops; + int rtnh_ifindex; +}; + +/* rtnh_flags */ + +#define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */ +#define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */ +#define RTNH_F_ONLINK 4 /* Gateway is forced on link */ +#define RTNH_F_OFFLOAD 8 /* Nexthop is offloaded */ +#define RTNH_F_LINKDOWN 16 /* carrier-down on nexthop */ +#define RTNH_F_UNRESOLVED 32 /* The entry is unresolved (ipmr) */ +#define RTNH_F_TRAP 64 /* Nexthop is trapping packets */ + +#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN | \ + RTNH_F_OFFLOAD | RTNH_F_TRAP) + +/* Macros to handle hexthops */ + +#define RTNH_ALIGNTO 4 +#define RTNH_ALIGN(len) ( ((len)+RTNH_ALIGNTO-1) & ~(RTNH_ALIGNTO-1) ) +#define RTNH_OK(rtnh,len) ((rtnh)->rtnh_len >= sizeof(struct rtnexthop) && \ + ((int)(rtnh)->rtnh_len) <= (len)) +#define RTNH_NEXT(rtnh) ((struct rtnexthop*)(((char*)(rtnh)) + RTNH_ALIGN((rtnh)->rtnh_len))) +#define RTNH_LENGTH(len) (RTNH_ALIGN(sizeof(struct rtnexthop)) + (len)) +#define RTNH_SPACE(len) RTNH_ALIGN(RTNH_LENGTH(len)) +#define RTNH_DATA(rtnh) ((struct rtattr*)(((char*)(rtnh)) + RTNH_LENGTH(0))) + +/* RTA_VIA */ +struct rtvia { + __kernel_sa_family_t rtvia_family; + __u8 rtvia_addr[]; +}; + +/* RTM_CACHEINFO */ + +struct rta_cacheinfo { + __u32 rta_clntref; + __u32 rta_lastuse; + __s32 rta_expires; + __u32 rta_error; + __u32 rta_used; + +#define RTNETLINK_HAVE_PEERINFO 1 + __u32 rta_id; + __u32 rta_ts; + __u32 rta_tsage; +}; + +/* RTM_METRICS --- array of struct rtattr with types of RTAX_* */ + +enum { + RTAX_UNSPEC, +#define RTAX_UNSPEC RTAX_UNSPEC + RTAX_LOCK, +#define RTAX_LOCK RTAX_LOCK + RTAX_MTU, +#define RTAX_MTU RTAX_MTU + RTAX_WINDOW, +#define RTAX_WINDOW RTAX_WINDOW + RTAX_RTT, +#define RTAX_RTT RTAX_RTT + RTAX_RTTVAR, +#define RTAX_RTTVAR RTAX_RTTVAR + RTAX_SSTHRESH, +#define RTAX_SSTHRESH RTAX_SSTHRESH + RTAX_CWND, +#define RTAX_CWND RTAX_CWND + RTAX_ADVMSS, +#define RTAX_ADVMSS RTAX_ADVMSS + RTAX_REORDERING, +#define RTAX_REORDERING RTAX_REORDERING + RTAX_HOPLIMIT, +#define RTAX_HOPLIMIT RTAX_HOPLIMIT + RTAX_INITCWND, +#define RTAX_INITCWND RTAX_INITCWND + RTAX_FEATURES, +#define RTAX_FEATURES RTAX_FEATURES + RTAX_RTO_MIN, +#define RTAX_RTO_MIN RTAX_RTO_MIN + RTAX_INITRWND, +#define RTAX_INITRWND RTAX_INITRWND + RTAX_QUICKACK, +#define RTAX_QUICKACK RTAX_QUICKACK + RTAX_CC_ALGO, +#define RTAX_CC_ALGO RTAX_CC_ALGO + RTAX_FASTOPEN_NO_COOKIE, +#define RTAX_FASTOPEN_NO_COOKIE RTAX_FASTOPEN_NO_COOKIE + __RTAX_MAX +}; + +#define RTAX_MAX (__RTAX_MAX - 1) + +#define RTAX_FEATURE_ECN (1 << 0) +#define RTAX_FEATURE_SACK (1 << 1) /* unused */ +#define RTAX_FEATURE_TIMESTAMP (1 << 2) /* unused */ +#define RTAX_FEATURE_ALLFRAG (1 << 3) /* unused */ +#define RTAX_FEATURE_TCP_USEC_TS (1 << 4) + +#define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | \ + RTAX_FEATURE_SACK | \ + RTAX_FEATURE_TIMESTAMP | \ + RTAX_FEATURE_ALLFRAG | \ + RTAX_FEATURE_TCP_USEC_TS) + +struct rta_session { + __u8 proto; + __u8 pad1; + __u16 pad2; + + union { + struct { + __u16 sport; + __u16 dport; + } ports; + + struct { + __u8 type; + __u8 code; + __u16 ident; + } icmpt; + + __u32 spi; + } u; +}; + +struct rta_mfc_stats { + __u64 mfcs_packets; + __u64 mfcs_bytes; + __u64 mfcs_wrong_if; +}; + +/**** + * General form of address family dependent message. + ****/ + +struct rtgenmsg { + unsigned char rtgen_family; +}; + +/***************************************************************** + * Link layer specific messages. + ****/ + +/* struct ifinfomsg + * passes link level specific information, not dependent + * on network protocol. + */ + +struct ifinfomsg { + unsigned char ifi_family; + unsigned char __ifi_pad; + unsigned short ifi_type; /* ARPHRD_* */ + int ifi_index; /* Link index */ + unsigned ifi_flags; /* IFF_* flags */ + unsigned ifi_change; /* IFF_* change mask */ +}; + +/******************************************************************** + * prefix information + ****/ + +struct prefixmsg { + unsigned char prefix_family; + unsigned char prefix_pad1; + unsigned short prefix_pad2; + int prefix_ifindex; + unsigned char prefix_type; + unsigned char prefix_len; + unsigned char prefix_flags; + unsigned char prefix_pad3; +}; + +enum +{ + PREFIX_UNSPEC, + PREFIX_ADDRESS, + PREFIX_CACHEINFO, + __PREFIX_MAX +}; + +#define PREFIX_MAX (__PREFIX_MAX - 1) + +struct prefix_cacheinfo { + __u32 preferred_time; + __u32 valid_time; +}; + + +/***************************************************************** + * Traffic control messages. + ****/ + +struct tcmsg { + unsigned char tcm_family; + unsigned char tcm__pad1; + unsigned short tcm__pad2; + int tcm_ifindex; + __u32 tcm_handle; + __u32 tcm_parent; +/* tcm_block_index is used instead of tcm_parent + * in case tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK + */ +#define tcm_block_index tcm_parent + __u32 tcm_info; +}; + +/* For manipulation of filters in shared block, tcm_ifindex is set to + * TCM_IFINDEX_MAGIC_BLOCK, and tcm_parent is aliased to tcm_block_index + * which is the block index. + */ +#define TCM_IFINDEX_MAGIC_BLOCK (0xFFFFFFFFU) + +enum { + TCA_UNSPEC, + TCA_KIND, + TCA_OPTIONS, + TCA_STATS, + TCA_XSTATS, + TCA_RATE, + TCA_FCNT, + TCA_STATS2, + TCA_STAB, + TCA_PAD, + TCA_DUMP_INVISIBLE, + TCA_CHAIN, + TCA_HW_OFFLOAD, + TCA_INGRESS_BLOCK, + TCA_EGRESS_BLOCK, + TCA_DUMP_FLAGS, + TCA_EXT_WARN_MSG, + __TCA_MAX +}; + +#define TCA_MAX (__TCA_MAX - 1) + +#define TCA_DUMP_FLAGS_TERSE (1 << 0) /* Means that in dump user gets only basic + * data necessary to identify the objects + * (handle, cookie, etc.) and stats. + */ + +#define TCA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcmsg)))) +#define TCA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcmsg)) + +/******************************************************************** + * Neighbor Discovery userland options + ****/ + +struct nduseroptmsg { + unsigned char nduseropt_family; + unsigned char nduseropt_pad1; + unsigned short nduseropt_opts_len; /* Total length of options */ + int nduseropt_ifindex; + __u8 nduseropt_icmp_type; + __u8 nduseropt_icmp_code; + unsigned short nduseropt_pad2; + unsigned int nduseropt_pad3; + /* Followed by one or more ND options */ +}; + +enum { + NDUSEROPT_UNSPEC, + NDUSEROPT_SRCADDR, + __NDUSEROPT_MAX +}; + +#define NDUSEROPT_MAX (__NDUSEROPT_MAX - 1) + +#ifndef __KERNEL__ +/* RTnetlink multicast groups - backwards compatibility for userspace */ +#define RTMGRP_LINK 1 +#define RTMGRP_NOTIFY 2 +#define RTMGRP_NEIGH 4 +#define RTMGRP_TC 8 + +#define RTMGRP_IPV4_IFADDR 0x10 +#define RTMGRP_IPV4_MROUTE 0x20 +#define RTMGRP_IPV4_ROUTE 0x40 +#define RTMGRP_IPV4_RULE 0x80 + +#define RTMGRP_IPV6_IFADDR 0x100 +#define RTMGRP_IPV6_MROUTE 0x200 +#define RTMGRP_IPV6_ROUTE 0x400 +#define RTMGRP_IPV6_IFINFO 0x800 + +#define RTMGRP_DECnet_IFADDR 0x1000 +#define RTMGRP_DECnet_ROUTE 0x4000 + +#define RTMGRP_IPV6_PREFIX 0x20000 +#endif + +/* RTnetlink multicast groups */ +enum rtnetlink_groups { + RTNLGRP_NONE, +#define RTNLGRP_NONE RTNLGRP_NONE + RTNLGRP_LINK, +#define RTNLGRP_LINK RTNLGRP_LINK + RTNLGRP_NOTIFY, +#define RTNLGRP_NOTIFY RTNLGRP_NOTIFY + RTNLGRP_NEIGH, +#define RTNLGRP_NEIGH RTNLGRP_NEIGH + RTNLGRP_TC, +#define RTNLGRP_TC RTNLGRP_TC + RTNLGRP_IPV4_IFADDR, +#define RTNLGRP_IPV4_IFADDR RTNLGRP_IPV4_IFADDR + RTNLGRP_IPV4_MROUTE, +#define RTNLGRP_IPV4_MROUTE RTNLGRP_IPV4_MROUTE + RTNLGRP_IPV4_ROUTE, +#define RTNLGRP_IPV4_ROUTE RTNLGRP_IPV4_ROUTE + RTNLGRP_IPV4_RULE, +#define RTNLGRP_IPV4_RULE RTNLGRP_IPV4_RULE + RTNLGRP_IPV6_IFADDR, +#define RTNLGRP_IPV6_IFADDR RTNLGRP_IPV6_IFADDR + RTNLGRP_IPV6_MROUTE, +#define RTNLGRP_IPV6_MROUTE RTNLGRP_IPV6_MROUTE + RTNLGRP_IPV6_ROUTE, +#define RTNLGRP_IPV6_ROUTE RTNLGRP_IPV6_ROUTE + RTNLGRP_IPV6_IFINFO, +#define RTNLGRP_IPV6_IFINFO RTNLGRP_IPV6_IFINFO + RTNLGRP_DECnet_IFADDR, +#define RTNLGRP_DECnet_IFADDR RTNLGRP_DECnet_IFADDR + RTNLGRP_NOP2, + RTNLGRP_DECnet_ROUTE, +#define RTNLGRP_DECnet_ROUTE RTNLGRP_DECnet_ROUTE + RTNLGRP_DECnet_RULE, +#define RTNLGRP_DECnet_RULE RTNLGRP_DECnet_RULE + RTNLGRP_NOP4, + RTNLGRP_IPV6_PREFIX, +#define RTNLGRP_IPV6_PREFIX RTNLGRP_IPV6_PREFIX + RTNLGRP_IPV6_RULE, +#define RTNLGRP_IPV6_RULE RTNLGRP_IPV6_RULE + RTNLGRP_ND_USEROPT, +#define RTNLGRP_ND_USEROPT RTNLGRP_ND_USEROPT + RTNLGRP_PHONET_IFADDR, +#define RTNLGRP_PHONET_IFADDR RTNLGRP_PHONET_IFADDR + RTNLGRP_PHONET_ROUTE, +#define RTNLGRP_PHONET_ROUTE RTNLGRP_PHONET_ROUTE + RTNLGRP_DCB, +#define RTNLGRP_DCB RTNLGRP_DCB + RTNLGRP_IPV4_NETCONF, +#define RTNLGRP_IPV4_NETCONF RTNLGRP_IPV4_NETCONF + RTNLGRP_IPV6_NETCONF, +#define RTNLGRP_IPV6_NETCONF RTNLGRP_IPV6_NETCONF + RTNLGRP_MDB, +#define RTNLGRP_MDB RTNLGRP_MDB + RTNLGRP_MPLS_ROUTE, +#define RTNLGRP_MPLS_ROUTE RTNLGRP_MPLS_ROUTE + RTNLGRP_NSID, +#define RTNLGRP_NSID RTNLGRP_NSID + RTNLGRP_MPLS_NETCONF, +#define RTNLGRP_MPLS_NETCONF RTNLGRP_MPLS_NETCONF + RTNLGRP_IPV4_MROUTE_R, +#define RTNLGRP_IPV4_MROUTE_R RTNLGRP_IPV4_MROUTE_R + RTNLGRP_IPV6_MROUTE_R, +#define RTNLGRP_IPV6_MROUTE_R RTNLGRP_IPV6_MROUTE_R + RTNLGRP_NEXTHOP, +#define RTNLGRP_NEXTHOP RTNLGRP_NEXTHOP + RTNLGRP_BRVLAN, +#define RTNLGRP_BRVLAN RTNLGRP_BRVLAN + RTNLGRP_MCTP_IFADDR, +#define RTNLGRP_MCTP_IFADDR RTNLGRP_MCTP_IFADDR + RTNLGRP_TUNNEL, +#define RTNLGRP_TUNNEL RTNLGRP_TUNNEL + RTNLGRP_STATS, +#define RTNLGRP_STATS RTNLGRP_STATS + RTNLGRP_IPV4_MCADDR, +#define RTNLGRP_IPV4_MCADDR RTNLGRP_IPV4_MCADDR + RTNLGRP_IPV6_MCADDR, +#define RTNLGRP_IPV6_MCADDR RTNLGRP_IPV6_MCADDR + RTNLGRP_IPV6_ACADDR, +#define RTNLGRP_IPV6_ACADDR RTNLGRP_IPV6_ACADDR + __RTNLGRP_MAX +}; +#define RTNLGRP_MAX (__RTNLGRP_MAX - 1) + +/* TC action piece */ +struct tcamsg { + unsigned char tca_family; + unsigned char tca__pad1; + unsigned short tca__pad2; +}; + +enum { + TCA_ROOT_UNSPEC, + TCA_ROOT_TAB, +#define TCA_ACT_TAB TCA_ROOT_TAB +#define TCAA_MAX TCA_ROOT_TAB + TCA_ROOT_FLAGS, + TCA_ROOT_COUNT, + TCA_ROOT_TIME_DELTA, /* in msecs */ + TCA_ROOT_EXT_WARN_MSG, + __TCA_ROOT_MAX, +#define TCA_ROOT_MAX (__TCA_ROOT_MAX - 1) +}; + +#define TA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg)))) +#define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg)) +/* tcamsg flags stored in attribute TCA_ROOT_FLAGS + * + * TCA_ACT_FLAG_LARGE_DUMP_ON user->kernel to request for larger than + * TCA_ACT_MAX_PRIO actions in a dump. All dump responses will contain the + * number of actions being dumped stored in for user app's consumption in + * TCA_ROOT_COUNT + * + * TCA_ACT_FLAG_TERSE_DUMP user->kernel to request terse (brief) dump that only + * includes essential action info (kind, index, etc.) + * + */ +#define TCA_FLAG_LARGE_DUMP_ON (1 << 0) +#define TCA_ACT_FLAG_LARGE_DUMP_ON TCA_FLAG_LARGE_DUMP_ON +#define TCA_ACT_FLAG_TERSE_DUMP (1 << 1) + +/* New extended info filters for IFLA_EXT_MASK */ +#define RTEXT_FILTER_VF (1 << 0) +#define RTEXT_FILTER_BRVLAN (1 << 1) +#define RTEXT_FILTER_BRVLAN_COMPRESSED (1 << 2) +#define RTEXT_FILTER_SKIP_STATS (1 << 3) +#define RTEXT_FILTER_MRP (1 << 4) +#define RTEXT_FILTER_CFM_CONFIG (1 << 5) +#define RTEXT_FILTER_CFM_STATUS (1 << 6) +#define RTEXT_FILTER_MST (1 << 7) + +/* End of information exported to user level */ + + + +#endif /* _UAPI__LINUX_RTNETLINK_H */ diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index 887a25286441..1686861aae20 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -98,43 +98,97 @@ struct statx_timestamp { */ struct statx { /* 0x00 */ - __u32 stx_mask; /* What results were written [uncond] */ - __u32 stx_blksize; /* Preferred general I/O size [uncond] */ - __u64 stx_attributes; /* Flags conveying information about the file [uncond] */ + /* What results were written [uncond] */ + __u32 stx_mask; + + /* Preferred general I/O size [uncond] */ + __u32 stx_blksize; + + /* Flags conveying information about the file [uncond] */ + __u64 stx_attributes; + /* 0x10 */ - __u32 stx_nlink; /* Number of hard links */ - __u32 stx_uid; /* User ID of owner */ - __u32 stx_gid; /* Group ID of owner */ - __u16 stx_mode; /* File mode */ + /* Number of hard links */ + __u32 stx_nlink; + + /* User ID of owner */ + __u32 stx_uid; + + /* Group ID of owner */ + __u32 stx_gid; + + /* File mode */ + __u16 stx_mode; __u16 __spare0[1]; + /* 0x20 */ - __u64 stx_ino; /* Inode number */ - __u64 stx_size; /* File size */ - __u64 stx_blocks; /* Number of 512-byte blocks allocated */ - __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */ + /* Inode number */ + __u64 stx_ino; + + /* File size */ + __u64 stx_size; + + /* Number of 512-byte blocks allocated */ + __u64 stx_blocks; + + /* Mask to show what's supported in stx_attributes */ + __u64 stx_attributes_mask; + /* 0x40 */ - struct statx_timestamp stx_atime; /* Last access time */ - struct statx_timestamp stx_btime; /* File creation time */ - struct statx_timestamp stx_ctime; /* Last attribute change time */ - struct statx_timestamp stx_mtime; /* Last data modification time */ + /* Last access time */ + struct statx_timestamp stx_atime; + + /* File creation time */ + struct statx_timestamp stx_btime; + + /* Last attribute change time */ + struct statx_timestamp stx_ctime; + + /* Last data modification time */ + struct statx_timestamp stx_mtime; + /* 0x80 */ - __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */ + /* Device ID of special file [if bdev/cdev] */ + __u32 stx_rdev_major; __u32 stx_rdev_minor; - __u32 stx_dev_major; /* ID of device containing file [uncond] */ + + /* ID of device containing file [uncond] */ + __u32 stx_dev_major; __u32 stx_dev_minor; + /* 0x90 */ __u64 stx_mnt_id; - __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ - __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ + + /* Memory buffer alignment for direct I/O */ + __u32 stx_dio_mem_align; + + /* File offset alignment for direct I/O */ + __u32 stx_dio_offset_align; + /* 0xa0 */ - __u64 stx_subvol; /* Subvolume identifier */ - __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ - __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* Subvolume identifier */ + __u64 stx_subvol; + + /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_min; + + /* Max atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; + /* 0xb0 */ - __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ - __u32 __spare1[1]; - /* 0xb8 */ - __u64 __spare3[9]; /* Spare space for future expansion */ + /* Max atomic write segment count */ + __u32 stx_atomic_write_segments_max; + + /* File offset alignment for direct I/O reads */ + __u32 stx_dio_read_offset_align; + + /* Optimised max atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max_opt; + __u32 __spare2[1]; + + /* 0xc0 */ + __u64 __spare3[8]; /* Spare space for future expansion */ + /* 0x100 */ }; @@ -164,6 +218,7 @@ struct statx { #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ #define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ +#define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ diff --git a/tools/include/uapi/linux/stddef.h b/tools/include/uapi/linux/stddef.h index bb6ea517efb5..c53cde425406 100644 --- a/tools/include/uapi/linux/stddef.h +++ b/tools/include/uapi/linux/stddef.h @@ -8,6 +8,13 @@ #define __always_inline __inline__ #endif +/* Not all C++ standards support type declarations inside an anonymous union */ +#ifndef __cplusplus +#define __struct_group_tag(TAG) TAG +#else +#define __struct_group_tag(TAG) +#endif + /** * __struct_group() - Create a mirrored named and anonyomous struct * @@ -20,14 +27,14 @@ * and size: one anonymous and one named. The former's members can be used * normally without sub-struct naming, and the latter can be used to * reason about the start, end, and size of the group of struct members. - * The named struct can also be explicitly tagged for layer reuse, as well - * as both having struct attributes appended. + * The named struct can also be explicitly tagged for layer reuse (C only), + * as well as both having struct attributes appended. */ #define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \ union { \ struct { MEMBERS } ATTRS; \ - struct TAG { MEMBERS } ATTRS NAME; \ - } + struct __struct_group_tag(TAG) { MEMBERS } ATTRS NAME; \ + } ATTRS /** * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union diff --git a/tools/include/uapi/linux/types.h b/tools/include/uapi/linux/types.h index 91fa51a9c31d..85aa327245c6 100644 --- a/tools/include/uapi/linux/types.h +++ b/tools/include/uapi/linux/types.h @@ -4,6 +4,8 @@ #include <asm-generic/int-ll64.h> +#ifndef __ASSEMBLER__ + /* copied from linux:include/uapi/linux/types.h */ #define __bitwise typedef __u16 __bitwise __le16; @@ -20,4 +22,5 @@ typedef __u32 __bitwise __wsum; #define __aligned_be64 __be64 __attribute__((aligned(8))) #define __aligned_le64 __le64 __attribute__((aligned(8))) +#endif /* __ASSEMBLER__ */ #endif /* _UAPI_LINUX_TYPES_H */ diff --git a/tools/include/vdso/unaligned.h b/tools/include/vdso/unaligned.h index eee3d2a4dbe4..ff0c06b6513e 100644 --- a/tools/include/vdso/unaligned.h +++ b/tools/include/vdso/unaligned.h @@ -2,14 +2,14 @@ #ifndef __VDSO_UNALIGNED_H #define __VDSO_UNALIGNED_H -#define __get_unaligned_t(type, ptr) ({ \ - const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ - __pptr->x; \ +#define __get_unaligned_t(type, ptr) ({ \ + const struct { type x; } __packed * __get_pptr = (typeof(__get_pptr))(ptr); \ + __get_pptr->x; \ }) -#define __put_unaligned_t(type, val, ptr) do { \ - struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ - __pptr->x = (val); \ +#define __put_unaligned_t(type, val, ptr) do { \ + struct { type x; } __packed * __put_pptr = (typeof(__put_pptr))(ptr); \ + __put_pptr->x = (val); \ } while (0) #endif /* __VDSO_UNALIGNED_H */ |
