diff options
Diffstat (limited to 'tools')
35 files changed, 819 insertions, 195 deletions
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 97c3478ee6e7..7b7ac0f6cec9 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -35,6 +35,7 @@ #include <linux/psci.h> #include <linux/types.h> #include <asm/ptrace.h> +#include <asm/sve_context.h> #define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_IRQ_LINE @@ -102,6 +103,9 @@ struct kvm_regs { #define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */ #define KVM_ARM_VCPU_PSCI_0_2 2 /* CPU uses PSCI v0.2 */ #define KVM_ARM_VCPU_PMU_V3 3 /* Support guest PMUv3 */ +#define KVM_ARM_VCPU_SVE 4 /* enable SVE for this CPU */ +#define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */ +#define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */ struct kvm_vcpu_init { __u32 target; @@ -226,6 +230,45 @@ struct kvm_vcpu_events { KVM_REG_ARM_FW | ((r) & 0xffff)) #define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0) +/* SVE registers */ +#define KVM_REG_ARM64_SVE (0x15 << KVM_REG_ARM_COPROC_SHIFT) + +/* Z- and P-regs occupy blocks at the following offsets within this range: */ +#define KVM_REG_ARM64_SVE_ZREG_BASE 0 +#define KVM_REG_ARM64_SVE_PREG_BASE 0x400 +#define KVM_REG_ARM64_SVE_FFR_BASE 0x600 + +#define KVM_ARM64_SVE_NUM_ZREGS __SVE_NUM_ZREGS +#define KVM_ARM64_SVE_NUM_PREGS __SVE_NUM_PREGS + +#define KVM_ARM64_SVE_MAX_SLICES 32 + +#define KVM_REG_ARM64_SVE_ZREG(n, i) \ + (KVM_REG_ARM64 | KVM_REG_ARM64_SVE | KVM_REG_ARM64_SVE_ZREG_BASE | \ + KVM_REG_SIZE_U2048 | \ + (((n) & (KVM_ARM64_SVE_NUM_ZREGS - 1)) << 5) | \ + ((i) & (KVM_ARM64_SVE_MAX_SLICES - 1))) + +#define KVM_REG_ARM64_SVE_PREG(n, i) \ + (KVM_REG_ARM64 | KVM_REG_ARM64_SVE | KVM_REG_ARM64_SVE_PREG_BASE | \ + KVM_REG_SIZE_U256 | \ + (((n) & (KVM_ARM64_SVE_NUM_PREGS - 1)) << 5) | \ + ((i) & (KVM_ARM64_SVE_MAX_SLICES - 1))) + +#define KVM_REG_ARM64_SVE_FFR(i) \ + (KVM_REG_ARM64 | KVM_REG_ARM64_SVE | KVM_REG_ARM64_SVE_FFR_BASE | \ + KVM_REG_SIZE_U256 | \ + ((i) & (KVM_ARM64_SVE_MAX_SLICES - 1))) + +#define KVM_ARM64_SVE_VQ_MIN __SVE_VQ_MIN +#define KVM_ARM64_SVE_VQ_MAX __SVE_VQ_MAX + +/* Vector lengths pseudo-register: */ +#define KVM_REG_ARM64_SVE_VLS (KVM_REG_ARM64 | KVM_REG_ARM64_SVE | \ + KVM_REG_SIZE_U512 | 0xffff) +#define KVM_ARM64_SVE_VLS_WORDS \ + ((KVM_ARM64_SVE_VQ_MAX - KVM_ARM64_SVE_VQ_MIN) / 64 + 1) + /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index 26ca425f4c2c..b0f72dea8b11 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -482,6 +482,8 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_ICP_PPRI_SHIFT 16 /* pending irq priority */ #define KVM_REG_PPC_ICP_PPRI_MASK 0xff +#define KVM_REG_PPC_VP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x8d) + /* Device control API: PPC-specific devices */ #define KVM_DEV_MPIC_GRP_MISC 1 #define KVM_DEV_MPIC_BASE_ADDR 0 /* 64-bit */ @@ -677,4 +679,48 @@ struct kvm_ppc_cpu_char { #define KVM_XICS_PRESENTED (1ULL << 43) #define KVM_XICS_QUEUED (1ULL << 44) +/* POWER9 XIVE Native Interrupt Controller */ +#define KVM_DEV_XIVE_GRP_CTRL 1 +#define KVM_DEV_XIVE_RESET 1 +#define KVM_DEV_XIVE_EQ_SYNC 2 +#define KVM_DEV_XIVE_GRP_SOURCE 2 /* 64-bit source identifier */ +#define KVM_DEV_XIVE_GRP_SOURCE_CONFIG 3 /* 64-bit source identifier */ +#define KVM_DEV_XIVE_GRP_EQ_CONFIG 4 /* 64-bit EQ identifier */ +#define KVM_DEV_XIVE_GRP_SOURCE_SYNC 5 /* 64-bit source identifier */ + +/* Layout of 64-bit XIVE source attribute values */ +#define KVM_XIVE_LEVEL_SENSITIVE (1ULL << 0) +#define KVM_XIVE_LEVEL_ASSERTED (1ULL << 1) + +/* Layout of 64-bit XIVE source configuration attribute values */ +#define KVM_XIVE_SOURCE_PRIORITY_SHIFT 0 +#define KVM_XIVE_SOURCE_PRIORITY_MASK 0x7 +#define KVM_XIVE_SOURCE_SERVER_SHIFT 3 +#define KVM_XIVE_SOURCE_SERVER_MASK 0xfffffff8ULL +#define KVM_XIVE_SOURCE_MASKED_SHIFT 32 +#define KVM_XIVE_SOURCE_MASKED_MASK 0x100000000ULL +#define KVM_XIVE_SOURCE_EISN_SHIFT 33 +#define KVM_XIVE_SOURCE_EISN_MASK 0xfffffffe00000000ULL + +/* Layout of 64-bit EQ identifier */ +#define KVM_XIVE_EQ_PRIORITY_SHIFT 0 +#define KVM_XIVE_EQ_PRIORITY_MASK 0x7 +#define KVM_XIVE_EQ_SERVER_SHIFT 3 +#define KVM_XIVE_EQ_SERVER_MASK 0xfffffff8ULL + +/* Layout of EQ configuration values (64 bytes) */ +struct kvm_ppc_xive_eq { + __u32 flags; + __u32 qshift; + __u64 qaddr; + __u32 qtoggle; + __u32 qindex; + __u8 pad[40]; +}; + +#define KVM_XIVE_EQ_ALWAYS_NOTIFY 0x00000001 + +#define KVM_XIVE_TIMA_PAGE_OFFSET 0 +#define KVM_XIVE_ESB_PAGE_OFFSET 4 + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h index 09652eabe769..47104e5b47fd 100644 --- a/tools/arch/s390/include/uapi/asm/kvm.h +++ b/tools/arch/s390/include/uapi/asm/kvm.h @@ -153,7 +153,9 @@ struct kvm_s390_vm_cpu_subfunc { __u8 ppno[16]; /* with MSA5 */ __u8 kma[16]; /* with MSA8 */ __u8 kdsa[16]; /* with MSA9 */ - __u8 reserved[1792]; + __u8 sortl[32]; /* with STFLE.150 */ + __u8 dfltcc[32]; /* with STFLE.151 */ + __u8 reserved[1728]; }; /* kvm attributes for crypto */ diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 981ff9479648..75f27ee2c263 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -344,6 +344,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ @@ -382,5 +383,7 @@ #define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ #define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ #define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ +#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ +#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 58a2cd002a4b..7317438ecd9e 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -208,8 +208,8 @@ static int dump_btf_type(const struct btf *btf, __u32 id, break; } case BTF_KIND_FWD: { - const char *fwd_kind = BTF_INFO_KIND(t->info) ? "union" - : "struct"; + const char *fwd_kind = BTF_INFO_KFLAG(t->info) ? "union" + : "struct"; if (json_output) jsonw_string_field(w, "fwd_kind", fwd_kind); diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index fc495b27f0fc..26336bad0442 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -879,6 +879,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) } } + set_max_rlimit(); + obj = __bpf_object__open_xattr(&attr, bpf_flags); if (IS_ERR_OR_NULL(obj)) { p_err("failed to open object file"); @@ -958,8 +960,6 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) goto err_close_obj; } - set_max_rlimit(); - err = bpf_object__load(obj); if (err) { p_err("failed to load object file"); diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index dee7292e1df6..a87904daf103 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -832,9 +832,21 @@ __SYSCALL(__NR_io_uring_setup, sys_io_uring_setup) __SYSCALL(__NR_io_uring_enter, sys_io_uring_enter) #define __NR_io_uring_register 427 __SYSCALL(__NR_io_uring_register, sys_io_uring_register) +#define __NR_open_tree 428 +__SYSCALL(__NR_open_tree, sys_open_tree) +#define __NR_move_mount 429 +__SYSCALL(__NR_move_mount, sys_move_mount) +#define __NR_fsopen 430 +__SYSCALL(__NR_fsopen, sys_fsopen) +#define __NR_fsconfig 431 +__SYSCALL(__NR_fsconfig, sys_fsconfig) +#define __NR_fsmount 432 +__SYSCALL(__NR_fsmount, sys_fsmount) +#define __NR_fspick 433 +__SYSCALL(__NR_fspick, sys_fspick) #undef __NR_syscalls -#define __NR_syscalls 428 +#define __NR_syscalls 434 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 300f336633f2..661d73f9a919 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -649,6 +649,7 @@ struct drm_gem_open { #define DRM_CAP_PAGE_FLIP_TARGET 0x11 #define DRM_CAP_CRTC_IN_VBLANK_EVENT 0x12 #define DRM_CAP_SYNCOBJ 0x13 +#define DRM_CAP_SYNCOBJ_TIMELINE 0x14 /** DRM_IOCTL_GET_CAP ioctl argument type */ struct drm_get_cap { @@ -735,8 +736,18 @@ struct drm_syncobj_handle { __u32 pad; }; +struct drm_syncobj_transfer { + __u32 src_handle; + __u32 dst_handle; + __u64 src_point; + __u64 dst_point; + __u32 flags; + __u32 pad; +}; + #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1) +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2) /* wait for time point to become available */ struct drm_syncobj_wait { __u64 handles; /* absolute timeout */ @@ -747,12 +758,33 @@ struct drm_syncobj_wait { __u32 pad; }; +struct drm_syncobj_timeline_wait { + __u64 handles; + /* wait on specific timeline point for every handles*/ + __u64 points; + /* absolute timeout */ + __s64 timeout_nsec; + __u32 count_handles; + __u32 flags; + __u32 first_signaled; /* only valid when not waiting all */ + __u32 pad; +}; + + struct drm_syncobj_array { __u64 handles; __u32 count_handles; __u32 pad; }; +struct drm_syncobj_timeline_array { + __u64 handles; + __u64 points; + __u32 count_handles; + __u32 pad; +}; + + /* Query current scanout sequence number */ struct drm_crtc_get_sequence { __u32 crtc_id; /* requested crtc_id */ @@ -909,6 +941,11 @@ extern "C" { #define DRM_IOCTL_MODE_GET_LEASE DRM_IOWR(0xC8, struct drm_mode_get_lease) #define DRM_IOCTL_MODE_REVOKE_LEASE DRM_IOWR(0xC9, struct drm_mode_revoke_lease) +#define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait) +#define DRM_IOCTL_SYNCOBJ_QUERY DRM_IOWR(0xCB, struct drm_syncobj_timeline_array) +#define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct drm_syncobj_transfer) +#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct drm_syncobj_timeline_array) + /** * Device specific ioctls should only be in their respective headers * The device specific ioctl range is from 0x40 to 0x9f. diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 397810fa2d33..3a73f5316766 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -63,6 +63,28 @@ extern "C" { #define I915_RESET_UEVENT "RESET" /* + * i915_user_extension: Base class for defining a chain of extensions + * + * Many interfaces need to grow over time. In most cases we can simply + * extend the struct and have userspace pass in more data. Another option, + * as demonstrated by Vulkan's approach to providing extensions for forward + * and backward compatibility, is to use a list of optional structs to + * provide those extra details. + * + * The key advantage to using an extension chain is that it allows us to + * redefine the interface more easily than an ever growing struct of + * increasing complexity, and for large parts of that interface to be + * entirely optional. The downside is more pointer chasing; chasing across + * the __user boundary with pointers encapsulated inside u64. + */ +struct i915_user_extension { + __u64 next_extension; + __u32 name; + __u32 flags; /* All undefined bits must be zero. */ + __u32 rsvd[4]; /* Reserved for future use; must be zero. */ +}; + +/* * MOCS indexes used for GPU surfaces, defining the cacheability of the * surface data and the coherency for this data wrt. CPU vs. GPU accesses. */ @@ -99,9 +121,23 @@ enum drm_i915_gem_engine_class { I915_ENGINE_CLASS_VIDEO = 2, I915_ENGINE_CLASS_VIDEO_ENHANCE = 3, + /* should be kept compact */ + I915_ENGINE_CLASS_INVALID = -1 }; +/* + * There may be more than one engine fulfilling any role within the system. + * Each engine of a class is given a unique instance number and therefore + * any engine can be specified by its class:instance tuplet. APIs that allow + * access to any engine in the system will use struct i915_engine_class_instance + * for this identification. + */ +struct i915_engine_class_instance { + __u16 engine_class; /* see enum drm_i915_gem_engine_class */ + __u16 engine_instance; +}; + /** * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915 * @@ -319,6 +355,7 @@ typedef struct _drm_i915_sarea { #define DRM_I915_PERF_ADD_CONFIG 0x37 #define DRM_I915_PERF_REMOVE_CONFIG 0x38 #define DRM_I915_QUERY 0x39 +/* Must be kept compact -- no holes */ #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) @@ -367,6 +404,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey) #define DRM_IOCTL_I915_GEM_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_WAIT, struct drm_i915_gem_wait) #define DRM_IOCTL_I915_GEM_CONTEXT_CREATE DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create) +#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create_ext) #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy) #define DRM_IOCTL_I915_REG_READ DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read) #define DRM_IOCTL_I915_GET_RESET_STATS DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats) @@ -476,6 +514,7 @@ typedef struct drm_i915_irq_wait { #define I915_SCHEDULER_CAP_ENABLED (1ul << 0) #define I915_SCHEDULER_CAP_PRIORITY (1ul << 1) #define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2) +#define I915_SCHEDULER_CAP_SEMAPHORES (1ul << 3) #define I915_PARAM_HUC_STATUS 42 @@ -559,6 +598,8 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_MMAP_GTT_COHERENT 52 +/* Must be kept compact -- no holes and well documented */ + typedef struct drm_i915_getparam { __s32 param; /* @@ -574,6 +615,7 @@ typedef struct drm_i915_getparam { #define I915_SETPARAM_TEX_LRU_LOG_GRANULARITY 2 #define I915_SETPARAM_ALLOW_BATCHBUFFER 3 #define I915_SETPARAM_NUM_USED_FENCES 4 +/* Must be kept compact -- no holes */ typedef struct drm_i915_setparam { int param; @@ -972,7 +1014,7 @@ struct drm_i915_gem_execbuffer2 { * struct drm_i915_gem_exec_fence *fences. */ __u64 cliprects_ptr; -#define I915_EXEC_RING_MASK (7<<0) +#define I915_EXEC_RING_MASK (0x3f) #define I915_EXEC_DEFAULT (0<<0) #define I915_EXEC_RENDER (1<<0) #define I915_EXEC_BSD (2<<0) @@ -1120,32 +1162,34 @@ struct drm_i915_gem_busy { * as busy may become idle before the ioctl is completed. * * Furthermore, if the object is busy, which engine is busy is only - * provided as a guide. There are race conditions which prevent the - * report of which engines are busy from being always accurate. - * However, the converse is not true. If the object is idle, the - * result of the ioctl, that all engines are idle, is accurate. + * provided as a guide and only indirectly by reporting its class + * (there may be more than one engine in each class). There are race + * conditions which prevent the report of which engines are busy from + * being always accurate. However, the converse is not true. If the + * object is idle, the result of the ioctl, that all engines are idle, + * is accurate. * * The returned dword is split into two fields to indicate both - * the engines on which the object is being read, and the - * engine on which it is currently being written (if any). + * the engine classess on which the object is being read, and the + * engine class on which it is currently being written (if any). * * The low word (bits 0:15) indicate if the object is being written * to by any engine (there can only be one, as the GEM implicit * synchronisation rules force writes to be serialised). Only the - * engine for the last write is reported. + * engine class (offset by 1, I915_ENGINE_CLASS_RENDER is reported as + * 1 not 0 etc) for the last write is reported. * - * The high word (bits 16:31) are a bitmask of which engines are - * currently reading from the object. Multiple engines may be + * The high word (bits 16:31) are a bitmask of which engines classes + * are currently reading from the object. Multiple engines may be * reading from the object simultaneously. * - * The value of each engine is the same as specified in the - * EXECBUFFER2 ioctl, i.e. I915_EXEC_RENDER, I915_EXEC_BSD etc. - * Note I915_EXEC_DEFAULT is a symbolic value and is mapped to - * the I915_EXEC_RENDER engine for execution, and so it is never + * The value of each engine class is the same as specified in the + * I915_CONTEXT_SET_ENGINES parameter and via perf, i.e. + * I915_ENGINE_CLASS_RENDER, I915_ENGINE_CLASS_COPY, etc. * reported as active itself. Some hardware may have parallel * execution engines, e.g. multiple media engines, which are - * mapped to the same identifier in the EXECBUFFER2 ioctl and - * so are not separately reported for busyness. + * mapped to the same class identifier and so are not separately + * reported for busyness. * * Caveat emptor: * Only the boolean result of this query is reliable; that is whether @@ -1412,65 +1456,17 @@ struct drm_i915_gem_wait { }; struct drm_i915_gem_context_create { - /* output: id of new context*/ - __u32 ctx_id; - __u32 pad; -}; - -struct drm_i915_gem_context_destroy { - __u32 ctx_id; + __u32 ctx_id; /* output: id of new context*/ __u32 pad; }; -struct drm_i915_reg_read { - /* - * Register offset. - * For 64bit wide registers where the upper 32bits don't immediately - * follow the lower 32bits, the offset of the lower 32bits must - * be specified - */ - __u64 offset; -#define I915_REG_READ_8B_WA (1ul << 0) - - __u64 val; /* Return value */ -}; -/* Known registers: - * - * Render engine timestamp - 0x2358 + 64bit - gen7+ - * - Note this register returns an invalid value if using the default - * single instruction 8byte read, in order to workaround that pass - * flag I915_REG_READ_8B_WA in offset field. - * - */ - -struct drm_i915_reset_stats { - __u32 ctx_id; - __u32 flags; - - /* All resets since boot/module reload, for all contexts */ - __u32 reset_count; - - /* Number of batches lost when active in GPU, for this context */ - __u32 batch_active; - - /* Number of batches lost pending for execution, for this context */ - __u32 batch_pending; - - __u32 pad; -}; - -struct drm_i915_gem_userptr { - __u64 user_ptr; - __u64 user_size; +struct drm_i915_gem_context_create_ext { + __u32 ctx_id; /* output: id of new context*/ __u32 flags; -#define I915_USERPTR_READ_ONLY 0x1 -#define I915_USERPTR_UNSYNCHRONIZED 0x80000000 - /** - * Returned handle for the object. - * - * Object handles are nonzero. - */ - __u32 handle; +#define I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS (1u << 0) +#define I915_CONTEXT_CREATE_FLAGS_UNKNOWN \ + (-(I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS << 1)) + __u64 extensions; }; struct drm_i915_gem_context_param { @@ -1491,6 +1487,28 @@ struct drm_i915_gem_context_param { * drm_i915_gem_context_param_sseu. */ #define I915_CONTEXT_PARAM_SSEU 0x7 + +/* + * Not all clients may want to attempt automatic recover of a context after + * a hang (for example, some clients may only submit very small incremental + * batches relying on known logical state of previous batches which will never + * recover correctly and each attempt will hang), and so would prefer that + * the context is forever banned instead. + * + * If set to false (0), after a reset, subsequent (and in flight) rendering + * from this context is discarded, and the client will need to create a new + * context to use instead. + * + * If set to true (1), the kernel will automatically attempt to recover the + * context by skipping the hanging batch and executing the next batch starting + * from the default context state (discarding the incomplete logical context + * state lost due to the reset). + * + * On creation, all new contexts are marked as recoverable. + */ +#define I915_CONTEXT_PARAM_RECOVERABLE 0x8 +/* Must be kept compact -- no holes and well documented */ + __u64 value; }; @@ -1519,8 +1537,7 @@ struct drm_i915_gem_context_param_sseu { /* * Engine class & instance to be configured or queried. */ - __u16 engine_class; - __u16 engine_instance; + struct i915_engine_class_instance engine; /* * Unused for now. Must be cleared to zero. @@ -1553,6 +1570,96 @@ struct drm_i915_gem_context_param_sseu { __u32 rsvd; }; +struct drm_i915_gem_context_create_ext_setparam { +#define I915_CONTEXT_CREATE_EXT_SETPARAM 0 + struct i915_user_extension base; + struct drm_i915_gem_context_param param; +}; + +struct drm_i915_gem_context_destroy { + __u32 ctx_id; + __u32 pad; +}; + +/* + * DRM_I915_GEM_VM_CREATE - + * + * Create a new virtual memory address space (ppGTT) for use within a context + * on the same file. Extensions can be provided to configure exactly how the + * address space is setup upon creation. + * + * The id of new VM (bound to the fd) for use with I915_CONTEXT_PARAM_VM is + * returned in the outparam @id. + * + * No flags are defined, with all bits reserved and must be zero. + * + * An extension chain maybe provided, starting with @extensions, and terminated + * by the @next_extension being 0. Currently, no extensions are defined. + * + * DRM_I915_GEM_VM_DESTROY - + * + * Destroys a previously created VM id, specified in @id. + * + * No extensions or flags are allowed currently, and so must be zero. + */ +struct drm_i915_gem_vm_control { + __u64 extensions; + __u32 flags; + __u32 vm_id; +}; + +struct drm_i915_reg_read { + /* + * Register offset. + * For 64bit wide registers where the upper 32bits don't immediately + * follow the lower 32bits, the offset of the lower 32bits must + * be specified + */ + __u64 offset; +#define I915_REG_READ_8B_WA (1ul << 0) + + __u64 val; /* Return value */ +}; + +/* Known registers: + * + * Render engine timestamp - 0x2358 + 64bit - gen7+ + * - Note this register returns an invalid value if using the default + * single instruction 8byte read, in order to workaround that pass + * flag I915_REG_READ_8B_WA in offset field. + * + */ + +struct drm_i915_reset_stats { + __u32 ctx_id; + __u32 flags; + + /* All resets since boot/module reload, for all contexts */ + __u32 reset_count; + + /* Number of batches lost when active in GPU, for this context */ + __u32 batch_active; + + /* Number of batches lost pending for execution, for this context */ + __u32 batch_pending; + + __u32 pad; +}; + +struct drm_i915_gem_userptr { + __u64 user_ptr; + __u64 user_size; + __u32 flags; +#define I915_USERPTR_READ_ONLY 0x1 +#define I915_USERPTR_UNSYNCHRONIZED 0x80000000 + /** + * Returned handle for the object. + * + * Object handles are nonzero. + */ + __u32 handle; +}; + enum drm_i915_oa_format { I915_OA_FORMAT_A13 = 1, /* HSW only */ I915_OA_FORMAT_A29, /* HSW only */ @@ -1714,6 +1821,7 @@ struct drm_i915_perf_oa_config { struct drm_i915_query_item { __u64 query_id; #define DRM_I915_QUERY_TOPOLOGY_INFO 1 +/* Must be kept compact -- no holes and well documented */ /* * When set to zero by userspace, this is filled with the size of the diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index 9310652ca4f9..63ae4a39e58b 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -83,7 +83,7 @@ struct btf_type { * is the 32 bits arrangement: */ #define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24) -#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16) +#define BTF_INT_OFFSET(VAL) (((VAL) & 0x00ff0000) >> 16) #define BTF_INT_BITS(VAL) ((VAL) & 0x000000ff) /* Attributes stored in the BTF_INT_ENCODING */ diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h index a2f8658f1c55..1d338357df8a 100644 --- a/tools/include/uapi/linux/fcntl.h +++ b/tools/include/uapi/linux/fcntl.h @@ -91,5 +91,7 @@ #define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */ #define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */ +#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ + #endif /* _UAPI_LINUX_FCNTL_H */ diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h index 121e82ce296b..59c71fa8c553 100644 --- a/tools/include/uapi/linux/fs.h +++ b/tools/include/uapi/linux/fs.h @@ -320,6 +320,9 @@ struct fscrypt_key { #define SYNC_FILE_RANGE_WAIT_BEFORE 1 #define SYNC_FILE_RANGE_WRITE 2 #define SYNC_FILE_RANGE_WAIT_AFTER 4 +#define SYNC_FILE_RANGE_WRITE_AND_WAIT (SYNC_FILE_RANGE_WRITE | \ + SYNC_FILE_RANGE_WAIT_BEFORE | \ + SYNC_FILE_RANGE_WAIT_AFTER) /* * Flags for preadv2/pwritev2: diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 6d4ea4b6c922..2fe12b40d503 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -986,8 +986,13 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163 #define KVM_CAP_EXCEPTION_PAYLOAD 164 #define KVM_CAP_ARM_VM_IPA_SIZE 165 -#define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166 +#define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166 /* Obsolete */ #define KVM_CAP_HYPERV_CPUID 167 +#define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 168 +#define KVM_CAP_PPC_IRQ_XIVE 169 +#define KVM_CAP_ARM_SVE 170 +#define KVM_CAP_ARM_PTRAUTH_ADDRESS 171 +#define KVM_CAP_ARM_PTRAUTH_GENERIC 172 #ifdef KVM_CAP_IRQ_ROUTING @@ -1145,6 +1150,7 @@ struct kvm_dirty_tlb { #define KVM_REG_SIZE_U256 0x0050000000000000ULL #define KVM_REG_SIZE_U512 0x0060000000000000ULL #define KVM_REG_SIZE_U1024 0x0070000000000000ULL +#define KVM_REG_SIZE_U2048 0x0080000000000000ULL struct kvm_reg_list { __u64 n; /* number of regs */ @@ -1211,6 +1217,8 @@ enum kvm_device_type { #define KVM_DEV_TYPE_ARM_VGIC_V3 KVM_DEV_TYPE_ARM_VGIC_V3 KVM_DEV_TYPE_ARM_VGIC_ITS, #define KVM_DEV_TYPE_ARM_VGIC_ITS KVM_DEV_TYPE_ARM_VGIC_ITS + KVM_DEV_TYPE_XIVE, +#define KVM_DEV_TYPE_XIVE KVM_DEV_TYPE_XIVE KVM_DEV_TYPE_MAX, }; @@ -1434,12 +1442,15 @@ struct kvm_enc_region { #define KVM_GET_NESTED_STATE _IOWR(KVMIO, 0xbe, struct kvm_nested_state) #define KVM_SET_NESTED_STATE _IOW(KVMIO, 0xbf, struct kvm_nested_state) -/* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT */ +/* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT_2 */ #define KVM_CLEAR_DIRTY_LOG _IOWR(KVMIO, 0xc0, struct kvm_clear_dirty_log) /* Available with KVM_CAP_HYPERV_CPUID */ #define KVM_GET_SUPPORTED_HV_CPUID _IOWR(KVMIO, 0xc1, struct kvm_cpuid2) +/* Available with KVM_CAP_ARM_SVE */ +#define KVM_ARM_VCPU_FINALIZE _IOW(KVMIO, 0xc2, int) + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h index 3f9ec42510b0..96a0240f23fe 100644 --- a/tools/include/uapi/linux/mount.h +++ b/tools/include/uapi/linux/mount.h @@ -55,4 +55,66 @@ #define MS_MGC_VAL 0xC0ED0000 #define MS_MGC_MSK 0xffff0000 +/* + * open_tree() flags. + */ +#define OPEN_TREE_CLONE 1 /* Clone the target tree and attach the clone */ +#define OPEN_TREE_CLOEXEC O_CLOEXEC /* Close the file on execve() */ + +/* + * move_mount() flags. + */ +#define MOVE_MOUNT_F_SYMLINKS 0x00000001 /* Follow symlinks on from path */ +#define MOVE_MOUNT_F_AUTOMOUNTS 0x00000002 /* Follow automounts on from path */ +#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ +#define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */ +#define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */ +#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ +#define MOVE_MOUNT__MASK 0x00000077 + +/* + * fsopen() flags. + */ +#define FSOPEN_CLOEXEC 0x00000001 + +/* + * fspick() flags. + */ +#define FSPICK_CLOEXEC 0x00000001 +#define FSPICK_SYMLINK_NOFOLLOW 0x00000002 +#define FSPICK_NO_AUTOMOUNT 0x00000004 +#define FSPICK_EMPTY_PATH 0x00000008 + +/* + * The type of fsconfig() call made. + */ +enum fsconfig_command { + FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */ + FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */ + FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */ + FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */ + FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */ + FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */ + FSCONFIG_CMD_CREATE = 6, /* Invoke superblock creation */ + FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */ +}; + +/* + * fsmount() flags. + */ +#define FSMOUNT_CLOEXEC 0x00000001 + +/* + * Mount attributes. + */ +#define MOUNT_ATTR_RDONLY 0x00000001 /* Mount read-only */ +#define MOUNT_ATTR_NOSUID 0x00000002 /* Ignore suid and sgid bits */ +#define MOUNT_ATTR_NODEV 0x00000004 /* Disallow access to device special files */ +#define MOUNT_ATTR_NOEXEC 0x00000008 /* Disallow program execution */ +#define MOUNT_ATTR__ATIME 0x00000070 /* Setting on how atime should be updated */ +#define MOUNT_ATTR_RELATIME 0x00000000 /* - Update atime relative to mtime/ctime. */ +#define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */ +#define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */ +#define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */ + #endif /* _UAPI_LINUX_MOUNT_H */ diff --git a/tools/include/uapi/linux/sched.h b/tools/include/uapi/linux/sched.h index 22627f80063e..ed4ee170bee2 100644 --- a/tools/include/uapi/linux/sched.h +++ b/tools/include/uapi/linux/sched.h @@ -10,6 +10,7 @@ #define CLONE_FS 0x00000200 /* set if fs info shared between processes */ #define CLONE_FILES 0x00000400 /* set if open files shared between processes */ #define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */ +#define CLONE_PIDFD 0x00001000 /* set if a pidfd should be placed in parent */ #define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */ #define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */ #define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */ diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 75eaf10b9e1a..03348c4d6bd4 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -11,7 +11,7 @@ #include "btf.h" #include "bpf.h" #include "libbpf.h" -#include "libbpf_util.h" +#include "libbpf_internal.h" #define max(a, b) ((a) > (b) ? (a) : (b)) #define min(a, b) ((a) < (b) ? (a) : (b)) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 7e3b79d7c25f..197b574406b3 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -43,7 +43,6 @@ #include "bpf.h" #include "btf.h" #include "str_error.h" -#include "libbpf_util.h" #include "libbpf_internal.h" #ifndef EM_BPF @@ -1696,7 +1695,7 @@ bpf_object__probe_caps(struct bpf_object *obj) for (i = 0; i < ARRAY_SIZE(probe_fn); i++) { ret = probe_fn[i](obj); if (ret < 0) - return ret; + pr_debug("Probe #%d failed with %d.\n", i, ret); } return 0; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 789e435b5900..f3025b4d90e1 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -21,6 +21,19 @@ #define BTF_PARAM_ENC(name, type) (name), (type) #define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size) +extern void libbpf_print(enum libbpf_print_level level, + const char *format, ...) + __attribute__((format(printf, 2, 3))); + +#define __pr(level, fmt, ...) \ +do { \ + libbpf_print(level, "libbpf: " fmt, ##__VA_ARGS__); \ +} while (0) + +#define pr_warning(fmt, ...) __pr(LIBBPF_WARN, fmt, ##__VA_ARGS__) +#define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__) +#define pr_debug(fmt, ...) __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__) + int libbpf__probe_raw_btf(const char *raw_types, size_t types_len, const char *str_sec, size_t str_len); diff --git a/tools/lib/bpf/libbpf_util.h b/tools/lib/bpf/libbpf_util.h index da94c4cb2e4d..59c779c5790c 100644 --- a/tools/lib/bpf/libbpf_util.h +++ b/tools/lib/bpf/libbpf_util.h @@ -10,19 +10,6 @@ extern "C" { #endif -extern void libbpf_print(enum libbpf_print_level level, - const char *format, ...) - __attribute__((format(printf, 2, 3))); - -#define __pr(level, fmt, ...) \ -do { \ - libbpf_print(level, "libbpf: " fmt, ##__VA_ARGS__); \ -} while (0) - -#define pr_warning(fmt, ...) __pr(LIBBPF_WARN, fmt, ##__VA_ARGS__) -#define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__) -#define pr_debug(fmt, ...) __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__) - /* Use these barrier functions instead of smp_[rw]mb() when they are * used in a libbpf header file. That way they can be built into the * application that uses libbpf. diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index a3d1a302bc9c..38667b62f1fe 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -29,7 +29,7 @@ #include "bpf.h" #include "libbpf.h" -#include "libbpf_util.h" +#include "libbpf_internal.h" #include "xsk.h" #ifndef SOL_XDP diff --git a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl index c88fd32563eb..459469b7222c 100755 --- a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl +++ b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl @@ -56,7 +56,7 @@ create_table() echo "};" } -$gcc -E -dM -x c $input \ +$gcc -E -dM -x c -I $incpath/include/uapi $input \ |sed -ne 's/^#define __NR_//p' \ |sort -t' ' -k2 -nu \ |create_table diff --git a/tools/perf/arch/s390/util/machine.c b/tools/perf/arch/s390/util/machine.c index 0b2054007314..a19690a17291 100644 --- a/tools/perf/arch/s390/util/machine.c +++ b/tools/perf/arch/s390/util/machine.c @@ -5,16 +5,19 @@ #include "util.h" #include "machine.h" #include "api/fs/fs.h" +#include "debug.h" int arch__fix_module_text_start(u64 *start, const char *name) { + u64 m_start = *start; char path[PATH_MAX]; snprintf(path, PATH_MAX, "module/%.*s/sections/.text", (int)strlen(name) - 2, name + 1); - - if (sysfs__read_ull(path, (unsigned long long *)start) < 0) - return -1; + if (sysfs__read_ull(path, (unsigned long long *)start) < 0) { + pr_debug2("Using module %s start:%#lx\n", path, m_start); + *start = m_start; + } return 0; } diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 92ee0b4378d4..b4e6f9e6204a 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -349,6 +349,12 @@ 425 common io_uring_setup __x64_sys_io_uring_setup 426 common io_uring_enter __x64_sys_io_uring_enter 427 common io_uring_register __x64_sys_io_uring_register +428 common open_tree __x64_sys_open_tree +429 common move_mount __x64_sys_move_mount +430 common fsopen __x64_sys_fsopen +431 common fsconfig __x64_sys_fsconfig +432 common fsmount __x64_sys_fsmount +433 common fspick __x64_sys_fspick # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index 7691980b7df1..f101576d1c72 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -161,9 +161,16 @@ next_pair: continue; } - } else + } else if (mem_start == kallsyms.vmlinux_map->end) { + /* + * Ignore aliases to _etext, i.e. to the end of the kernel text area, + * such as __indirect_thunk_end. + */ + continue; + } else { pr_debug("ERR : %#" PRIx64 ": %s not on kallsyms\n", mem_start, sym->name); + } err = -1; } diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index e0311c9750ad..9097543a818b 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -271,7 +271,7 @@ static int string_set_value(struct bt_ctf_field *field, const char *string) if (i > 0) strncpy(buffer, string, i); } - strncat(buffer + p, numstr, 4); + memcpy(buffer + p, numstr, 4); p += 3; } } diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 28a9541c4835..dc7aafe45a2b 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -924,7 +924,8 @@ const char *ref_reloc_sym_names[] = {"_text", "_stext", NULL}; * symbol_name if it's not that important. */ static int machine__get_running_kernel_start(struct machine *machine, - const char **symbol_name, u64 *start) + const char **symbol_name, + u64 *start, u64 *end) { char filename[PATH_MAX]; int i, err = -1; @@ -949,6 +950,11 @@ static int machine__get_running_kernel_start(struct machine *machine, *symbol_name = name; *start = addr; + + err = kallsyms__get_function_start(filename, "_etext", &addr); + if (!err) + *end = addr; + return 0; } @@ -1441,7 +1447,7 @@ int machine__create_kernel_maps(struct machine *machine) struct dso *kernel = machine__get_kernel(machine); const char *name = NULL; struct map *map; - u64 addr = 0; + u64 start = 0, end = ~0ULL; int ret; if (kernel == NULL) @@ -1460,9 +1466,9 @@ int machine__create_kernel_maps(struct machine *machine) "continuing anyway...\n", machine->pid); } - if (!machine__get_running_kernel_start(machine, &name, &addr)) { + if (!machine__get_running_kernel_start(machine, &name, &start, &end)) { if (name && - map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map, name, addr)) { + map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map, name, start)) { machine__destroy_kernel_maps(machine); ret = -1; goto out_put; @@ -1472,16 +1478,19 @@ int machine__create_kernel_maps(struct machine *machine) * we have a real start address now, so re-order the kmaps * assume it's the last in the kmaps */ - machine__update_kernel_mmap(machine, addr, ~0ULL); + machine__update_kernel_mmap(machine, start, end); } if (machine__create_extra_kernel_maps(machine, kernel)) pr_debug("Problems creating extra kernel maps, continuing anyway...\n"); - /* update end address of the kernel map using adjacent module address */ - map = map__next(machine__kernel_map(machine)); - if (map) - machine__set_kernel_mmap(machine, addr, map->start); + if (end == ~0ULL) { + /* update end address of the kernel map using adjacent module address */ + map = map__next(machine__kernel_map(machine)); + if (map) + machine__set_kernel_mmap(machine, start, map->start); + } + out_put: dso__put(kernel); return ret; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 2310a1752983..54cf163347f7 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -647,6 +647,26 @@ static void perf_event__throttle_swap(union perf_event *event, swap_sample_id_all(event, &event->throttle + 1); } +static void perf_event__namespaces_swap(union perf_event *event, + bool sample_id_all) +{ + u64 i; + + event->namespaces.pid = bswap_32(event->namespaces.pid); + event->namespaces.tid = bswap_32(event->namespaces.tid); + event->namespaces.nr_namespaces = bswap_64(event->namespaces.nr_namespaces); + + for (i = 0; i < event->namespaces.nr_namespaces; i++) { + struct perf_ns_link_info *ns = &event->namespaces.link_info[i]; + + ns->dev = bswap_64(ns->dev); + ns->ino = bswap_64(ns->ino); + } + + if (sample_id_all) + swap_sample_id_all(event, &event->namespaces.link_info[i]); +} + static u8 revbyte(u8 b) { int rev = (b >> 4) | ((b & 0xf) << 4); @@ -887,6 +907,7 @@ static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_LOST_SAMPLES] = perf_event__all64_swap, [PERF_RECORD_SWITCH] = perf_event__switch_swap, [PERF_RECORD_SWITCH_CPU_WIDE] = perf_event__switch_swap, + [PERF_RECORD_NAMESPACES] = perf_event__namespaces_swap, [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap, [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap, diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 403045a2bbea..b413ba5b9835 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -133,7 +133,7 @@ void thread__put(struct thread *thread) } } -struct namespaces *thread__namespaces(const struct thread *thread) +static struct namespaces *__thread__namespaces(const struct thread *thread) { if (list_empty(&thread->namespaces_list)) return NULL; @@ -141,10 +141,21 @@ struct namespaces *thread__namespaces(const struct thread *thread) return list_first_entry(&thread->namespaces_list, struct namespaces, list); } +struct namespaces *thread__namespaces(const struct thread *thread) +{ + struct namespaces *ns; + + down_read((struct rw_semaphore *)&thread->namespaces_lock); + ns = __thread__namespaces(thread); + up_read((struct rw_semaphore *)&thread->namespaces_lock); + + return ns; +} + static int __thread__set_namespaces(struct thread *thread, u64 timestamp, struct namespaces_event *event) { - struct namespaces *new, *curr = thread__namespaces(thread); + struct namespaces *new, *curr = __thread__namespaces(thread); new = namespaces__new(event); if (!new) diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index a877803e4ba8..dd5d69529382 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -31,6 +31,7 @@ test_section_names test_tcpnotify_user test_libbpf test_tcp_check_syncookie_user +test_sysctl alu32 libbpf.pc libbpf.so.* diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 6e80b66d7fb1..5f6f9e7aba2a 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -278,7 +278,7 @@ static int (*bpf_skb_change_type)(void *ctx, __u32 type) = (void *) BPF_FUNC_skb_change_type; static unsigned int (*bpf_get_hash_recalc)(void *ctx) = (void *) BPF_FUNC_get_hash_recalc; -static unsigned long long (*bpf_get_current_task)(void *ctx) = +static unsigned long long (*bpf_get_current_task)(void) = (void *) BPF_FUNC_get_current_task; static int (*bpf_skb_change_tail)(void *ctx, __u32 len, __u64 flags) = (void *) BPF_FUNC_skb_change_tail; diff --git a/tools/testing/selftests/bpf/map_tests/.gitignore b/tools/testing/selftests/bpf/map_tests/.gitignore new file mode 100644 index 000000000000..45984a364647 --- /dev/null +++ b/tools/testing/selftests/bpf/map_tests/.gitignore @@ -0,0 +1 @@ +tests.h diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index 8b54adfd6264..fbd1d88a6095 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -242,12 +242,12 @@ void test_flow_dissector(void) */ err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0); - CHECK(err, "bpf_prog_attach", "err %d errno %d", err, errno); + CHECK(err, "bpf_prog_attach", "err %d errno %d\n", err, errno); tap_fd = create_tap("tap0"); - CHECK(tap_fd < 0, "create_tap", "tap_fd %d errno %d", tap_fd, errno); + CHECK(tap_fd < 0, "create_tap", "tap_fd %d errno %d\n", tap_fd, errno); err = ifup("tap0"); - CHECK(err, "ifup", "err %d errno %d", err, errno); + CHECK(err, "ifup", "err %d errno %d\n", err, errno); for (i = 0; i < ARRAY_SIZE(tests); i++) { struct bpf_flow_keys flow_keys = {}; @@ -255,7 +255,7 @@ void test_flow_dissector(void) __u32 key = 0; err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt)); - CHECK(err < 0, "tx_tap", "err %d errno %d", err, errno); + CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno); err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys); CHECK_ATTR(err, tests[i].name, "bpf_map_lookup_elem %d\n", err); @@ -264,5 +264,6 @@ void test_flow_dissector(void) CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys); } + bpf_prog_detach(prog_fd, BPF_FLOW_DISSECTOR); bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c index 781c7de343be..1b25a7e348dc 100644 --- a/tools/testing/selftests/bpf/test_lru_map.c +++ b/tools/testing/selftests/bpf/test_lru_map.c @@ -18,9 +18,11 @@ #include <sys/wait.h> #include <bpf/bpf.h> +#include <bpf/libbpf.h> #include "bpf_util.h" #include "bpf_rlimit.h" +#include "../../../include/linux/filter.h" #define LOCAL_FREE_TARGET (128) #define PERCPU_FREE_TARGET (4) @@ -40,6 +42,68 @@ static int create_map(int map_type, int map_flags, unsigned int size) return map_fd; } +static int bpf_map_lookup_elem_with_ref_bit(int fd, unsigned long long key, + void *value) +{ + struct bpf_load_program_attr prog; + struct bpf_create_map_attr map; + struct bpf_insn insns[] = { + BPF_LD_MAP_VALUE(BPF_REG_9, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, fd), + BPF_LD_IMM64(BPF_REG_3, key), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_9, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }; + __u8 data[64] = {}; + int mfd, pfd, ret, zero = 0; + __u32 retval = 0; + + memset(&map, 0, sizeof(map)); + map.map_type = BPF_MAP_TYPE_ARRAY; + map.key_size = sizeof(int); + map.value_size = sizeof(unsigned long long); + map.max_entries = 1; + + mfd = bpf_create_map_xattr(&map); + if (mfd < 0) + return -1; + + insns[0].imm = mfd; + + memset(&prog, 0, sizeof(prog)); + prog.prog_type = BPF_PROG_TYPE_SCHED_CLS; + prog.insns = insns; + prog.insns_cnt = ARRAY_SIZE(insns); + prog.license = "GPL"; + + pfd = bpf_load_program_xattr(&prog, NULL, 0); + if (pfd < 0) { + close(mfd); + return -1; + } + + ret = bpf_prog_test_run(pfd, 1, data, sizeof(data), + NULL, NULL, &retval, NULL); + if (ret < 0 || retval != 42) { + ret = -1; + } else { + assert(!bpf_map_lookup_elem(mfd, &zero, value)); + ret = 0; + } + close(pfd); + close(mfd); + return ret; +} + static int map_subset(int map0, int map1) { unsigned long long next_key = 0; @@ -87,7 +151,7 @@ static int sched_next_online(int pid, int *next_to_try) return ret; } -/* Size of the LRU amp is 2 +/* Size of the LRU map is 2 * Add key=1 (+1 key) * Add key=2 (+1 key) * Lookup Key=1 @@ -157,7 +221,7 @@ static void test_lru_sanity0(int map_type, int map_flags) * stop LRU from removing key=1 */ key = 1; - assert(!bpf_map_lookup_elem(lru_map_fd, &key, value)); + assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value)); assert(value[0] == 1234); key = 3; @@ -167,7 +231,8 @@ static void test_lru_sanity0(int map_type, int map_flags) /* key=2 has been removed from the LRU */ key = 2; - assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1); + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && + errno == ENOENT); assert(map_equal(lru_map_fd, expected_map_fd)); @@ -221,7 +286,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) /* Lookup 1 to tgt_free/2 */ end_key = 1 + batch_size; for (key = 1; key < end_key; key++) { - assert(!bpf_map_lookup_elem(lru_map_fd, &key, value)); + assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value)); assert(!bpf_map_update_elem(expected_map_fd, &key, value, BPF_NOEXIST)); } @@ -322,10 +387,11 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) end_key = 1 + batch_size; value[0] = 4321; for (key = 1; key < end_key; key++) { - assert(bpf_map_lookup_elem(lru_map_fd, &key, value)); + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && + errno == ENOENT); assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); - assert(!bpf_map_lookup_elem(lru_map_fd, &key, value)); + assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value)); assert(value[0] == 4321); assert(!bpf_map_update_elem(expected_map_fd, &key, value, BPF_NOEXIST)); @@ -404,7 +470,7 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free) /* Lookup key 1 to tgt_free*3/2 */ end_key = tgt_free + batch_size; for (key = 1; key < end_key; key++) { - assert(!bpf_map_lookup_elem(lru_map_fd, &key, value)); + assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value)); assert(!bpf_map_update_elem(expected_map_fd, &key, value, BPF_NOEXIST)); } @@ -463,7 +529,7 @@ static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free) assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); for (key = 1; key <= tgt_free; key++) { - assert(!bpf_map_lookup_elem(lru_map_fd, &key, value)); + assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value)); assert(!bpf_map_update_elem(expected_map_fd, &key, value, BPF_NOEXIST)); } @@ -494,16 +560,16 @@ static void do_test_lru_sanity5(unsigned long long last_key, int map_fd) unsigned long long key, value[nr_cpus]; /* Ensure the last key inserted by previous CPU can be found */ - assert(!bpf_map_lookup_elem(map_fd, &last_key, value)); - + assert(!bpf_map_lookup_elem_with_ref_bit(map_fd, last_key, value)); value[0] = 1234; key = last_key + 1; assert(!bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST)); - assert(!bpf_map_lookup_elem(map_fd, &key, value)); + assert(!bpf_map_lookup_elem_with_ref_bit(map_fd, key, value)); /* Cannot find the last key because it was removed by LRU */ - assert(bpf_map_lookup_elem(map_fd, &last_key, value)); + assert(bpf_map_lookup_elem(map_fd, &last_key, value) == -1 && + errno == ENOENT); } /* Test map with only one element */ @@ -590,8 +656,8 @@ static void test_lru_sanity6(int map_type, int map_flags, int tgt_free) /* Make ref bit sticky for key: [1, tgt_free] */ for (stable_key = 1; stable_key <= tgt_free; stable_key++) { /* Mark the ref bit */ - assert(!bpf_map_lookup_elem(lru_map_fd, &stable_key, - value)); + assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, + stable_key, value)); } assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); @@ -612,6 +678,198 @@ static void test_lru_sanity6(int map_type, int map_flags, int tgt_free) printf("Pass\n"); } +/* Size of the LRU map is 2 + * Add key=1 (+1 key) + * Add key=2 (+1 key) + * Lookup Key=1 (datapath) + * Lookup Key=2 (syscall) + * Add Key=3 + * => Key=2 will be removed by LRU + * Iterate map. Only found key=1 and key=3 + */ +static void test_lru_sanity7(int map_type, int map_flags) +{ + unsigned long long key, value[nr_cpus]; + int lru_map_fd, expected_map_fd; + int next_cpu = 0; + + printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, + map_flags); + + assert(sched_next_online(0, &next_cpu) != -1); + + if (map_flags & BPF_F_NO_COMMON_LRU) + lru_map_fd = create_map(map_type, map_flags, 2 * nr_cpus); + else + lru_map_fd = create_map(map_type, map_flags, 2); + assert(lru_map_fd != -1); + + expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, 2); + assert(expected_map_fd != -1); + + value[0] = 1234; + + /* insert key=1 element */ + + key = 1; + assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); + assert(!bpf_map_update_elem(expected_map_fd, &key, value, + BPF_NOEXIST)); + + /* BPF_NOEXIST means: add new element if it doesn't exist */ + assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1 + /* key=1 already exists */ + && errno == EEXIST); + + /* insert key=2 element */ + + /* check that key=2 is not found */ + key = 2; + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && + errno == ENOENT); + + /* BPF_EXIST means: update existing element */ + assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 && + /* key=2 is not there */ + errno == ENOENT); + + assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); + + /* insert key=3 element */ + + /* check that key=3 is not found */ + key = 3; + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && + errno == ENOENT); + + /* check that key=1 can be found and mark the ref bit to + * stop LRU from removing key=1 + */ + key = 1; + assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value)); + assert(value[0] == 1234); + + /* check that key=2 can be found and do _not_ mark ref bit. + * this will be evicted on next update. + */ + key = 2; + assert(!bpf_map_lookup_elem(lru_map_fd, &key, value)); + assert(value[0] == 1234); + + key = 3; + assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); + assert(!bpf_map_update_elem(expected_map_fd, &key, value, + BPF_NOEXIST)); + + /* key=2 has been removed from the LRU */ + key = 2; + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && + errno == ENOENT); + + assert(map_equal(lru_map_fd, expected_map_fd)); + + close(expected_map_fd); + close(lru_map_fd); + + printf("Pass\n"); +} + +/* Size of the LRU map is 2 + * Add key=1 (+1 key) + * Add key=2 (+1 key) + * Lookup Key=1 (syscall) + * Lookup Key=2 (datapath) + * Add Key=3 + * => Key=1 will be removed by LRU + * Iterate map. Only found key=2 and key=3 + */ +static void test_lru_sanity8(int map_type, int map_flags) +{ + unsigned long long key, value[nr_cpus]; + int lru_map_fd, expected_map_fd; + int next_cpu = 0; + + printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, + map_flags); + + assert(sched_next_online(0, &next_cpu) != -1); + + if (map_flags & BPF_F_NO_COMMON_LRU) + lru_map_fd = create_map(map_type, map_flags, 2 * nr_cpus); + else + lru_map_fd = create_map(map_type, map_flags, 2); + assert(lru_map_fd != -1); + + expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, 2); + assert(expected_map_fd != -1); + + value[0] = 1234; + + /* insert key=1 element */ + + key = 1; + assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); + + /* BPF_NOEXIST means: add new element if it doesn't exist */ + assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1 + /* key=1 already exists */ + && errno == EEXIST); + + /* insert key=2 element */ + + /* check that key=2 is not found */ + key = 2; + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && + errno == ENOENT); + + /* BPF_EXIST means: update existing element */ + assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 && + /* key=2 is not there */ + errno == ENOENT); + + assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); + assert(!bpf_map_update_elem(expected_map_fd, &key, value, + BPF_NOEXIST)); + + /* insert key=3 element */ + + /* check that key=3 is not found */ + key = 3; + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && + errno == ENOENT); + + /* check that key=1 can be found and do _not_ mark ref bit. + * this will be evicted on next update. + */ + key = 1; + assert(!bpf_map_lookup_elem(lru_map_fd, &key, value)); + assert(value[0] == 1234); + + /* check that key=2 can be found and mark the ref bit to + * stop LRU from removing key=2 + */ + key = 2; + assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value)); + assert(value[0] == 1234); + + key = 3; + assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); + assert(!bpf_map_update_elem(expected_map_fd, &key, value, + BPF_NOEXIST)); + + /* key=1 has been removed from the LRU */ + key = 1; + assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && + errno == ENOENT); + + assert(map_equal(lru_map_fd, expected_map_fd)); + + close(expected_map_fd); + close(lru_map_fd); + + printf("Pass\n"); +} + int main(int argc, char **argv) { int map_types[] = {BPF_MAP_TYPE_LRU_HASH, @@ -637,6 +895,8 @@ int main(int argc, char **argv) test_lru_sanity4(map_types[t], map_flags[f], tgt_free); test_lru_sanity5(map_types[t], map_flags[f]); test_lru_sanity6(map_types[t], map_flags[f], tgt_free); + test_lru_sanity7(map_types[t], map_flags[f]); + test_lru_sanity8(map_types[t], map_flags[f]); printf("\n"); } diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 524b15dabb3c..b9171a7b3aaa 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -430,15 +430,15 @@ setup_xfrm() { veth_a_addr="${2}" veth_b_addr="${3}" - run_cmd "${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel" || return 1 - run_cmd "${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel" - run_cmd "${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel" - run_cmd "${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel" - - run_cmd "${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel" - run_cmd "${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel" - run_cmd "${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel" - run_cmd "${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel" + run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1 + run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel + run_cmd ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel + run_cmd ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel + + run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel + run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel + run_cmd ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel + run_cmd ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel } setup_xfrm4() { diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh index 21159f5f3362..14fcf3104c77 100755 --- a/tools/testing/selftests/netfilter/nft_nat.sh +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -8,6 +8,11 @@ ksft_skip=4 ret=0 test_inet_nat=true +cleanup() +{ + for i in 0 1 2; do ip netns del ns$i;done +} + nft --version > /dev/null 2>&1 if [ $? -ne 0 ];then echo "SKIP: Could not run test without nft tool" @@ -21,6 +26,13 @@ if [ $? -ne 0 ];then fi ip netns add ns0 +if [ $? -ne 0 ];then + echo "SKIP: Could not create net namespace" + exit $ksft_skip +fi + +trap cleanup EXIT + ip netns add ns1 ip netns add ns2 @@ -347,7 +359,7 @@ EOF test_masquerade6() { local family=$1 - local natflags=$1 + local natflags=$2 local lret=0 ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null @@ -392,18 +404,13 @@ EOF ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then -<<<<<<< HEAD - echo "ERROR: cannot ping ns1 from ns2 with active $family masquerading" -======= - echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerade $natflags" ->>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1 + echo "ERROR: cannot ping ns1 from ns2 with active $family masquerade $natflags" lret=1 fi # ns1 should have seen packets from ns0, due to masquerade expect="packets 1 bytes 104" for dir in "in6" "out6" ; do - cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") if [ $? -ne 0 ]; then bad_counter ns1 ns0$dir "$expect" @@ -433,38 +440,27 @@ EOF fi done -<<<<<<< HEAD - ip netns exec ns0 nft flush chain $family nat postrouting -======= ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerade $natflags (attempt 2)" lret=1 fi - ip netns exec ns0 nft flush chain ip6 nat postrouting ->>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1 + ip netns exec ns0 nft flush chain $family nat postrouting if [ $? -ne 0 ]; then echo "ERROR: Could not flush $family nat postrouting" 1>&2 lret=1 fi -<<<<<<< HEAD - test $lret -eq 0 && echo "PASS: $family IPv6 masquerade for ns2" -======= - test $lret -eq 0 && echo "PASS: IPv6 masquerade $natflags for ns2" ->>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1 + test $lret -eq 0 && echo "PASS: $family IPv6 masquerade $natflags for ns2" return $lret } test_masquerade() { -<<<<<<< HEAD local family=$1 -======= - local natflags=$1 ->>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1 + local natflags=$2 local lret=0 ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null @@ -509,11 +505,7 @@ EOF ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then -<<<<<<< HEAD - echo "ERROR: cannot ping ns1 from ns2 with active $family masquerading" -======= - echo "ERROR: cannot ping ns1 from ns2 with active ip masquere $natflags" ->>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1 + echo "ERROR: cannot ping ns1 from ns2 with active $family masquerade $natflags" lret=1 fi @@ -549,27 +541,19 @@ EOF fi done -<<<<<<< HEAD - ip netns exec ns0 nft flush chain $family nat postrouting -======= ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then echo "ERROR: cannot ping ns1 from ns2 with active ip masquerade $natflags (attempt 2)" lret=1 fi - ip netns exec ns0 nft flush chain ip nat postrouting ->>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1 + ip netns exec ns0 nft flush chain $family nat postrouting if [ $? -ne 0 ]; then echo "ERROR: Could not flush $family nat postrouting" 1>&2 lret=1 fi -<<<<<<< HEAD - test $lret -eq 0 && echo "PASS: $family IP masquerade for ns2" -======= - test $lret -eq 0 && echo "PASS: IP masquerade $natflags for ns2" ->>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1 + test $lret -eq 0 && echo "PASS: $family IP masquerade $natflags for ns2" return $lret } @@ -842,21 +826,14 @@ reset_counters $test_inet_nat && test_local_dnat inet $test_inet_nat && test_local_dnat6 inet +for flags in "" "fully-random"; do reset_counters -<<<<<<< HEAD -test_masquerade ip -test_masquerade6 ip6 +test_masquerade ip $flags +test_masquerade6 ip6 $flags reset_counters -$test_inet_nat && test_masquerade inet -$test_inet_nat && test_masquerade6 inet -======= -test_masquerade "" -test_masquerade6 "" - -reset_counters -test_masquerade "fully-random" -test_masquerade6 "fully-random" ->>>>>>> cd8dead0c39457e58ec1d36db93aedca811d48f1 +$test_inet_nat && test_masquerade inet $flags +$test_inet_nat && test_masquerade6 inet $flags +done reset_counters test_redirect ip @@ -865,6 +842,4 @@ reset_counters $test_inet_nat && test_redirect inet $test_inet_nat && test_redirect6 inet -for i in 0 1 2; do ip netns del ns$i;done - exit $ret |