diff options
Diffstat (limited to 'tools/lib')
26 files changed, 562 insertions, 253 deletions
| diff --git a/tools/lib/api/fs/cgroup.c b/tools/lib/api/fs/cgroup.c index 1573dae4259d..250629a09423 100644 --- a/tools/lib/api/fs/cgroup.c +++ b/tools/lib/api/fs/cgroup.c @@ -14,7 +14,7 @@ struct cgroupfs_cache_entry {  };  /* just cache last used one */ -static struct cgroupfs_cache_entry cached; +static struct cgroupfs_cache_entry *cached;  int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys)  { @@ -24,9 +24,9 @@ int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys)  	char *p, *path;  	char mountpoint[PATH_MAX]; -	if (!strcmp(cached.subsys, subsys)) { -		if (strlen(cached.mountpoint) < maxlen) { -			strcpy(buf, cached.mountpoint); +	if (cached && !strcmp(cached->subsys, subsys)) { +		if (strlen(cached->mountpoint) < maxlen) { +			strcpy(buf, cached->mountpoint);  			return 0;  		}  		return -1; @@ -91,8 +91,13 @@ int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys)  	free(line);  	fclose(fp); -	strncpy(cached.subsys, subsys, sizeof(cached.subsys) - 1); -	strcpy(cached.mountpoint, mountpoint); +	if (!cached) +		cached = calloc(1, sizeof(*cached)); + +	if (cached) { +		strncpy(cached->subsys, subsys, sizeof(cached->subsys) - 1); +		strcpy(cached->mountpoint, mountpoint); +	}  	if (mountpoint[0] && strlen(mountpoint) < maxlen) {  		strcpy(buf, mountpoint); diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index 82f53d81a7a7..5cb0eeec2c8a 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -1,4 +1,5 @@  // SPDX-License-Identifier: GPL-2.0 +#include <assert.h>  #include <ctype.h>  #include <errno.h>  #include <limits.h> @@ -10,6 +11,7 @@  #include <sys/types.h>  #include <sys/stat.h>  #include <fcntl.h> +#include <pthread.h>  #include <unistd.h>  #include <sys/mount.h> @@ -43,7 +45,7 @@  #define BPF_FS_MAGIC           0xcafe4a11  #endif -static const char * const sysfs__fs_known_mountpoints[] = { +static const char * const sysfs__known_mountpoints[] = {  	"/sys",  	0,  }; @@ -86,87 +88,89 @@ static const char * const bpf_fs__known_mountpoints[] = {  };  struct fs { -	const char		*name; -	const char * const	*mounts; -	char			 path[PATH_MAX]; -	bool			 found; -	bool			 checked; -	long			 magic; -}; - -enum { -	FS__SYSFS   = 0, -	FS__PROCFS  = 1, -	FS__DEBUGFS = 2, -	FS__TRACEFS = 3, -	FS__HUGETLBFS = 4, -	FS__BPF_FS = 5, +	const char *		 const name; +	const char * const *	 const mounts; +	char			*path; +	pthread_mutex_t		 mount_mutex; +	const long		 magic;  };  #ifndef TRACEFS_MAGIC  #define TRACEFS_MAGIC 0x74726163  #endif -static struct fs fs__entries[] = { -	[FS__SYSFS] = { -		.name	= "sysfs", -		.mounts	= sysfs__fs_known_mountpoints, -		.magic	= SYSFS_MAGIC, -		.checked = false, -	}, -	[FS__PROCFS] = { -		.name	= "proc", -		.mounts	= procfs__known_mountpoints, -		.magic	= PROC_SUPER_MAGIC, -		.checked = false, -	}, -	[FS__DEBUGFS] = { -		.name	= "debugfs", -		.mounts	= debugfs__known_mountpoints, -		.magic	= DEBUGFS_MAGIC, -		.checked = false, -	}, -	[FS__TRACEFS] = { -		.name	= "tracefs", -		.mounts	= tracefs__known_mountpoints, -		.magic	= TRACEFS_MAGIC, -		.checked = false, -	}, -	[FS__HUGETLBFS] = { -		.name	= "hugetlbfs", -		.mounts = hugetlbfs__known_mountpoints, -		.magic	= HUGETLBFS_MAGIC, -		.checked = false, -	}, -	[FS__BPF_FS] = { -		.name	= "bpf", -		.mounts = bpf_fs__known_mountpoints, -		.magic	= BPF_FS_MAGIC, -		.checked = false, -	}, -}; +static void fs__init_once(struct fs *fs); +static const char *fs__mountpoint(const struct fs *fs); +static const char *fs__mount(struct fs *fs); + +#define FS(lower_name, fs_name, upper_name)		\ +static struct fs fs__##lower_name = {			\ +	.name = #fs_name,				\ +	.mounts = lower_name##__known_mountpoints,	\ +	.magic = upper_name##_MAGIC,			\ +	.mount_mutex = PTHREAD_MUTEX_INITIALIZER,	\ +};							\ +							\ +static void lower_name##_init_once(void)		\ +{							\ +	struct fs *fs = &fs__##lower_name;		\ +							\ +	fs__init_once(fs);				\ +}							\ +							\ +const char *lower_name##__mountpoint(void)		\ +{							\ +	static pthread_once_t init_once = PTHREAD_ONCE_INIT;	\ +	struct fs *fs = &fs__##lower_name;		\ +							\ +	pthread_once(&init_once, lower_name##_init_once);	\ +	return fs__mountpoint(fs);			\ +}							\ +							\ +const char *lower_name##__mount(void)			\ +{							\ +	const char *mountpoint = lower_name##__mountpoint();	\ +	struct fs *fs = &fs__##lower_name;		\ +							\ +	if (mountpoint)					\ +		return mountpoint;			\ +							\ +	return fs__mount(fs);				\ +}							\ +							\ +bool lower_name##__configured(void)			\ +{							\ +	return lower_name##__mountpoint() != NULL;	\ +} + +FS(sysfs, sysfs, SYSFS); +FS(procfs, procfs, PROC_SUPER); +FS(debugfs, debugfs, DEBUGFS); +FS(tracefs, tracefs, TRACEFS); +FS(hugetlbfs, hugetlbfs, HUGETLBFS); +FS(bpf_fs, bpf, BPF_FS);  static bool fs__read_mounts(struct fs *fs)  { -	bool found = false;  	char type[100];  	FILE *fp; +	char path[PATH_MAX + 1];  	fp = fopen("/proc/mounts", "r");  	if (fp == NULL) -		return NULL; +		return false; -	while (!found && -	       fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n", -		      fs->path, type) == 2) { +	while (fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n", +		      path, type) == 2) { -		if (strcmp(type, fs->name) == 0) -			found = true; +		if (strcmp(type, fs->name) == 0) { +			fs->path = strdup(path); +			fclose(fp); +			return fs->path != NULL; +		}  	} -  	fclose(fp); -	fs->checked = true; -	return fs->found = found; +	return false;  }  static int fs__valid_mount(const char *fs, long magic) @@ -188,8 +192,9 @@ static bool fs__check_mounts(struct fs *fs)  	ptr = fs->mounts;  	while (*ptr) {  		if (fs__valid_mount(*ptr, fs->magic) == 0) { -			fs->found = true; -			strcpy(fs->path, *ptr); +			fs->path = strdup(*ptr); +			if (!fs->path) +				return false;  			return true;  		}  		ptr++; @@ -227,43 +232,26 @@ static bool fs__env_override(struct fs *fs)  	if (!override_path)  		return false; -	fs->found = true; -	fs->checked = true; -	strncpy(fs->path, override_path, sizeof(fs->path) - 1); -	fs->path[sizeof(fs->path) - 1] = '\0'; +	fs->path = strdup(override_path); +	if (!fs->path) +		return false;  	return true;  } -static const char *fs__get_mountpoint(struct fs *fs) +static void fs__init_once(struct fs *fs)  { -	if (fs__env_override(fs)) -		return fs->path; - -	if (fs__check_mounts(fs)) -		return fs->path; - -	if (fs__read_mounts(fs)) -		return fs->path; - -	return NULL; +	if (!fs__env_override(fs) && +	    !fs__check_mounts(fs) && +	    !fs__read_mounts(fs)) { +		assert(!fs->path); +	} else { +		assert(fs->path); +	}  } -static const char *fs__mountpoint(int idx) +static const char *fs__mountpoint(const struct fs *fs)  { -	struct fs *fs = &fs__entries[idx]; - -	if (fs->found) -		return (const char *)fs->path; - -	/* the mount point was already checked for the mount point -	 * but and did not exist, so return NULL to avoid scanning again. -	 * This makes the found and not found paths cost equivalent -	 * in case of multiple calls. -	 */ -	if (fs->checked) -		return NULL; - -	return fs__get_mountpoint(fs); +	return fs->path;  }  static const char *mount_overload(struct fs *fs) @@ -278,45 +266,29 @@ static const char *mount_overload(struct fs *fs)  	return getenv(upper_name) ?: *fs->mounts;  } -static const char *fs__mount(int idx) +static const char *fs__mount(struct fs *fs)  { -	struct fs *fs = &fs__entries[idx];  	const char *mountpoint; -	if (fs__mountpoint(idx)) -		return (const char *)fs->path; +	pthread_mutex_lock(&fs->mount_mutex); -	mountpoint = mount_overload(fs); +	/* Check if path found inside the mutex to avoid races with other callers of mount. */ +	mountpoint = fs__mountpoint(fs); +	if (mountpoint) +		goto out; -	if (mount(NULL, mountpoint, fs->name, 0, NULL) < 0) -		return NULL; - -	return fs__check_mounts(fs) ? fs->path : NULL; -} +	mountpoint = mount_overload(fs); -#define FS(name, idx)				\ -const char *name##__mountpoint(void)		\ -{						\ -	return fs__mountpoint(idx);		\ -}						\ -						\ -const char *name##__mount(void)			\ -{						\ -	return fs__mount(idx);			\ -}						\ -						\ -bool name##__configured(void)			\ -{						\ -	return name##__mountpoint() != NULL;	\ +	if (mount(NULL, mountpoint, fs->name, 0, NULL) == 0 && +	    fs__valid_mount(mountpoint, fs->magic) == 0) { +		fs->path = strdup(mountpoint); +		mountpoint = fs->path; +	} +out: +	pthread_mutex_unlock(&fs->mount_mutex); +	return mountpoint;  } -FS(sysfs,   FS__SYSFS); -FS(procfs,  FS__PROCFS); -FS(debugfs, FS__DEBUGFS); -FS(tracefs, FS__TRACEFS); -FS(hugetlbfs, FS__HUGETLBFS); -FS(bpf_fs, FS__BPF_FS); -  int filename__read_int(const char *filename, int *value)  {  	char line[64]; diff --git a/tools/lib/api/fs/tracing_path.c b/tools/lib/api/fs/tracing_path.c index 7ba3e81274e8..30745f35d0d2 100644 --- a/tools/lib/api/fs/tracing_path.c +++ b/tools/lib/api/fs/tracing_path.c @@ -13,17 +13,12 @@  #include "tracing_path.h" -static char tracing_mnt[PATH_MAX]  = "/sys/kernel/debug";  static char tracing_path[PATH_MAX]        = "/sys/kernel/tracing"; -static char tracing_events_path[PATH_MAX] = "/sys/kernel/tracing/events";  static void __tracing_path_set(const char *tracing, const char *mountpoint)  { -	snprintf(tracing_mnt, sizeof(tracing_mnt), "%s", mountpoint);  	snprintf(tracing_path, sizeof(tracing_path), "%s/%s",  		 mountpoint, tracing); -	snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s%s", -		 mountpoint, tracing, "events");  }  static const char *tracing_path_tracefs_mount(void) @@ -149,15 +144,15 @@ int tracing_path__strerror_open_tp(int err, char *buf, size_t size,  			/* sdt markers */  			if (!strncmp(filename, "sdt_", 4)) {  				snprintf(buf, size, -					"Error:\tFile %s/%s not found.\n" +					"Error:\tFile %s/events/%s not found.\n"  					"Hint:\tSDT event cannot be directly recorded on.\n"  					"\tPlease first use 'perf probe %s:%s' before recording it.\n", -					tracing_events_path, filename, sys, name); +					tracing_path, filename, sys, name);  			} else {  				snprintf(buf, size, -					 "Error:\tFile %s/%s not found.\n" +					 "Error:\tFile %s/events/%s not found.\n"  					 "Hint:\tPerhaps this kernel misses some CONFIG_ setting to enable this feature?.\n", -					 tracing_events_path, filename); +					 tracing_path, filename);  			}  			break;  		} @@ -169,9 +164,9 @@ int tracing_path__strerror_open_tp(int err, char *buf, size_t size,  		break;  	case EACCES: {  		snprintf(buf, size, -			 "Error:\tNo permissions to read %s/%s\n" +			 "Error:\tNo permissions to read %s/events/%s\n"  			 "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n", -			 tracing_events_path, filename, tracing_path_mount()); +			 tracing_path, filename, tracing_path_mount());  	}  		break;  	default: diff --git a/tools/lib/api/io.h b/tools/lib/api/io.h index d5e8cf0dada0..9fc429d2852d 100644 --- a/tools/lib/api/io.h +++ b/tools/lib/api/io.h @@ -8,6 +8,7 @@  #define __API_IO__  #include <errno.h> +#include <poll.h>  #include <stdlib.h>  #include <string.h>  #include <unistd.h> @@ -23,6 +24,8 @@ struct io {  	char *end;  	/* Currently accessed data pointer. */  	char *data; +	/* Read timeout, 0 implies no timeout. */ +	int timeout_ms;  	/* Set true on when the end of file on read error. */  	bool eof;  }; @@ -35,6 +38,7 @@ static inline void io__init(struct io *io, int fd,  	io->buf = buf;  	io->end = buf;  	io->data = buf; +	io->timeout_ms = 0;  	io->eof = false;  } @@ -47,7 +51,29 @@ static inline int io__get_char(struct io *io)  		return -1;  	if (ptr == io->end) { -		ssize_t n = read(io->fd, io->buf, io->buf_len); +		ssize_t n; + +		if (io->timeout_ms != 0) { +			struct pollfd pfds[] = { +				{ +					.fd = io->fd, +					.events = POLLIN, +				}, +			}; + +			n = poll(pfds, 1, io->timeout_ms); +			if (n == 0) +				errno = ETIMEDOUT; +			if (n > 0 && !(pfds[0].revents & POLLIN)) { +				errno = EIO; +				n = -1; +			} +			if (n <= 0) { +				io->eof = true; +				return -1; +			} +		} +		n = read(io->fd, io->buf, io->buf_len);  		if (n <= 0) {  			io->eof = true; diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 128ac723c4ea..ed86b37d8024 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -572,20 +572,30 @@ int bpf_map_update_batch(int fd, const void *keys, const void *values, __u32 *co  				    (void *)keys, (void *)values, count, opts);  } -int bpf_obj_pin(int fd, const char *pathname) +int bpf_obj_pin_opts(int fd, const char *pathname, const struct bpf_obj_pin_opts *opts)  { -	const size_t attr_sz = offsetofend(union bpf_attr, file_flags); +	const size_t attr_sz = offsetofend(union bpf_attr, path_fd);  	union bpf_attr attr;  	int ret; +	if (!OPTS_VALID(opts, bpf_obj_pin_opts)) +		return libbpf_err(-EINVAL); +  	memset(&attr, 0, attr_sz); +	attr.path_fd = OPTS_GET(opts, path_fd, 0);  	attr.pathname = ptr_to_u64((void *)pathname); +	attr.file_flags = OPTS_GET(opts, file_flags, 0);  	attr.bpf_fd = fd;  	ret = sys_bpf(BPF_OBJ_PIN, &attr, attr_sz);  	return libbpf_err_errno(ret);  } +int bpf_obj_pin(int fd, const char *pathname) +{ +	return bpf_obj_pin_opts(fd, pathname, NULL); +} +  int bpf_obj_get(const char *pathname)  {  	return bpf_obj_get_opts(pathname, NULL); @@ -593,7 +603,7 @@ int bpf_obj_get(const char *pathname)  int bpf_obj_get_opts(const char *pathname, const struct bpf_obj_get_opts *opts)  { -	const size_t attr_sz = offsetofend(union bpf_attr, file_flags); +	const size_t attr_sz = offsetofend(union bpf_attr, path_fd);  	union bpf_attr attr;  	int fd; @@ -601,6 +611,7 @@ int bpf_obj_get_opts(const char *pathname, const struct bpf_obj_get_opts *opts)  		return libbpf_err(-EINVAL);  	memset(&attr, 0, attr_sz); +	attr.path_fd = OPTS_GET(opts, path_fd, 0);  	attr.pathname = ptr_to_u64((void *)pathname);  	attr.file_flags = OPTS_GET(opts, file_flags, 0); diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index a2c091389b18..9aa0ee473754 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -284,16 +284,30 @@ LIBBPF_API int bpf_map_update_batch(int fd, const void *keys, const void *values  				    __u32 *count,  				    const struct bpf_map_batch_opts *opts); -struct bpf_obj_get_opts { +struct bpf_obj_pin_opts {  	size_t sz; /* size of this struct for forward/backward compatibility */  	__u32 file_flags; +	int path_fd;  	size_t :0;  }; -#define bpf_obj_get_opts__last_field file_flags +#define bpf_obj_pin_opts__last_field path_fd  LIBBPF_API int bpf_obj_pin(int fd, const char *pathname); +LIBBPF_API int bpf_obj_pin_opts(int fd, const char *pathname, +				const struct bpf_obj_pin_opts *opts); + +struct bpf_obj_get_opts { +	size_t sz; /* size of this struct for forward/backward compatibility */ + +	__u32 file_flags; +	int path_fd; + +	size_t :0; +}; +#define bpf_obj_get_opts__last_field path_fd +  LIBBPF_API int bpf_obj_get(const char *pathname);  LIBBPF_API int bpf_obj_get_opts(const char *pathname,  				const struct bpf_obj_get_opts *opts); diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 929a3baca8ef..bbab9ad9dc5a 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -77,16 +77,21 @@  /*   * Helper macros to manipulate data structures   */ -#ifndef offsetof -#define offsetof(TYPE, MEMBER)	((unsigned long)&((TYPE *)0)->MEMBER) -#endif -#ifndef container_of + +/* offsetof() definition that uses __builtin_offset() might not preserve field + * offset CO-RE relocation properly, so force-redefine offsetof() using + * old-school approach which works with CO-RE correctly + */ +#undef offsetof +#define offsetof(type, member)	((unsigned long)&((type *)0)->member) + +/* redefined container_of() to ensure we use the above offsetof() macro */ +#undef container_of  #define container_of(ptr, type, member)				\  	({							\  		void *__mptr = (void *)(ptr);			\  		((type *)(__mptr - offsetof(type, member)));	\  	}) -#endif  /*   * Compiler (optimization) barrier. diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index 6fb3d0f9af17..be076a4041ab 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -351,6 +351,7 @@ struct pt_regs___arm64 {   * https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc#risc-v-calling-conventions   */ +/* riscv provides struct user_regs_struct instead of struct pt_regs to userspace */  #define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x))  #define __PT_PARM1_REG a0  #define __PT_PARM2_REG a1 @@ -383,7 +384,7 @@ struct pt_regs___arm64 {   * https://raw.githubusercontent.com/wiki/foss-for-synopsys-dwc-arc-processors/toolchain/files/ARCv2_ABI.pdf   */ -/* arc provides struct user_pt_regs instead of struct pt_regs to userspace */ +/* arc provides struct user_regs_struct instead of struct pt_regs to userspace */  #define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x))  #define __PT_PARM1_REG scratch.r0  #define __PT_PARM2_REG scratch.r1 diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 0a2c079244b6..8484b563b53d 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1064,7 +1064,7 @@ static struct btf *btf_parse_raw(const char *path, struct btf *base_btf)  	int err = 0;  	long sz; -	f = fopen(path, "rb"); +	f = fopen(path, "rbe");  	if (!f) {  		err = -errno;  		goto err_out; diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 580985ee5545..4d9f30bf7f01 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -2250,9 +2250,25 @@ static int btf_dump_type_data_check_overflow(struct btf_dump *d,  					     const struct btf_type *t,  					     __u32 id,  					     const void *data, -					     __u8 bits_offset) +					     __u8 bits_offset, +					     __u8 bit_sz)  { -	__s64 size = btf__resolve_size(d->btf, id); +	__s64 size; + +	if (bit_sz) { +		/* bits_offset is at most 7. bit_sz is at most 128. */ +		__u8 nr_bytes = (bits_offset + bit_sz + 7) / 8; + +		/* When bit_sz is non zero, it is called from +		 * btf_dump_struct_data() where it only cares about +		 * negative error value. +		 * Return nr_bytes in success case to make it +		 * consistent as the regular integer case below. +		 */ +		return data + nr_bytes > d->typed_dump->data_end ? -E2BIG : nr_bytes; +	} + +	size = btf__resolve_size(d->btf, id);  	if (size < 0 || size >= INT_MAX) {  		pr_warn("unexpected size [%zu] for id [%u]\n", @@ -2407,7 +2423,7 @@ static int btf_dump_dump_type_data(struct btf_dump *d,  {  	int size, err = 0; -	size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset); +	size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset, bit_sz);  	if (size < 0)  		return size;  	err = btf_dump_type_data_check_zero(d, t, id, data, bits_offset, bit_sz); diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index 83e8e3bfd8ff..cf3323fd47b8 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -703,17 +703,17 @@ static void emit_relo_kfunc_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo  	/* obtain fd in BPF_REG_9 */  	emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7));  	emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32)); -	/* jump to fd_array store if fd denotes module BTF */ -	emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2)); -	/* set the default value for off */ -	emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); -	/* skip BTF fd store for vmlinux BTF */ -	emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 4));  	/* load fd_array slot pointer */  	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE,  					 0, 0, 0, blob_fd_array_off(gen, btf_fd_idx))); -	/* store BTF fd in slot */ +	/* store BTF fd in slot, 0 for vmlinux */  	emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_9, 0)); +	/* jump to insn[insn_idx].off store if fd denotes module BTF */ +	emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2)); +	/* set the default value for off */ +	emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); +	/* skip BTF fd store for vmlinux BTF */ +	emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 1));  	/* store index into insn[insn_idx].off */  	emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), btf_fd_idx));  log: diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ad1ec893b41b..214f828ece6b 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -117,6 +117,7 @@ static const char * const attach_type_name[] = {  	[BPF_PERF_EVENT]		= "perf_event",  	[BPF_TRACE_KPROBE_MULTI]	= "trace_kprobe_multi",  	[BPF_STRUCT_OPS]		= "struct_ops", +	[BPF_NETFILTER]			= "netfilter",  };  static const char * const link_type_name[] = { @@ -1500,16 +1501,36 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)  	return map;  } -static size_t bpf_map_mmap_sz(const struct bpf_map *map) +static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries)  { -	long page_sz = sysconf(_SC_PAGE_SIZE); +	const long page_sz = sysconf(_SC_PAGE_SIZE);  	size_t map_sz; -	map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries; +	map_sz = (size_t)roundup(value_sz, 8) * max_entries;  	map_sz = roundup(map_sz, page_sz);  	return map_sz;  } +static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz) +{ +	void *mmaped; + +	if (!map->mmaped) +		return -EINVAL; + +	if (old_sz == new_sz) +		return 0; + +	mmaped = mmap(NULL, new_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); +	if (mmaped == MAP_FAILED) +		return -errno; + +	memcpy(mmaped, map->mmaped, min(old_sz, new_sz)); +	munmap(map->mmaped, old_sz); +	map->mmaped = mmaped; +	return 0; +} +  static char *internal_map_name(struct bpf_object *obj, const char *real_name)  {  	char map_name[BPF_OBJ_NAME_LEN], *p; @@ -1608,6 +1629,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,  {  	struct bpf_map_def *def;  	struct bpf_map *map; +	size_t mmap_sz;  	int err;  	map = bpf_object__add_map(obj); @@ -1642,7 +1664,8 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,  	pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",  		 map->name, map->sec_idx, map->sec_offset, def->map_flags); -	map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE, +	mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); +	map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,  			   MAP_SHARED | MAP_ANONYMOUS, -1, 0);  	if (map->mmaped == MAP_FAILED) {  		err = -errno; @@ -4329,7 +4352,7 @@ static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)  	snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);  	memset(info, 0, sizeof(*info)); -	fp = fopen(file, "r"); +	fp = fopen(file, "re");  	if (!fp) {  		err = -errno;  		pr_warn("failed to open %s: %d. No procfs support?\n", file, @@ -4392,18 +4415,17 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd)  	if (!new_name)  		return libbpf_err(-errno); -	new_fd = open("/", O_RDONLY | O_CLOEXEC); +	/* +	 * Like dup(), but make sure new FD is >= 3 and has O_CLOEXEC set. +	 * This is similar to what we do in ensure_good_fd(), but without +	 * closing original FD. +	 */ +	new_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);  	if (new_fd < 0) {  		err = -errno;  		goto err_free_new_name;  	} -	new_fd = dup3(fd, new_fd, O_CLOEXEC); -	if (new_fd < 0) { -		err = -errno; -		goto err_close_new_fd; -	} -  	err = zclose(map->fd);  	if (err) {  		err = -errno; @@ -7433,7 +7455,7 @@ int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx)  	int ret, err = 0;  	FILE *f; -	f = fopen("/proc/kallsyms", "r"); +	f = fopen("/proc/kallsyms", "re");  	if (!f) {  		err = -errno;  		pr_warn("failed to open /proc/kallsyms: %d\n", err); @@ -8294,7 +8316,10 @@ static void bpf_map__destroy(struct bpf_map *map)  	map->init_slots_sz = 0;  	if (map->mmaped) { -		munmap(map->mmaped, bpf_map_mmap_sz(map)); +		size_t mmap_sz; + +		mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); +		munmap(map->mmaped, mmap_sz);  		map->mmaped = NULL;  	} @@ -8712,7 +8737,7 @@ static const struct bpf_sec_def section_defs[] = {  	SEC_DEF("struct_ops+",		STRUCT_OPS, 0, SEC_NONE),  	SEC_DEF("struct_ops.s+",	STRUCT_OPS, 0, SEC_SLEEPABLE),  	SEC_DEF("sk_lookup",		SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE), -	SEC_DEF("netfilter",		NETFILTER, 0, SEC_NONE), +	SEC_DEF("netfilter",		NETFILTER, BPF_NETFILTER, SEC_NONE),  };  static size_t custom_sec_def_cnt; @@ -9412,10 +9437,103 @@ __u32 bpf_map__value_size(const struct bpf_map *map)  	return map->def.value_size;  } +static int map_btf_datasec_resize(struct bpf_map *map, __u32 size) +{ +	struct btf *btf; +	struct btf_type *datasec_type, *var_type; +	struct btf_var_secinfo *var; +	const struct btf_type *array_type; +	const struct btf_array *array; +	int vlen, element_sz, new_array_id; +	__u32 nr_elements; + +	/* check btf existence */ +	btf = bpf_object__btf(map->obj); +	if (!btf) +		return -ENOENT; + +	/* verify map is datasec */ +	datasec_type = btf_type_by_id(btf, bpf_map__btf_value_type_id(map)); +	if (!btf_is_datasec(datasec_type)) { +		pr_warn("map '%s': cannot be resized, map value type is not a datasec\n", +			bpf_map__name(map)); +		return -EINVAL; +	} + +	/* verify datasec has at least one var */ +	vlen = btf_vlen(datasec_type); +	if (vlen == 0) { +		pr_warn("map '%s': cannot be resized, map value datasec is empty\n", +			bpf_map__name(map)); +		return -EINVAL; +	} + +	/* verify last var in the datasec is an array */ +	var = &btf_var_secinfos(datasec_type)[vlen - 1]; +	var_type = btf_type_by_id(btf, var->type); +	array_type = skip_mods_and_typedefs(btf, var_type->type, NULL); +	if (!btf_is_array(array_type)) { +		pr_warn("map '%s': cannot be resized, last var must be an array\n", +			bpf_map__name(map)); +		return -EINVAL; +	} + +	/* verify request size aligns with array */ +	array = btf_array(array_type); +	element_sz = btf__resolve_size(btf, array->type); +	if (element_sz <= 0 || (size - var->offset) % element_sz != 0) { +		pr_warn("map '%s': cannot be resized, element size (%d) doesn't align with new total size (%u)\n", +			bpf_map__name(map), element_sz, size); +		return -EINVAL; +	} + +	/* create a new array based on the existing array, but with new length */ +	nr_elements = (size - var->offset) / element_sz; +	new_array_id = btf__add_array(btf, array->index_type, array->type, nr_elements); +	if (new_array_id < 0) +		return new_array_id; + +	/* adding a new btf type invalidates existing pointers to btf objects, +	 * so refresh pointers before proceeding +	 */ +	datasec_type = btf_type_by_id(btf, map->btf_value_type_id); +	var = &btf_var_secinfos(datasec_type)[vlen - 1]; +	var_type = btf_type_by_id(btf, var->type); + +	/* finally update btf info */ +	datasec_type->size = size; +	var->size = size - var->offset; +	var_type->type = new_array_id; + +	return 0; +} +  int bpf_map__set_value_size(struct bpf_map *map, __u32 size)  {  	if (map->fd >= 0)  		return libbpf_err(-EBUSY); + +	if (map->mmaped) { +		int err; +		size_t mmap_old_sz, mmap_new_sz; + +		mmap_old_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); +		mmap_new_sz = bpf_map_mmap_sz(size, map->def.max_entries); +		err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz); +		if (err) { +			pr_warn("map '%s': failed to resize memory-mapped region: %d\n", +				bpf_map__name(map), err); +			return err; +		} +		err = map_btf_datasec_resize(map, size); +		if (err && err != -ENOENT) { +			pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %d\n", +				bpf_map__name(map), err); +			map->btf_value_type_id = 0; +			map->btf_key_type_id = 0; +		} +	} +  	map->def.value_size = size;  	return 0;  } @@ -9441,7 +9559,7 @@ int bpf_map__set_initial_value(struct bpf_map *map,  	return 0;  } -const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize) +void *bpf_map__initial_value(struct bpf_map *map, size_t *psize)  {  	if (!map->mmaped)  		return NULL; @@ -9957,7 +10075,7 @@ static int parse_uint_from_file(const char *file, const char *fmt)  	int err, ret;  	FILE *f; -	f = fopen(file, "r"); +	f = fopen(file, "re");  	if (!f) {  		err = -errno;  		pr_debug("failed to open '%s': %s\n", file, @@ -12693,7 +12811,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)  	for (i = 0; i < s->map_cnt; i++) {  		struct bpf_map *map = *s->maps[i].map; -		size_t mmap_sz = bpf_map_mmap_sz(map); +		size_t mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);  		int prot, map_fd = bpf_map__fd(map);  		void **mmaped = s->maps[i].mmaped; @@ -12720,8 +12838,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)  		 * as per normal clean up procedure, so we don't need to worry  		 * about it from skeleton's clean up perspective.  		 */ -		*mmaped = mmap(map->mmaped, mmap_sz, prot, -				MAP_SHARED | MAP_FIXED, map_fd, 0); +		*mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map_fd, 0);  		if (*mmaped == MAP_FAILED) {  			err = -errno;  			*mmaped = NULL; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 0b7362397ea3..754da73c643b 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -869,8 +869,22 @@ LIBBPF_API int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node);  /* get/set map key size */  LIBBPF_API __u32 bpf_map__key_size(const struct bpf_map *map);  LIBBPF_API int bpf_map__set_key_size(struct bpf_map *map, __u32 size); -/* get/set map value size */ +/* get map value size */  LIBBPF_API __u32 bpf_map__value_size(const struct bpf_map *map); +/** + * @brief **bpf_map__set_value_size()** sets map value size. + * @param map the BPF map instance + * @return 0, on success; negative error, otherwise + * + * There is a special case for maps with associated memory-mapped regions, like + * the global data section maps (bss, data, rodata). When this function is used + * on such a map, the mapped region is resized. Afterward, an attempt is made to + * adjust the corresponding BTF info. This attempt is best-effort and can only + * succeed if the last variable of the data section map is an array. The array + * BTF type is replaced by a new BTF array type with a different length. + * Any previously existing pointers returned from bpf_map__initial_value() or + * corresponding data section skeleton pointer must be reinitialized. + */  LIBBPF_API int bpf_map__set_value_size(struct bpf_map *map, __u32 size);  /* get map key/value BTF type IDs */  LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map); @@ -884,7 +898,7 @@ LIBBPF_API int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra);  LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,  					  const void *data, size_t size); -LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize); +LIBBPF_API void *bpf_map__initial_value(struct bpf_map *map, size_t *psize);  /**   * @brief **bpf_map__is_internal()** tells the caller whether or not the diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index a5aa3a383d69..7521a2fb7626 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -391,3 +391,8 @@ LIBBPF_1.2.0 {  		bpf_map_get_info_by_fd;  		bpf_prog_get_info_by_fd;  } LIBBPF_1.1.0; + +LIBBPF_1.3.0 { +	global: +		bpf_obj_pin_opts; +} LIBBPF_1.2.0; diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 6065f408a59c..9c4db90b92b6 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -38,7 +38,7 @@ static __u32 get_ubuntu_kernel_version(void)  	if (faccessat(AT_FDCWD, ubuntu_kver_file, R_OK, AT_EACCESS) != 0)  		return 0; -	f = fopen(ubuntu_kver_file, "r"); +	f = fopen(ubuntu_kver_file, "re");  	if (!f)  		return 0; @@ -180,7 +180,9 @@ static int probe_prog_load(enum bpf_prog_type prog_type,  	case BPF_PROG_TYPE_SK_REUSEPORT:  	case BPF_PROG_TYPE_FLOW_DISSECTOR:  	case BPF_PROG_TYPE_CGROUP_SYSCTL: +		break;  	case BPF_PROG_TYPE_NETFILTER: +		opts.expected_attach_type = BPF_NETFILTER;  		break;  	default:  		return -EOPNOTSUPP; diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h index 1fd2eeac5cfc..290411ddb39e 100644 --- a/tools/lib/bpf/libbpf_version.h +++ b/tools/lib/bpf/libbpf_version.h @@ -4,6 +4,6 @@  #define __LIBBPF_VERSION_H  #define LIBBPF_MAJOR_VERSION 1 -#define LIBBPF_MINOR_VERSION 2 +#define LIBBPF_MINOR_VERSION 3  #endif /* __LIBBPF_VERSION_H */ diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index 086eef355ab3..f1a141555f08 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -466,7 +466,7 @@ static int parse_vma_segs(int pid, const char *lib_path, struct elf_seg **segs,  proceed:  	sprintf(line, "/proc/%d/maps", pid); -	f = fopen(line, "r"); +	f = fopen(line, "re");  	if (!f) {  		err = -errno;  		pr_warn("usdt: failed to open '%s' to get base addr of '%s': %d\n", @@ -954,8 +954,7 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct  	spec_map_fd = bpf_map__fd(man->specs_map);  	ip_map_fd = bpf_map__fd(man->ip_to_spec_id_map); -	/* TODO: perform path resolution similar to uprobe's */ -	fd = open(path, O_RDONLY); +	fd = open(path, O_RDONLY | O_CLOEXEC);  	if (fd < 0) {  		err = -errno;  		pr_warn("usdt: failed to open ELF binary '%s': %d\n", path, err); diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c index 1229b18bcdb1..2a5a29217374 100644 --- a/tools/lib/perf/cpumap.c +++ b/tools/lib/perf/cpumap.c @@ -99,6 +99,11 @@ static int cmp_cpu(const void *a, const void *b)  	return cpu_a->cpu - cpu_b->cpu;  } +static struct perf_cpu __perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx) +{ +	return RC_CHK_ACCESS(cpus)->map[idx]; +} +  static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu *tmp_cpus)  {  	size_t payload_size = nr_cpus * sizeof(struct perf_cpu); @@ -111,8 +116,12 @@ static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu  		/* Remove dups */  		j = 0;  		for (i = 0; i < nr_cpus; i++) { -			if (i == 0 || RC_CHK_ACCESS(cpus)->map[i].cpu != RC_CHK_ACCESS(cpus)->map[i - 1].cpu) -				RC_CHK_ACCESS(cpus)->map[j++].cpu = RC_CHK_ACCESS(cpus)->map[i].cpu; +			if (i == 0 || +			    __perf_cpu_map__cpu(cpus, i).cpu != +			    __perf_cpu_map__cpu(cpus, i - 1).cpu) { +				RC_CHK_ACCESS(cpus)->map[j++].cpu = +					__perf_cpu_map__cpu(cpus, i).cpu; +			}  		}  		perf_cpu_map__set_nr(cpus, j);  		assert(j <= nr_cpus); @@ -269,26 +278,31 @@ out:  	return cpus;  } +static int __perf_cpu_map__nr(const struct perf_cpu_map *cpus) +{ +	return RC_CHK_ACCESS(cpus)->nr; +} +  struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)  {  	struct perf_cpu result = {  		.cpu = -1  	}; -	if (cpus && idx < RC_CHK_ACCESS(cpus)->nr) -		return RC_CHK_ACCESS(cpus)->map[idx]; +	if (cpus && idx < __perf_cpu_map__nr(cpus)) +		return __perf_cpu_map__cpu(cpus, idx);  	return result;  }  int perf_cpu_map__nr(const struct perf_cpu_map *cpus)  { -	return cpus ? RC_CHK_ACCESS(cpus)->nr : 1; +	return cpus ? __perf_cpu_map__nr(cpus) : 1;  }  bool perf_cpu_map__empty(const struct perf_cpu_map *map)  { -	return map ? RC_CHK_ACCESS(map)->map[0].cpu == -1 : true; +	return map ? __perf_cpu_map__cpu(map, 0).cpu == -1 : true;  }  int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu) @@ -299,10 +313,10 @@ int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu)  		return -1;  	low = 0; -	high = RC_CHK_ACCESS(cpus)->nr; +	high = __perf_cpu_map__nr(cpus);  	while (low < high) {  		int idx = (low + high) / 2; -		struct perf_cpu cpu_at_idx = RC_CHK_ACCESS(cpus)->map[idx]; +		struct perf_cpu cpu_at_idx = __perf_cpu_map__cpu(cpus, idx);  		if (cpu_at_idx.cpu == cpu.cpu)  			return idx; @@ -321,6 +335,32 @@ bool perf_cpu_map__has(const struct perf_cpu_map *cpus, struct perf_cpu cpu)  	return perf_cpu_map__idx(cpus, cpu) != -1;  } +bool perf_cpu_map__equal(const struct perf_cpu_map *lhs, const struct perf_cpu_map *rhs) +{ +	int nr; + +	if (lhs == rhs) +		return true; + +	if (!lhs || !rhs) +		return false; + +	nr = __perf_cpu_map__nr(lhs); +	if (nr != __perf_cpu_map__nr(rhs)) +		return false; + +	for (int idx = 0; idx < nr; idx++) { +		if (__perf_cpu_map__cpu(lhs, idx).cpu != __perf_cpu_map__cpu(rhs, idx).cpu) +			return false; +	} +	return true; +} + +bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map) +{ +	return map && __perf_cpu_map__cpu(map, 0).cpu == -1; +} +  struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map)  {  	struct perf_cpu result = { @@ -328,7 +368,9 @@ struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map)  	};  	// cpu_map__trim_new() qsort()s it, cpu_map__default_new() sorts it as well. -	return RC_CHK_ACCESS(map)->nr > 0 ? RC_CHK_ACCESS(map)->map[RC_CHK_ACCESS(map)->nr - 1] : result; +	return __perf_cpu_map__nr(map) > 0 +		? __perf_cpu_map__cpu(map, __perf_cpu_map__nr(map) - 1) +		: result;  }  /** Is 'b' a subset of 'a'. */ @@ -336,15 +378,15 @@ bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu  {  	if (a == b || !b)  		return true; -	if (!a || RC_CHK_ACCESS(b)->nr > RC_CHK_ACCESS(a)->nr) +	if (!a || __perf_cpu_map__nr(b) > __perf_cpu_map__nr(a))  		return false; -	for (int i = 0, j = 0; i < RC_CHK_ACCESS(a)->nr; i++) { -		if (RC_CHK_ACCESS(a)->map[i].cpu > RC_CHK_ACCESS(b)->map[j].cpu) +	for (int i = 0, j = 0; i < __perf_cpu_map__nr(a); i++) { +		if (__perf_cpu_map__cpu(a, i).cpu > __perf_cpu_map__cpu(b, j).cpu)  			return false; -		if (RC_CHK_ACCESS(a)->map[i].cpu == RC_CHK_ACCESS(b)->map[j].cpu) { +		if (__perf_cpu_map__cpu(a, i).cpu == __perf_cpu_map__cpu(b, j).cpu) {  			j++; -			if (j == RC_CHK_ACCESS(b)->nr) +			if (j == __perf_cpu_map__nr(b))  				return true;  		}  	} @@ -374,27 +416,27 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,  		return perf_cpu_map__get(other);  	} -	tmp_len = RC_CHK_ACCESS(orig)->nr + RC_CHK_ACCESS(other)->nr; +	tmp_len = __perf_cpu_map__nr(orig) + __perf_cpu_map__nr(other);  	tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));  	if (!tmp_cpus)  		return NULL;  	/* Standard merge algorithm from wikipedia */  	i = j = k = 0; -	while (i < RC_CHK_ACCESS(orig)->nr && j < RC_CHK_ACCESS(other)->nr) { -		if (RC_CHK_ACCESS(orig)->map[i].cpu <= RC_CHK_ACCESS(other)->map[j].cpu) { -			if (RC_CHK_ACCESS(orig)->map[i].cpu == RC_CHK_ACCESS(other)->map[j].cpu) +	while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) { +		if (__perf_cpu_map__cpu(orig, i).cpu <= __perf_cpu_map__cpu(other, j).cpu) { +			if (__perf_cpu_map__cpu(orig, i).cpu == __perf_cpu_map__cpu(other, j).cpu)  				j++; -			tmp_cpus[k++] = RC_CHK_ACCESS(orig)->map[i++]; +			tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++);  		} else -			tmp_cpus[k++] = RC_CHK_ACCESS(other)->map[j++]; +			tmp_cpus[k++] = __perf_cpu_map__cpu(other, j++);  	} -	while (i < RC_CHK_ACCESS(orig)->nr) -		tmp_cpus[k++] = RC_CHK_ACCESS(orig)->map[i++]; +	while (i < __perf_cpu_map__nr(orig)) +		tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++); -	while (j < RC_CHK_ACCESS(other)->nr) -		tmp_cpus[k++] = RC_CHK_ACCESS(other)->map[j++]; +	while (j < __perf_cpu_map__nr(other)) +		tmp_cpus[k++] = __perf_cpu_map__cpu(other, j++);  	assert(k <= tmp_len);  	merged = cpu_map__trim_new(k, tmp_cpus); @@ -402,3 +444,38 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,  	perf_cpu_map__put(orig);  	return merged;  } + +struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig, +					     struct perf_cpu_map *other) +{ +	struct perf_cpu *tmp_cpus; +	int tmp_len; +	int i, j, k; +	struct perf_cpu_map *merged = NULL; + +	if (perf_cpu_map__is_subset(other, orig)) +		return perf_cpu_map__get(orig); +	if (perf_cpu_map__is_subset(orig, other)) +		return perf_cpu_map__get(other); + +	tmp_len = max(__perf_cpu_map__nr(orig), __perf_cpu_map__nr(other)); +	tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu)); +	if (!tmp_cpus) +		return NULL; + +	i = j = k = 0; +	while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) { +		if (__perf_cpu_map__cpu(orig, i).cpu < __perf_cpu_map__cpu(other, j).cpu) +			i++; +		else if (__perf_cpu_map__cpu(orig, i).cpu > __perf_cpu_map__cpu(other, j).cpu) +			j++; +		else { +			j++; +			tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++); +		} +	} +	if (k) +		merged = cpu_map__trim_new(k, tmp_cpus); +	free(tmp_cpus); +	return merged; +} diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 81e8b5fcd8ba..b8b066d0dc5e 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -36,18 +36,33 @@ void perf_evlist__init(struct perf_evlist *evlist)  static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,  					  struct perf_evsel *evsel)  { -	/* -	 * We already have cpus for evsel (via PMU sysfs) so -	 * keep it, if there's no target cpu list defined. -	 */  	if (evsel->system_wide) { +		/* System wide: set the cpu map of the evsel to all online CPUs. */  		perf_cpu_map__put(evsel->cpus);  		evsel->cpus = perf_cpu_map__new(NULL); +	} else if (evlist->has_user_cpus && evsel->is_pmu_core) { +		/* +		 * User requested CPUs on a core PMU, ensure the requested CPUs +		 * are valid by intersecting with those of the PMU. +		 */ +		perf_cpu_map__put(evsel->cpus); +		evsel->cpus = perf_cpu_map__intersect(evlist->user_requested_cpus, evsel->own_cpus);  	} else if (!evsel->own_cpus || evlist->has_user_cpus || -		   (!evsel->requires_cpu && perf_cpu_map__empty(evlist->user_requested_cpus))) { +		(!evsel->requires_cpu && perf_cpu_map__has_any_cpu(evlist->user_requested_cpus))) { +		/* +		 * The PMU didn't specify a default cpu map, this isn't a core +		 * event and the user requested CPUs or the evlist user +		 * requested CPUs have the "any CPU" (aka dummy) CPU value. In +		 * which case use the user requested CPUs rather than the PMU +		 * ones. +		 */  		perf_cpu_map__put(evsel->cpus);  		evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);  	} else if (evsel->cpus != evsel->own_cpus) { +		/* +		 * No user requested cpu map but the PMU cpu map doesn't match +		 * the evsel's. Reset it back to the PMU cpu map. +		 */  		perf_cpu_map__put(evsel->cpus);  		evsel->cpus = perf_cpu_map__get(evsel->own_cpus);  	} diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h index a99a75d9e78f..5cd220a61962 100644 --- a/tools/lib/perf/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h @@ -41,7 +41,14 @@ struct perf_sample_id {  struct perf_evsel {  	struct list_head	 node;  	struct perf_event_attr	 attr; +	/** The commonly used cpu map of CPUs the event should be opened upon, etc. */  	struct perf_cpu_map	*cpus; +	/** +	 * The cpu map read from the PMU. For core PMUs this is the list of all +	 * CPUs the event can be opened upon. For other PMUs this is the default +	 * cpu map for opening the event on, for example, the first CPU on a +	 * socket for an uncore event. +	 */  	struct perf_cpu_map	*own_cpus;  	struct perf_thread_map	*threads;  	struct xyarray		*fd; @@ -55,9 +62,9 @@ struct perf_evsel {  	int			 nr_members;  	/*  	 * system_wide is for events that need to be on every CPU, irrespective -	 * of user requested CPUs or threads. Map propagation will set cpus to -	 * this event's own_cpus, whereby they will contribute to evlist -	 * all_cpus. +	 * of user requested CPUs or threads. Tha main example of this is the +	 * dummy event. Map propagation will set cpus for this event to all CPUs +	 * as software PMU events like dummy, have a CPU map that is empty.  	 */  	bool			 system_wide;  	/* @@ -65,6 +72,8 @@ struct perf_evsel {  	 * i.e. it cannot be the 'any CPU' value of -1.  	 */  	bool			 requires_cpu; +	/** Is the PMU for the event a core one? Effects the handling of own_cpus. */ +	bool			 is_pmu_core;  	int			 idx;  }; diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index 3f43f770cdac..e38d859a384d 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -11,8 +11,16 @@ struct perf_cpu {  	int cpu;  }; +struct perf_cache { +	int cache_lvl; +	int cache; +}; +  struct perf_cpu_map; +/** + * perf_cpu_map__dummy_new - a map with a singular "any CPU"/dummy -1 value. + */  LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void);  LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void);  LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list); @@ -20,12 +28,23 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file);  LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);  LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,  						     struct perf_cpu_map *other); +LIBPERF_API struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig, +							 struct perf_cpu_map *other);  LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map);  LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);  LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus); +/** + * perf_cpu_map__empty - is map either empty or the "any CPU"/dummy value. + */  LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map);  LIBPERF_API struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map);  LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu); +LIBPERF_API bool perf_cpu_map__equal(const struct perf_cpu_map *lhs, +				     const struct perf_cpu_map *rhs); +/** + * perf_cpu_map__any_cpu - Does the map contain the "any CPU"/dummy -1 value? + */ +LIBPERF_API bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map);  #define perf_cpu_map__for_each_cpu(cpu, idx, cpus)		\  	for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx);	\ diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index 51b9338f4c11..ba2dcf64f4e6 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -380,7 +380,8 @@ enum {  	PERF_STAT_CONFIG_TERM__AGGR_MODE	= 0,  	PERF_STAT_CONFIG_TERM__INTERVAL		= 1,  	PERF_STAT_CONFIG_TERM__SCALE		= 2, -	PERF_STAT_CONFIG_TERM__MAX		= 3, +	PERF_STAT_CONFIG_TERM__AGGR_LEVEL	= 3, +	PERF_STAT_CONFIG_TERM__MAX		= 4,  };  struct perf_record_stat_config_entry { diff --git a/tools/lib/subcmd/exec-cmd.c b/tools/lib/subcmd/exec-cmd.c index 5dbea456973e..7739b5217cf6 100644 --- a/tools/lib/subcmd/exec-cmd.c +++ b/tools/lib/subcmd/exec-cmd.c @@ -36,38 +36,40 @@ static int is_absolute_path(const char *path)  	return path[0] == '/';  } -static const char *get_pwd_cwd(void) +static const char *get_pwd_cwd(char *buf, size_t sz)  { -	static char cwd[PATH_MAX + 1];  	char *pwd;  	struct stat cwd_stat, pwd_stat; -	if (getcwd(cwd, PATH_MAX) == NULL) +	if (getcwd(buf, sz) == NULL)  		return NULL;  	pwd = getenv("PWD"); -	if (pwd && strcmp(pwd, cwd)) { -		stat(cwd, &cwd_stat); +	if (pwd && strcmp(pwd, buf)) { +		stat(buf, &cwd_stat);  		if (!stat(pwd, &pwd_stat) &&  		    pwd_stat.st_dev == cwd_stat.st_dev &&  		    pwd_stat.st_ino == cwd_stat.st_ino) { -			strlcpy(cwd, pwd, PATH_MAX); +			strlcpy(buf, pwd, sz);  		}  	} -	return cwd; +	return buf;  } -static const char *make_nonrelative_path(const char *path) +static const char *make_nonrelative_path(char *buf, size_t sz, const char *path)  { -	static char buf[PATH_MAX + 1]; -  	if (is_absolute_path(path)) { -		if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX) +		if (strlcpy(buf, path, sz) >= sz)  			die("Too long path: %.*s", 60, path);  	} else { -		const char *cwd = get_pwd_cwd(); +		const char *cwd = get_pwd_cwd(buf, sz); +  		if (!cwd)  			die("Cannot determine the current working directory"); -		if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX) + +		if (strlen(cwd) + strlen(path) + 2 >= sz)  			die("Too long path: %.*s", 60, path); + +		strcat(buf, "/"); +		strcat(buf, path);  	}  	return buf;  } @@ -133,8 +135,11 @@ static void add_path(char **out, const char *path)  	if (path && *path) {  		if (is_absolute_path(path))  			astrcat(out, path); -		else -			astrcat(out, make_nonrelative_path(path)); +		else { +			char buf[PATH_MAX]; + +			astrcat(out, make_nonrelative_path(buf, sizeof(buf), path)); +		}  		astrcat(out, ":");  	} diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c index bf02d62a3b2b..67a8d6b740ea 100644 --- a/tools/lib/subcmd/help.c +++ b/tools/lib/subcmd/help.c @@ -16,6 +16,8 @@  void add_cmdname(struct cmdnames *cmds, const char *name, size_t len)  {  	struct cmdname *ent = malloc(sizeof(*ent) + len + 1); +	if (!ent) +		return;  	ent->len = len;  	memcpy(ent->name, name, len); @@ -66,6 +68,7 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)  	while (ci < cmds->cnt && ei < excludes->cnt) {  		cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name);  		if (cmp < 0) { +			zfree(&cmds->names[cj]);  			cmds->names[cj++] = cmds->names[ci++];  		} else if (cmp == 0) {  			ci++; @@ -75,9 +78,12 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)  		}  	} -	while (ci < cmds->cnt) +	while (ci < cmds->cnt) { +		zfree(&cmds->names[cj]);  		cmds->names[cj++] = cmds->names[ci++]; - +	} +	for (ci = cj; ci < cmds->cnt; ci++) +		zfree(&cmds->names[ci]);  	cmds->cnt = cj;  } diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h index 41b9b942504d..8e9147358a28 100644 --- a/tools/lib/subcmd/parse-options.h +++ b/tools/lib/subcmd/parse-options.h @@ -6,10 +6,6 @@  #include <stdbool.h>  #include <stdint.h> -#ifndef NORETURN -#define NORETURN __attribute__((__noreturn__)) -#endif -  enum parse_opt_type {  	/* special types */  	OPTION_END, @@ -183,9 +179,9 @@ extern int parse_options_subcommand(int argc, const char **argv,  				const char *const subcommands[],  				const char *usagestr[], int flags); -extern NORETURN void usage_with_options(const char * const *usagestr, +extern __noreturn void usage_with_options(const char * const *usagestr,                                          const struct option *options); -extern NORETURN __attribute__((format(printf,3,4))) +extern __noreturn __attribute__((format(printf,3,4)))  void usage_with_options_msg(const char * const *usagestr,  			    const struct option *options,  			    const char *fmt, ...); diff --git a/tools/lib/subcmd/subcmd-util.h b/tools/lib/subcmd/subcmd-util.h index b2aec04fce8f..dfac76e35ac7 100644 --- a/tools/lib/subcmd/subcmd-util.h +++ b/tools/lib/subcmd/subcmd-util.h @@ -5,8 +5,7 @@  #include <stdarg.h>  #include <stdlib.h>  #include <stdio.h> - -#define NORETURN __attribute__((__noreturn__)) +#include <linux/compiler.h>  static inline void report(const char *prefix, const char *err, va_list params)  { @@ -15,7 +14,7 @@ static inline void report(const char *prefix, const char *err, va_list params)  	fprintf(stderr, " %s%s\n", prefix, msg);  } -static NORETURN inline void die(const char *err, ...) +static __noreturn inline void die(const char *err, ...)  {  	va_list params; | 
