1 files changed, 130 insertions, 37 deletions
diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
index 95cd8414f6ef..6533ea9b044c 100644
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -19,13 +19,6 @@
 #define LCB_F_PERCPU	(1U << 4)
 #define LCB_F_MUTEX	(1U << 5)
 
-struct tstamp_data {
-	__u64 timestamp;
-	__u64 lock;
-	__u32 flags;
-	__s32 stack_id;
-};
-
 /* callstack storage  */
 struct {
 	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
@@ -107,6 +100,20 @@ struct {
 	__uint(max_entries, 1);
 } cgroup_filter SEC(".maps");
 
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(key_size, sizeof(long));
+	__uint(value_size, sizeof(__u8));
+	__uint(max_entries, 1);
+} slab_filter SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(key_size, sizeof(long));
+	__uint(value_size, sizeof(struct slab_cache_data));
+	__uint(max_entries, 1);
+} slab_caches SEC(".maps");
+
 struct rw_semaphore___old {
 	struct task_struct *owner;
 } __attribute__((preserve_access_index));
@@ -123,22 +130,30 @@ struct mm_struct___new {
 	struct rw_semaphore mmap_lock;
 } __attribute__((preserve_access_index));
 
+extern struct kmem_cache *bpf_get_kmem_cache(u64 addr) __ksym __weak;
+
 /* control flags */
+const volatile int has_cpu;
+const volatile int has_task;
+const volatile int has_type;
+const volatile int has_addr;
+const volatile int has_cgroup;
+const volatile int has_slab;
+const volatile int needs_callstack;
+const volatile int stack_skip;
+const volatile int lock_owner;
+const volatile int use_cgroup_v2;
+
+/* determine the key of lock stat */
+const volatile int aggr_mode;
+
 int enabled;
-int has_cpu;
-int has_task;
-int has_type;
-int has_addr;
-int has_cgroup;
-int needs_callstack;
-int stack_skip;
-int lock_owner;
-
-int use_cgroup_v2;
+
 int perf_subsys_id = -1;
 
-/* determine the key of lock stat */
-int aggr_mode;
+__u64 end_ts;
+
+__u32 slab_cache_id;
 
 /* error stat */
 int task_fail;
@@ -206,7 +221,7 @@ static inline int can_record(u64 *ctx)
 		__u64 addr = ctx[0];
 
 		ok = bpf_map_lookup_elem(&addr_filter, &addr);
-		if (!ok)
+		if (!ok && !has_slab)
 			return 0;
 	}
 
@@ -219,6 +234,17 @@ static inline int can_record(u64 *ctx)
 			return 0;
 	}
 
+	if (has_slab && bpf_get_kmem_cache) {
+		__u8 *ok;
+		__u64 addr = ctx[0];
+		long kmem_cache_addr;
+
+		kmem_cache_addr = (long)bpf_get_kmem_cache(addr);
+		ok = bpf_map_lookup_elem(&slab_filter, &kmem_cache_addr);
+		if (!ok)
+			return 0;
+	}
+
 	return 1;
 }
 
@@ -289,6 +315,7 @@ static inline __u32 check_lock_type(__u64 lock, __u32 flags)
 	struct task_struct *curr;
 	struct mm_struct___old *mm_old;
 	struct mm_struct___new *mm_new;
+	struct sighand_struct *sighand;
 
 	switch (flags) {
 	case LCB_F_READ:  /* rwsem */
@@ -310,7 +337,9 @@ static inline __u32 check_lock_type(__u64 lock, __u32 flags)
 		break;
 	case LCB_F_SPIN:  /* spinlock */
 		curr = bpf_get_current_task_btf();
-		if (&curr->sighand->siglock == (void *)lock)
+		sighand = curr->sighand;
+
+		if (sighand && &sighand->siglock == (void *)lock)
 			return LCD_F_SIGHAND_LOCK;
 		break;
 	default:
@@ -325,8 +354,7 @@ static inline struct tstamp_data *get_tstamp_elem(__u32 flags)
 	struct tstamp_data *pelem;
 
 	/* Use per-cpu array map for spinlock and rwlock */
-	if (flags == (LCB_F_SPIN | LCB_F_READ) || flags == LCB_F_SPIN ||
-	    flags == (LCB_F_SPIN | LCB_F_WRITE)) {
+	if ((flags & (LCB_F_SPIN | LCB_F_MUTEX)) == LCB_F_SPIN) {
 		__u32 idx = 0;
 
 		pelem = bpf_map_lookup_elem(&tstamp_cpu, &idx);
@@ -441,11 +469,8 @@ int contention_end(u64 *ctx)
 
 	duration = bpf_ktime_get_ns() - pelem->timestamp;
 	if ((__s64)duration < 0) {
-		pelem->lock = 0;
-		if (need_delete)
-			bpf_map_delete_elem(&tstamp, &pid);
 		__sync_fetch_and_add(&time_fail, 1);
-		return 0;
+		goto out;
 	}
 
 	switch (aggr_mode) {
@@ -479,11 +504,8 @@ int contention_end(u64 *ctx)
 	data = bpf_map_lookup_elem(&lock_stat, &key);
 	if (!data) {
 		if (data_map_full) {
-			pelem->lock = 0;
-			if (need_delete)
-				bpf_map_delete_elem(&tstamp, &pid);
 			__sync_fetch_and_add(&data_fail, 1);
-			return 0;
+			goto out;
 		}
 
 		struct contention_data first = {
@@ -495,21 +517,45 @@ int contention_end(u64 *ctx)
 		};
 		int err;
 
-		if (aggr_mode == LOCK_AGGR_ADDR)
-			first.flags |= check_lock_type(pelem->lock, pelem->flags);
+		if (aggr_mode == LOCK_AGGR_ADDR) {
+			first.flags |= check_lock_type(pelem->lock,
+						       pelem->flags & LCB_F_TYPE_MASK);
+
+			/* Check if it's from a slab object */
+			if (bpf_get_kmem_cache) {
+				struct kmem_cache *s;
+				struct slab_cache_data *d;
+
+				s = bpf_get_kmem_cache(pelem->lock);
+				if (s != NULL) {
+					/*
+					 * Save the ID of the slab cache in the flags
+					 * (instead of full address) to reduce the
+					 * space in the contention_data.
+					 */
+					d = bpf_map_lookup_elem(&slab_caches, &s);
+					if (d != NULL)
+						first.flags |= d->id;
+				}
+			}
+		}
 
 		err = bpf_map_update_elem(&lock_stat, &key, &first, BPF_NOEXIST);
 		if (err < 0) {
+			if (err == -EEXIST) {
+				/* it lost the race, try to get it again */
+				data = bpf_map_lookup_elem(&lock_stat, &key);
+				if (data != NULL)
+					goto found;
+			}
 			if (err == -E2BIG)
 				data_map_full = 1;
 			__sync_fetch_and_add(&data_fail, 1);
 		}
-		pelem->lock = 0;
-		if (need_delete)
-			bpf_map_delete_elem(&tstamp, &pid);
-		return 0;
+		goto out;
 	}
 
+found:
 	__sync_fetch_and_add(&data->total_time, duration);
 	__sync_fetch_and_add(&data->count, 1);
 
@@ -519,6 +565,7 @@ int contention_end(u64 *ctx)
 	if (data->min_time > duration)
 		data->min_time = duration;
 
+out:
 	pelem->lock = 0;
 	if (need_delete)
 		bpf_map_delete_elem(&tstamp, &pid);
@@ -559,4 +606,50 @@ int BPF_PROG(collect_lock_syms)
 	return 0;
 }
 
+SEC("raw_tp/bpf_test_finish")
+int BPF_PROG(end_timestamp)
+{
+	end_ts = bpf_ktime_get_ns();
+	return 0;
+}
+
+/*
+ * bpf_iter__kmem_cache added recently so old kernels don't have it in the
+ * vmlinux.h.  But we cannot add it here since it will cause a compiler error
+ * due to redefinition of the struct on later kernels.
+ *
+ * So it uses a CO-RE trick to access the member only if it has the type.
+ * This will support both old and new kernels without compiler errors.
+ */
+struct bpf_iter__kmem_cache___new {
+	struct kmem_cache *s;
+} __attribute__((preserve_access_index));
+
+SEC("iter/kmem_cache")
+int slab_cache_iter(void *ctx)
+{
+	struct kmem_cache *s = NULL;
+	struct slab_cache_data d;
+	const char *nameptr;
+
+	if (bpf_core_type_exists(struct bpf_iter__kmem_cache)) {
+		struct bpf_iter__kmem_cache___new *iter = ctx;
+
+		s = iter->s;
+	}
+
+	if (s == NULL)
+		return 0;
+
+	nameptr = s->name;
+	bpf_probe_read_kernel_str(d.name, sizeof(d.name), nameptr);
+
+	d.id = ++slab_cache_id << LCB_F_SLAB_ID_SHIFT;
+	if (d.id >= LCB_F_SLAB_ID_END)
+		return 0;
+
+	bpf_map_update_elem(&slab_caches, &s, &d, BPF_NOEXIST);
+	return 0;
+}
+
 char LICENSE[] SEC("license") = "Dual BSD/GPL";