summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/Makefile1
-rw-r--r--kernel/bpf/bpf_local_storage.c8
-rw-r--r--kernel/bpf/btf.c394
-rw-r--r--kernel/bpf/core.c28
-rw-r--r--kernel/bpf/cpumap.c2
-rw-r--r--kernel/bpf/cpumask.c479
-rw-r--r--kernel/bpf/devmap.c16
-rw-r--r--kernel/bpf/hashtab.c4
-rw-r--r--kernel/bpf/helpers.c203
-rw-r--r--kernel/bpf/memalloc.c5
-rw-r--r--kernel/bpf/offload.c419
-rw-r--r--kernel/bpf/preload/bpf_preload_kern.c6
-rw-r--r--kernel/bpf/preload/iterators/Makefile12
-rw-r--r--kernel/bpf/preload/iterators/README5
-rw-r--r--kernel/bpf/preload/iterators/iterators.lskel-big-endian.h419
-rw-r--r--kernel/bpf/preload/iterators/iterators.lskel-little-endian.h (renamed from kernel/bpf/preload/iterators/iterators.lskel.h)0
-rw-r--r--kernel/bpf/syscall.c106
-rw-r--r--kernel/bpf/verifier.c1293
-rw-r--r--kernel/cgroup/rstat.c4
-rw-r--r--kernel/exit.c9
-rw-r--r--kernel/kexec_core.c3
-rw-r--r--kernel/kprobes.c27
-rw-r--r--kernel/livepatch/core.c72
-rw-r--r--kernel/module/kallsyms.c13
-rw-r--r--kernel/module/main.c3
-rw-r--r--kernel/params.c3
-rw-r--r--kernel/printk/index.c2
-rw-r--r--kernel/printk/internal.h45
-rw-r--r--kernel/printk/printk.c308
-rw-r--r--kernel/sched/topology.c95
-rw-r--r--kernel/trace/Kconfig27
-rw-r--r--kernel/trace/bpf_trace.c157
-rw-r--r--kernel/trace/ftrace.c111
-rw-r--r--kernel/trace/kprobe_event_gen_test.c2
-rw-r--r--kernel/trace/ring_buffer.c51
-rw-r--r--kernel/trace/synth_event_gen_test.c2
-rw-r--r--kernel/trace/trace.c168
-rw-r--r--kernel/trace/trace.h6
-rw-r--r--kernel/trace/trace_eprobe.c12
-rw-r--r--kernel/trace/trace_events.c13
-rw-r--r--kernel/trace/trace_events_filter.c93
-rw-r--r--kernel/trace/trace_events_hist.c126
-rw-r--r--kernel/trace/trace_events_synth.c90
-rw-r--r--kernel/trace/trace_osnoise.c2
-rw-r--r--kernel/trace/trace_probe.c2
-rw-r--r--kernel/trace/trace_probe.h1
-rw-r--r--kernel/trace/trace_seq.c23
-rw-r--r--kernel/trace/trace_synth.h1
-rw-r--r--kernel/tracepoint.c4
49 files changed, 3919 insertions, 956 deletions
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 3a12e6b400a2..02242614dcc7 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_DEBUG_INFO_BTF) += sysfs_btf.o
endif
ifeq ($(CONFIG_BPF_JIT),y)
obj-$(CONFIG_BPF_SYSCALL) += bpf_struct_ops.o
+obj-$(CONFIG_BPF_SYSCALL) += cpumask.o
obj-${CONFIG_BPF_LSM} += bpf_lsm.o
endif
obj-$(CONFIG_BPF_PRELOAD) += preload/
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index b39a46e8fb08..35f4138a54dc 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -568,8 +568,8 @@ static struct bpf_local_storage_map *__bpf_local_storage_map_alloc(union bpf_att
nbuckets = max_t(u32, 2, nbuckets);
smap->bucket_log = ilog2(nbuckets);
- smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets,
- GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT);
+ smap->buckets = bpf_map_kvcalloc(&smap->map, sizeof(*smap->buckets),
+ nbuckets, GFP_USER | __GFP_NOWARN);
if (!smap->buckets) {
bpf_map_area_free(smap);
return ERR_PTR(-ENOMEM);
@@ -580,8 +580,8 @@ static struct bpf_local_storage_map *__bpf_local_storage_map_alloc(union bpf_att
raw_spin_lock_init(&smap->buckets[i].lock);
}
- smap->elem_size =
- sizeof(struct bpf_local_storage_elem) + attr->value_size;
+ smap->elem_size = offsetof(struct bpf_local_storage_elem,
+ sdata.data[attr->value_size]);
return smap;
}
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index b7017cae6fd1..fa22ec79ac0e 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -337,6 +337,12 @@ const char *btf_type_str(const struct btf_type *t)
#define BTF_SHOW_NAME_SIZE 80
/*
+ * The suffix of a type that indicates it cannot alias another type when
+ * comparing BTF IDs for kfunc invocations.
+ */
+#define NOCAST_ALIAS_SUFFIX "___init"
+
+/*
* Common data to all BTF show operations. Private show functions can add
* their own data to a structure containing a struct btf_show and consult it
* in the show callback. See btf_type_show() below.
@@ -1397,12 +1403,18 @@ __printf(4, 5) static void __btf_verifier_log_type(struct btf_verifier_env *env,
if (!bpf_verifier_log_needed(log))
return;
- /* btf verifier prints all types it is processing via
- * btf_verifier_log_type(..., fmt = NULL).
- * Skip those prints for in-kernel BTF verification.
- */
- if (log->level == BPF_LOG_KERNEL && !fmt)
- return;
+ if (log->level == BPF_LOG_KERNEL) {
+ /* btf verifier prints all types it is processing via
+ * btf_verifier_log_type(..., fmt = NULL).
+ * Skip those prints for in-kernel BTF verification.
+ */
+ if (!fmt)
+ return;
+
+ /* Skip logging when loading module BTF with mismatches permitted */
+ if (env->btf->base_btf && IS_ENABLED(CONFIG_MODULE_ALLOW_BTF_MISMATCH))
+ return;
+ }
__btf_verifier_log(log, "[%u] %s %s%s",
env->log_type_id,
@@ -1441,8 +1453,15 @@ static void btf_verifier_log_member(struct btf_verifier_env *env,
if (!bpf_verifier_log_needed(log))
return;
- if (log->level == BPF_LOG_KERNEL && !fmt)
- return;
+ if (log->level == BPF_LOG_KERNEL) {
+ if (!fmt)
+ return;
+
+ /* Skip logging when loading module BTF with mismatches permitted */
+ if (env->btf->base_btf && IS_ENABLED(CONFIG_MODULE_ALLOW_BTF_MISMATCH))
+ return;
+ }
+
/* The CHECK_META phase already did a btf dump.
*
* If member is logged again, it must hit an error in
@@ -3228,7 +3247,7 @@ struct btf_field_info {
struct {
const char *node_name;
u32 value_btf_id;
- } list_head;
+ } graph_root;
};
};
@@ -3305,12 +3324,14 @@ static const char *btf_find_decl_tag_value(const struct btf *btf,
return NULL;
}
-static int btf_find_list_head(const struct btf *btf, const struct btf_type *pt,
- const struct btf_type *t, int comp_idx,
- u32 off, int sz, struct btf_field_info *info)
+static int
+btf_find_graph_root(const struct btf *btf, const struct btf_type *pt,
+ const struct btf_type *t, int comp_idx, u32 off,
+ int sz, struct btf_field_info *info,
+ enum btf_field_type head_type)
{
+ const char *node_field_name;
const char *value_type;
- const char *list_node;
s32 id;
if (!__btf_type_is_struct(t))
@@ -3320,26 +3341,32 @@ static int btf_find_list_head(const struct btf *btf, const struct btf_type *pt,
value_type = btf_find_decl_tag_value(btf, pt, comp_idx, "contains:");
if (!value_type)
return -EINVAL;
- list_node = strstr(value_type, ":");
- if (!list_node)
+ node_field_name = strstr(value_type, ":");
+ if (!node_field_name)
return -EINVAL;
- value_type = kstrndup(value_type, list_node - value_type, GFP_KERNEL | __GFP_NOWARN);
+ value_type = kstrndup(value_type, node_field_name - value_type, GFP_KERNEL | __GFP_NOWARN);
if (!value_type)
return -ENOMEM;
id = btf_find_by_name_kind(btf, value_type, BTF_KIND_STRUCT);
kfree(value_type);
if (id < 0)
return id;
- list_node++;
- if (str_is_empty(list_node))
+ node_field_name++;
+ if (str_is_empty(node_field_name))
return -EINVAL;
- info->type = BPF_LIST_HEAD;
+ info->type = head_type;
info->off = off;
- info->list_head.value_btf_id = id;
- info->list_head.node_name = list_node;
+ info->graph_root.value_btf_id = id;
+ info->graph_root.node_name = node_field_name;
return BTF_FIELD_FOUND;
}
+#define field_mask_test_name(field_type, field_type_str) \
+ if (field_mask & field_type && !strcmp(name, field_type_str)) { \
+ type = field_type; \
+ goto end; \
+ }
+
static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask,
int *align, int *sz)
{
@@ -3363,18 +3390,11 @@ static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask,
goto end;
}
}
- if (field_mask & BPF_LIST_HEAD) {
- if (!strcmp(name, "bpf_list_head")) {
- type = BPF_LIST_HEAD;
- goto end;
- }
- }
- if (field_mask & BPF_LIST_NODE) {
- if (!strcmp(name, "bpf_list_node")) {
- type = BPF_LIST_NODE;
- goto end;
- }
- }
+ field_mask_test_name(BPF_LIST_HEAD, "bpf_list_head");
+ field_mask_test_name(BPF_LIST_NODE, "bpf_list_node");
+ field_mask_test_name(BPF_RB_ROOT, "bpf_rb_root");
+ field_mask_test_name(BPF_RB_NODE, "bpf_rb_node");
+
/* Only return BPF_KPTR when all other types with matchable names fail */
if (field_mask & BPF_KPTR) {
type = BPF_KPTR_REF;
@@ -3387,6 +3407,8 @@ end:
return type;
}
+#undef field_mask_test_name
+
static int btf_find_struct_field(const struct btf *btf,
const struct btf_type *t, u32 field_mask,
struct btf_field_info *info, int info_cnt)
@@ -3419,6 +3441,7 @@ static int btf_find_struct_field(const struct btf *btf,
case BPF_SPIN_LOCK:
case BPF_TIMER:
case BPF_LIST_NODE:
+ case BPF_RB_NODE:
ret = btf_find_struct(btf, member_type, off, sz, field_type,
idx < info_cnt ? &info[idx] : &tmp);
if (ret < 0)
@@ -3432,8 +3455,11 @@ static int btf_find_struct_field(const struct btf *btf,
return ret;
break;
case BPF_LIST_HEAD:
- ret = btf_find_list_head(btf, t, member_type, i, off, sz,
- idx < info_cnt ? &info[idx] : &tmp);
+ case BPF_RB_ROOT:
+ ret = btf_find_graph_root(btf, t, member_type,
+ i, off, sz,
+ idx < info_cnt ? &info[idx] : &tmp,
+ field_type);
if (ret < 0)
return ret;
break;
@@ -3480,6 +3506,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
case BPF_SPIN_LOCK:
case BPF_TIMER:
case BPF_LIST_NODE:
+ case BPF_RB_NODE:
ret = btf_find_struct(btf, var_type, off, sz, field_type,
idx < info_cnt ? &info[idx] : &tmp);
if (ret < 0)
@@ -3493,8 +3520,11 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
return ret;
break;
case BPF_LIST_HEAD:
- ret = btf_find_list_head(btf, var, var_type, -1, off, sz,
- idx < info_cnt ? &info[idx] : &tmp);
+ case BPF_RB_ROOT:
+ ret = btf_find_graph_root(btf, var, var_type,
+ -1, off, sz,
+ idx < info_cnt ? &info[idx] : &tmp,
+ field_type);
if (ret < 0)
return ret;
break;
@@ -3596,21 +3626,25 @@ end_btf:
return ret;
}
-static int btf_parse_list_head(const struct btf *btf, struct btf_field *field,
- struct btf_field_info *info)
+static int btf_parse_graph_root(const struct btf *btf,
+ struct btf_field *field,
+ struct btf_field_info *info,
+ const char *node_type_name,
+ size_t node_type_align)
{
const struct btf_type *t, *n = NULL;
const struct btf_member *member;
u32 offset;
int i;
- t = btf_type_by_id(btf, info->list_head.value_btf_id);
+ t = btf_type_by_id(btf, info->graph_root.value_btf_id);
/* We've already checked that value_btf_id is a struct type. We
* just need to figure out the offset of the list_node, and
* verify its type.
*/
for_each_member(i, t, member) {
- if (strcmp(info->list_head.node_name, __btf_name_by_offset(btf, member->name_off)))
+ if (strcmp(info->graph_root.node_name,
+ __btf_name_by_offset(btf, member->name_off)))
continue;
/* Invalid BTF, two members with same name */
if (n)
@@ -3618,24 +3652,38 @@ static int btf_parse_list_head(const struct btf *btf, struct btf_field *field,
n = btf_type_by_id(btf, member->type);
if (!__btf_type_is_struct(n))
return -EINVAL;
- if (strcmp("bpf_list_node", __btf_name_by_offset(btf, n->name_off)))
+ if (strcmp(node_type_name, __btf_name_by_offset(btf, n->name_off)))
return -EINVAL;
offset = __btf_member_bit_offset(n, member);
if (offset % 8)
return -EINVAL;
offset /= 8;
- if (offset % __alignof__(struct bpf_list_node))
+ if (offset % node_type_align)
return -EINVAL;
- field->list_head.btf = (struct btf *)btf;
- field->list_head.value_btf_id = info->list_head.value_btf_id;
- field->list_head.node_offset = offset;
+ field->graph_root.btf = (struct btf *)btf;
+ field->graph_root.value_btf_id = info->graph_root.value_btf_id;
+ field->graph_root.node_offset = offset;
}
if (!n)
return -ENOENT;
return 0;
}
+static int btf_parse_list_head(const struct btf *btf, struct btf_field *field,
+ struct btf_field_info *info)
+{
+ return btf_parse_graph_root(btf, field, info, "bpf_list_node",
+ __alignof__(struct bpf_list_node));
+}
+
+static int btf_parse_rb_root(const struct btf *btf, struct btf_field *field,
+ struct btf_field_info *info)
+{
+ return btf_parse_graph_root(btf, field, info, "bpf_rb_node",
+ __alignof__(struct bpf_rb_node));
+}
+
struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t,
u32 field_mask, u32 value_size)
{
@@ -3698,7 +3746,13 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
if (ret < 0)
goto end;
break;
+ case BPF_RB_ROOT:
+ ret = btf_parse_rb_root(btf, &rec->fields[i], &info_arr[i]);
+ if (ret < 0)
+ goto end;
+ break;
case BPF_LIST_NODE:
+ case BPF_RB_NODE:
break;
default:
ret = -EFAULT;
@@ -3707,8 +3761,33 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
rec->cnt++;
}
- /* bpf_list_head requires bpf_spin_lock */
- if (btf_record_has_field(rec, BPF_LIST_HEAD) && rec->spin_lock_off < 0) {
+ /* bpf_{list_head, rb_node} require bpf_spin_lock */
+ if ((btf_record_has_field(rec, BPF_LIST_HEAD) ||
+ btf_record_has_field(rec, BPF_RB_ROOT)) && rec->spin_lock_off < 0) {
+ ret = -EINVAL;
+ goto end;
+ }
+
+ /* need collection identity for non-owning refs before allowing this
+ *
+ * Consider a node type w/ both list and rb_node fields:
+ * struct node {
+ * struct bpf_list_node l;
+ * struct bpf_rb_node r;
+ * }
+ *
+ * Used like so:
+ * struct node *n = bpf_obj_new(....);
+ * bpf_list_push_front(&list_head, &n->l);
+ * bpf_rbtree_remove(&rb_root, &n->r);
+ *
+ * It should not be possible to rbtree_remove the node since it hasn't
+ * been added to a tree. But push_front converts n to a non-owning
+ * reference, and rbtree_remove accepts the non-owning reference to
+ * a type w/ bpf_rb_node field.
+ */
+ if (btf_record_has_field(rec, BPF_LIST_NODE) &&
+ btf_record_has_field(rec, BPF_RB_NODE)) {
ret = -EINVAL;
goto end;
}
@@ -3719,62 +3798,76 @@ end:
return ERR_PTR(ret);
}
+#define GRAPH_ROOT_MASK (BPF_LIST_HEAD | BPF_RB_ROOT)
+#define GRAPH_NODE_MASK (BPF_LIST_NODE | BPF_RB_NODE)
+
int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
{
int i;
- /* There are two owning types, kptr_ref and bpf_list_head. The former
- * only supports storing kernel types, which can never store references
- * to program allocated local types, atleast not yet. Hence we only need
- * to ensure that bpf_list_head ownership does not form cycles.
+ /* There are three types that signify ownership of some other type:
+ * kptr_ref, bpf_list_head, bpf_rb_root.
+ * kptr_ref only supports storing kernel types, which can't store
+ * references to program allocated local types.
+ *
+ * Hence we only need to ensure that bpf_{list_head,rb_root} ownership
+ * does not form cycles.
*/
- if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & BPF_LIST_HEAD))
+ if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & GRAPH_ROOT_MASK))
return 0;
for (i = 0; i < rec->cnt; i++) {
struct btf_struct_meta *meta;
u32 btf_id;
- if (!(rec->fields[i].type & BPF_LIST_HEAD))
+ if (!(rec->fields[i].type & GRAPH_ROOT_MASK))
continue;
- btf_id = rec->fields[i].list_head.value_btf_id;
+ btf_id = rec->fields[i].graph_root.value_btf_id;
meta = btf_find_struct_meta(btf, btf_id);
if (!meta)
return -EFAULT;
- rec->fields[i].list_head.value_rec = meta->record;
+ rec->fields[i].graph_root.value_rec = meta->record;
- if (!(rec->field_mask & BPF_LIST_NODE))
+ /* We need to set value_rec for all root types, but no need
+ * to check ownership cycle for a type unless it's also a
+ * node type.
+ */
+ if (!(rec->field_mask & GRAPH_NODE_MASK))
continue;
/* We need to ensure ownership acyclicity among all types. The
* proper way to do it would be to topologically sort all BTF
* IDs based on the ownership edges, since there can be multiple
- * bpf_list_head in a type. Instead, we use the following
- * reasoning:
+ * bpf_{list_head,rb_node} in a type. Instead, we use the
+ * following resaoning:
*
* - A type can only be owned by another type in user BTF if it
- * has a bpf_list_node.
+ * has a bpf_{list,rb}_node. Let's call these node types.
* - A type can only _own_ another type in user BTF if it has a
- * bpf_list_head.
+ * bpf_{list_head,rb_root}. Let's call these root types.
*
- * We ensure that if a type has both bpf_list_head and
- * bpf_list_node, its element types cannot be owning types.
+ * We ensure that if a type is both a root and node, its
+ * element types cannot be root types.
*
* To ensure acyclicity:
*
- * When A only has bpf_list_head, ownership chain can be:
+ * When A is an root type but not a node, its ownership
+ * chain can be:
* A -> B -> C
* Where:
- * - B has both bpf_list_head and bpf_list_node.
- * - C only has bpf_list_node.
+ * - A is an root, e.g. has bpf_rb_root.
+ * - B is both a root and node, e.g. has bpf_rb_node and
+ * bpf_list_head.
+ * - C is only an root, e.g. has bpf_list_node
*
- * When A has both bpf_list_head and bpf_list_node, some other
- * type already owns it in the BTF domain, hence it can not own
- * another owning type through any of the bpf_list_head edges.
+ * When A is both a root and node, some other type already
+ * owns it in the BTF domain, hence it can not own
+ * another root type through any of the ownership edges.
* A -> B
* Where:
- * - B only has bpf_list_node.
+ * - A is both an root and node.
+ * - B is only an node.
*/
- if (meta->record->field_mask & BPF_LIST_HEAD)
+ if (meta->record->field_mask & GRAPH_ROOT_MASK)
return -ELOOP;
}
return 0;
@@ -5236,6 +5329,8 @@ static const char *alloc_obj_fields[] = {
"bpf_spin_lock",
"bpf_list_head",
"bpf_list_node",
+ "bpf_rb_root",
+ "bpf_rb_node",
};
static struct btf_struct_metas *
@@ -5309,7 +5404,8 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
type = &tab->types[tab->cnt];
type->btf_id = i;
- record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE, t->size);
+ record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE |
+ BPF_RB_ROOT | BPF_RB_NODE, t->size);
/* The record cannot be unset, treat it as an error if so */
if (IS_ERR_OR_NULL(record)) {
ret = PTR_ERR_OR_ZERO(record) ?: -EFAULT;
@@ -5573,6 +5669,7 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
if (!ctx_struct)
/* should not happen */
return NULL;
+again:
ctx_tname = btf_name_by_offset(btf_vmlinux, ctx_struct->name_off);
if (!ctx_tname) {
/* should not happen */
@@ -5586,8 +5683,16 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
* int socket_filter_bpf_prog(struct __sk_buff *skb)
* { // no fields of skb are ever used }
*/
- if (strcmp(ctx_tname, tname))
- return NULL;
+ if (strcmp(ctx_tname, tname)) {
+ /* bpf_user_pt_regs_t is a typedef, so resolve it to
+ * underlying struct and check name again
+ */
+ if (!btf_type_is_modifier(ctx_struct))
+ return NULL;
+ while (btf_type_is_modifier(ctx_struct))
+ ctx_struct = btf_type_by_id(btf_vmlinux, ctx_struct->type);
+ goto again;
+ }
return ctx_type;
}
@@ -6433,6 +6538,18 @@ static int __get_type_size(struct btf *btf, u32 btf_id,
return -EINVAL;
}
+static u8 __get_type_fmodel_flags(const struct btf_type *t)
+{
+ u8 flags = 0;
+
+ if (__btf_type_is_struct(t))
+ flags |= BTF_FMODEL_STRUCT_ARG;
+ if (btf_type_is_signed_int(t))
+ flags |= BTF_FMODEL_SIGNED_ARG;
+
+ return flags;
+}
+
int btf_distill_func_proto(struct bpf_verifier_log *log,
struct btf *btf,
const struct btf_type *func,
@@ -6453,6 +6570,7 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
m->arg_flags[i] = 0;
}
m->ret_size = 8;
+ m->ret_flags = 0;
m->nr_args = MAX_BPF_FUNC_REG_ARGS;
return 0;
}
@@ -6472,6 +6590,7 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
return -EINVAL;
}
m->ret_size = ret;
+ m->ret_flags = __get_type_fmodel_flags(t);
for (i = 0; i < nargs; i++) {
if (i == nargs - 1 && args[i].type == 0) {
@@ -6496,7 +6615,7 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
return -EINVAL;
}
m->arg_size[i] = ret;
- m->arg_flags[i] = __btf_type_is_struct(t) ? BTF_FMODEL_STRUCT_ARG : 0;
+ m->arg_flags[i] = __get_type_fmodel_flags(t);
}
m->nr_args = nargs;
return 0;
@@ -7260,11 +7379,14 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op,
}
btf = btf_parse_module(mod->name, mod->btf_data, mod->btf_data_size);
if (IS_ERR(btf)) {
- pr_warn("failed to validate module [%s] BTF: %ld\n",
- mod->name, PTR_ERR(btf));
kfree(btf_mod);
- if (!IS_ENABLED(CONFIG_MODULE_ALLOW_BTF_MISMATCH))
+ if (!IS_ENABLED(CONFIG_MODULE_ALLOW_BTF_MISMATCH)) {
+ pr_warn("failed to validate module [%s] BTF: %ld\n",
+ mod->name, PTR_ERR(btf));
err = PTR_ERR(btf);
+ } else {
+ pr_warn_once("Kernel module BTF mismatch detected, BTF debug info may be unavailable for some modules\n");
+ }
goto out;
}
err = btf_alloc_id(btf);
@@ -8210,3 +8332,119 @@ out:
}
return err;
}
+
+bool btf_nested_type_is_trusted(struct bpf_verifier_log *log,
+ const struct bpf_reg_state *reg,
+ int off)
+{
+ struct btf *btf = reg->btf;
+ const struct btf_type *walk_type, *safe_type;
+ const char *tname;
+ char safe_tname[64];
+ long ret, safe_id;
+ const struct btf_member *member, *m_walk = NULL;
+ u32 i;
+ const char *walk_name;
+
+ walk_type = btf_type_by_id(btf, reg->btf_id);
+ if (!walk_type)
+ return false;
+
+ tname = btf_name_by_offset(btf, walk_type->name_off);
+
+ ret = snprintf(safe_tname, sizeof(safe_tname), "%s__safe_fields", tname);
+ if (ret < 0)
+ return false;
+
+ safe_id = btf_find_by_name_kind(btf, safe_tname, BTF_INFO_KIND(walk_type->info));
+ if (safe_id < 0)
+ return false;
+
+ safe_type = btf_type_by_id(btf, safe_id);
+ if (!safe_type)
+ return false;
+
+ for_each_member(i, walk_type, member) {
+ u32 moff;
+
+ /* We're looking for the PTR_TO_BTF_ID member in the struct
+ * type we're walking which matches the specified offset.
+ * Below, we'll iterate over the fields in the safe variant of
+ * the struct and see if any of them has a matching type /
+ * name.
+ */
+ moff = __btf_member_bit_offset(walk_type, member) / 8;
+ if (off == moff) {
+ m_walk = member;
+ break;
+ }
+ }
+ if (m_walk == NULL)
+ return false;
+
+ walk_name = __btf_name_by_offset(btf, m_walk->name_off);
+ for_each_member(i, safe_type, member) {
+ const char *m_name = __btf_name_by_offset(btf, member->name_off);
+
+ /* If we match on both type and name, the field is considered trusted. */
+ if (m_walk->type == member->type && !strcmp(walk_name, m_name))
+ return true;
+ }
+
+ return false;
+}
+
+bool btf_type_ids_nocast_alias(struct bpf_verifier_log *log,
+ const struct btf *reg_btf, u32 reg_id,
+ const struct btf *arg_btf, u32 arg_id)
+{
+ const char *reg_name, *arg_name, *search_needle;
+ const struct btf_type *reg_type, *arg_type;
+ int reg_len, arg_len, cmp_len;
+ size_t pattern_len = sizeof(NOCAST_ALIAS_SUFFIX) - sizeof(char);
+
+ reg_type = btf_type_by_id(reg_btf, reg_id);
+ if (!reg_type)
+ return false;
+
+ arg_type = btf_type_by_id(arg_btf, arg_id);
+ if (!arg_type)
+ return false;
+
+ reg_name = btf_name_by_offset(reg_btf, reg_type->name_off);
+ arg_name = btf_name_by_offset(arg_btf, arg_type->name_off);
+
+ reg_len = strlen(reg_name);
+ arg_len = strlen(arg_name);
+
+ /* Exactly one of the two type names may be suffixed with ___init, so
+ * if the strings are the same size, they can't possibly be no-cast
+ * aliases of one another. If you have two of the same type names, e.g.
+ * they're both nf_conn___init, it would be improper to return true
+ * because they are _not_ no-cast aliases, they are the same type.
+ */
+ if (reg_len == arg_len)
+ return false;
+
+ /* Either of the two names must be the other name, suffixed with ___init. */
+ if ((reg_len != arg_len + pattern_len) &&
+ (arg_len != reg_len + pattern_len))
+ return false;
+
+ if (reg_len < arg_len) {
+ search_needle = strstr(arg_name, NOCAST_ALIAS_SUFFIX);
+ cmp_len = reg_len;
+ } else {
+ search_needle = strstr(reg_name, NOCAST_ALIAS_SUFFIX);
+ cmp_len = arg_len;
+ }
+
+ if (!search_needle)
+ return false;
+
+ /* ___init suffix must come at the end of the name */
+ if (*(search_needle + pattern_len) != '\0')
+ return false;
+
+ return !strncmp(reg_name, arg_name, cmp_len);
+}
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ba3fff17e2f9..b297e9f60ca1 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -34,7 +34,9 @@
#include <linux/log2.h>
#include <linux/bpf_verifier.h>
#include <linux/nodemask.h>
+#include <linux/nospec.h>
#include <linux/bpf_mem_alloc.h>
+#include <linux/memcontrol.h>
#include <asm/barrier.h>
#include <asm/unaligned.h>
@@ -87,7 +89,7 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns
struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags)
{
- gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags;
+ gfp_t gfp_flags = bpf_memcg_flags(GFP_KERNEL | __GFP_ZERO | gfp_extra_flags);
struct bpf_prog_aux *aux;
struct bpf_prog *fp;
@@ -96,12 +98,12 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
if (fp == NULL)
return NULL;
- aux = kzalloc(sizeof(*aux), GFP_KERNEL_ACCOUNT | gfp_extra_flags);
+ aux = kzalloc(sizeof(*aux), bpf_memcg_flags(GFP_KERNEL | gfp_extra_flags));
if (aux == NULL) {
vfree(fp);
return NULL;
}
- fp->active = alloc_percpu_gfp(int, GFP_KERNEL_ACCOUNT | gfp_extra_flags);
+ fp->active = alloc_percpu_gfp(int, bpf_memcg_flags(GFP_KERNEL | gfp_extra_flags));
if (!fp->active) {
vfree(fp);
kfree(aux);
@@ -126,7 +128,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
{
- gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags;
+ gfp_t gfp_flags = bpf_memcg_flags(GFP_KERNEL | __GFP_ZERO | gfp_extra_flags);
struct bpf_prog *prog;
int cpu;
@@ -159,7 +161,7 @@ int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog)
prog->aux->jited_linfo = kvcalloc(prog->aux->nr_linfo,
sizeof(*prog->aux->jited_linfo),
- GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
+ bpf_memcg_flags(GFP_KERNEL | __GFP_NOWARN));
if (!prog->aux->jited_linfo)
return -ENOMEM;
@@ -234,7 +236,7 @@ void bpf_prog_fill_jited_linfo(struct bpf_prog *prog,
struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
gfp_t gfp_extra_flags)
{
- gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags;
+ gfp_t gfp_flags = bpf_memcg_flags(GFP_KERNEL | __GFP_ZERO | gfp_extra_flags);
struct bpf_prog *fp;
u32 pages;
@@ -1910,9 +1912,7 @@ out:
* reuse preexisting logic from Spectre v1 mitigation that
* happens to produce the required code on x86 for v4 as well.
*/
-#ifdef CONFIG_X86
barrier_nospec();
-#endif
CONT;
#define LDST(SIZEOP, SIZE) \
STX_MEM_##SIZEOP: \
@@ -2096,6 +2096,14 @@ bool bpf_prog_map_compatible(struct bpf_map *map,
if (fp->kprobe_override)
return false;
+ /* XDP programs inserted into maps are not guaranteed to run on
+ * a particular netdev (and can run outside driver context entirely
+ * in the case of devmap and cpumap). Until device checks
+ * are implemented, prohibit adding dev-bound programs to program maps.
+ */
+ if (bpf_prog_is_dev_bound(fp->aux))
+ return false;
+
spin_lock(&map->owner.lock);
if (!map->owner.type) {
/* There's no owner yet where we could check for
@@ -2182,7 +2190,7 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
* valid program, which in this case would simply not
* be JITed, but falls back to the interpreter.
*/
- if (!bpf_prog_is_dev_bound(fp->aux)) {
+ if (!bpf_prog_is_offloaded(fp->aux)) {
*err = bpf_prog_alloc_jited_linfo(fp);
if (*err)
return fp;
@@ -2554,7 +2562,7 @@ static void bpf_prog_free_deferred(struct work_struct *work)
bpf_free_used_maps(aux);
bpf_free_used_btfs(aux);
if (bpf_prog_is_dev_bound(aux))
- bpf_prog_offload_destroy(aux->prog);
+ bpf_prog_dev_bound_destroy(aux->prog);
#ifdef CONFIG_PERF_EVENTS
if (aux->prog->has_callchain_buf)
put_callchain_buffers();
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index e0b2d016f0bf..d2110c1f6fa6 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -361,7 +361,7 @@ static int cpu_map_kthread_run(void *data)
/* Support running another XDP prog on this CPU */
nframes = cpu_map_bpf_prog_run(rcpu, frames, xdp_n, &stats, &list);
if (nframes) {
- m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, nframes, skbs);
+ m = kmem_cache_alloc_bulk(skbuff_cache, gfp, nframes, skbs);
if (unlikely(m == 0)) {
for (i = 0; i < nframes; i++)
skbs[i] = NULL; /* effect: xdp_return_frame */
diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c
new file mode 100644
index 000000000000..52b981512a35
--- /dev/null
+++ b/kernel/bpf/cpumask.c
@@ -0,0 +1,479 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2023 Meta, Inc */
+#include <linux/bpf.h>
+#include <linux/bpf_mem_alloc.h>
+#include <linux/btf.h>
+#include <linux/btf_ids.h>
+#include <linux/cpumask.h>
+
+/**
+ * struct bpf_cpumask - refcounted BPF cpumask wrapper structure
+ * @cpumask: The actual cpumask embedded in the struct.
+ * @usage: Object reference counter. When the refcount goes to 0, the
+ * memory is released back to the BPF allocator, which provides
+ * RCU safety.
+ *
+ * Note that we explicitly embed a cpumask_t rather than a cpumask_var_t. This
+ * is done to avoid confusing the verifier due to the typedef of cpumask_var_t
+ * changing depending on whether CONFIG_CPUMASK_OFFSTACK is defined or not. See
+ * the details in <linux/cpumask.h>. The consequence is that this structure is
+ * likely a bit larger than it needs to be when CONFIG_CPUMASK_OFFSTACK is
+ * defined due to embedding the whole NR_CPUS-size bitmap, but the extra memory
+ * overhead is minimal. For the more typical case of CONFIG_CPUMASK_OFFSTACK
+ * not being defined, the structure is the same size regardless.
+ */
+struct bpf_cpumask {
+ cpumask_t cpumask;
+ refcount_t usage;
+};
+
+static struct bpf_mem_alloc bpf_cpumask_ma;
+
+static bool cpu_valid(u32 cpu)
+{
+ return cpu < nr_cpu_ids;
+}
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+ "Global kfuncs as their definitions will be in BTF");
+
+/**
+ * bpf_cpumask_create() - Create a mutable BPF cpumask.
+ *
+ * Allocates a cpumask that can be queried, mutated, acquired, and released by
+ * a BPF program. The cpumask returned by this function must either be embedded
+ * in a map as a kptr, or freed with bpf_cpumask_release().
+ *
+ * bpf_cpumask_create() allocates memory using the BPF memory allocator, and
+ * will not block. It may return NULL if no memory is available.
+ */
+__bpf_kfunc struct bpf_cpumask *bpf_cpumask_create(void)
+{
+ struct bpf_cpumask *cpumask;
+
+ /* cpumask must be the first element so struct bpf_cpumask be cast to struct cpumask. */
+ BUILD_BUG_ON(offsetof(struct bpf_cpumask, cpumask) != 0);
+
+ cpumask = bpf_mem_alloc(&bpf_cpumask_ma, sizeof(*cpumask));
+ if (!cpumask)
+ return NULL;
+
+ memset(cpumask, 0, sizeof(*cpumask));
+ refcount_set(&cpumask->usage, 1);
+
+ return cpumask;
+}
+
+/**
+ * bpf_cpumask_acquire() - Acquire a reference to a BPF cpumask.
+ * @cpumask: The BPF cpumask being acquired. The cpumask must be a trusted
+ * pointer.
+ *
+ * Acquires a reference to a BPF cpumask. The cpumask returned by this function
+ * must either be embedded in a map as a kptr, or freed with
+ * bpf_cpumask_release().
+ */
+__bpf_kfunc struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask)
+{
+ refcount_inc(&cpumask->usage);
+ return cpumask;
+}
+
+/**
+ * bpf_cpumask_kptr_get() - Attempt to acquire a reference to a BPF cpumask
+ * stored in a map.
+ * @cpumaskp: A pointer to a BPF cpumask map value.
+ *
+ * Attempts to acquire a reference to a BPF cpumask stored in a map value. The
+ * cpumask returned by this function must either be embedded in a map as a
+ * kptr, or freed with bpf_cpumask_release(). This function may return NULL if
+ * no BPF cpumask was found in the specified map value.
+ */
+__bpf_kfunc struct bpf_cpumask *bpf_cpumask_kptr_get(struct bpf_cpumask **cpumaskp)
+{
+ struct bpf_cpumask *cpumask;
+
+ /* The BPF memory allocator frees memory backing its caches in an RCU
+ * callback. Thus, we can safely use RCU to ensure that the cpumask is
+ * safe to read.
+ */
+ rcu_read_lock();
+
+ cpumask = READ_ONCE(*cpumaskp);
+ if (cpumask && !refcount_inc_not_zero(&cpumask->usage))
+ cpumask = NULL;
+
+ rcu_read_unlock();
+ return cpumask;
+}
+
+/**
+ * bpf_cpumask_release() - Release a previously acquired BPF cpumask.
+ * @cpumask: The cpumask being released.
+ *
+ * Releases a previously acquired reference to a BPF cpumask. When the final
+ * reference of the BPF cpumask has been released, it is subsequently freed in
+ * an RCU callback in the BPF memory allocator.
+ */
+__bpf_kfunc void bpf_cpumask_release(struct bpf_cpumask *cpumask)
+{
+ if (!cpumask)
+ return;
+
+ if (refcount_dec_and_test(&cpumask->usage)) {
+ migrate_disable();
+ bpf_mem_free(&bpf_cpumask_ma, cpumask);
+ migrate_enable();
+ }
+}
+
+/**
+ * bpf_cpumask_first() - Get the index of the first nonzero bit in the cpumask.
+ * @cpumask: The cpumask being queried.
+ *
+ * Find the index of the first nonzero bit of the cpumask. A struct bpf_cpumask
+ * pointer may be safely passed to this function.
+ */
+__bpf_kfunc u32 bpf_cpumask_first(const struct cpumask *cpumask)
+{
+ return cpumask_first(cpumask);
+}
+
+/**
+ * bpf_cpumask_first_zero() - Get the index of the first unset bit in the
+ * cpumask.
+ * @cpumask: The cpumask being queried.
+ *
+ * Find the index of the first unset bit of the cpumask. A struct bpf_cpumask
+ * pointer may be safely passed to this function.
+ */
+__bpf_kfunc u32 bpf_cpumask_first_zero(const struct cpumask *cpumask)
+{
+ return cpumask_first_zero(cpumask);
+}
+
+/**
+ * bpf_cpumask_set_cpu() - Set a bit for a CPU in a BPF cpumask.
+ * @cpu: The CPU to be set in the cpumask.
+ * @cpumask: The BPF cpumask in which a bit is being set.
+ */
+__bpf_kfunc void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask)
+{
+ if (!cpu_valid(cpu))
+ return;
+
+ cpumask_set_cpu(cpu, (struct cpumask *)cpumask);
+}
+
+/**
+ * bpf_cpumask_clear_cpu() - Clear a bit for a CPU in a BPF cpumask.
+ * @cpu: The CPU to be cleared from the cpumask.
+ * @cpumask: The BPF cpumask in which a bit is being cleared.
+ */
+__bpf_kfunc void bpf_cpumask_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask)
+{
+ if (!cpu_valid(cpu))
+ return;
+
+ cpumask_clear_cpu(cpu, (struct cpumask *)cpumask);
+}
+
+/**
+ * bpf_cpumask_test_cpu() - Test whether a CPU is set in a cpumask.
+ * @cpu: The CPU being queried for.
+ * @cpumask: The cpumask being queried for containing a CPU.
+ *
+ * Return:
+ * * true - @cpu is set in the cpumask
+ * * false - @cpu was not set in the cpumask, or @cpu is an invalid cpu.
+ */
+__bpf_kfunc bool bpf_cpumask_test_cpu(u32 cpu, const struct cpumask *cpumask)
+{
+ if (!cpu_valid(cpu))
+ return false;
+
+ return cpumask_test_cpu(cpu, (struct cpumask *)cpumask);
+}
+
+/**
+ * bpf_cpumask_test_and_set_cpu() - Atomically test and set a CPU in a BPF cpumask.
+ * @cpu: The CPU being set and queried for.
+ * @cpumask: The BPF cpumask being set and queried for containing a CPU.
+ *
+ * Return:
+ * * true - @cpu is set in the cpumask
+ * * false - @cpu was not set in the cpumask, or @cpu is invalid.
+ */
+__bpf_kfunc bool bpf_cpumask_test_and_set_cpu(u32 cpu, struct bpf_cpumask *cpumask)
+{
+ if (!cpu_valid(cpu))
+ return false;
+
+ return cpumask_test_and_set_cpu(cpu, (struct cpumask *)cpumask);
+}
+
+/**
+ * bpf_cpumask_test_and_clear_cpu() - Atomically test and clear a CPU in a BPF
+ * cpumask.
+ * @cpu: The CPU being cleared and queried for.
+ * @cpumask: The BPF cpumask being cleared and queried for containing a CPU.
+ *
+ * Return:
+ * * true - @cpu is set in the cpumask
+ * * false - @cpu was not set in the cpumask, or @cpu is invalid.
+ */
+__bpf_kfunc bool bpf_cpumask_test_and_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask)
+{
+ if (!cpu_valid(cpu))
+ return false;
+
+ return cpumask_test_and_clear_cpu(cpu, (struct cpumask *)cpumask);
+}
+
+/**
+ * bpf_cpumask_setall() - Set all of the bits in a BPF cpumask.
+ * @cpumask: The BPF cpumask having all of its bits set.
+ */
+__bpf_kfunc void bpf_cpumask_setall(struct bpf_cpumask *cpumask)
+{
+ cpumask_setall((struct cpumask *)cpumask);
+}
+
+/**
+ * bpf_cpumask_clear() - Clear all of the bits in a BPF cpumask.
+ * @cpumask: The BPF cpumask being cleared.
+ */
+__bpf_kfunc void bpf_cpumask_clear(struct bpf_cpumask *cpumask)
+{
+ cpumask_clear((struct cpumask *)cpumask);
+}
+
+/**
+ * bpf_cpumask_and() - AND two cpumasks and store the result.
+ * @dst: The BPF cpumask where the result is being stored.
+ * @src1: The first input.
+ * @src2: The second input.
+ *
+ * Return:
+ * * true - @dst has at least one bit set following the operation
+ * * false - @dst is empty following the operation
+ *
+ * struct bpf_cpumask pointers may be safely passed to @src1 and @src2.
+ */
+__bpf_kfunc bool bpf_cpumask_and(struct bpf_cpumask *dst,
+ const struct cpumask *src1,
+ const struct cpumask *src2)
+{
+ return cpumask_and((struct cpumask *)dst, src1, src2);
+}
+
+/**
+ * bpf_cpumask_or() - OR two cpumasks and store the result.
+ * @dst: The BPF cpumask where the result is being stored.
+ * @src1: The first input.
+ * @src2: The second input.
+ *
+ * struct bpf_cpumask pointers may be safely passed to @src1 and @src2.
+ */
+__bpf_kfunc void bpf_cpumask_or(struct bpf_cpumask *dst,
+ const struct cpumask *src1,
+ const struct cpumask *src2)
+{
+ cpumask_or((struct cpumask *)dst, src1, src2);
+}
+
+/**
+ * bpf_cpumask_xor() - XOR two cpumasks and store the result.
+ * @dst: The BPF cpumask where the result is being stored.
+ * @src1: The first input.
+ * @src2: The second input.
+ *
+ * struct bpf_cpumask pointers may be safely passed to @src1 and @src2.
+ */
+__bpf_kfunc void bpf_cpumask_xor(struct bpf_cpumask *dst,
+ const struct cpumask *src1,
+ const struct cpumask *src2)
+{
+ cpumask_xor((struct cpumask *)dst, src1, src2);
+}
+
+/**
+ * bpf_cpumask_equal() - Check two cpumasks for equality.
+ * @src1: The first input.
+ * @src2: The second input.
+ *
+ * Return:
+ * * true - @src1 and @src2 have the same bits set.
+ * * false - @src1 and @src2 differ in at least one bit.
+ *
+ * struct bpf_cpumask pointers may be safely passed to @src1 and @src2.
+ */
+__bpf_kfunc bool bpf_cpumask_equal(const struct cpumask *src1, const struct cpumask *src2)
+{
+ return cpumask_equal(src1, src2);
+}
+
+/**
+ * bpf_cpumask_intersects() - Check two cpumasks for overlap.
+ * @src1: The first input.
+ * @src2: The second input.
+ *
+ * Return:
+ * * true - @src1 and @src2 have at least one of the same bits set.
+ * * false - @src1 and @src2 don't have any of the same bits set.
+ *
+ * struct bpf_cpumask pointers may be safely passed to @src1 and @src2.
+ */
+__bpf_kfunc bool bpf_cpumask_intersects(const struct cpumask *src1, const struct cpumask *src2)
+{
+ return cpumask_intersects(src1, src2);
+}
+
+/**
+ * bpf_cpumask_subset() - Check if a cpumask is a subset of another.
+ * @src1: The first cpumask being checked as a subset.
+ * @src2: The second cpumask being checked as a superset.
+ *
+ * Return:
+ * * true - All of the bits of @src1 are set in @src2.
+ * * false - At least one bit in @src1 is not set in @src2.
+ *
+ * struct bpf_cpumask pointers may be safely passed to @src1 and @src2.
+ */
+__bpf_kfunc bool bpf_cpumask_subset(const struct cpumask *src1, const struct cpumask *src2)
+{
+ return cpumask_subset(src1, src2);
+}
+
+/**
+ * bpf_cpumask_empty() - Check if a cpumask is empty.
+ * @cpumask: The cpumask being checked.
+ *
+ * Return:
+ * * true - None of the bits in @cpumask are set.
+ * * false - At least one bit in @cpumask is set.
+ *
+ * A struct bpf_cpumask pointer may be safely passed to @cpumask.
+ */
+__bpf_kfunc bool bpf_cpumask_empty(const struct cpumask *cpumask)
+{
+ return cpumask_empty(cpumask);
+}
+
+/**
+ * bpf_cpumask_full() - Check if a cpumask has all bits set.
+ * @cpumask: The cpumask being checked.
+ *
+ * Return:
+ * * true - All of the bits in @cpumask are set.
+ * * false - At least one bit in @cpumask is cleared.
+ *
+ * A struct bpf_cpumask pointer may be safely passed to @cpumask.
+ */
+__bpf_kfunc bool bpf_cpumask_full(const struct cpumask *cpumask)
+{
+ return cpumask_full(cpumask);
+}
+
+/**
+ * bpf_cpumask_copy() - Copy the contents of a cpumask into a BPF cpumask.
+ * @dst: The BPF cpumask being copied into.
+ * @src: The cpumask being copied.
+ *
+ * A struct bpf_cpumask pointer may be safely passed to @src.
+ */
+__bpf_kfunc void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src)
+{
+ cpumask_copy((struct cpumask *)dst, src);
+}
+
+/**
+ * bpf_cpumask_any() - Return a random set CPU from a cpumask.
+ * @cpumask: The cpumask being queried.
+ *
+ * Return:
+ * * A random set bit within [0, num_cpus) if at least one bit is set.
+ * * >= num_cpus if no bit is set.
+ *
+ * A struct bpf_cpumask pointer may be safely passed to @src.
+ */
+__bpf_kfunc u32 bpf_cpumask_any(const struct cpumask *cpumask)
+{
+ return cpumask_any(cpumask);
+}
+
+/**
+ * bpf_cpumask_any_and() - Return a random set CPU from the AND of two
+ * cpumasks.
+ * @src1: The first cpumask.
+ * @src2: The second cpumask.
+ *
+ * Return:
+ * * A random set bit within [0, num_cpus) if at least one bit is set.
+ * * >= num_cpus if no bit is set.
+ *
+ * struct bpf_cpumask pointers may be safely passed to @src1 and @src2.
+ */
+__bpf_kfunc u32 bpf_cpumask_any_and(const struct cpumask *src1, const struct cpumask *src2)
+{
+ return cpumask_any_and(src1, src2);
+}
+
+__diag_pop();
+
+BTF_SET8_START(cpumask_kfunc_btf_ids)
+BTF_ID_FLAGS(func, bpf_cpumask_create, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_cpumask_release, KF_RELEASE | KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_cpumask_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_cpumask_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_cpumask_first, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_cpumask_first_zero, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_cpumask_set_cpu, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_cpumask_clear_cpu, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_cpumask_test_cpu, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_cpumask_test_and_set_cpu, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_cpumask_test_and_clear_cpu, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_cpumask_setall, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_cpumask_clear, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_cpumask_and, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_cpumask_or, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_cpumask_xor, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_cpumask_equal, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_cpumask_intersects, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_cpumask_subset, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_cpumask_empty, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_cpumask_full, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_cpumask_copy, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_cpumask_any, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_cpumask_any_and, KF_TRUSTED_ARGS)
+BTF_SET8_END(cpumask_kfunc_btf_ids)
+
+static const struct btf_kfunc_id_set cpumask_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &cpumask_kfunc_btf_ids,
+};
+
+BTF_ID_LIST(cpumask_dtor_ids)
+BTF_ID(struct, bpf_cpumask)
+BTF_ID(func, bpf_cpumask_release)
+
+static int __init cpumask_kfunc_init(void)
+{
+ int ret;
+ const struct btf_id_dtor_kfunc cpumask_dtors[] = {
+ {
+ .btf_id = cpumask_dtor_ids[0],
+ .kfunc_btf_id = cpumask_dtor_ids[1]
+ },
+ };
+
+ ret = bpf_mem_alloc_init(&bpf_cpumask_ma, 0, false);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &cpumask_kfunc_set);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &cpumask_kfunc_set);
+ return ret ?: register_btf_id_dtor_kfuncs(cpumask_dtors,
+ ARRAY_SIZE(cpumask_dtors),
+ THIS_MODULE);
+}
+
+late_initcall(cpumask_kfunc_init);
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index d01e4c55b376..2675fefc6cb6 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -474,7 +474,11 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
{
int err;
- if (!dev->netdev_ops->ndo_xdp_xmit)
+ if (!(dev->xdp_features & NETDEV_XDP_ACT_NDO_XMIT))
+ return -EOPNOTSUPP;
+
+ if (unlikely(!(dev->xdp_features & NETDEV_XDP_ACT_NDO_XMIT_SG) &&
+ xdp_frame_has_frags(xdpf)))
return -EOPNOTSUPP;
err = xdp_ok_fwd_dev(dev, xdp_get_frame_len(xdpf));
@@ -532,8 +536,14 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf,
static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf)
{
- if (!obj ||
- !obj->dev->netdev_ops->ndo_xdp_xmit)
+ if (!obj)
+ return false;
+
+ if (!(obj->dev->xdp_features & NETDEV_XDP_ACT_NDO_XMIT))
+ return false;
+
+ if (unlikely(!(obj->dev->xdp_features & NETDEV_XDP_ACT_NDO_XMIT_SG) &&
+ xdp_frame_has_frags(xdpf)))
return false;
if (xdp_ok_fwd_dev(obj->dev, xdp_get_frame_len(xdpf)))
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 66bded144377..5dfcb5ad0d06 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1004,8 +1004,6 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
l_new = ERR_PTR(-ENOMEM);
goto dec_count;
}
- check_and_init_map_value(&htab->map,
- l_new->key + round_up(key_size, 8));
}
memcpy(l_new->key, key, key_size);
@@ -1592,6 +1590,7 @@ static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
else
copy_map_value(map, value, l->key +
roundup_key_size);
+ /* Zeroing special fields in the temp buffer */
check_and_init_map_value(map, value);
}
@@ -1792,6 +1791,7 @@ again_nocopy:
true);
else
copy_map_value(map, dst_val, value);
+ /* Zeroing special fields in the temp buffer */
check_and_init_map_value(map, dst_val);
}
if (do_delete) {
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index af30c6cbd65d..5b278a38ae58 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -756,19 +756,20 @@ static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
/* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary
* arguments representation.
*/
-#define MAX_BPRINTF_BUF_LEN 512
+#define MAX_BPRINTF_BIN_ARGS 512
/* Support executing three nested bprintf helper calls on a given CPU */
#define MAX_BPRINTF_NEST_LEVEL 3
struct bpf_bprintf_buffers {
- char tmp_bufs[MAX_BPRINTF_NEST_LEVEL][MAX_BPRINTF_BUF_LEN];
+ char bin_args[MAX_BPRINTF_BIN_ARGS];
+ char buf[MAX_BPRINTF_BUF];
};
-static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs);
+
+static DEFINE_PER_CPU(struct bpf_bprintf_buffers[MAX_BPRINTF_NEST_LEVEL], bpf_bprintf_bufs);
static DEFINE_PER_CPU(int, bpf_bprintf_nest_level);
-static int try_get_fmt_tmp_buf(char **tmp_buf)
+static int try_get_buffers(struct bpf_bprintf_buffers **bufs)
{
- struct bpf_bprintf_buffers *bufs;
int nest_level;
preempt_disable();
@@ -778,18 +779,19 @@ static int try_get_fmt_tmp_buf(char **tmp_buf)
preempt_enable();
return -EBUSY;
}
- bufs = this_cpu_ptr(&bpf_bprintf_bufs);
- *tmp_buf = bufs->tmp_bufs[nest_level - 1];
+ *bufs = this_cpu_ptr(&bpf_bprintf_bufs[nest_level - 1]);
return 0;
}
-void bpf_bprintf_cleanup(void)
+void bpf_bprintf_cleanup(struct bpf_bprintf_data *data)
{
- if (this_cpu_read(bpf_bprintf_nest_level)) {
- this_cpu_dec(bpf_bprintf_nest_level);
- preempt_enable();
- }
+ if (!data->bin_args && !data->buf)
+ return;
+ if (WARN_ON_ONCE(this_cpu_read(bpf_bprintf_nest_level) == 0))
+ return;
+ this_cpu_dec(bpf_bprintf_nest_level);
+ preempt_enable();
}
/*
@@ -798,18 +800,20 @@ void bpf_bprintf_cleanup(void)
* Returns a negative value if fmt is an invalid format string or 0 otherwise.
*
* This can be used in two ways:
- * - Format string verification only: when bin_args is NULL
+ * - Format string verification only: when data->get_bin_args is false
* - Arguments preparation: in addition to the above verification, it writes in
- * bin_args a binary representation of arguments usable by bstr_printf where
- * pointers from BPF have been sanitized.
+ * data->bin_args a binary representation of arguments usable by bstr_printf
+ * where pointers from BPF have been sanitized.
*
* In argument preparation mode, if 0 is returned, safe temporary buffers are
* allocated and bpf_bprintf_cleanup should be called to free them after use.
*/
int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
- u32 **bin_args, u32 num_args)
+ u32 num_args, struct bpf_bprintf_data *data)
{
+ bool get_buffers = (data->get_bin_args && num_args) || data->get_buf;
char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end;
+ struct bpf_bprintf_buffers *buffers = NULL;
size_t sizeof_cur_arg, sizeof_cur_ip;
int err, i, num_spec = 0;
u64 cur_arg;
@@ -820,14 +824,19 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
return -EINVAL;
fmt_size = fmt_end - fmt;
- if (bin_args) {
- if (num_args && try_get_fmt_tmp_buf(&tmp_buf))
- return -EBUSY;
+ if (get_buffers && try_get_buffers(&buffers))
+ return -EBUSY;
- tmp_buf_end = tmp_buf + MAX_BPRINTF_BUF_LEN;
- *bin_args = (u32 *)tmp_buf;
+ if (data->get_bin_args) {
+ if (num_args)
+ tmp_buf = buffers->bin_args;
+ tmp_buf_end = tmp_buf + MAX_BPRINTF_BIN_ARGS;
+ data->bin_args = (u32 *)tmp_buf;
}
+ if (data->get_buf)
+ data->buf = buffers->buf;
+
for (i = 0; i < fmt_size; i++) {
if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
err = -EINVAL;
@@ -1021,31 +1030,33 @@ nocopy_fmt:
err = 0;
out:
if (err)
- bpf_bprintf_cleanup();
+ bpf_bprintf_cleanup(data);
return err;
}
BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt,
- const void *, data, u32, data_len)
+ const void *, args, u32, data_len)
{
+ struct bpf_bprintf_data data = {
+ .get_bin_args = true,
+ };
int err, num_args;
- u32 *bin_args;
if (data_len % 8 || data_len > MAX_BPRINTF_VARARGS * 8 ||
- (data_len && !data))
+ (data_len && !args))
return -EINVAL;
num_args = data_len / 8;
/* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we
* can safely give an unbounded size.
*/
- err = bpf_bprintf_prepare(fmt, UINT_MAX, data, &bin_args, num_args);
+ err = bpf_bprintf_prepare(fmt, UINT_MAX, args, num_args, &data);
if (err < 0)
return err;
- err = bstr_printf(str, str_size, fmt, bin_args);
+ err = bstr_printf(str, str_size, fmt, data.bin_args);
- bpf_bprintf_cleanup();
+ bpf_bprintf_cleanup(&data);
return err + 1;
}
@@ -1745,12 +1756,12 @@ unlock:
while (head != orig_head) {
void *obj = head;
- obj -= field->list_head.node_offset;
+ obj -= field->graph_root.node_offset;
head = head->next;
/* The contained type can also have resources, including a
* bpf_list_head which needs to be freed.
*/
- bpf_obj_free_fields(field->list_head.value_rec, obj);
+ bpf_obj_free_fields(field->graph_root.value_rec, obj);
/* bpf_mem_free requires migrate_disable(), since we can be
* called from map free path as well apart from BPF program (as
* part of map ops doing bpf_obj_free_fields).
@@ -1761,11 +1772,51 @@ unlock:
}
}
+/* Like rbtree_postorder_for_each_entry_safe, but 'pos' and 'n' are
+ * 'rb_node *', so field name of rb_node within containing struct is not
+ * needed.
+ *
+ * Since bpf_rb_tree's node type has a corresponding struct btf_field with
+ * graph_root.node_offset, it's not necessary to know field name
+ * or type of node struct
+ */
+#define bpf_rbtree_postorder_for_each_entry_safe(pos, n, root) \
+ for (pos = rb_first_postorder(root); \
+ pos && ({ n = rb_next_postorder(pos); 1; }); \
+ pos = n)
+
+void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
+ struct bpf_spin_lock *spin_lock)
+{
+ struct rb_root_cached orig_root, *root = rb_root;
+ struct rb_node *pos, *n;
+ void *obj;
+
+ BUILD_BUG_ON(sizeof(struct rb_root_cached) > sizeof(struct bpf_rb_root));
+ BUILD_BUG_ON(__alignof__(struct rb_root_cached) > __alignof__(struct bpf_rb_root));
+
+ __bpf_spin_lock_irqsave(spin_lock);
+ orig_root = *root;
+ *root = RB_ROOT_CACHED;
+ __bpf_spin_unlock_irqrestore(spin_lock);
+
+ bpf_rbtree_postorder_for_each_entry_safe(pos, n, &orig_root.rb_root) {
+ obj = pos;
+ obj -= field->graph_root.node_offset;
+
+ bpf_obj_free_fields(field->graph_root.value_rec, obj);
+
+ migrate_disable();
+ bpf_mem_free(&bpf_global_ma, obj);
+ migrate_enable();
+ }
+}
+
__diag_push();
__diag_ignore_all("-Wmissing-prototypes",
"Global functions as their definitions will be in vmlinux BTF");
-void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
+__bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
{
struct btf_struct_meta *meta = meta__ign;
u64 size = local_type_id__k;
@@ -1779,7 +1830,7 @@ void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
return p;
}
-void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
+__bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
{
struct btf_struct_meta *meta = meta__ign;
void *p = p__alloc;
@@ -1800,12 +1851,12 @@ static void __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *hea
tail ? list_add_tail(n, h) : list_add(n, h);
}
-void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node)
+__bpf_kfunc void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node)
{
return __bpf_list_add(node, head, false);
}
-void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node)
+__bpf_kfunc void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node)
{
return __bpf_list_add(node, head, true);
}
@@ -1823,23 +1874,73 @@ static struct bpf_list_node *__bpf_list_del(struct bpf_list_head *head, bool tai
return (struct bpf_list_node *)n;
}
-struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head)
+__bpf_kfunc struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head)
{
return __bpf_list_del(head, false);
}
-struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head)
+__bpf_kfunc struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head)
{
return __bpf_list_del(head, true);
}
+__bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root,
+ struct bpf_rb_node *node)
+{
+ struct rb_root_cached *r = (struct rb_root_cached *)root;
+ struct rb_node *n = (struct rb_node *)node;
+
+ rb_erase_cached(n, r);
+ RB_CLEAR_NODE(n);
+ return (struct bpf_rb_node *)n;
+}
+
+/* Need to copy rbtree_add_cached's logic here because our 'less' is a BPF
+ * program
+ */
+static void __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node,
+ void *less)
+{
+ struct rb_node **link = &((struct rb_root_cached *)root)->rb_root.rb_node;
+ bpf_callback_t cb = (bpf_callback_t)less;
+ struct rb_node *parent = NULL;
+ bool leftmost = true;
+
+ while (*link) {
+ parent = *link;
+ if (cb((uintptr_t)node, (uintptr_t)parent, 0, 0, 0)) {
+ link = &parent->rb_left;
+ } else {
+ link = &parent->rb_right;
+ leftmost = false;
+ }
+ }
+
+ rb_link_node((struct rb_node *)node, parent, link);
+ rb_insert_color_cached((struct rb_node *)node,
+ (struct rb_root_cached *)root, leftmost);
+}
+
+__bpf_kfunc void bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node,
+ bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b))
+{
+ __bpf_rbtree_add(root, node, (void *)less);
+}
+
+__bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root)
+{
+ struct rb_root_cached *r = (struct rb_root_cached *)root;
+
+ return (struct bpf_rb_node *)rb_first_cached(r);
+}
+
/**
* bpf_task_acquire - Acquire a reference to a task. A task acquired by this
* kfunc which is not stored in a map as a kptr, must be released by calling
* bpf_task_release().
* @p: The task on which a reference is being acquired.
*/
-struct task_struct *bpf_task_acquire(struct task_struct *p)
+__bpf_kfunc struct task_struct *bpf_task_acquire(struct task_struct *p)
{
return get_task_struct(p);
}
@@ -1850,7 +1951,7 @@ struct task_struct *bpf_task_acquire(struct task_struct *p)
* released by calling bpf_task_release().
* @p: The task on which a reference is being acquired.
*/
-struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p)
+__bpf_kfunc struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p)
{
/* For the time being this function returns NULL, as it's not currently
* possible to safely acquire a reference to a task with RCU protection
@@ -1902,7 +2003,7 @@ struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p)
* be released by calling bpf_task_release().
* @pp: A pointer to a task kptr on which a reference is being acquired.
*/
-struct task_struct *bpf_task_kptr_get(struct task_struct **pp)
+__bpf_kfunc struct task_struct *bpf_task_kptr_get(struct task_struct **pp)
{
/* We must return NULL here until we have clarity on how to properly
* leverage RCU for ensuring a task's lifetime. See the comment above
@@ -1915,7 +2016,7 @@ struct task_struct *bpf_task_kptr_get(struct task_struct **pp)
* bpf_task_release - Release the reference acquired on a task.
* @p: The task on which a reference is being released.
*/
-void bpf_task_release(struct task_struct *p)
+__bpf_kfunc void bpf_task_release(struct task_struct *p)
{
if (!p)
return;
@@ -1930,7 +2031,7 @@ void bpf_task_release(struct task_struct *p)
* calling bpf_cgroup_release().
* @cgrp: The cgroup on which a reference is being acquired.
*/
-struct cgroup *bpf_cgroup_acquire(struct cgroup *cgrp)
+__bpf_kfunc struct cgroup *bpf_cgroup_acquire(struct cgroup *cgrp)
{
cgroup_get(cgrp);
return cgrp;
@@ -1942,7 +2043,7 @@ struct cgroup *bpf_cgroup_acquire(struct cgroup *cgrp)
* be released by calling bpf_cgroup_release().
* @cgrpp: A pointer to a cgroup kptr on which a reference is being acquired.
*/
-struct cgroup *bpf_cgroup_kptr_get(struct cgroup **cgrpp)
+__bpf_kfunc struct cgroup *bpf_cgroup_kptr_get(struct cgroup **cgrpp)
{
struct cgroup *cgrp;
@@ -1974,7 +2075,7 @@ struct cgroup *bpf_cgroup_kptr_get(struct cgroup **cgrpp)
* drops to 0.
* @cgrp: The cgroup on which a reference is being released.
*/
-void bpf_cgroup_release(struct cgroup *cgrp)
+__bpf_kfunc void bpf_cgroup_release(struct cgroup *cgrp)
{
if (!cgrp)
return;
@@ -1989,7 +2090,7 @@ void bpf_cgroup_release(struct cgroup *cgrp)
* @cgrp: The cgroup for which we're performing a lookup.
* @level: The level of ancestor to look up.
*/
-struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level)
+__bpf_kfunc struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level)
{
struct cgroup *ancestor;
@@ -2008,7 +2109,7 @@ struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level)
* stored in a map, or released with bpf_task_release().
* @pid: The pid of the task being looked up.
*/
-struct task_struct *bpf_task_from_pid(s32 pid)
+__bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid)
{
struct task_struct *p;
@@ -2021,22 +2122,22 @@ struct task_struct *bpf_task_from_pid(s32 pid)
return p;
}
-void *bpf_cast_to_kern_ctx(void *obj)
+__bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj)
{
return obj;
}
-void *bpf_rdonly_cast(void *obj__ign, u32 btf_id__k)
+__bpf_kfunc void *bpf_rdonly_cast(void *obj__ign, u32 btf_id__k)
{
return obj__ign;
}
-void bpf_rcu_read_lock(void)
+__bpf_kfunc void bpf_rcu_read_lock(void)
{
rcu_read_lock();
}
-void bpf_rcu_read_unlock(void)
+__bpf_kfunc void bpf_rcu_read_unlock(void)
{
rcu_read_unlock();
}
@@ -2057,6 +2158,10 @@ BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_task_acquire_not_zero, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE)
+BTF_ID_FLAGS(func, bpf_rbtree_add)
+BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL)
+
#ifdef CONFIG_CGROUPS
BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cgroup_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index 1db156405b68..5fcdacbb8439 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -143,7 +143,7 @@ static void *__alloc(struct bpf_mem_cache *c, int node)
return obj;
}
- return kmalloc_node(c->unit_size, flags, node);
+ return kmalloc_node(c->unit_size, flags | __GFP_ZERO, node);
}
static struct mem_cgroup *get_memcg(const struct bpf_mem_cache *c)
@@ -395,7 +395,8 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
unit_size = size;
#ifdef CONFIG_MEMCG_KMEM
- objcg = get_obj_cgroup_from_current();
+ if (memcg_bpf_enabled())
+ objcg = get_obj_cgroup_from_current();
#endif
for_each_possible_cpu(cpu) {
c = per_cpu_ptr(pc, cpu);
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 190d9f9dc987..0c85e06f7ea7 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -41,7 +41,7 @@ struct bpf_offload_dev {
struct bpf_offload_netdev {
struct rhash_head l;
struct net_device *netdev;
- struct bpf_offload_dev *offdev;
+ struct bpf_offload_dev *offdev; /* NULL when bound-only */
struct list_head progs;
struct list_head maps;
struct list_head offdev_netdevs;
@@ -56,7 +56,6 @@ static const struct rhashtable_params offdevs_params = {
};
static struct rhashtable offdevs;
-static bool offdevs_inited;
static int bpf_dev_offload_check(struct net_device *netdev)
{
@@ -72,58 +71,218 @@ bpf_offload_find_netdev(struct net_device *netdev)
{
lockdep_assert_held(&bpf_devs_lock);
- if (!offdevs_inited)
- return NULL;
return rhashtable_lookup_fast(&offdevs, &netdev, offdevs_params);
}
-int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
+static int __bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev,
+ struct net_device *netdev)
{
struct bpf_offload_netdev *ondev;
- struct bpf_prog_offload *offload;
int err;
- if (attr->prog_type != BPF_PROG_TYPE_SCHED_CLS &&
- attr->prog_type != BPF_PROG_TYPE_XDP)
- return -EINVAL;
+ ondev = kzalloc(sizeof(*ondev), GFP_KERNEL);
+ if (!ondev)
+ return -ENOMEM;
- if (attr->prog_flags)
- return -EINVAL;
+ ondev->netdev = netdev;
+ ondev->offdev = offdev;
+ INIT_LIST_HEAD(&ondev->progs);
+ INIT_LIST_HEAD(&ondev->maps);
+
+ err = rhashtable_insert_fast(&offdevs, &ondev->l, offdevs_params);
+ if (err) {
+ netdev_warn(netdev, "failed to register for BPF offload\n");
+ goto err_free;
+ }
+
+ if (offdev)
+ list_add(&ondev->offdev_netdevs, &offdev->netdevs);
+ return 0;
+
+err_free:
+ kfree(ondev);
+ return err;
+}
+
+static void __bpf_prog_offload_destroy(struct bpf_prog *prog)
+{
+ struct bpf_prog_offload *offload = prog->aux->offload;
+
+ if (offload->dev_state)
+ offload->offdev->ops->destroy(prog);
+
+ list_del_init(&offload->offloads);
+ kfree(offload);
+ prog->aux->offload = NULL;
+}
+
+static int bpf_map_offload_ndo(struct bpf_offloaded_map *offmap,
+ enum bpf_netdev_command cmd)
+{
+ struct netdev_bpf data = {};
+ struct net_device *netdev;
+
+ ASSERT_RTNL();
+
+ data.command = cmd;
+ data.offmap = offmap;
+ /* Caller must make sure netdev is valid */
+ netdev = offmap->netdev;
+
+ return netdev->netdev_ops->ndo_bpf(netdev, &data);
+}
+
+static void __bpf_map_offload_destroy(struct bpf_offloaded_map *offmap)
+{
+ WARN_ON(bpf_map_offload_ndo(offmap, BPF_OFFLOAD_MAP_FREE));
+ /* Make sure BPF_MAP_GET_NEXT_ID can't find this dead map */
+ bpf_map_free_id(&offmap->map);
+ list_del_init(&offmap->offloads);
+ offmap->netdev = NULL;
+}
+
+static void __bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
+ struct net_device *netdev)
+{
+ struct bpf_offload_netdev *ondev, *altdev = NULL;
+ struct bpf_offloaded_map *offmap, *mtmp;
+ struct bpf_prog_offload *offload, *ptmp;
+
+ ASSERT_RTNL();
+
+ ondev = rhashtable_lookup_fast(&offdevs, &netdev, offdevs_params);
+ if (WARN_ON(!ondev))
+ return;
+
+ WARN_ON(rhashtable_remove_fast(&offdevs, &ondev->l, offdevs_params));
+
+ /* Try to move the objects to another netdev of the device */
+ if (offdev) {
+ list_del(&ondev->offdev_netdevs);
+ altdev = list_first_entry_or_null(&offdev->netdevs,
+ struct bpf_offload_netdev,
+ offdev_netdevs);
+ }
+
+ if (altdev) {
+ list_for_each_entry(offload, &ondev->progs, offloads)
+ offload->netdev = altdev->netdev;
+ list_splice_init(&ondev->progs, &altdev->progs);
+
+ list_for_each_entry(offmap, &ondev->maps, offloads)
+ offmap->netdev = altdev->netdev;
+ list_splice_init(&ondev->maps, &altdev->maps);
+ } else {
+ list_for_each_entry_safe(offload, ptmp, &ondev->progs, offloads)
+ __bpf_prog_offload_destroy(offload->prog);
+ list_for_each_entry_safe(offmap, mtmp, &ondev->maps, offloads)
+ __bpf_map_offload_destroy(offmap);
+ }
+
+ WARN_ON(!list_empty(&ondev->progs));
+ WARN_ON(!list_empty(&ondev->maps));
+ kfree(ondev);
+}
+
+static int __bpf_prog_dev_bound_init(struct bpf_prog *prog, struct net_device *netdev)
+{
+ struct bpf_offload_netdev *ondev;
+ struct bpf_prog_offload *offload;
+ int err;
offload = kzalloc(sizeof(*offload), GFP_USER);
if (!offload)
return -ENOMEM;
offload->prog = prog;
+ offload->netdev = netdev;
- offload->netdev = dev_get_by_index(current->nsproxy->net_ns,
- attr->prog_ifindex);
- err = bpf_dev_offload_check(offload->netdev);
- if (err)
- goto err_maybe_put;
-
- down_write(&bpf_devs_lock);
ondev = bpf_offload_find_netdev(offload->netdev);
if (!ondev) {
- err = -EINVAL;
- goto err_unlock;
+ if (bpf_prog_is_offloaded(prog->aux)) {
+ err = -EINVAL;
+ goto err_free;
+ }
+
+ /* When only binding to the device, explicitly
+ * create an entry in the hashtable.
+ */
+ err = __bpf_offload_dev_netdev_register(NULL, offload->netdev);
+ if (err)
+ goto err_free;
+ ondev = bpf_offload_find_netdev(offload->netdev);
}
offload->offdev = ondev->offdev;
prog->aux->offload = offload;
list_add_tail(&offload->offloads, &ondev->progs);
- dev_put(offload->netdev);
- up_write(&bpf_devs_lock);
return 0;
-err_unlock:
- up_write(&bpf_devs_lock);
-err_maybe_put:
- if (offload->netdev)
- dev_put(offload->netdev);
+err_free:
kfree(offload);
return err;
}
+int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr)
+{
+ struct net_device *netdev;
+ int err;
+
+ if (attr->prog_type != BPF_PROG_TYPE_SCHED_CLS &&
+ attr->prog_type != BPF_PROG_TYPE_XDP)
+ return -EINVAL;
+
+ if (attr->prog_flags & ~BPF_F_XDP_DEV_BOUND_ONLY)
+ return -EINVAL;
+
+ if (attr->prog_type == BPF_PROG_TYPE_SCHED_CLS &&
+ attr->prog_flags & BPF_F_XDP_DEV_BOUND_ONLY)
+ return -EINVAL;
+
+ netdev = dev_get_by_index(current->nsproxy->net_ns, attr->prog_ifindex);
+ if (!netdev)
+ return -EINVAL;
+
+ err = bpf_dev_offload_check(netdev);
+ if (err)
+ goto out;
+
+ prog->aux->offload_requested = !(attr->prog_flags & BPF_F_XDP_DEV_BOUND_ONLY);
+
+ down_write(&bpf_devs_lock);
+ err = __bpf_prog_dev_bound_init(prog, netdev);
+ up_write(&bpf_devs_lock);
+
+out:
+ dev_put(netdev);
+ return err;
+}
+
+int bpf_prog_dev_bound_inherit(struct bpf_prog *new_prog, struct bpf_prog *old_prog)
+{
+ int err;
+
+ if (!bpf_prog_is_dev_bound(old_prog->aux))
+ return 0;
+
+ if (bpf_prog_is_offloaded(old_prog->aux))
+ return -EINVAL;
+
+ new_prog->aux->dev_bound = old_prog->aux->dev_bound;
+ new_prog->aux->offload_requested = old_prog->aux->offload_requested;
+
+ down_write(&bpf_devs_lock);
+ if (!old_prog->aux->offload) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = __bpf_prog_dev_bound_init(new_prog, old_prog->aux->offload->netdev);
+
+out:
+ up_write(&bpf_devs_lock);
+ return err;
+}
+
int bpf_prog_offload_verifier_prep(struct bpf_prog *prog)
{
struct bpf_prog_offload *offload;
@@ -209,24 +368,25 @@ bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
up_read(&bpf_devs_lock);
}
-static void __bpf_prog_offload_destroy(struct bpf_prog *prog)
+void bpf_prog_dev_bound_destroy(struct bpf_prog *prog)
{
- struct bpf_prog_offload *offload = prog->aux->offload;
-
- if (offload->dev_state)
- offload->offdev->ops->destroy(prog);
-
- list_del_init(&offload->offloads);
- kfree(offload);
- prog->aux->offload = NULL;
-}
+ struct bpf_offload_netdev *ondev;
+ struct net_device *netdev;
-void bpf_prog_offload_destroy(struct bpf_prog *prog)
-{
+ rtnl_lock();
down_write(&bpf_devs_lock);
- if (prog->aux->offload)
+ if (prog->aux->offload) {
+ list_del_init(&prog->aux->offload->offloads);
+
+ netdev = prog->aux->offload->netdev;
__bpf_prog_offload_destroy(prog);
+
+ ondev = bpf_offload_find_netdev(netdev);
+ if (!ondev->offdev && list_empty(&ondev->progs))
+ __bpf_offload_dev_netdev_unregister(NULL, netdev);
+ }
up_write(&bpf_devs_lock);
+ rtnl_unlock();
}
static int bpf_prog_offload_translate(struct bpf_prog *prog)
@@ -340,22 +500,6 @@ int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
const struct bpf_prog_ops bpf_offload_prog_ops = {
};
-static int bpf_map_offload_ndo(struct bpf_offloaded_map *offmap,
- enum bpf_netdev_command cmd)
-{
- struct netdev_bpf data = {};
- struct net_device *netdev;
-
- ASSERT_RTNL();
-
- data.command = cmd;
- data.offmap = offmap;
- /* Caller must make sure netdev is valid */
- netdev = offmap->netdev;
-
- return netdev->netdev_ops->ndo_bpf(netdev, &data);
-}
-
struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
{
struct net *net = current->nsproxy->net_ns;
@@ -405,15 +549,6 @@ err_unlock:
return ERR_PTR(err);
}
-static void __bpf_map_offload_destroy(struct bpf_offloaded_map *offmap)
-{
- WARN_ON(bpf_map_offload_ndo(offmap, BPF_OFFLOAD_MAP_FREE));
- /* Make sure BPF_MAP_GET_NEXT_ID can't find this dead map */
- bpf_map_free_id(&offmap->map, true);
- list_del_init(&offmap->offloads);
- offmap->netdev = NULL;
-}
-
void bpf_map_offload_map_free(struct bpf_map *map)
{
struct bpf_offloaded_map *offmap = map_to_offmap(map);
@@ -573,12 +708,28 @@ bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev)
}
EXPORT_SYMBOL_GPL(bpf_offload_dev_match);
+bool bpf_prog_dev_bound_match(const struct bpf_prog *lhs, const struct bpf_prog *rhs)
+{
+ bool ret;
+
+ if (bpf_prog_is_offloaded(lhs->aux) != bpf_prog_is_offloaded(rhs->aux))
+ return false;
+
+ down_read(&bpf_devs_lock);
+ ret = lhs->aux->offload && rhs->aux->offload &&
+ lhs->aux->offload->netdev &&
+ lhs->aux->offload->netdev == rhs->aux->offload->netdev;
+ up_read(&bpf_devs_lock);
+
+ return ret;
+}
+
bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map)
{
struct bpf_offloaded_map *offmap;
bool ret;
- if (!bpf_map_is_dev_bound(map))
+ if (!bpf_map_is_offloaded(map))
return bpf_map_offload_neutral(map);
offmap = map_to_offmap(map);
@@ -592,32 +743,11 @@ bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map)
int bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev,
struct net_device *netdev)
{
- struct bpf_offload_netdev *ondev;
int err;
- ondev = kzalloc(sizeof(*ondev), GFP_KERNEL);
- if (!ondev)
- return -ENOMEM;
-
- ondev->netdev = netdev;
- ondev->offdev = offdev;
- INIT_LIST_HEAD(&ondev->progs);
- INIT_LIST_HEAD(&ondev->maps);
-
down_write(&bpf_devs_lock);
- err = rhashtable_insert_fast(&offdevs, &ondev->l, offdevs_params);
- if (err) {
- netdev_warn(netdev, "failed to register for BPF offload\n");
- goto err_unlock_free;
- }
-
- list_add(&ondev->offdev_netdevs, &offdev->netdevs);
- up_write(&bpf_devs_lock);
- return 0;
-
-err_unlock_free:
+ err = __bpf_offload_dev_netdev_register(offdev, netdev);
up_write(&bpf_devs_lock);
- kfree(ondev);
return err;
}
EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_register);
@@ -625,43 +755,8 @@ EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_register);
void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
struct net_device *netdev)
{
- struct bpf_offload_netdev *ondev, *altdev;
- struct bpf_offloaded_map *offmap, *mtmp;
- struct bpf_prog_offload *offload, *ptmp;
-
- ASSERT_RTNL();
-
down_write(&bpf_devs_lock);
- ondev = rhashtable_lookup_fast(&offdevs, &netdev, offdevs_params);
- if (WARN_ON(!ondev))
- goto unlock;
-
- WARN_ON(rhashtable_remove_fast(&offdevs, &ondev->l, offdevs_params));
- list_del(&ondev->offdev_netdevs);
-
- /* Try to move the objects to another netdev of the device */
- altdev = list_first_entry_or_null(&offdev->netdevs,
- struct bpf_offload_netdev,
- offdev_netdevs);
- if (altdev) {
- list_for_each_entry(offload, &ondev->progs, offloads)
- offload->netdev = altdev->netdev;
- list_splice_init(&ondev->progs, &altdev->progs);
-
- list_for_each_entry(offmap, &ondev->maps, offloads)
- offmap->netdev = altdev->netdev;
- list_splice_init(&ondev->maps, &altdev->maps);
- } else {
- list_for_each_entry_safe(offload, ptmp, &ondev->progs, offloads)
- __bpf_prog_offload_destroy(offload->prog);
- list_for_each_entry_safe(offmap, mtmp, &ondev->maps, offloads)
- __bpf_map_offload_destroy(offmap);
- }
-
- WARN_ON(!list_empty(&ondev->progs));
- WARN_ON(!list_empty(&ondev->maps));
- kfree(ondev);
-unlock:
+ __bpf_offload_dev_netdev_unregister(offdev, netdev);
up_write(&bpf_devs_lock);
}
EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_unregister);
@@ -670,18 +765,6 @@ struct bpf_offload_dev *
bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops, void *priv)
{
struct bpf_offload_dev *offdev;
- int err;
-
- down_write(&bpf_devs_lock);
- if (!offdevs_inited) {
- err = rhashtable_init(&offdevs, &offdevs_params);
- if (err) {
- up_write(&bpf_devs_lock);
- return ERR_PTR(err);
- }
- offdevs_inited = true;
- }
- up_write(&bpf_devs_lock);
offdev = kzalloc(sizeof(*offdev), GFP_KERNEL);
if (!offdev)
@@ -707,3 +790,67 @@ void *bpf_offload_dev_priv(struct bpf_offload_dev *offdev)
return offdev->priv;
}
EXPORT_SYMBOL_GPL(bpf_offload_dev_priv);
+
+void bpf_dev_bound_netdev_unregister(struct net_device *dev)
+{
+ struct bpf_offload_netdev *ondev;
+
+ ASSERT_RTNL();
+
+ down_write(&bpf_devs_lock);
+ ondev = bpf_offload_find_netdev(dev);
+ if (ondev && !ondev->offdev)
+ __bpf_offload_dev_netdev_unregister(NULL, ondev->netdev);
+ up_write(&bpf_devs_lock);
+}
+
+int bpf_dev_bound_kfunc_check(struct bpf_verifier_log *log,
+ struct bpf_prog_aux *prog_aux)
+{
+ if (!bpf_prog_is_dev_bound(prog_aux)) {
+ bpf_log(log, "metadata kfuncs require device-bound program\n");
+ return -EINVAL;
+ }
+
+ if (bpf_prog_is_offloaded(prog_aux)) {
+ bpf_log(log, "metadata kfuncs can't be offloaded\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id)
+{
+ const struct xdp_metadata_ops *ops;
+ void *p = NULL;
+
+ /* We don't hold bpf_devs_lock while resolving several
+ * kfuncs and can race with the unregister_netdevice().
+ * We rely on bpf_dev_bound_match() check at attach
+ * to render this program unusable.
+ */
+ down_read(&bpf_devs_lock);
+ if (!prog->aux->offload)
+ goto out;
+
+ ops = prog->aux->offload->netdev->xdp_metadata_ops;
+ if (!ops)
+ goto out;
+
+ if (func_id == bpf_xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_TIMESTAMP))
+ p = ops->xmo_rx_timestamp;
+ else if (func_id == bpf_xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_HASH))
+ p = ops->xmo_rx_hash;
+out:
+ up_read(&bpf_devs_lock);
+
+ return p;
+}
+
+static int __init bpf_offload_init(void)
+{
+ return rhashtable_init(&offdevs, &offdevs_params);
+}
+
+late_initcall(bpf_offload_init);
diff --git a/kernel/bpf/preload/bpf_preload_kern.c b/kernel/bpf/preload/bpf_preload_kern.c
index 5106b5372f0c..b56f9f3314fd 100644
--- a/kernel/bpf/preload/bpf_preload_kern.c
+++ b/kernel/bpf/preload/bpf_preload_kern.c
@@ -3,7 +3,11 @@
#include <linux/init.h>
#include <linux/module.h>
#include "bpf_preload.h"
-#include "iterators/iterators.lskel.h"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#include "iterators/iterators.lskel-little-endian.h"
+#else
+#include "iterators/iterators.lskel-big-endian.h"
+#endif
static struct bpf_link *maps_link, *progs_link;
static struct iterators_bpf *skel;
diff --git a/kernel/bpf/preload/iterators/Makefile b/kernel/bpf/preload/iterators/Makefile
index 6762b1260f2f..8937dc6bc8d0 100644
--- a/kernel/bpf/preload/iterators/Makefile
+++ b/kernel/bpf/preload/iterators/Makefile
@@ -35,20 +35,22 @@ endif
.PHONY: all clean
-all: iterators.lskel.h
+all: iterators.lskel-little-endian.h
+
+big: iterators.lskel-big-endian.h
clean:
$(call msg,CLEAN)
$(Q)rm -rf $(OUTPUT) iterators
-iterators.lskel.h: $(OUTPUT)/iterators.bpf.o | $(BPFTOOL)
+iterators.lskel-%.h: $(OUTPUT)/%/iterators.bpf.o | $(BPFTOOL)
$(call msg,GEN-SKEL,$@)
$(Q)$(BPFTOOL) gen skeleton -L $< > $@
-
-$(OUTPUT)/iterators.bpf.o: iterators.bpf.c $(BPFOBJ) | $(OUTPUT)
+$(OUTPUT)/%/iterators.bpf.o: iterators.bpf.c $(BPFOBJ) | $(OUTPUT)
$(call msg,BPF,$@)
- $(Q)$(CLANG) -g -O2 -target bpf $(INCLUDES) \
+ $(Q)mkdir -p $(@D)
+ $(Q)$(CLANG) -g -O2 -target bpf -m$* $(INCLUDES) \
-c $(filter %.c,$^) -o $@ && \
$(LLVM_STRIP) -g $@
diff --git a/kernel/bpf/preload/iterators/README b/kernel/bpf/preload/iterators/README
index 7fd6d39a9ad2..98e7c90ea012 100644
--- a/kernel/bpf/preload/iterators/README
+++ b/kernel/bpf/preload/iterators/README
@@ -1,4 +1,7 @@
WARNING:
-If you change "iterators.bpf.c" do "make -j" in this directory to rebuild "iterators.skel.h".
+If you change "iterators.bpf.c" do "make -j" in this directory to
+rebuild "iterators.lskel-little-endian.h". Then, on a big-endian
+machine, do "make -j big" in this directory to rebuild
+"iterators.lskel-big-endian.h". Commit both resulting headers.
Make sure to have clang 10 installed.
See Documentation/bpf/bpf_devel_QA.rst
diff --git a/kernel/bpf/preload/iterators/iterators.lskel-big-endian.h b/kernel/bpf/preload/iterators/iterators.lskel-big-endian.h
new file mode 100644
index 000000000000..ebdc6c0cdb70
--- /dev/null
+++ b/kernel/bpf/preload/iterators/iterators.lskel-big-endian.h
@@ -0,0 +1,419 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* THIS FILE IS AUTOGENERATED BY BPFTOOL! */
+#ifndef __ITERATORS_BPF_SKEL_H__
+#define __ITERATORS_BPF_SKEL_H__
+
+#include <bpf/skel_internal.h>
+
+struct iterators_bpf {
+ struct bpf_loader_ctx ctx;
+ struct {
+ struct bpf_map_desc rodata;
+ } maps;
+ struct {
+ struct bpf_prog_desc dump_bpf_map;
+ struct bpf_prog_desc dump_bpf_prog;
+ } progs;
+ struct {
+ int dump_bpf_map_fd;
+ int dump_bpf_prog_fd;
+ } links;
+};
+
+static inline int
+iterators_bpf__dump_bpf_map__attach(struct iterators_bpf *skel)
+{
+ int prog_fd = skel->progs.dump_bpf_map.prog_fd;
+ int fd = skel_link_create(prog_fd, 0, BPF_TRACE_ITER);
+
+ if (fd > 0)
+ skel->links.dump_bpf_map_fd = fd;
+ return fd;
+}
+
+static inline int
+iterators_bpf__dump_bpf_prog__attach(struct iterators_bpf *skel)
+{
+ int prog_fd = skel->progs.dump_bpf_prog.prog_fd;
+ int fd = skel_link_create(prog_fd, 0, BPF_TRACE_ITER);
+
+ if (fd > 0)
+ skel->links.dump_bpf_prog_fd = fd;
+ return fd;
+}
+
+static inline int
+iterators_bpf__attach(struct iterators_bpf *skel)
+{
+ int ret = 0;
+
+ ret = ret < 0 ? ret : iterators_bpf__dump_bpf_map__attach(skel);
+ ret = ret < 0 ? ret : iterators_bpf__dump_bpf_prog__attach(skel);
+ return ret < 0 ? ret : 0;
+}
+
+static inline void
+iterators_bpf__detach(struct iterators_bpf *skel)
+{
+ skel_closenz(skel->links.dump_bpf_map_fd);
+ skel_closenz(skel->links.dump_bpf_prog_fd);
+}
+static void
+iterators_bpf__destroy(struct iterators_bpf *skel)
+{
+ if (!skel)
+ return;
+ iterators_bpf__detach(skel);
+ skel_closenz(skel->progs.dump_bpf_map.prog_fd);
+ skel_closenz(skel->progs.dump_bpf_prog.prog_fd);
+ skel_closenz(skel->maps.rodata.map_fd);
+ skel_free(skel);
+}
+static inline struct iterators_bpf *
+iterators_bpf__open(void)
+{
+ struct iterators_bpf *skel;
+
+ skel = skel_alloc(sizeof(*skel));
+ if (!skel)
+ goto cleanup;
+ skel->ctx.sz = (void *)&skel->links - (void *)skel;
+ return skel;
+cleanup:
+ iterators_bpf__destroy(skel);
+ return NULL;
+}
+
+static inline int
+iterators_bpf__load(struct iterators_bpf *skel)
+{
+ struct bpf_load_and_run_opts opts = {};
+ int err;
+
+ opts.ctx = (struct bpf_loader_ctx *)skel;
+ opts.data_sz = 6008;
+ opts.data = (void *)"\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xeb\x9f\x01\0\
+\0\0\0\x18\0\0\0\0\0\0\x04\x1c\0\0\x04\x1c\0\0\x05\x18\0\0\0\0\x02\0\0\0\0\0\0\
+\x02\0\0\0\x01\x04\0\0\x02\0\0\0\x10\0\0\0\x13\0\0\0\x03\0\0\0\0\0\0\0\x18\0\0\
+\0\x04\0\0\0\x40\0\0\0\0\x02\0\0\0\0\0\0\x08\0\0\0\0\x02\0\0\0\0\0\0\x0d\0\0\0\
+\0\x0d\0\0\x01\0\0\0\x06\0\0\0\x1c\0\0\0\x01\0\0\0\x20\x01\0\0\0\0\0\0\x04\x01\
+\0\0\x20\0\0\0\x24\x0c\0\0\x01\0\0\0\x05\0\0\0\xc2\x04\0\0\x03\0\0\0\x18\0\0\0\
+\xd0\0\0\0\x09\0\0\0\0\0\0\0\xd4\0\0\0\x0b\0\0\0\x40\0\0\0\xdf\0\0\0\x0b\0\0\0\
+\x80\0\0\0\0\x02\0\0\0\0\0\0\x0a\0\0\0\xe7\x07\0\0\0\0\0\0\0\0\0\0\xf0\x08\0\0\
+\0\0\0\0\x0c\0\0\0\xf6\x01\0\0\0\0\0\0\x08\0\0\0\x40\0\0\x01\xb3\x04\0\0\x03\0\
+\0\0\x18\0\0\x01\xbb\0\0\0\x0e\0\0\0\0\0\0\x01\xbe\0\0\0\x11\0\0\0\x20\0\0\x01\
+\xc3\0\0\0\x0e\0\0\0\xa0\0\0\x01\xcf\x08\0\0\0\0\0\0\x0f\0\0\x01\xd5\x01\0\0\0\
+\0\0\0\x04\0\0\0\x20\0\0\x01\xe2\x01\0\0\0\0\0\0\x01\x01\0\0\x08\0\0\0\0\x03\0\
+\0\0\0\0\0\0\0\0\0\x10\0\0\0\x12\0\0\0\x10\0\0\x01\xe7\x01\0\0\0\0\0\0\x04\0\0\
+\0\x20\0\0\0\0\x02\0\0\0\0\0\0\x14\0\0\x02\x4b\x04\0\0\x02\0\0\0\x10\0\0\0\x13\
+\0\0\0\x03\0\0\0\0\0\0\x02\x5e\0\0\0\x15\0\0\0\x40\0\0\0\0\x02\0\0\0\0\0\0\x18\
+\0\0\0\0\x0d\0\0\x01\0\0\0\x06\0\0\0\x1c\0\0\0\x13\0\0\x02\x63\x0c\0\0\x01\0\0\
+\0\x16\0\0\x02\xaf\x04\0\0\x01\0\0\0\x08\0\0\x02\xb8\0\0\0\x19\0\0\0\0\0\0\0\0\
+\x02\0\0\0\0\0\0\x1a\0\0\x03\x09\x04\0\0\x06\0\0\0\x38\0\0\x01\xbb\0\0\0\x0e\0\
+\0\0\0\0\0\x01\xbe\0\0\0\x11\0\0\0\x20\0\0\x03\x16\0\0\0\x1b\0\0\0\xc0\0\0\x03\
+\x27\0\0\0\x15\0\0\x01\0\0\0\x03\x30\0\0\0\x1d\0\0\x01\x40\0\0\x03\x3a\0\0\0\
+\x1e\0\0\x01\x80\0\0\0\0\x02\0\0\0\0\0\0\x1c\0\0\0\0\x0a\0\0\0\0\0\0\x10\0\0\0\
+\0\x02\0\0\0\0\0\0\x1f\0\0\0\0\x02\0\0\0\0\0\0\x20\0\0\x03\x84\x04\0\0\x02\0\0\
+\0\x08\0\0\x03\x92\0\0\0\x0e\0\0\0\0\0\0\x03\x9b\0\0\0\x0e\0\0\0\x20\0\0\x03\
+\x3a\x04\0\0\x03\0\0\0\x18\0\0\x03\xa5\0\0\0\x1b\0\0\0\0\0\0\x03\xad\0\0\0\x21\
+\0\0\0\x40\0\0\x03\xb3\0\0\0\x23\0\0\0\x80\0\0\0\0\x02\0\0\0\0\0\0\x22\0\0\0\0\
+\x02\0\0\0\0\0\0\x24\0\0\x03\xb7\x04\0\0\x01\0\0\0\x04\0\0\x03\xc2\0\0\0\x0e\0\
+\0\0\0\0\0\x04\x2b\x04\0\0\x01\0\0\0\x04\0\0\x04\x34\0\0\0\x0e\0\0\0\0\0\0\0\0\
+\x03\0\0\0\0\0\0\0\0\0\0\x1c\0\0\0\x12\0\0\0\x23\0\0\x04\xaa\x0e\0\0\0\0\0\0\
+\x25\0\0\0\0\0\0\0\0\x03\0\0\0\0\0\0\0\0\0\0\x1c\0\0\0\x12\0\0\0\x0e\0\0\x04\
+\xbe\x0e\0\0\0\0\0\0\x27\0\0\0\0\0\0\0\0\x03\0\0\0\0\0\0\0\0\0\0\x1c\0\0\0\x12\
+\0\0\0\x20\0\0\x04\xd4\x0e\0\0\0\0\0\0\x29\0\0\0\0\0\0\0\0\x03\0\0\0\0\0\0\0\0\
+\0\0\x1c\0\0\0\x12\0\0\0\x11\0\0\x04\xe9\x0e\0\0\0\0\0\0\x2b\0\0\0\0\0\0\0\0\
+\x03\0\0\0\0\0\0\0\0\0\0\x10\0\0\0\x12\0\0\0\x04\0\0\x05\0\x0e\0\0\0\0\0\0\x2d\
+\0\0\0\x01\0\0\x05\x08\x0f\0\0\x04\0\0\0\x62\0\0\0\x26\0\0\0\0\0\0\0\x23\0\0\0\
+\x28\0\0\0\x23\0\0\0\x0e\0\0\0\x2a\0\0\0\x31\0\0\0\x20\0\0\0\x2c\0\0\0\x51\0\0\
+\0\x11\0\0\x05\x10\x0f\0\0\x01\0\0\0\x04\0\0\0\x2e\0\0\0\0\0\0\0\x04\0\x62\x70\
+\x66\x5f\x69\x74\x65\x72\x5f\x5f\x62\x70\x66\x5f\x6d\x61\x70\0\x6d\x65\x74\x61\
+\0\x6d\x61\x70\0\x63\x74\x78\0\x69\x6e\x74\0\x64\x75\x6d\x70\x5f\x62\x70\x66\
+\x5f\x6d\x61\x70\0\x69\x74\x65\x72\x2f\x62\x70\x66\x5f\x6d\x61\x70\0\x30\x3a\
+\x30\0\x2f\x68\x6f\x6d\x65\x2f\x69\x69\x69\x2f\x6c\x69\x6e\x75\x78\x2d\x6b\x65\
+\x72\x6e\x65\x6c\x2d\x74\x6f\x6f\x6c\x63\x68\x61\x69\x6e\x2f\x73\x72\x63\x2f\
+\x6c\x69\x6e\x75\x78\x2f\x6b\x65\x72\x6e\x65\x6c\x2f\x62\x70\x66\x2f\x70\x72\
+\x65\x6c\x6f\x61\x64\x2f\x69\x74\x65\x72\x61\x74\x6f\x72\x73\x2f\x69\x74\x65\
+\x72\x61\x74\x6f\x72\x73\x2e\x62\x70\x66\x2e\x63\0\x09\x73\x74\x72\x75\x63\x74\
+\x20\x73\x65\x71\x5f\x66\x69\x6c\x65\x20\x2a\x73\x65\x71\x20\x3d\x20\x63\x74\
+\x78\x2d\x3e\x6d\x65\x74\x61\x2d\x3e\x73\x65\x71\x3b\0\x62\x70\x66\x5f\x69\x74\
+\x65\x72\x5f\x6d\x65\x74\x61\0\x73\x65\x71\0\x73\x65\x73\x73\x69\x6f\x6e\x5f\
+\x69\x64\0\x73\x65\x71\x5f\x6e\x75\x6d\0\x73\x65\x71\x5f\x66\x69\x6c\x65\0\x5f\
+\x5f\x75\x36\x34\0\x75\x6e\x73\x69\x67\x6e\x65\x64\x20\x6c\x6f\x6e\x67\x20\x6c\
+\x6f\x6e\x67\0\x30\x3a\x31\0\x09\x73\x74\x72\x75\x63\x74\x20\x62\x70\x66\x5f\
+\x6d\x61\x70\x20\x2a\x6d\x61\x70\x20\x3d\x20\x63\x74\x78\x2d\x3e\x6d\x61\x70\
+\x3b\0\x09\x69\x66\x20\x28\x21\x6d\x61\x70\x29\0\x30\x3a\x32\0\x09\x5f\x5f\x75\
+\x36\x34\x20\x73\x65\x71\x5f\x6e\x75\x6d\x20\x3d\x20\x63\x74\x78\x2d\x3e\x6d\
+\x65\x74\x61\x2d\x3e\x73\x65\x71\x5f\x6e\x75\x6d\x3b\0\x09\x69\x66\x20\x28\x73\
+\x65\x71\x5f\x6e\x75\x6d\x20\x3d\x3d\x20\x30\x29\0\x09\x09\x42\x50\x46\x5f\x53\
+\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\x20\x20\x69\
+\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
+\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x5c\x6e\x22\x29\x3b\0\x62\x70\x66\
+\x5f\x6d\x61\x70\0\x69\x64\0\x6e\x61\x6d\x65\0\x6d\x61\x78\x5f\x65\x6e\x74\x72\
+\x69\x65\x73\0\x5f\x5f\x75\x33\x32\0\x75\x6e\x73\x69\x67\x6e\x65\x64\x20\x69\
+\x6e\x74\0\x63\x68\x61\x72\0\x5f\x5f\x41\x52\x52\x41\x59\x5f\x53\x49\x5a\x45\
+\x5f\x54\x59\x50\x45\x5f\x5f\0\x09\x42\x50\x46\x5f\x53\x45\x51\x5f\x50\x52\x49\
+\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\x25\x34\x75\x20\x25\x2d\x31\x36\x73\
+\x25\x36\x64\x5c\x6e\x22\x2c\x20\x6d\x61\x70\x2d\x3e\x69\x64\x2c\x20\x6d\x61\
+\x70\x2d\x3e\x6e\x61\x6d\x65\x2c\x20\x6d\x61\x70\x2d\x3e\x6d\x61\x78\x5f\x65\
+\x6e\x74\x72\x69\x65\x73\x29\x3b\0\x7d\0\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\
+\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x70\x72\x6f\x67\0\x64\x75\x6d\x70\x5f\
+\x62\x70\x66\x5f\x70\x72\x6f\x67\0\x69\x74\x65\x72\x2f\x62\x70\x66\x5f\x70\x72\
+\x6f\x67\0\x09\x73\x74\x72\x75\x63\x74\x20\x62\x70\x66\x5f\x70\x72\x6f\x67\x20\
+\x2a\x70\x72\x6f\x67\x20\x3d\x20\x63\x74\x78\x2d\x3e\x70\x72\x6f\x67\x3b\0\x09\
+\x69\x66\x20\x28\x21\x70\x72\x6f\x67\x29\0\x62\x70\x66\x5f\x70\x72\x6f\x67\0\
+\x61\x75\x78\0\x09\x61\x75\x78\x20\x3d\x20\x70\x72\x6f\x67\x2d\x3e\x61\x75\x78\
+\x3b\0\x09\x09\x42\x50\x46\x5f\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\
+\x65\x71\x2c\x20\x22\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\
+\x20\x20\x20\x20\x20\x20\x20\x20\x61\x74\x74\x61\x63\x68\x65\x64\x5c\x6e\x22\
+\x29\x3b\0\x62\x70\x66\x5f\x70\x72\x6f\x67\x5f\x61\x75\x78\0\x61\x74\x74\x61\
+\x63\x68\x5f\x66\x75\x6e\x63\x5f\x6e\x61\x6d\x65\0\x64\x73\x74\x5f\x70\x72\x6f\
+\x67\0\x66\x75\x6e\x63\x5f\x69\x6e\x66\x6f\0\x62\x74\x66\0\x09\x42\x50\x46\x5f\
+\x53\x45\x51\x5f\x50\x52\x49\x4e\x54\x46\x28\x73\x65\x71\x2c\x20\x22\x25\x34\
+\x75\x20\x25\x2d\x31\x36\x73\x20\x25\x73\x20\x25\x73\x5c\x6e\x22\x2c\x20\x61\
+\x75\x78\x2d\x3e\x69\x64\x2c\0\x30\x3a\x34\0\x30\x3a\x35\0\x09\x69\x66\x20\x28\
+\x21\x62\x74\x66\x29\0\x62\x70\x66\x5f\x66\x75\x6e\x63\x5f\x69\x6e\x66\x6f\0\
+\x69\x6e\x73\x6e\x5f\x6f\x66\x66\0\x74\x79\x70\x65\x5f\x69\x64\0\x30\0\x73\x74\
+\x72\x69\x6e\x67\x73\0\x74\x79\x70\x65\x73\0\x68\x64\x72\0\x62\x74\x66\x5f\x68\
+\x65\x61\x64\x65\x72\0\x73\x74\x72\x5f\x6c\x65\x6e\0\x09\x74\x79\x70\x65\x73\
+\x20\x3d\x20\x62\x74\x66\x2d\x3e\x74\x79\x70\x65\x73\x3b\0\x09\x62\x70\x66\x5f\
+\x70\x72\x6f\x62\x65\x5f\x72\x65\x61\x64\x5f\x6b\x65\x72\x6e\x65\x6c\x28\x26\
+\x74\x2c\x20\x73\x69\x7a\x65\x6f\x66\x28\x74\x29\x2c\x20\x74\x79\x70\x65\x73\
+\x20\x2b\x20\x62\x74\x66\x5f\x69\x64\x29\x3b\0\x09\x73\x74\x72\x20\x3d\x20\x62\
+\x74\x66\x2d\x3e\x73\x74\x72\x69\x6e\x67\x73\x3b\0\x62\x74\x66\x5f\x74\x79\x70\
+\x65\0\x6e\x61\x6d\x65\x5f\x6f\x66\x66\0\x09\x6e\x61\x6d\x65\x5f\x6f\x66\x66\
+\x20\x3d\x20\x42\x50\x46\x5f\x43\x4f\x52\x45\x5f\x52\x45\x41\x44\x28\x74\x2c\
+\x20\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x29\x3b\0\x30\x3a\x32\x3a\x30\0\x09\x69\
+\x66\x20\x28\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x20\x3e\x3d\x20\x62\x74\x66\x2d\
+\x3e\x68\x64\x72\x2e\x73\x74\x72\x5f\x6c\x65\x6e\x29\0\x09\x72\x65\x74\x75\x72\
+\x6e\x20\x73\x74\x72\x20\x2b\x20\x6e\x61\x6d\x65\x5f\x6f\x66\x66\x3b\0\x30\x3a\
+\x33\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\
+\x74\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x6d\x61\x70\x2e\x5f\x5f\x5f\x66\x6d\
+\x74\x2e\x31\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\x2e\x5f\x5f\
+\x5f\x66\x6d\x74\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\x2e\x5f\
+\x5f\x5f\x66\x6d\x74\x2e\x32\0\x4c\x49\x43\x45\x4e\x53\x45\0\x2e\x72\x6f\x64\
+\x61\x74\x61\0\x6c\x69\x63\x65\x6e\x73\x65\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\x09\x4c\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x02\0\0\0\x04\0\0\0\x62\0\0\0\
+\x01\0\0\0\x80\0\0\0\0\0\0\0\0\x69\x74\x65\x72\x61\x74\x6f\x72\x2e\x72\x6f\x64\
+\x61\x74\x61\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x2f\0\0\0\0\0\0\0\0\0\0\0\0\x20\
+\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
+\x20\x20\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x0a\0\x25\x34\x75\x20\x25\
+\x2d\x31\x36\x73\x25\x36\x64\x0a\0\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\
+\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x61\x74\x74\x61\x63\x68\x65\x64\
+\x0a\0\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x20\x25\x73\x20\x25\x73\x0a\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\x47\x50\x4c\0\0\0\0\0\x79\x21\0\0\0\0\0\0\x79\x62\0\0\
+\0\0\0\0\x79\x71\0\x08\0\0\0\0\x15\x70\0\x1a\0\0\0\0\x79\x12\0\x10\0\0\0\0\x55\
+\x10\0\x08\0\0\0\0\xbf\x4a\0\0\0\0\0\0\x07\x40\0\0\xff\xff\xff\xe8\xbf\x16\0\0\
+\0\0\0\0\x18\x26\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xb7\x30\0\0\0\0\0\x23\xb7\x50\0\0\
+\0\0\0\0\x85\0\0\0\0\0\0\x7e\x61\x17\0\0\0\0\0\0\x7b\xa1\xff\xe8\0\0\0\0\xb7\
+\x10\0\0\0\0\0\x04\xbf\x27\0\0\0\0\0\0\x0f\x21\0\0\0\0\0\0\x7b\xa2\xff\xf0\0\0\
+\0\0\x61\x17\0\x14\0\0\0\0\x7b\xa1\xff\xf8\0\0\0\0\xbf\x4a\0\0\0\0\0\0\x07\x40\
+\0\0\xff\xff\xff\xe8\xbf\x16\0\0\0\0\0\0\x18\x26\0\0\0\0\0\0\0\0\0\0\0\0\0\x23\
+\xb7\x30\0\0\0\0\0\x0e\xb7\x50\0\0\0\0\0\x18\x85\0\0\0\0\0\0\x7e\xb7\0\0\0\0\0\
+\0\0\x95\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x07\0\0\0\0\0\0\0\x42\0\0\0\x9a\0\x01\x3c\
+\x1e\0\0\0\x01\0\0\0\x42\0\0\0\x9a\0\x01\x3c\x24\0\0\0\x02\0\0\0\x42\0\0\x01\
+\x0d\0\x01\x44\x1d\0\0\0\x03\0\0\0\x42\0\0\x01\x2e\0\x01\x4c\x06\0\0\0\x04\0\0\
+\0\x42\0\0\x01\x3d\0\x01\x40\x1d\0\0\0\x05\0\0\0\x42\0\0\x01\x62\0\x01\x58\x06\
+\0\0\0\x07\0\0\0\x42\0\0\x01\x75\0\x01\x5c\x03\0\0\0\x0e\0\0\0\x42\0\0\x01\xfb\
+\0\x01\x64\x02\0\0\0\x1e\0\0\0\x42\0\0\x02\x49\0\x01\x6c\x01\0\0\0\0\0\0\0\x02\
+\0\0\0\x3e\0\0\0\0\0\0\0\x08\0\0\0\x08\0\0\0\x3e\0\0\0\0\0\0\0\x10\0\0\0\x02\0\
+\0\x01\x09\0\0\0\0\0\0\0\x20\0\0\0\x08\0\0\x01\x39\0\0\0\0\0\0\0\x70\0\0\0\x0d\
+\0\0\0\x3e\0\0\0\0\0\0\0\x80\0\0\0\x0d\0\0\x01\x09\0\0\0\0\0\0\0\xa0\0\0\0\x0d\
+\0\0\x01\x39\0\0\0\0\0\0\0\x1a\0\0\0\x20\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\
+\x6d\x61\x70\0\0\0\0\0\0\0\0\0\0\0\x1c\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\0\0\0\
+\x01\0\0\0\x10\0\0\0\0\0\0\0\0\0\0\0\x09\0\0\0\x01\0\0\0\0\0\0\0\x07\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x10\0\0\0\0\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\
+\x62\x70\x66\x5f\x6d\x61\x70\0\0\0\0\0\0\0\0\x47\x50\x4c\0\0\0\0\0\x79\x21\0\0\
+\0\0\0\0\x79\x62\0\0\0\0\0\0\x79\x11\0\x08\0\0\0\0\x15\x10\0\x3b\0\0\0\0\x79\
+\x71\0\0\0\0\0\0\x79\x12\0\x10\0\0\0\0\x55\x10\0\x08\0\0\0\0\xbf\x4a\0\0\0\0\0\
+\0\x07\x40\0\0\xff\xff\xff\xd0\xbf\x16\0\0\0\0\0\0\x18\x26\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\x31\xb7\x30\0\0\0\0\0\x20\xb7\x50\0\0\0\0\0\0\x85\0\0\0\0\0\0\x7e\x7b\
+\xa6\xff\xc8\0\0\0\0\x61\x17\0\0\0\0\0\0\x7b\xa1\xff\xd0\0\0\0\0\xb7\x30\0\0\0\
+\0\0\x04\xbf\x97\0\0\0\0\0\0\x0f\x93\0\0\0\0\0\0\x79\x17\0\x28\0\0\0\0\x79\x87\
+\0\x30\0\0\0\0\x15\x80\0\x18\0\0\0\0\xb7\x20\0\0\0\0\0\0\x0f\x12\0\0\0\0\0\0\
+\x61\x11\0\x04\0\0\0\0\x79\x38\0\x08\0\0\0\0\x67\x10\0\0\0\0\0\x03\x0f\x31\0\0\
+\0\0\0\0\x79\x68\0\0\0\0\0\0\xbf\x1a\0\0\0\0\0\0\x07\x10\0\0\xff\xff\xff\xf8\
+\xb7\x20\0\0\0\0\0\x08\x85\0\0\0\0\0\0\x71\xb7\x10\0\0\0\0\0\0\x79\x3a\xff\xf8\
+\0\0\0\0\x0f\x31\0\0\0\0\0\0\xbf\x1a\0\0\0\0\0\0\x07\x10\0\0\xff\xff\xff\xf4\
+\xb7\x20\0\0\0\0\0\x04\x85\0\0\0\0\0\0\x71\xb7\x30\0\0\0\0\0\x04\x61\x1a\xff\
+\xf4\0\0\0\0\x61\x28\0\x10\0\0\0\0\x3d\x12\0\x02\0\0\0\0\x0f\x61\0\0\0\0\0\0\
+\xbf\x96\0\0\0\0\0\0\x7b\xa9\xff\xd8\0\0\0\0\x79\x17\0\x18\0\0\0\0\x7b\xa1\xff\
+\xe0\0\0\0\0\x79\x17\0\x20\0\0\0\0\x79\x11\0\0\0\0\0\0\x0f\x13\0\0\0\0\0\0\x7b\
+\xa1\xff\xe8\0\0\0\0\xbf\x4a\0\0\0\0\0\0\x07\x40\0\0\xff\xff\xff\xd0\x79\x1a\
+\xff\xc8\0\0\0\0\x18\x26\0\0\0\0\0\0\0\0\0\0\0\0\0\x51\xb7\x30\0\0\0\0\0\x11\
+\xb7\x50\0\0\0\0\0\x20\x85\0\0\0\0\0\0\x7e\xb7\0\0\0\0\0\0\0\x95\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\x17\0\0\0\0\0\0\0\x42\0\0\0\x9a\0\x01\x80\x1e\0\0\0\x01\0\0\0\
+\x42\0\0\0\x9a\0\x01\x80\x24\0\0\0\x02\0\0\0\x42\0\0\x02\x7f\0\x01\x88\x1f\0\0\
+\0\x03\0\0\0\x42\0\0\x02\xa3\0\x01\x94\x06\0\0\0\x04\0\0\0\x42\0\0\x02\xbc\0\
+\x01\xa0\x0e\0\0\0\x05\0\0\0\x42\0\0\x01\x3d\0\x01\x84\x1d\0\0\0\x06\0\0\0\x42\
+\0\0\x01\x62\0\x01\xa4\x06\0\0\0\x08\0\0\0\x42\0\0\x02\xce\0\x01\xa8\x03\0\0\0\
+\x10\0\0\0\x42\0\0\x03\x3e\0\x01\xb0\x02\0\0\0\x17\0\0\0\x42\0\0\x03\x79\0\x01\
+\x04\x06\0\0\0\x1a\0\0\0\x42\0\0\x03\x3e\0\x01\xb0\x02\0\0\0\x1b\0\0\0\x42\0\0\
+\x03\xca\0\x01\x10\x0f\0\0\0\x1c\0\0\0\x42\0\0\x03\xdf\0\x01\x14\x2d\0\0\0\x1e\
+\0\0\0\x42\0\0\x04\x16\0\x01\x0c\x0d\0\0\0\x20\0\0\0\x42\0\0\x03\x3e\0\x01\xb0\
+\x02\0\0\0\x21\0\0\0\x42\0\0\x03\xdf\0\x01\x14\x02\0\0\0\x24\0\0\0\x42\0\0\x04\
+\x3d\0\x01\x18\x0d\0\0\0\x27\0\0\0\x42\0\0\x03\x3e\0\x01\xb0\x02\0\0\0\x28\0\0\
+\0\x42\0\0\x04\x3d\0\x01\x18\x0d\0\0\0\x2b\0\0\0\x42\0\0\x04\x3d\0\x01\x18\x0d\
+\0\0\0\x2c\0\0\0\x42\0\0\x04\x6b\0\x01\x1c\x1b\0\0\0\x2d\0\0\0\x42\0\0\x04\x6b\
+\0\x01\x1c\x06\0\0\0\x2e\0\0\0\x42\0\0\x04\x8e\0\x01\x24\x0d\0\0\0\x30\0\0\0\
+\x42\0\0\x03\x3e\0\x01\xb0\x02\0\0\0\x3f\0\0\0\x42\0\0\x02\x49\0\x01\xc0\x01\0\
+\0\0\0\0\0\0\x14\0\0\0\x3e\0\0\0\0\0\0\0\x08\0\0\0\x08\0\0\0\x3e\0\0\0\0\0\0\0\
+\x10\0\0\0\x14\0\0\x01\x09\0\0\0\0\0\0\0\x20\0\0\0\x18\0\0\0\x3e\0\0\0\0\0\0\0\
+\x28\0\0\0\x08\0\0\x01\x39\0\0\0\0\0\0\0\x80\0\0\0\x1a\0\0\0\x3e\0\0\0\0\0\0\0\
+\x90\0\0\0\x1a\0\0\x01\x09\0\0\0\0\0\0\0\xa8\0\0\0\x1a\0\0\x03\x71\0\0\0\0\0\0\
+\0\xb0\0\0\0\x1a\0\0\x03\x75\0\0\0\0\0\0\0\xc0\0\0\0\x1f\0\0\x03\xa3\0\0\0\0\0\
+\0\0\xd8\0\0\0\x20\0\0\x01\x09\0\0\0\0\0\0\0\xf0\0\0\0\x20\0\0\0\x3e\0\0\0\0\0\
+\0\x01\x18\0\0\0\x24\0\0\0\x3e\0\0\0\0\0\0\x01\x50\0\0\0\x1a\0\0\x01\x09\0\0\0\
+\0\0\0\x01\x60\0\0\0\x20\0\0\x04\x65\0\0\0\0\0\0\x01\x88\0\0\0\x1a\0\0\x01\x39\
+\0\0\0\0\0\0\x01\x98\0\0\0\x1a\0\0\x04\xa6\0\0\0\0\0\0\x01\xa0\0\0\0\x18\0\0\0\
+\x3e\0\0\0\0\0\0\0\x1a\0\0\0\x41\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x64\x75\x6d\x70\x5f\x62\x70\x66\x5f\x70\x72\
+\x6f\x67\0\0\0\0\0\0\0\0\0\0\x1c\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0\0\0\0\0\x01\0\
+\0\0\x10\0\0\0\0\0\0\0\0\0\0\0\x19\0\0\0\x01\0\0\0\0\0\0\0\x12\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\x10\0\0\0\0\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\x62\x70\
+\x66\x5f\x70\x72\x6f\x67\0\0\0\0\0\0\0";
+ opts.insns_sz = 2216;
+ opts.insns = (void *)"\
+\xbf\x61\0\0\0\0\0\0\xbf\x1a\0\0\0\0\0\0\x07\x10\0\0\xff\xff\xff\x78\xb7\x20\0\
+\0\0\0\0\x88\xb7\x30\0\0\0\0\0\0\x85\0\0\0\0\0\0\x71\x05\0\0\x14\0\0\0\0\x61\
+\x1a\xff\x78\0\0\0\0\xd5\x10\0\x01\0\0\0\0\x85\0\0\0\0\0\0\xa8\x61\x1a\xff\x7c\
+\0\0\0\0\xd5\x10\0\x01\0\0\0\0\x85\0\0\0\0\0\0\xa8\x61\x1a\xff\x80\0\0\0\0\xd5\
+\x10\0\x01\0\0\0\0\x85\0\0\0\0\0\0\xa8\x61\x1a\xff\x84\0\0\0\0\xd5\x10\0\x01\0\
+\0\0\0\x85\0\0\0\0\0\0\xa8\x18\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x61\x10\0\0\0\0\
+\0\0\xd5\x10\0\x02\0\0\0\0\xbf\x91\0\0\0\0\0\0\x85\0\0\0\0\0\0\xa8\xbf\x07\0\0\
+\0\0\0\0\x95\0\0\0\0\0\0\0\x61\x06\0\x08\0\0\0\0\x18\x16\0\0\0\0\0\0\0\0\0\0\0\
+\0\x0e\x68\x63\x10\0\0\0\0\0\0\x61\x06\0\x0c\0\0\0\0\x18\x16\0\0\0\0\0\0\0\0\0\
+\0\0\0\x0e\x64\x63\x10\0\0\0\0\0\0\x79\x06\0\x10\0\0\0\0\x18\x16\0\0\0\0\0\0\0\
+\0\0\0\0\0\x0e\x58\x7b\x10\0\0\0\0\0\0\x18\x06\0\0\0\0\0\0\0\0\0\0\0\0\x05\0\
+\x18\x16\0\0\0\0\0\0\0\0\0\0\0\0\x0e\x50\x7b\x10\0\0\0\0\0\0\xb7\x10\0\0\0\0\0\
+\x12\x18\x26\0\0\0\0\0\0\0\0\0\0\0\0\x0e\x50\xb7\x30\0\0\0\0\0\x1c\x85\0\0\0\0\
+\0\0\xa6\xbf\x70\0\0\0\0\0\0\xc5\x70\xff\xd4\0\0\0\0\x63\xa7\xff\x78\0\0\0\0\
+\x61\x0a\xff\x78\0\0\0\0\x18\x16\0\0\0\0\0\0\0\0\0\0\0\0\x0e\xa0\x63\x10\0\0\0\
+\0\0\0\x61\x06\0\x1c\0\0\0\0\x15\0\0\x03\0\0\0\0\x18\x16\0\0\0\0\0\0\0\0\0\0\0\
+\0\x0e\x7c\x63\x10\0\0\0\0\0\0\xb7\x10\0\0\0\0\0\0\x18\x26\0\0\0\0\0\0\0\0\0\0\
+\0\0\x0e\x70\xb7\x30\0\0\0\0\0\x48\x85\0\0\0\0\0\0\xa6\xbf\x70\0\0\0\0\0\0\xc5\
+\x70\xff\xc3\0\0\0\0\x18\x16\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x63\x17\0\0\0\0\0\0\
+\x79\x36\0\x20\0\0\0\0\x15\x30\0\x08\0\0\0\0\x18\x16\0\0\0\0\0\0\0\0\0\0\0\0\
+\x0e\xb8\xb7\x20\0\0\0\0\0\x62\x61\x06\0\x04\0\0\0\0\x45\0\0\x02\0\0\0\x01\x85\
+\0\0\0\0\0\0\x94\x05\0\0\x01\0\0\0\0\x85\0\0\0\0\0\0\x71\x18\x26\0\0\0\0\0\0\0\
+\0\0\0\0\0\0\0\x61\x02\0\0\0\0\0\0\x18\x16\0\0\0\0\0\0\0\0\0\0\0\0\x0f\x28\x63\
+\x10\0\0\0\0\0\0\x18\x06\0\0\0\0\0\0\0\0\0\0\0\0\x0f\x20\x18\x16\0\0\0\0\0\0\0\
+\0\0\0\0\0\x0f\x30\x7b\x10\0\0\0\0\0\0\x18\x06\0\0\0\0\0\0\0\0\0\0\0\0\x0e\xb8\
+\x18\x16\0\0\0\0\0\0\0\0\0\0\0\0\x0f\x38\x7b\x10\0\0\0\0\0\0\xb7\x10\0\0\0\0\0\
+\x02\x18\x26\0\0\0\0\0\0\0\0\0\0\0\0\x0f\x28\xb7\x30\0\0\0\0\0\x20\x85\0\0\0\0\
+\0\0\xa6\xbf\x70\0\0\0\0\0\0\xc5\x70\xff\x9f\0\0\0\0\x18\x26\0\0\0\0\0\0\0\0\0\
+\0\0\0\0\0\x61\x02\0\0\0\0\0\0\x18\x16\0\0\0\0\0\0\0\0\0\0\0\0\x0f\x48\x63\x10\
+\0\0\0\0\0\0\xb7\x10\0\0\0\0\0\x16\x18\x26\0\0\0\0\0\0\0\0\0\0\0\0\x0f\x48\xb7\
+\x30\0\0\0\0\0\x04\x85\0\0\0\0\0\0\xa6\xbf\x70\0\0\0\0\0\0\xc5\x70\xff\x92\0\0\
+\0\0\x18\x06\0\0\0\0\0\0\0\0\0\0\0\0\x0f\x50\x18\x16\0\0\0\0\0\0\0\0\0\0\0\0\
+\x11\x70\x7b\x10\0\0\0\0\0\0\x18\x06\0\0\0\0\0\0\0\0\0\0\0\0\x0f\x58\x18\x16\0\
+\0\0\0\0\0\0\0\0\0\0\0\x11\x68\x7b\x10\0\0\0\0\0\0\x18\x06\0\0\0\0\0\0\0\0\0\0\
+\0\0\x10\x58\x18\x16\0\0\0\0\0\0\0\0\0\0\0\0\x11\xb0\x7b\x10\0\0\0\0\0\0\x18\
+\x06\0\0\0\0\0\0\0\0\0\0\0\0\x10\x60\x18\x16\0\0\0\0\0\0\0\0\0\0\0\0\x11\xc0\
+\x7b\x10\0\0\0\0\0\0\x18\x06\0\0\0\0\0\0\0\0\0\0\0\0\x10\xf0\x18\x16\0\0\0\0\0\
+\0\0\0\0\0\0\0\x11\xe0\x7b\x10\0\0\0\0\0\0\x18\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
+\x18\x16\0\0\0\0\0\0\0\0\0\0\0\0\x11\xd8\x7b\x10\0\0\0\0\0\0\x61\x06\0\x08\0\0\
+\0\0\x18\x16\0\0\0\0\0\0\0\0\0\0\0\0\x11\x78\x63\x10\0\0\0\0\0\0\x61\x06\0\x0c\
+\0\0\0\0\x18\x16\0\0\0\0\0\0\0\0\0\0\0\0\x11\x7c\x63\x10\0\0\0\0\0\0\x79\x06\0\
+\x10\0\0\0\0\x18\x16\0\0\0\0\0\0\0\0\0\0\0\0\x11\x80\x7b\x10\0\0\0\0\0\0\x61\
+\x0a\xff\x78\0\0\0\0\x18\x16\0\0\0\0\0\0\0\0\0\0\0\0\x11\xa8\x63\x10\0\0\0\0\0\
+\0\x18\x16\0\0\0\0\0\0\0\0\0\0\0\0\x11\xf0\xb7\x20\0\0\0\0\0\x11\xb7\x30\0\0\0\
+\0\0\x0c\xb7\x40\0\0\0\0\0\0\x85\0\0\0\0\0\0\xa7\xbf\x70\0\0\0\0\0\0\xc5\x70\
+\xff\x5c\0\0\0\0\x18\x06\0\0\0\0\0\0\0\0\0\0\0\0\x11\x60\x63\x07\0\x6c\0\0\0\0\
+\x77\x70\0\0\0\0\0\x20\x63\x07\0\x70\0\0\0\0\xb7\x10\0\0\0\0\0\x05\x18\x26\0\0\
+\0\0\0\0\0\0\0\0\0\0\x11\x60\xb7\x30\0\0\0\0\0\x8c\x85\0\0\0\0\0\0\xa6\xbf\x70\
+\0\0\0\0\0\0\x18\x06\0\0\0\0\0\0\0\0\0\0\0\0\x11\xd0\x61\x10\0\0\0\0\0\0\xd5\
+\x10\0\x02\0\0\0\0\xbf\x91\0\0\0\0\0\0\x85\0\0\0\0\0\0\xa8\xc5\x70\xff\x4a\0\0\
+\0\0\x63\xa7\xff\x80\0\0\0\0\x18\x06\0\0\0\0\0\0\0\0\0\0\0\0\x12\x08\x18\x16\0\
+\0\0\0\0\0\0\0\0\0\0\0\x16\xe0\x7b\x10\0\0\0\0\0\0\x18\x06\0\0\0\0\0\0\0\0\0\0\
+\0\0\x12\x10\x18\x16\0\0\0\0\0\0\0\0\0\0\0\0\x16\xd8\x7b\x10\0\0\0\0\0\0\x18\
+\x06\0\0\0\0\0\0\0\0\0\0\0\0\x14\x18\x18\x16\0\0\0\0\0\0\0\0\0\0\0\0\x17\x20\
+\x7b\x10\0\0\0\0\0\0\x18\x06\0\0\0\0\0\0\0\0\0\0\0\0\x14\x20\x18\x16\0\0\0\0\0\
+\0\0\0\0\0\0\0\x17\x30\x7b\x10\0\0\0\0\0\0\x18\x06\0\0\0\0\0\0\0\0\0\0\0\0\x15\
+\xb0\x18\x16\0\0\0\0\0\0\0\0\0\0\0\0\x17\x50\x7b\x10\0\0\0\0\0\0\x18\x06\0\0\0\
+\0\0\0\0\0\0\0\0\0\0\0\x18\x16\0\0\0\0\0\0\0\0\0\0\0\0\x17\x48\x7b\x10\0\0\0\0\
+\0\0\x61\x06\0\x08\0\0\0\0\x18\x16\0\0\0\0\0\0\0\0\0\0\0\0\x16\xe8\x63\x10\0\0\
+\0\0\0\0\x61\x06\0\x0c\0\0\0\0\x18\x16\0\0\0\0\0\0\0\0\0\0\0\0\x16\xec\x63\x10\
+\0\0\0\0\0\0\x79\x06\0\x10\0\0\0\0\x18\x16\0\0\0\0\0\0\0\0\0\0\0\0\x16\xf0\x7b\
+\x10\0\0\0\0\0\0\x61\x0a\xff\x78\0\0\0\0\x18\x16\0\0\0\0\0\0\0\0\0\0\0\0\x17\
+\x18\x63\x10\0\0\0\0\0\0\x18\x16\0\0\0\0\0\0\0\0\0\0\0\0\x17\x60\xb7\x20\0\0\0\
+\0\0\x12\xb7\x30\0\0\0\0\0\x0c\xb7\x40\0\0\0\0\0\0\x85\0\0\0\0\0\0\xa7\xbf\x70\
+\0\0\0\0\0\0\xc5\x70\xff\x13\0\0\0\0\x18\x06\0\0\0\0\0\0\0\0\0\0\0\0\x16\xd0\
+\x63\x07\0\x6c\0\0\0\0\x77\x70\0\0\0\0\0\x20\x63\x07\0\x70\0\0\0\0\xb7\x10\0\0\
+\0\0\0\x05\x18\x26\0\0\0\0\0\0\0\0\0\0\0\0\x16\xd0\xb7\x30\0\0\0\0\0\x8c\x85\0\
+\0\0\0\0\0\xa6\xbf\x70\0\0\0\0\0\0\x18\x06\0\0\0\0\0\0\0\0\0\0\0\0\x17\x40\x61\
+\x10\0\0\0\0\0\0\xd5\x10\0\x02\0\0\0\0\xbf\x91\0\0\0\0\0\0\x85\0\0\0\0\0\0\xa8\
+\xc5\x70\xff\x01\0\0\0\0\x63\xa7\xff\x84\0\0\0\0\x61\x1a\xff\x78\0\0\0\0\xd5\
+\x10\0\x02\0\0\0\0\xbf\x91\0\0\0\0\0\0\x85\0\0\0\0\0\0\xa8\x61\x0a\xff\x80\0\0\
+\0\0\x63\x60\0\x28\0\0\0\0\x61\x0a\xff\x84\0\0\0\0\x63\x60\0\x2c\0\0\0\0\x18\
+\x16\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x61\x01\0\0\0\0\0\0\x63\x60\0\x18\0\0\0\0\xb7\
+\0\0\0\0\0\0\0\x95\0\0\0\0\0\0\0";
+ err = bpf_load_and_run(&opts);
+ if (err < 0)
+ return err;
+ return 0;
+}
+
+static inline struct iterators_bpf *
+iterators_bpf__open_and_load(void)
+{
+ struct iterators_bpf *skel;
+
+ skel = iterators_bpf__open();
+ if (!skel)
+ return NULL;
+ if (iterators_bpf__load(skel)) {
+ iterators_bpf__destroy(skel);
+ return NULL;
+ }
+ return skel;
+}
+
+__attribute__((unused)) static void
+iterators_bpf__assert(struct iterators_bpf *s __attribute__((unused)))
+{
+#ifdef __cplusplus
+#define _Static_assert static_assert
+#endif
+#ifdef __cplusplus
+#undef _Static_assert
+#endif
+}
+
+#endif /* __ITERATORS_BPF_SKEL_H__ */
diff --git a/kernel/bpf/preload/iterators/iterators.lskel.h b/kernel/bpf/preload/iterators/iterators.lskel-little-endian.h
index 70f236a82fe1..70f236a82fe1 100644
--- a/kernel/bpf/preload/iterators/iterators.lskel.h
+++ b/kernel/bpf/preload/iterators/iterators.lskel-little-endian.h
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ecca9366c7a6..e3fcdc9836a6 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -181,7 +181,7 @@ static int bpf_map_update_value(struct bpf_map *map, struct file *map_file,
int err;
/* Need to create a kthread, thus must support schedule */
- if (bpf_map_is_dev_bound(map)) {
+ if (bpf_map_is_offloaded(map)) {
return bpf_map_offload_update_elem(map, key, value, flags);
} else if (map->map_type == BPF_MAP_TYPE_CPUMAP ||
map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
@@ -238,7 +238,7 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
void *ptr;
int err;
- if (bpf_map_is_dev_bound(map))
+ if (bpf_map_is_offloaded(map))
return bpf_map_offload_lookup_elem(map, key, value);
bpf_disable_instrumentation();
@@ -309,7 +309,7 @@ static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable)
* __GFP_RETRY_MAYFAIL to avoid such situations.
*/
- const gfp_t gfp = __GFP_NOWARN | __GFP_ZERO | __GFP_ACCOUNT;
+ gfp_t gfp = bpf_memcg_flags(__GFP_NOWARN | __GFP_ZERO);
unsigned int flags = 0;
unsigned long align = 1;
void *area;
@@ -390,7 +390,7 @@ static int bpf_map_alloc_id(struct bpf_map *map)
return id > 0 ? 0 : id;
}
-void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
+void bpf_map_free_id(struct bpf_map *map)
{
unsigned long flags;
@@ -402,18 +402,12 @@ void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
if (!map->id)
return;
- if (do_idr_lock)
- spin_lock_irqsave(&map_idr_lock, flags);
- else
- __acquire(&map_idr_lock);
+ spin_lock_irqsave(&map_idr_lock, flags);
idr_remove(&map_idr, map->id);
map->id = 0;
- if (do_idr_lock)
- spin_unlock_irqrestore(&map_idr_lock, flags);
- else
- __release(&map_idr_lock);
+ spin_unlock_irqrestore(&map_idr_lock, flags);
}
#ifdef CONFIG_MEMCG_KMEM
@@ -424,7 +418,8 @@ static void bpf_map_save_memcg(struct bpf_map *map)
* So we have to check map->objcg for being NULL each time it's
* being used.
*/
- map->objcg = get_obj_cgroup_from_current();
+ if (memcg_bpf_enabled())
+ map->objcg = get_obj_cgroup_from_current();
}
static void bpf_map_release_memcg(struct bpf_map *map)
@@ -470,6 +465,21 @@ void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags)
return ptr;
}
+void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size,
+ gfp_t flags)
+{
+ struct mem_cgroup *memcg, *old_memcg;
+ void *ptr;
+
+ memcg = bpf_map_get_memcg(map);
+ old_memcg = set_active_memcg(memcg);
+ ptr = kvcalloc(n, size, flags | __GFP_ACCOUNT);
+ set_active_memcg(old_memcg);
+ mem_cgroup_put(memcg);
+
+ return ptr;
+}
+
void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
size_t align, gfp_t flags)
{
@@ -527,9 +537,6 @@ void btf_record_free(struct btf_record *rec)
return;
for (i = 0; i < rec->cnt; i++) {
switch (rec->fields[i].type) {
- case BPF_SPIN_LOCK:
- case BPF_TIMER:
- break;
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
if (rec->fields[i].kptr.module)
@@ -538,7 +545,11 @@ void btf_record_free(struct btf_record *rec)
break;
case BPF_LIST_HEAD:
case BPF_LIST_NODE:
- /* Nothing to release for bpf_list_head */
+ case BPF_RB_ROOT:
+ case BPF_RB_NODE:
+ case BPF_SPIN_LOCK:
+ case BPF_TIMER:
+ /* Nothing to release */
break;
default:
WARN_ON_ONCE(1);
@@ -571,9 +582,6 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
new_rec->cnt = 0;
for (i = 0; i < rec->cnt; i++) {
switch (fields[i].type) {
- case BPF_SPIN_LOCK:
- case BPF_TIMER:
- break;
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
btf_get(fields[i].kptr.btf);
@@ -584,7 +592,11 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
break;
case BPF_LIST_HEAD:
case BPF_LIST_NODE:
- /* Nothing to acquire for bpf_list_head */
+ case BPF_RB_ROOT:
+ case BPF_RB_NODE:
+ case BPF_SPIN_LOCK:
+ case BPF_TIMER:
+ /* Nothing to acquire */
break;
default:
ret = -EFAULT;
@@ -664,7 +676,13 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
continue;
bpf_list_head_free(field, field_ptr, obj + rec->spin_lock_off);
break;
+ case BPF_RB_ROOT:
+ if (WARN_ON_ONCE(rec->spin_lock_off < 0))
+ continue;
+ bpf_rb_root_free(field, field_ptr, obj + rec->spin_lock_off);
+ break;
case BPF_LIST_NODE:
+ case BPF_RB_NODE:
break;
default:
WARN_ON_ONCE(1);
@@ -706,13 +724,13 @@ static void bpf_map_put_uref(struct bpf_map *map)
}
/* decrement map refcnt and schedule it for freeing via workqueue
- * (unrelying map implementation ops->map_free() might sleep)
+ * (underlying map implementation ops->map_free() might sleep)
*/
-static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock)
+void bpf_map_put(struct bpf_map *map)
{
if (atomic64_dec_and_test(&map->refcnt)) {
/* bpf_map_free_id() must be called first */
- bpf_map_free_id(map, do_idr_lock);
+ bpf_map_free_id(map);
btf_put(map->btf);
INIT_WORK(&map->work, bpf_map_free_deferred);
/* Avoid spawning kworkers, since they all might contend
@@ -721,11 +739,6 @@ static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock)
queue_work(system_unbound_wq, &map->work);
}
}
-
-void bpf_map_put(struct bpf_map *map)
-{
- __bpf_map_put(map, true);
-}
EXPORT_SYMBOL_GPL(bpf_map_put);
void bpf_map_put_with_uref(struct bpf_map *map)
@@ -1005,7 +1018,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
return -EINVAL;
map->record = btf_parse_fields(btf, value_type,
- BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD,
+ BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD |
+ BPF_RB_ROOT,
map->value_size);
if (!IS_ERR_OR_NULL(map->record)) {
int i;
@@ -1053,6 +1067,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
}
break;
case BPF_LIST_HEAD:
+ case BPF_RB_ROOT:
if (map->map_type != BPF_MAP_TYPE_HASH &&
map->map_type != BPF_MAP_TYPE_LRU_HASH &&
map->map_type != BPF_MAP_TYPE_ARRAY) {
@@ -1483,7 +1498,7 @@ static int map_delete_elem(union bpf_attr *attr, bpfptr_t uattr)
goto err_put;
}
- if (bpf_map_is_dev_bound(map)) {
+ if (bpf_map_is_offloaded(map)) {
err = bpf_map_offload_delete_elem(map, key);
goto out;
} else if (IS_FD_PROG_ARRAY(map) ||
@@ -1547,7 +1562,7 @@ static int map_get_next_key(union bpf_attr *attr)
if (!next_key)
goto free_key;
- if (bpf_map_is_dev_bound(map)) {
+ if (bpf_map_is_offloaded(map)) {
err = bpf_map_offload_get_next_key(map, key, next_key);
goto out;
}
@@ -1605,7 +1620,7 @@ int generic_map_delete_batch(struct bpf_map *map,
map->key_size))
break;
- if (bpf_map_is_dev_bound(map)) {
+ if (bpf_map_is_offloaded(map)) {
err = bpf_map_offload_delete_elem(map, key);
break;
}
@@ -1851,7 +1866,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
map->map_type == BPF_MAP_TYPE_LRU_HASH ||
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
- if (!bpf_map_is_dev_bound(map)) {
+ if (!bpf_map_is_offloaded(map)) {
bpf_disable_instrumentation();
rcu_read_lock();
err = map->ops->map_lookup_and_delete_elem(map, key, value, attr->flags);
@@ -1944,7 +1959,7 @@ static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
if (!ops)
return -EINVAL;
- if (!bpf_prog_is_dev_bound(prog->aux))
+ if (!bpf_prog_is_offloaded(prog->aux))
prog->aux->ops = ops;
else
prog->aux->ops = &bpf_offload_prog_ops;
@@ -2245,7 +2260,7 @@ bool bpf_prog_get_ok(struct bpf_prog *prog,
if (prog->type != *attach_type)
return false;
- if (bpf_prog_is_dev_bound(prog->aux) && !attach_drv)
+ if (bpf_prog_is_offloaded(prog->aux) && !attach_drv)
return false;
return true;
@@ -2481,7 +2496,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
BPF_F_TEST_STATE_FREQ |
BPF_F_SLEEPABLE |
BPF_F_TEST_RND_HI32 |
- BPF_F_XDP_HAS_FRAGS))
+ BPF_F_XDP_HAS_FRAGS |
+ BPF_F_XDP_DEV_BOUND_ONLY))
return -EINVAL;
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
@@ -2565,7 +2581,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
prog->aux->attach_btf = attach_btf;
prog->aux->attach_btf_id = attr->attach_btf_id;
prog->aux->dst_prog = dst_prog;
- prog->aux->offload_requested = !!attr->prog_ifindex;
+ prog->aux->dev_bound = !!attr->prog_ifindex;
prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS;
@@ -2589,7 +2605,14 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
prog->gpl_compatible = is_gpl ? 1 : 0;
if (bpf_prog_is_dev_bound(prog->aux)) {
- err = bpf_prog_offload_init(prog, attr);
+ err = bpf_prog_dev_bound_init(prog, attr);
+ if (err)
+ goto free_prog_sec;
+ }
+
+ if (type == BPF_PROG_TYPE_EXT && dst_prog &&
+ bpf_prog_is_dev_bound(dst_prog->aux)) {
+ err = bpf_prog_dev_bound_inherit(prog, dst_prog);
if (err)
goto free_prog_sec;
}
@@ -3987,7 +4010,7 @@ static int bpf_prog_get_info_by_fd(struct file *file,
return -EFAULT;
}
- if (bpf_prog_is_dev_bound(prog->aux)) {
+ if (bpf_prog_is_offloaded(prog->aux)) {
err = bpf_prog_offload_info_fill(&info, prog);
if (err)
return err;
@@ -4215,7 +4238,7 @@ static int bpf_map_get_info_by_fd(struct file *file,
}
info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
- if (bpf_map_is_dev_bound(map)) {
+ if (bpf_map_is_offloaded(map)) {
err = bpf_map_offload_info_fill(&info, map);
if (err)
return err;
@@ -5309,7 +5332,6 @@ static struct ctl_table bpf_syscall_table[] = {
{
.procname = "bpf_stats_enabled",
.data = &bpf_stats_enabled_key.key,
- .maxlen = sizeof(bpf_stats_enabled_key),
.mode = 0644,
.proc_handler = bpf_stats_handler,
},
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 7ee218827259..272563a0b770 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -190,6 +190,10 @@ struct bpf_verifier_stack_elem {
static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx);
static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
+static void invalidate_non_owning_refs(struct bpf_verifier_env *env);
+static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env);
+static int ref_set_non_owning(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg);
static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
{
@@ -255,6 +259,7 @@ struct bpf_call_arg_meta {
int mem_size;
u64 msize_max_value;
int ref_obj_id;
+ int dynptr_id;
int map_uid;
int func_id;
struct btf *btf;
@@ -456,6 +461,11 @@ static bool type_is_ptr_alloc_obj(u32 type)
return base_type(type) == PTR_TO_BTF_ID && type_flag(type) & MEM_ALLOC;
}
+static bool type_is_non_owning_ref(u32 type)
+{
+ return type_is_ptr_alloc_obj(type) && type_flag(type) & NON_OWN_REF;
+}
+
static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
{
struct btf_record *rec = NULL;
@@ -638,31 +648,57 @@ static void print_liveness(struct bpf_verifier_env *env,
verbose(env, "D");
}
-static int get_spi(s32 off)
+static int __get_spi(s32 off)
{
return (-off - 1) / BPF_REG_SIZE;
}
+static struct bpf_func_state *func(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg)
+{
+ struct bpf_verifier_state *cur = env->cur_state;
+
+ return cur->frame[reg->frameno];
+}
+
static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
{
- int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
+ int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
- /* We need to check that slots between [spi - nr_slots + 1, spi] are
- * within [0, allocated_stack).
- *
- * Please note that the spi grows downwards. For example, a dynptr
- * takes the size of two stack slots; the first slot will be at
- * spi and the second slot will be at spi - 1.
- */
- return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
+ /* We need to check that slots between [spi - nr_slots + 1, spi] are
+ * within [0, allocated_stack).
+ *
+ * Please note that the spi grows downwards. For example, a dynptr
+ * takes the size of two stack slots; the first slot will be at
+ * spi and the second slot will be at spi - 1.
+ */
+ return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
}
-static struct bpf_func_state *func(struct bpf_verifier_env *env,
- const struct bpf_reg_state *reg)
+static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
- struct bpf_verifier_state *cur = env->cur_state;
+ int off, spi;
- return cur->frame[reg->frameno];
+ if (!tnum_is_const(reg->var_off)) {
+ verbose(env, "dynptr has to be at a constant offset\n");
+ return -EINVAL;
+ }
+
+ off = reg->off + reg->var_off.value;
+ if (off % BPF_REG_SIZE) {
+ verbose(env, "cannot pass in dynptr at an offset=%d\n", off);
+ return -EINVAL;
+ }
+
+ spi = __get_spi(off);
+ if (spi < 1) {
+ verbose(env, "cannot pass in dynptr at an offset=%d\n", off);
+ return -EINVAL;
+ }
+
+ if (!is_spi_bounds_valid(func(env, reg), spi, BPF_DYNPTR_NR_SLOTS))
+ return -ERANGE;
+ return spi;
}
static const char *kernel_type_name(const struct btf* btf, u32 id)
@@ -727,37 +763,58 @@ static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
static void __mark_dynptr_reg(struct bpf_reg_state *reg,
enum bpf_dynptr_type type,
- bool first_slot);
+ bool first_slot, int dynptr_id);
static void __mark_reg_not_init(const struct bpf_verifier_env *env,
struct bpf_reg_state *reg);
-static void mark_dynptr_stack_regs(struct bpf_reg_state *sreg1,
+static void mark_dynptr_stack_regs(struct bpf_verifier_env *env,
+ struct bpf_reg_state *sreg1,
struct bpf_reg_state *sreg2,
enum bpf_dynptr_type type)
{
- __mark_dynptr_reg(sreg1, type, true);
- __mark_dynptr_reg(sreg2, type, false);
+ int id = ++env->id_gen;
+
+ __mark_dynptr_reg(sreg1, type, true, id);
+ __mark_dynptr_reg(sreg2, type, false, id);
}
-static void mark_dynptr_cb_reg(struct bpf_reg_state *reg,
+static void mark_dynptr_cb_reg(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg,
enum bpf_dynptr_type type)
{
- __mark_dynptr_reg(reg, type, true);
+ __mark_dynptr_reg(reg, type, true, ++env->id_gen);
}
+static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
+ struct bpf_func_state *state, int spi);
static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
enum bpf_arg_type arg_type, int insn_idx)
{
struct bpf_func_state *state = func(env, reg);
enum bpf_dynptr_type type;
- int spi, i, id;
-
- spi = get_spi(reg->off);
-
- if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
- return -EINVAL;
+ int spi, i, id, err;
+
+ spi = dynptr_get_spi(env, reg);
+ if (spi < 0)
+ return spi;
+
+ /* We cannot assume both spi and spi - 1 belong to the same dynptr,
+ * hence we need to call destroy_if_dynptr_stack_slot twice for both,
+ * to ensure that for the following example:
+ * [d1][d1][d2][d2]
+ * spi 3 2 1 0
+ * So marking spi = 2 should lead to destruction of both d1 and d2. In
+ * case they do belong to same dynptr, second call won't see slot_type
+ * as STACK_DYNPTR and will simply skip destruction.
+ */
+ err = destroy_if_dynptr_stack_slot(env, state, spi);
+ if (err)
+ return err;
+ err = destroy_if_dynptr_stack_slot(env, state, spi - 1);
+ if (err)
+ return err;
for (i = 0; i < BPF_REG_SIZE; i++) {
state->stack[spi].slot_type[i] = STACK_DYNPTR;
@@ -768,7 +825,7 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_
if (type == BPF_DYNPTR_TYPE_INVALID)
return -EINVAL;
- mark_dynptr_stack_regs(&state->stack[spi].spilled_ptr,
+ mark_dynptr_stack_regs(env, &state->stack[spi].spilled_ptr,
&state->stack[spi - 1].spilled_ptr, type);
if (dynptr_type_refcounted(type)) {
@@ -781,6 +838,9 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_
state->stack[spi - 1].spilled_ptr.ref_obj_id = id;
}
+ state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
+ state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
+
return 0;
}
@@ -789,10 +849,9 @@ static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_re
struct bpf_func_state *state = func(env, reg);
int spi, i;
- spi = get_spi(reg->off);
-
- if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
- return -EINVAL;
+ spi = dynptr_get_spi(env, reg);
+ if (spi < 0)
+ return spi;
for (i = 0; i < BPF_REG_SIZE; i++) {
state->stack[spi].slot_type[i] = STACK_INVALID;
@@ -805,43 +864,133 @@ static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_re
__mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
__mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
+
+ /* Why do we need to set REG_LIVE_WRITTEN for STACK_INVALID slot?
+ *
+ * While we don't allow reading STACK_INVALID, it is still possible to
+ * do <8 byte writes marking some but not all slots as STACK_MISC. Then,
+ * helpers or insns can do partial read of that part without failing,
+ * but check_stack_range_initialized, check_stack_read_var_off, and
+ * check_stack_read_fixed_off will do mark_reg_read for all 8-bytes of
+ * the slot conservatively. Hence we need to prevent those liveness
+ * marking walks.
+ *
+ * This was not a problem before because STACK_INVALID is only set by
+ * default (where the default reg state has its reg->parent as NULL), or
+ * in clean_live_states after REG_LIVE_DONE (at which point
+ * mark_reg_read won't walk reg->parent chain), but not randomly during
+ * verifier state exploration (like we did above). Hence, for our case
+ * parentage chain will still be live (i.e. reg->parent may be
+ * non-NULL), while earlier reg->parent was NULL, so we need
+ * REG_LIVE_WRITTEN to screen off read marker propagation when it is
+ * done later on reads or by mark_dynptr_read as well to unnecessary
+ * mark registers in verifier state.
+ */
+ state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
+ state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
+
return 0;
}
-static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
+static void __mark_reg_unknown(const struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg);
+
+static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
+ struct bpf_func_state *state, int spi)
{
- struct bpf_func_state *state = func(env, reg);
- int spi, i;
+ struct bpf_func_state *fstate;
+ struct bpf_reg_state *dreg;
+ int i, dynptr_id;
- if (reg->type == CONST_PTR_TO_DYNPTR)
- return false;
+ /* We always ensure that STACK_DYNPTR is never set partially,
+ * hence just checking for slot_type[0] is enough. This is
+ * different for STACK_SPILL, where it may be only set for
+ * 1 byte, so code has to use is_spilled_reg.
+ */
+ if (state->stack[spi].slot_type[0] != STACK_DYNPTR)
+ return 0;
- spi = get_spi(reg->off);
- if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
- return true;
+ /* Reposition spi to first slot */
+ if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
+ spi = spi + 1;
+ if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
+ verbose(env, "cannot overwrite referenced dynptr\n");
+ return -EINVAL;
+ }
+
+ mark_stack_slot_scratched(env, spi);
+ mark_stack_slot_scratched(env, spi - 1);
+
+ /* Writing partially to one dynptr stack slot destroys both. */
for (i = 0; i < BPF_REG_SIZE; i++) {
- if (state->stack[spi].slot_type[i] == STACK_DYNPTR ||
- state->stack[spi - 1].slot_type[i] == STACK_DYNPTR)
- return false;
+ state->stack[spi].slot_type[i] = STACK_INVALID;
+ state->stack[spi - 1].slot_type[i] = STACK_INVALID;
}
+ dynptr_id = state->stack[spi].spilled_ptr.id;
+ /* Invalidate any slices associated with this dynptr */
+ bpf_for_each_reg_in_vstate(env->cur_state, fstate, dreg, ({
+ /* Dynptr slices are only PTR_TO_MEM_OR_NULL and PTR_TO_MEM */
+ if (dreg->type != (PTR_TO_MEM | PTR_MAYBE_NULL) && dreg->type != PTR_TO_MEM)
+ continue;
+ if (dreg->dynptr_id == dynptr_id) {
+ if (!env->allow_ptr_leaks)
+ __mark_reg_not_init(env, dreg);
+ else
+ __mark_reg_unknown(env, dreg);
+ }
+ }));
+
+ /* Do not release reference state, we are destroying dynptr on stack,
+ * not using some helper to release it. Just reset register.
+ */
+ __mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
+ __mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
+
+ /* Same reason as unmark_stack_slots_dynptr above */
+ state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
+ state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
+
+ return 0;
+}
+
+static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+ int spi)
+{
+ if (reg->type == CONST_PTR_TO_DYNPTR)
+ return false;
+
+ /* For -ERANGE (i.e. spi not falling into allocated stack slots), we
+ * will do check_mem_access to check and update stack bounds later, so
+ * return true for that case.
+ */
+ if (spi < 0)
+ return spi == -ERANGE;
+ /* We allow overwriting existing unreferenced STACK_DYNPTR slots, see
+ * mark_stack_slots_dynptr which calls destroy_if_dynptr_stack_slot to
+ * ensure dynptr objects at the slots we are touching are completely
+ * destructed before we reinitialize them for a new one. For referenced
+ * ones, destroy_if_dynptr_stack_slot returns an error early instead of
+ * delaying it until the end where the user will get "Unreleased
+ * reference" error.
+ */
return true;
}
-static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
+static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+ int spi)
{
struct bpf_func_state *state = func(env, reg);
- int spi;
int i;
/* This already represents first slot of initialized bpf_dynptr */
if (reg->type == CONST_PTR_TO_DYNPTR)
return true;
- spi = get_spi(reg->off);
- if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
- !state->stack[spi].spilled_ptr.dynptr.first_slot)
+ if (spi < 0)
+ return false;
+ if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
return false;
for (i = 0; i < BPF_REG_SIZE; i++) {
@@ -868,7 +1017,9 @@ static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg
if (reg->type == CONST_PTR_TO_DYNPTR) {
return reg->dynptr.type == dynptr_type;
} else {
- spi = get_spi(reg->off);
+ spi = dynptr_get_spi(env, reg);
+ if (spi < 0)
+ return false;
return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type;
}
}
@@ -931,6 +1082,8 @@ static void print_verifier_state(struct bpf_verifier_env *env,
verbose_a("id=%d", reg->id);
if (reg->ref_obj_id)
verbose_a("ref_obj_id=%d", reg->ref_obj_id);
+ if (type_is_non_owning_ref(reg->type))
+ verbose_a("%s", "non_own_ref");
if (t != SCALAR_VALUE)
verbose_a("off=%d", reg->off);
if (type_is_pkt_pointer(t))
@@ -1404,9 +1557,11 @@ static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
*/
static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
{
- /* Clear id, off, and union(map_ptr, range) */
+ /* Clear off and union(map_ptr, range) */
memset(((u8 *)reg) + sizeof(reg->type), 0,
offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
+ reg->id = 0;
+ reg->ref_obj_id = 0;
___mark_reg_known(reg, imm);
}
@@ -1447,7 +1602,7 @@ static void mark_reg_known_zero(struct bpf_verifier_env *env,
}
static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type,
- bool first_slot)
+ bool first_slot, int dynptr_id)
{
/* reg->type has no meaning for STACK_DYNPTR, but when we set reg for
* callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply
@@ -1455,6 +1610,8 @@ static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type ty
*/
__mark_reg_known_zero(reg);
reg->type = CONST_PTR_TO_DYNPTR;
+ /* Give each dynptr a unique id to uniquely associate slices to it. */
+ reg->id = dynptr_id;
reg->dynptr.type = type;
reg->dynptr.first_slot = first_slot;
}
@@ -1486,6 +1643,16 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
reg->type &= ~PTR_MAYBE_NULL;
}
+static void mark_reg_graph_node(struct bpf_reg_state *regs, u32 regno,
+ struct btf_field_graph_root *ds_head)
+{
+ __mark_reg_known_zero(&regs[regno]);
+ regs[regno].type = PTR_TO_BTF_ID | MEM_ALLOC;
+ regs[regno].btf = ds_head->btf;
+ regs[regno].btf_id = ds_head->value_btf_id;
+ regs[regno].off = ds_head->node_offset;
+}
+
static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
{
return type_is_pkt_pointer(reg->type);
@@ -1752,11 +1919,13 @@ static void __mark_reg_unknown(const struct bpf_verifier_env *env,
struct bpf_reg_state *reg)
{
/*
- * Clear type, id, off, and union(map_ptr, range) and
+ * Clear type, off, and union(map_ptr, range) and
* padding between 'type' and union
*/
memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
reg->type = SCALAR_VALUE;
+ reg->id = 0;
+ reg->ref_obj_id = 0;
reg->var_off = tnum_unknown;
reg->frameno = 0;
reg->precise = !env->bpf_capable;
@@ -2185,6 +2354,12 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
return -EINVAL;
}
+ if (bpf_dev_bound_kfunc_id(func_id)) {
+ err = bpf_dev_bound_kfunc_check(&env->log, prog_aux);
+ if (err)
+ return err;
+ }
+
desc = &tab->descs[tab->nr_descs++];
desc->func_id = func_id;
desc->imm = call_imm;
@@ -2386,6 +2561,32 @@ static int mark_reg_read(struct bpf_verifier_env *env,
return 0;
}
+static int mark_dynptr_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
+{
+ struct bpf_func_state *state = func(env, reg);
+ int spi, ret;
+
+ /* For CONST_PTR_TO_DYNPTR, it must have already been done by
+ * check_reg_arg in check_helper_call and mark_btf_func_reg_size in
+ * check_kfunc_call.
+ */
+ if (reg->type == CONST_PTR_TO_DYNPTR)
+ return 0;
+ spi = dynptr_get_spi(env, reg);
+ if (spi < 0)
+ return spi;
+ /* Caller ensures dynptr is valid and initialized, which means spi is in
+ * bounds and spi is the first dynptr slot. Simply mark stack slot as
+ * read.
+ */
+ ret = mark_reg_read(env, &state->stack[spi].spilled_ptr,
+ state->stack[spi].spilled_ptr.parent, REG_LIVE_READ64);
+ if (ret)
+ return ret;
+ return mark_reg_read(env, &state->stack[spi - 1].spilled_ptr,
+ state->stack[spi - 1].spilled_ptr.parent, REG_LIVE_READ64);
+}
+
/* This function is supposed to be used by the following 32-bit optimization
* code only. It returns TRUE if the source or destination register operates
* on 64-bit, otherwise return FALSE.
@@ -3272,6 +3473,11 @@ static void save_register_state(struct bpf_func_state *state,
scrub_spilled_slot(&state->stack[spi].slot_type[i - 1]);
}
+static bool is_bpf_st_mem(struct bpf_insn *insn)
+{
+ return BPF_CLASS(insn->code) == BPF_ST && BPF_MODE(insn->code) == BPF_MEM;
+}
+
/* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
* stack boundary and alignment are checked in check_mem_access()
*/
@@ -3283,8 +3489,9 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
{
struct bpf_func_state *cur; /* state of the current function */
int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
- u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
+ struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
struct bpf_reg_state *reg = NULL;
+ u32 dst_reg = insn->dst_reg;
err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE));
if (err)
@@ -3318,6 +3525,10 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
}
+ err = destroy_if_dynptr_stack_slot(env, state, spi);
+ if (err)
+ return err;
+
mark_stack_slot_scratched(env, spi);
if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) &&
!register_is_null(reg) && env->bpf_capable) {
@@ -3333,6 +3544,13 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
return err;
}
save_register_state(state, spi, reg, size);
+ } else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) &&
+ insn->imm != 0 && env->bpf_capable) {
+ struct bpf_reg_state fake_reg = {};
+
+ __mark_reg_known(&fake_reg, (u32)insn->imm);
+ fake_reg.type = SCALAR_VALUE;
+ save_register_state(state, spi, &fake_reg, size);
} else if (reg && is_spillable_regtype(reg->type)) {
/* register containing pointer is being spilled into stack */
if (size != BPF_REG_SIZE) {
@@ -3367,7 +3585,8 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
/* when we zero initialize stack slots mark them as such */
- if (reg && register_is_null(reg)) {
+ if ((reg && register_is_null(reg)) ||
+ (!reg && is_bpf_st_mem(insn) && insn->imm == 0)) {
/* backtracking doesn't work for STACK_ZERO yet. */
err = mark_chain_precision(env, value_regno);
if (err)
@@ -3412,6 +3631,7 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env,
int min_off, max_off;
int i, err;
struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
+ struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
bool writing_zero = false;
/* set if the fact that we're writing a zero is used to let any
* stack slots remain STACK_ZERO
@@ -3424,13 +3644,22 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env,
max_off = ptr_reg->smax_value + off + size;
if (value_regno >= 0)
value_reg = &cur->regs[value_regno];
- if (value_reg && register_is_null(value_reg))
+ if ((value_reg && register_is_null(value_reg)) ||
+ (!value_reg && is_bpf_st_mem(insn) && insn->imm == 0))
writing_zero = true;
err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE));
if (err)
return err;
+ for (i = min_off; i < max_off; i++) {
+ int spi;
+
+ spi = __get_spi(i);
+ err = destroy_if_dynptr_stack_slot(env, state, spi);
+ if (err)
+ return err;
+ }
/* Variable offset writes destroy any spilled pointers in range. */
for (i = min_off; i < max_off; i++) {
@@ -4770,6 +4999,25 @@ static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
return 0;
}
+#define BTF_TYPE_SAFE_NESTED(__type) __PASTE(__type, __safe_fields)
+
+BTF_TYPE_SAFE_NESTED(struct task_struct) {
+ const cpumask_t *cpus_ptr;
+};
+
+static bool nested_ptr_is_trusted(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg,
+ int off)
+{
+ /* If its parent is not trusted, it can't regain its trusted status. */
+ if (!is_trusted_reg(reg))
+ return false;
+
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_NESTED(struct task_struct));
+
+ return btf_nested_type_is_trusted(&env->log, reg, off);
+}
+
static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
struct bpf_reg_state *regs,
int regno, int off, int size,
@@ -4841,7 +5089,8 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
return -EACCES;
}
- if (type_is_alloc(reg->type) && !reg->ref_obj_id) {
+ if (type_is_alloc(reg->type) && !type_is_non_owning_ref(reg->type) &&
+ !reg->ref_obj_id) {
verbose(env, "verifier internal error: ref_obj_id for allocated object must be non-zero\n");
return -EFAULT;
}
@@ -4858,10 +5107,17 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
if (type_flag(reg->type) & PTR_UNTRUSTED)
flag |= PTR_UNTRUSTED;
- /* By default any pointer obtained from walking a trusted pointer is
- * no longer trusted except the rcu case below.
+ /* By default any pointer obtained from walking a trusted pointer is no
+ * longer trusted, unless the field being accessed has explicitly been
+ * marked as inheriting its parent's state of trust.
+ *
+ * An RCU-protected pointer can also be deemed trusted if we are in an
+ * RCU read region. This case is handled below.
*/
- flag &= ~PTR_TRUSTED;
+ if (nested_ptr_is_trusted(env, reg, off))
+ flag |= PTR_TRUSTED;
+ else
+ flag &= ~PTR_TRUSTED;
if (flag & MEM_RCU) {
/* Mark value register as MEM_RCU only if it is protected by
@@ -5458,6 +5714,31 @@ static int check_stack_range_initialized(
}
if (meta && meta->raw_mode) {
+ /* Ensure we won't be overwriting dynptrs when simulating byte
+ * by byte access in check_helper_call using meta.access_size.
+ * This would be a problem if we have a helper in the future
+ * which takes:
+ *
+ * helper(uninit_mem, len, dynptr)
+ *
+ * Now, uninint_mem may overlap with dynptr pointer. Hence, it
+ * may end up writing to dynptr itself when touching memory from
+ * arg 1. This can be relaxed on a case by case basis for known
+ * safe cases, but reject due to the possibilitiy of aliasing by
+ * default.
+ */
+ for (i = min_off; i < max_off + access_size; i++) {
+ int stack_off = -i - 1;
+
+ spi = __get_spi(i);
+ /* raw_mode may write past allocated_stack */
+ if (state->allocated_stack <= stack_off)
+ continue;
+ if (state->stack[spi].slot_type[stack_off % BPF_REG_SIZE] == STACK_DYNPTR) {
+ verbose(env, "potential write to dynptr at off=%d disallowed\n", i);
+ return -EACCES;
+ }
+ }
meta->access_size = access_size;
meta->regno = regno;
return 0;
@@ -5799,9 +6080,7 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno,
cur->active_lock.ptr = btf;
cur->active_lock.id = reg->id;
} else {
- struct bpf_func_state *fstate = cur_func(env);
void *ptr;
- int i;
if (map)
ptr = map;
@@ -5817,25 +6096,11 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno,
verbose(env, "bpf_spin_unlock of different lock\n");
return -EINVAL;
}
- cur->active_lock.ptr = NULL;
- cur->active_lock.id = 0;
- for (i = fstate->acquired_refs - 1; i >= 0; i--) {
- int err;
+ invalidate_non_owning_refs(env);
- /* Complain on error because this reference state cannot
- * be freed before this point, as bpf_spin_lock critical
- * section does not allow functions that release the
- * allocated object immediately.
- */
- if (!fstate->refs[i].release_on_unlock)
- continue;
- err = release_reference(env, fstate->refs[i].id);
- if (err) {
- verbose(env, "failed to release release_on_unlock reference");
- return err;
- }
- }
+ cur->active_lock.ptr = NULL;
+ cur->active_lock.id = 0;
}
return 0;
}
@@ -5945,6 +6210,7 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno,
enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta)
{
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
+ int spi = 0;
/* MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an
* ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*):
@@ -5955,12 +6221,14 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno,
}
/* CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to
* check_func_arg_reg_off's logic. We only need to check offset
- * alignment for PTR_TO_STACK.
+ * and its alignment for PTR_TO_STACK.
*/
- if (reg->type == PTR_TO_STACK && (reg->off % BPF_REG_SIZE)) {
- verbose(env, "cannot pass in dynptr at an offset=%d\n", reg->off);
- return -EINVAL;
+ if (reg->type == PTR_TO_STACK) {
+ spi = dynptr_get_spi(env, reg);
+ if (spi < 0 && spi != -ERANGE)
+ return spi;
}
+
/* MEM_UNINIT - Points to memory that is an appropriate candidate for
* constructing a mutable bpf_dynptr object.
*
@@ -5977,7 +6245,7 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno,
* to.
*/
if (arg_type & MEM_UNINIT) {
- if (!is_dynptr_reg_valid_uninit(env, reg)) {
+ if (!is_dynptr_reg_valid_uninit(env, reg, spi)) {
verbose(env, "Dynptr has to be an uninitialized dynptr\n");
return -EINVAL;
}
@@ -5992,13 +6260,15 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno,
meta->uninit_dynptr_regno = regno;
} else /* MEM_RDONLY and None case from above */ {
+ int err;
+
/* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */
if (reg->type == CONST_PTR_TO_DYNPTR && !(arg_type & MEM_RDONLY)) {
verbose(env, "cannot pass pointer to const bpf_dynptr, the helper mutates it\n");
return -EINVAL;
}
- if (!is_dynptr_reg_valid_init(env, reg)) {
+ if (!is_dynptr_reg_valid_init(env, reg, spi)) {
verbose(env,
"Expected an initialized dynptr as arg #%d\n",
regno);
@@ -6025,6 +6295,10 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno,
err_extra, regno);
return -EINVAL;
}
+
+ err = mark_dynptr_read(env, reg);
+ if (err)
+ return err;
}
return 0;
}
@@ -6294,6 +6568,23 @@ found:
return 0;
}
+static struct btf_field *
+reg_find_field_offset(const struct bpf_reg_state *reg, s32 off, u32 fields)
+{
+ struct btf_field *field;
+ struct btf_record *rec;
+
+ rec = reg_btf_record(reg);
+ if (!rec)
+ return NULL;
+
+ field = btf_record_find(rec, off, fields);
+ if (!field)
+ return NULL;
+
+ return field;
+}
+
int check_func_arg_reg_off(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg, int regno,
enum bpf_arg_type arg_type)
@@ -6315,6 +6606,18 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
*/
if (arg_type_is_dynptr(arg_type) && type == PTR_TO_STACK)
return 0;
+
+ if ((type_is_ptr_alloc_obj(type) || type_is_non_owning_ref(type)) && reg->off) {
+ if (reg_find_field_offset(reg, reg->off, BPF_GRAPH_NODE_OR_ROOT))
+ return __check_ptr_off_reg(env, reg, regno, true);
+
+ verbose(env, "R%d must have zero offset when passed to release func\n",
+ regno);
+ verbose(env, "No graph node or root found at R%d type:%s off:%d\n", regno,
+ kernel_type_name(reg->btf, reg->btf_id), reg->off);
+ return -EINVAL;
+ }
+
/* Doing check_ptr_off_reg check for the offset will catch this
* because fixed_off_ok is false, but checking here allows us
* to give the user a better error message.
@@ -6349,6 +6652,7 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
case PTR_TO_BTF_ID | PTR_TRUSTED:
case PTR_TO_BTF_ID | MEM_RCU:
case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED:
+ case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF:
/* When referenced PTR_TO_BTF_ID is passed to release function,
* its fixed offset must be 0. In the other cases, fixed offset
* can be non-zero. This was already checked above. So pass
@@ -6362,15 +6666,29 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
}
}
-static u32 dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
+static int dynptr_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
struct bpf_func_state *state = func(env, reg);
int spi;
if (reg->type == CONST_PTR_TO_DYNPTR)
- return reg->ref_obj_id;
+ return reg->id;
+ spi = dynptr_get_spi(env, reg);
+ if (spi < 0)
+ return spi;
+ return state->stack[spi].spilled_ptr.id;
+}
- spi = get_spi(reg->off);
+static int dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
+{
+ struct bpf_func_state *state = func(env, reg);
+ int spi;
+
+ if (reg->type == CONST_PTR_TO_DYNPTR)
+ return reg->ref_obj_id;
+ spi = dynptr_get_spi(env, reg);
+ if (spi < 0)
+ return spi;
return state->stack[spi].spilled_ptr.ref_obj_id;
}
@@ -6444,9 +6762,8 @@ skip_type_check:
* PTR_TO_STACK.
*/
if (reg->type == PTR_TO_STACK) {
- spi = get_spi(reg->off);
- if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
- !state->stack[spi].spilled_ptr.ref_obj_id) {
+ spi = dynptr_get_spi(env, reg);
+ if (spi < 0 || !state->stack[spi].spilled_ptr.ref_obj_id) {
verbose(env, "arg %d is an unacquired reference\n", regno);
return -EINVAL;
}
@@ -6547,6 +6864,10 @@ skip_type_check:
meta->ret_btf_id = reg->btf_id;
break;
case ARG_PTR_TO_SPIN_LOCK:
+ if (in_rbtree_lock_required_cb(env)) {
+ verbose(env, "can't spin_{lock,unlock} in rbtree cb\n");
+ return -EACCES;
+ }
if (meta->func_id == BPF_FUNC_spin_lock) {
err = process_spin_lock(env, regno, true);
if (err)
@@ -7098,6 +7419,17 @@ static int release_reference(struct bpf_verifier_env *env,
return 0;
}
+static void invalidate_non_owning_refs(struct bpf_verifier_env *env)
+{
+ struct bpf_func_state *unused;
+ struct bpf_reg_state *reg;
+
+ bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
+ if (type_is_non_owning_ref(reg->type))
+ __mark_reg_unknown(env, reg);
+ }));
+}
+
static void clear_caller_saved_regs(struct bpf_verifier_env *env,
struct bpf_reg_state *regs)
{
@@ -7119,6 +7451,8 @@ static int set_callee_state(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee, int insn_idx);
+static bool is_callback_calling_kfunc(u32 btf_id);
+
static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx, int subprog,
set_callee_state_fn set_callee_state_cb)
@@ -7173,10 +7507,18 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
* interested in validating only BPF helpers that can call subprogs as
* callbacks
*/
- if (set_callee_state_cb != set_callee_state && !is_callback_calling_function(insn->imm)) {
- verbose(env, "verifier bug: helper %s#%d is not marked as callback-calling\n",
- func_id_name(insn->imm), insn->imm);
- return -EFAULT;
+ if (set_callee_state_cb != set_callee_state) {
+ if (bpf_pseudo_kfunc_call(insn) &&
+ !is_callback_calling_kfunc(insn->imm)) {
+ verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n",
+ func_id_name(insn->imm), insn->imm);
+ return -EFAULT;
+ } else if (!bpf_pseudo_kfunc_call(insn) &&
+ !is_callback_calling_function(insn->imm)) { /* helper */
+ verbose(env, "verifier bug: helper %s#%d not marked as callback-calling\n",
+ func_id_name(insn->imm), insn->imm);
+ return -EFAULT;
+ }
}
if (insn->code == (BPF_JMP | BPF_CALL) &&
@@ -7428,7 +7770,7 @@ static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
* callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx);
*/
__mark_reg_not_init(env, &callee->regs[BPF_REG_0]);
- mark_dynptr_cb_reg(&callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL);
+ mark_dynptr_cb_reg(env, &callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL);
callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
/* unused */
@@ -7441,6 +7783,63 @@ static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
return 0;
}
+static int set_rbtree_add_callback_state(struct bpf_verifier_env *env,
+ struct bpf_func_state *caller,
+ struct bpf_func_state *callee,
+ int insn_idx)
+{
+ /* void bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node,
+ * bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b));
+ *
+ * 'struct bpf_rb_node *node' arg to bpf_rbtree_add is the same PTR_TO_BTF_ID w/ offset
+ * that 'less' callback args will be receiving. However, 'node' arg was release_reference'd
+ * by this point, so look at 'root'
+ */
+ struct btf_field *field;
+
+ field = reg_find_field_offset(&caller->regs[BPF_REG_1], caller->regs[BPF_REG_1].off,
+ BPF_RB_ROOT);
+ if (!field || !field->graph_root.value_btf_id)
+ return -EFAULT;
+
+ mark_reg_graph_node(callee->regs, BPF_REG_1, &field->graph_root);
+ ref_set_non_owning(env, &callee->regs[BPF_REG_1]);
+ mark_reg_graph_node(callee->regs, BPF_REG_2, &field->graph_root);
+ ref_set_non_owning(env, &callee->regs[BPF_REG_2]);
+
+ __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
+ __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
+ __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
+ callee->in_callback_fn = true;
+ callee->callback_ret_range = tnum_range(0, 1);
+ return 0;
+}
+
+static bool is_rbtree_lock_required_kfunc(u32 btf_id);
+
+/* Are we currently verifying the callback for a rbtree helper that must
+ * be called with lock held? If so, no need to complain about unreleased
+ * lock
+ */
+static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env)
+{
+ struct bpf_verifier_state *state = env->cur_state;
+ struct bpf_insn *insn = env->prog->insnsi;
+ struct bpf_func_state *callee;
+ int kfunc_btf_id;
+
+ if (!state->curframe)
+ return false;
+
+ callee = state->frame[state->curframe];
+
+ if (!callee->in_callback_fn)
+ return false;
+
+ kfunc_btf_id = insn[callee->callsite].imm;
+ return is_rbtree_lock_required_kfunc(kfunc_btf_id);
+}
+
static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
{
struct bpf_verifier_state *state = env->cur_state;
@@ -7633,6 +8032,7 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
struct bpf_reg_state *fmt_reg = &regs[BPF_REG_3];
struct bpf_reg_state *data_len_reg = &regs[BPF_REG_5];
struct bpf_map *fmt_map = fmt_reg->map_ptr;
+ struct bpf_bprintf_data data = {};
int err, fmt_map_off, num_args;
u64 fmt_addr;
char *fmt;
@@ -7657,7 +8057,7 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
/* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
* can focus on validating the format specifiers.
*/
- err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, NULL, num_args);
+ err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, num_args, &data);
if (err < 0)
verbose(env, "Invalid format string\n");
@@ -7933,13 +8333,32 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
if (arg_type_is_dynptr(fn->arg_type[i])) {
struct bpf_reg_state *reg = &regs[BPF_REG_1 + i];
+ int id, ref_obj_id;
+
+ if (meta.dynptr_id) {
+ verbose(env, "verifier internal error: meta.dynptr_id already set\n");
+ return -EFAULT;
+ }
if (meta.ref_obj_id) {
verbose(env, "verifier internal error: meta.ref_obj_id already set\n");
return -EFAULT;
}
- meta.ref_obj_id = dynptr_ref_obj_id(env, reg);
+ id = dynptr_id(env, reg);
+ if (id < 0) {
+ verbose(env, "verifier internal error: failed to obtain dynptr id\n");
+ return id;
+ }
+
+ ref_obj_id = dynptr_ref_obj_id(env, reg);
+ if (ref_obj_id < 0) {
+ verbose(env, "verifier internal error: failed to obtain dynptr ref_obj_id\n");
+ return ref_obj_id;
+ }
+
+ meta.dynptr_id = id;
+ meta.ref_obj_id = ref_obj_id;
break;
}
}
@@ -8095,6 +8514,9 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
return -EFAULT;
}
+ if (is_dynptr_ref_function(func_id))
+ regs[BPF_REG_0].dynptr_id = meta.dynptr_id;
+
if (is_ptr_cast_function(func_id) || is_dynptr_ref_function(func_id)) {
/* For release_reference() */
regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
@@ -8186,6 +8608,7 @@ struct bpf_kfunc_call_arg_meta {
bool r0_rdonly;
u32 ret_btf_id;
u64 r0_size;
+ u32 subprogno;
struct {
u64 value;
bool found;
@@ -8197,6 +8620,9 @@ struct bpf_kfunc_call_arg_meta {
struct {
struct btf_field *field;
} arg_list_head;
+ struct {
+ struct btf_field *field;
+ } arg_rbtree_root;
};
static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta)
@@ -8308,12 +8734,16 @@ enum {
KF_ARG_DYNPTR_ID,
KF_ARG_LIST_HEAD_ID,
KF_ARG_LIST_NODE_ID,
+ KF_ARG_RB_ROOT_ID,
+ KF_ARG_RB_NODE_ID,
};
BTF_ID_LIST(kf_arg_btf_ids)
BTF_ID(struct, bpf_dynptr_kern)
BTF_ID(struct, bpf_list_head)
BTF_ID(struct, bpf_list_node)
+BTF_ID(struct, bpf_rb_root)
+BTF_ID(struct, bpf_rb_node)
static bool __is_kfunc_ptr_arg_type(const struct btf *btf,
const struct btf_param *arg, int type)
@@ -8347,6 +8777,28 @@ static bool is_kfunc_arg_list_node(const struct btf *btf, const struct btf_param
return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_NODE_ID);
}
+static bool is_kfunc_arg_rbtree_root(const struct btf *btf, const struct btf_param *arg)
+{
+ return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_ROOT_ID);
+}
+
+static bool is_kfunc_arg_rbtree_node(const struct btf *btf, const struct btf_param *arg)
+{
+ return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_NODE_ID);
+}
+
+static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf *btf,
+ const struct btf_param *arg)
+{
+ const struct btf_type *t;
+
+ t = btf_type_resolve_func_ptr(btf, arg->type, NULL);
+ if (!t)
+ return false;
+
+ return true;
+}
+
/* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env,
const struct btf *btf,
@@ -8406,6 +8858,9 @@ enum kfunc_ptr_arg_type {
KF_ARG_PTR_TO_BTF_ID, /* Also covers reg2btf_ids conversions */
KF_ARG_PTR_TO_MEM,
KF_ARG_PTR_TO_MEM_SIZE, /* Size derived from next argument, skip it */
+ KF_ARG_PTR_TO_CALLBACK,
+ KF_ARG_PTR_TO_RB_ROOT,
+ KF_ARG_PTR_TO_RB_NODE,
};
enum special_kfunc_type {
@@ -8419,6 +8874,9 @@ enum special_kfunc_type {
KF_bpf_rdonly_cast,
KF_bpf_rcu_read_lock,
KF_bpf_rcu_read_unlock,
+ KF_bpf_rbtree_remove,
+ KF_bpf_rbtree_add,
+ KF_bpf_rbtree_first,
};
BTF_SET_START(special_kfunc_set)
@@ -8430,6 +8888,9 @@ BTF_ID(func, bpf_list_pop_front)
BTF_ID(func, bpf_list_pop_back)
BTF_ID(func, bpf_cast_to_kern_ctx)
BTF_ID(func, bpf_rdonly_cast)
+BTF_ID(func, bpf_rbtree_remove)
+BTF_ID(func, bpf_rbtree_add)
+BTF_ID(func, bpf_rbtree_first)
BTF_SET_END(special_kfunc_set)
BTF_ID_LIST(special_kfunc_list)
@@ -8443,6 +8904,9 @@ BTF_ID(func, bpf_cast_to_kern_ctx)
BTF_ID(func, bpf_rdonly_cast)
BTF_ID(func, bpf_rcu_read_lock)
BTF_ID(func, bpf_rcu_read_unlock)
+BTF_ID(func, bpf_rbtree_remove)
+BTF_ID(func, bpf_rbtree_add)
+BTF_ID(func, bpf_rbtree_first)
static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
{
@@ -8504,6 +8968,12 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
if (is_kfunc_arg_list_node(meta->btf, &args[argno]))
return KF_ARG_PTR_TO_LIST_NODE;
+ if (is_kfunc_arg_rbtree_root(meta->btf, &args[argno]))
+ return KF_ARG_PTR_TO_RB_ROOT;
+
+ if (is_kfunc_arg_rbtree_node(meta->btf, &args[argno]))
+ return KF_ARG_PTR_TO_RB_NODE;
+
if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) {
if (!btf_type_is_struct(ref_t)) {
verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n",
@@ -8513,6 +8983,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
return KF_ARG_PTR_TO_BTF_ID;
}
+ if (is_kfunc_arg_callback(env, meta->btf, &args[argno]))
+ return KF_ARG_PTR_TO_CALLBACK;
+
if (argno + 1 < nargs && is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1]))
arg_mem_size = true;
@@ -8551,9 +9024,37 @@ static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
reg_ref_id = *reg2btf_ids[base_type(reg->type)];
}
- if (is_kfunc_trusted_args(meta) || (is_kfunc_release(meta) && reg->ref_obj_id))
+ /* Enforce strict type matching for calls to kfuncs that are acquiring
+ * or releasing a reference, or are no-cast aliases. We do _not_
+ * enforce strict matching for plain KF_TRUSTED_ARGS kfuncs by default,
+ * as we want to enable BPF programs to pass types that are bitwise
+ * equivalent without forcing them to explicitly cast with something
+ * like bpf_cast_to_kern_ctx().
+ *
+ * For example, say we had a type like the following:
+ *
+ * struct bpf_cpumask {
+ * cpumask_t cpumask;
+ * refcount_t usage;
+ * };
+ *
+ * Note that as specified in <linux/cpumask.h>, cpumask_t is typedef'ed
+ * to a struct cpumask, so it would be safe to pass a struct
+ * bpf_cpumask * to a kfunc expecting a struct cpumask *.
+ *
+ * The philosophy here is similar to how we allow scalars of different
+ * types to be passed to kfuncs as long as the size is the same. The
+ * only difference here is that we're simply allowing
+ * btf_struct_ids_match() to walk the struct at the 0th offset, and
+ * resolve types.
+ */
+ if (is_kfunc_acquire(meta) ||
+ (is_kfunc_release(meta) && reg->ref_obj_id) ||
+ btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id))
strict_type_match = true;
+ WARN_ON_ONCE(is_kfunc_trusted_args(meta) && reg->off);
+
reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, &reg_ref_id);
reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off);
if (!btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match)) {
@@ -8599,38 +9100,54 @@ static int process_kf_arg_ptr_to_kptr(struct bpf_verifier_env *env,
return 0;
}
-static int ref_set_release_on_unlock(struct bpf_verifier_env *env, u32 ref_obj_id)
+static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
- struct bpf_func_state *state = cur_func(env);
+ struct bpf_verifier_state *state = env->cur_state;
+
+ if (!state->active_lock.ptr) {
+ verbose(env, "verifier internal error: ref_set_non_owning w/o active lock\n");
+ return -EFAULT;
+ }
+
+ if (type_flag(reg->type) & NON_OWN_REF) {
+ verbose(env, "verifier internal error: NON_OWN_REF already set\n");
+ return -EFAULT;
+ }
+
+ reg->type |= NON_OWN_REF;
+ return 0;
+}
+
+static int ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 ref_obj_id)
+{
+ struct bpf_func_state *state, *unused;
struct bpf_reg_state *reg;
int i;
- /* bpf_spin_lock only allows calling list_push and list_pop, no BPF
- * subprogs, no global functions. This means that the references would
- * not be released inside the critical section but they may be added to
- * the reference state, and the acquired_refs are never copied out for a
- * different frame as BPF to BPF calls don't work in bpf_spin_lock
- * critical sections.
- */
+ state = cur_func(env);
+
if (!ref_obj_id) {
- verbose(env, "verifier internal error: ref_obj_id is zero for release_on_unlock\n");
+ verbose(env, "verifier internal error: ref_obj_id is zero for "
+ "owning -> non-owning conversion\n");
return -EFAULT;
}
+
for (i = 0; i < state->acquired_refs; i++) {
- if (state->refs[i].id == ref_obj_id) {
- if (state->refs[i].release_on_unlock) {
- verbose(env, "verifier internal error: expected false release_on_unlock");
- return -EFAULT;
+ if (state->refs[i].id != ref_obj_id)
+ continue;
+
+ /* Clear ref_obj_id here so release_reference doesn't clobber
+ * the whole reg
+ */
+ bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
+ if (reg->ref_obj_id == ref_obj_id) {
+ reg->ref_obj_id = 0;
+ ref_set_non_owning(env, reg);
}
- state->refs[i].release_on_unlock = true;
- /* Now mark everyone sharing same ref_obj_id as untrusted */
- bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
- if (reg->ref_obj_id == ref_obj_id)
- reg->type |= PTR_UNTRUSTED;
- }));
- return 0;
- }
+ }));
+ return 0;
}
+
verbose(env, "verifier internal error: ref state missing for ref_obj_id\n");
return -EFAULT;
}
@@ -8716,101 +9233,226 @@ static bool is_bpf_list_api_kfunc(u32 btf_id)
btf_id == special_kfunc_list[KF_bpf_list_pop_back];
}
-static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env,
- struct bpf_reg_state *reg, u32 regno,
- struct bpf_kfunc_call_arg_meta *meta)
+static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
+{
+ return btf_id == special_kfunc_list[KF_bpf_rbtree_add] ||
+ btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
+ btf_id == special_kfunc_list[KF_bpf_rbtree_first];
+}
+
+static bool is_bpf_graph_api_kfunc(u32 btf_id)
+{
+ return is_bpf_list_api_kfunc(btf_id) || is_bpf_rbtree_api_kfunc(btf_id);
+}
+
+static bool is_callback_calling_kfunc(u32 btf_id)
+{
+ return btf_id == special_kfunc_list[KF_bpf_rbtree_add];
+}
+
+static bool is_rbtree_lock_required_kfunc(u32 btf_id)
+{
+ return is_bpf_rbtree_api_kfunc(btf_id);
+}
+
+static bool check_kfunc_is_graph_root_api(struct bpf_verifier_env *env,
+ enum btf_field_type head_field_type,
+ u32 kfunc_btf_id)
+{
+ bool ret;
+
+ switch (head_field_type) {
+ case BPF_LIST_HEAD:
+ ret = is_bpf_list_api_kfunc(kfunc_btf_id);
+ break;
+ case BPF_RB_ROOT:
+ ret = is_bpf_rbtree_api_kfunc(kfunc_btf_id);
+ break;
+ default:
+ verbose(env, "verifier internal error: unexpected graph root argument type %s\n",
+ btf_field_type_name(head_field_type));
+ return false;
+ }
+
+ if (!ret)
+ verbose(env, "verifier internal error: %s head arg for unknown kfunc\n",
+ btf_field_type_name(head_field_type));
+ return ret;
+}
+
+static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
+ enum btf_field_type node_field_type,
+ u32 kfunc_btf_id)
{
+ bool ret;
+
+ switch (node_field_type) {
+ case BPF_LIST_NODE:
+ ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front] ||
+ kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back]);
+ break;
+ case BPF_RB_NODE:
+ ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
+ kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add]);
+ break;
+ default:
+ verbose(env, "verifier internal error: unexpected graph node argument type %s\n",
+ btf_field_type_name(node_field_type));
+ return false;
+ }
+
+ if (!ret)
+ verbose(env, "verifier internal error: %s node arg for unknown kfunc\n",
+ btf_field_type_name(node_field_type));
+ return ret;
+}
+
+static int
+__process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg, u32 regno,
+ struct bpf_kfunc_call_arg_meta *meta,
+ enum btf_field_type head_field_type,
+ struct btf_field **head_field)
+{
+ const char *head_type_name;
struct btf_field *field;
struct btf_record *rec;
- u32 list_head_off;
+ u32 head_off;
- if (meta->btf != btf_vmlinux || !is_bpf_list_api_kfunc(meta->func_id)) {
- verbose(env, "verifier internal error: bpf_list_head argument for unknown kfunc\n");
+ if (meta->btf != btf_vmlinux) {
+ verbose(env, "verifier internal error: unexpected btf mismatch in kfunc call\n");
return -EFAULT;
}
+ if (!check_kfunc_is_graph_root_api(env, head_field_type, meta->func_id))
+ return -EFAULT;
+
+ head_type_name = btf_field_type_name(head_field_type);
if (!tnum_is_const(reg->var_off)) {
verbose(env,
- "R%d doesn't have constant offset. bpf_list_head has to be at the constant offset\n",
- regno);
+ "R%d doesn't have constant offset. %s has to be at the constant offset\n",
+ regno, head_type_name);
return -EINVAL;
}
rec = reg_btf_record(reg);
- list_head_off = reg->off + reg->var_off.value;
- field = btf_record_find(rec, list_head_off, BPF_LIST_HEAD);
+ head_off = reg->off + reg->var_off.value;
+ field = btf_record_find(rec, head_off, head_field_type);
if (!field) {
- verbose(env, "bpf_list_head not found at offset=%u\n", list_head_off);
+ verbose(env, "%s not found at offset=%u\n", head_type_name, head_off);
return -EINVAL;
}
/* All functions require bpf_list_head to be protected using a bpf_spin_lock */
if (check_reg_allocation_locked(env, reg)) {
- verbose(env, "bpf_spin_lock at off=%d must be held for bpf_list_head\n",
- rec->spin_lock_off);
+ verbose(env, "bpf_spin_lock at off=%d must be held for %s\n",
+ rec->spin_lock_off, head_type_name);
return -EINVAL;
}
- if (meta->arg_list_head.field) {
- verbose(env, "verifier internal error: repeating bpf_list_head arg\n");
+ if (*head_field) {
+ verbose(env, "verifier internal error: repeating %s arg\n", head_type_name);
return -EFAULT;
}
- meta->arg_list_head.field = field;
+ *head_field = field;
return 0;
}
-static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
+static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env,
struct bpf_reg_state *reg, u32 regno,
struct bpf_kfunc_call_arg_meta *meta)
{
+ return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_LIST_HEAD,
+ &meta->arg_list_head.field);
+}
+
+static int process_kf_arg_ptr_to_rbtree_root(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg, u32 regno,
+ struct bpf_kfunc_call_arg_meta *meta)
+{
+ return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_RB_ROOT,
+ &meta->arg_rbtree_root.field);
+}
+
+static int
+__process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg, u32 regno,
+ struct bpf_kfunc_call_arg_meta *meta,
+ enum btf_field_type head_field_type,
+ enum btf_field_type node_field_type,
+ struct btf_field **node_field)
+{
+ const char *node_type_name;
const struct btf_type *et, *t;
struct btf_field *field;
- struct btf_record *rec;
- u32 list_node_off;
+ u32 node_off;
- if (meta->btf != btf_vmlinux ||
- (meta->func_id != special_kfunc_list[KF_bpf_list_push_front] &&
- meta->func_id != special_kfunc_list[KF_bpf_list_push_back])) {
- verbose(env, "verifier internal error: bpf_list_node argument for unknown kfunc\n");
+ if (meta->btf != btf_vmlinux) {
+ verbose(env, "verifier internal error: unexpected btf mismatch in kfunc call\n");
return -EFAULT;
}
+ if (!check_kfunc_is_graph_node_api(env, node_field_type, meta->func_id))
+ return -EFAULT;
+
+ node_type_name = btf_field_type_name(node_field_type);
if (!tnum_is_const(reg->var_off)) {
verbose(env,
- "R%d doesn't have constant offset. bpf_list_node has to be at the constant offset\n",
- regno);
+ "R%d doesn't have constant offset. %s has to be at the constant offset\n",
+ regno, node_type_name);
return -EINVAL;
}
- rec = reg_btf_record(reg);
- list_node_off = reg->off + reg->var_off.value;
- field = btf_record_find(rec, list_node_off, BPF_LIST_NODE);
- if (!field || field->offset != list_node_off) {
- verbose(env, "bpf_list_node not found at offset=%u\n", list_node_off);
+ node_off = reg->off + reg->var_off.value;
+ field = reg_find_field_offset(reg, node_off, node_field_type);
+ if (!field || field->offset != node_off) {
+ verbose(env, "%s not found at offset=%u\n", node_type_name, node_off);
return -EINVAL;
}
- field = meta->arg_list_head.field;
+ field = *node_field;
- et = btf_type_by_id(field->list_head.btf, field->list_head.value_btf_id);
+ et = btf_type_by_id(field->graph_root.btf, field->graph_root.value_btf_id);
t = btf_type_by_id(reg->btf, reg->btf_id);
- if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->list_head.btf,
- field->list_head.value_btf_id, true)) {
- verbose(env, "operation on bpf_list_head expects arg#1 bpf_list_node at offset=%d "
+ if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->graph_root.btf,
+ field->graph_root.value_btf_id, true)) {
+ verbose(env, "operation on %s expects arg#1 %s at offset=%d "
"in struct %s, but arg is at offset=%d in struct %s\n",
- field->list_head.node_offset, btf_name_by_offset(field->list_head.btf, et->name_off),
- list_node_off, btf_name_by_offset(reg->btf, t->name_off));
+ btf_field_type_name(head_field_type),
+ btf_field_type_name(node_field_type),
+ field->graph_root.node_offset,
+ btf_name_by_offset(field->graph_root.btf, et->name_off),
+ node_off, btf_name_by_offset(reg->btf, t->name_off));
return -EINVAL;
}
- if (list_node_off != field->list_head.node_offset) {
- verbose(env, "arg#1 offset=%d, but expected bpf_list_node at offset=%d in struct %s\n",
- list_node_off, field->list_head.node_offset,
- btf_name_by_offset(field->list_head.btf, et->name_off));
+ if (node_off != field->graph_root.node_offset) {
+ verbose(env, "arg#1 offset=%d, but expected %s at offset=%d in struct %s\n",
+ node_off, btf_field_type_name(node_field_type),
+ field->graph_root.node_offset,
+ btf_name_by_offset(field->graph_root.btf, et->name_off));
return -EINVAL;
}
- /* Set arg#1 for expiration after unlock */
- return ref_set_release_on_unlock(env, reg->ref_obj_id);
+
+ return 0;
+}
+
+static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg, u32 regno,
+ struct bpf_kfunc_call_arg_meta *meta)
+{
+ return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
+ BPF_LIST_HEAD, BPF_LIST_NODE,
+ &meta->arg_list_head.field);
+}
+
+static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg, u32 regno,
+ struct bpf_kfunc_call_arg_meta *meta)
+{
+ return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
+ BPF_RB_ROOT, BPF_RB_NODE,
+ &meta->arg_rbtree_root.field);
}
static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta)
@@ -8896,6 +9538,12 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
return -EINVAL;
}
+ if (is_kfunc_trusted_args(meta) &&
+ (register_is_null(reg) || type_may_be_null(reg->type))) {
+ verbose(env, "Possibly NULL pointer passed to trusted arg%d\n", i);
+ return -EACCES;
+ }
+
if (reg->ref_obj_id) {
if (is_kfunc_release(meta) && meta->ref_obj_id) {
verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
@@ -8941,8 +9589,11 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
case KF_ARG_PTR_TO_DYNPTR:
case KF_ARG_PTR_TO_LIST_HEAD:
case KF_ARG_PTR_TO_LIST_NODE:
+ case KF_ARG_PTR_TO_RB_ROOT:
+ case KF_ARG_PTR_TO_RB_NODE:
case KF_ARG_PTR_TO_MEM:
case KF_ARG_PTR_TO_MEM_SIZE:
+ case KF_ARG_PTR_TO_CALLBACK:
/* Trusted by default */
break;
default:
@@ -9019,6 +9670,20 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
if (ret < 0)
return ret;
break;
+ case KF_ARG_PTR_TO_RB_ROOT:
+ if (reg->type != PTR_TO_MAP_VALUE &&
+ reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
+ verbose(env, "arg#%d expected pointer to map value or allocated object\n", i);
+ return -EINVAL;
+ }
+ if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) {
+ verbose(env, "allocated object must be referenced\n");
+ return -EINVAL;
+ }
+ ret = process_kf_arg_ptr_to_rbtree_root(env, reg, regno, meta);
+ if (ret < 0)
+ return ret;
+ break;
case KF_ARG_PTR_TO_LIST_NODE:
if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
verbose(env, "arg#%d expected pointer to allocated object\n", i);
@@ -9032,6 +9697,31 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
if (ret < 0)
return ret;
break;
+ case KF_ARG_PTR_TO_RB_NODE:
+ if (meta->func_id == special_kfunc_list[KF_bpf_rbtree_remove]) {
+ if (!type_is_non_owning_ref(reg->type) || reg->ref_obj_id) {
+ verbose(env, "rbtree_remove node input must be non-owning ref\n");
+ return -EINVAL;
+ }
+ if (in_rbtree_lock_required_cb(env)) {
+ verbose(env, "rbtree_remove not allowed in rbtree cb\n");
+ return -EINVAL;
+ }
+ } else {
+ if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
+ verbose(env, "arg#%d expected pointer to allocated object\n", i);
+ return -EINVAL;
+ }
+ if (!reg->ref_obj_id) {
+ verbose(env, "allocated object must be referenced\n");
+ return -EINVAL;
+ }
+ }
+
+ ret = process_kf_arg_ptr_to_rbtree_node(env, reg, regno, meta);
+ if (ret < 0)
+ return ret;
+ break;
case KF_ARG_PTR_TO_BTF_ID:
/* Only base_type is checked, further checks are done here */
if ((base_type(reg->type) != PTR_TO_BTF_ID ||
@@ -9067,6 +9757,9 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
/* Skip next '__sz' argument */
i++;
break;
+ case KF_ARG_PTR_TO_CALLBACK:
+ meta->subprogno = reg->subprogno;
+ break;
}
}
@@ -9083,11 +9776,11 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx_p)
{
const struct btf_type *t, *func, *func_proto, *ptr_type;
+ u32 i, nargs, func_id, ptr_type_id, release_ref_obj_id;
struct bpf_reg_state *regs = cur_regs(env);
const char *func_name, *ptr_type_name;
bool sleepable, rcu_lock, rcu_unlock;
struct bpf_kfunc_call_arg_meta meta;
- u32 i, nargs, func_id, ptr_type_id;
int err, insn_idx = *insn_idx_p;
const struct btf_param *args;
const struct btf_type *ret_t;
@@ -9182,6 +9875,35 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
}
}
+ if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front] ||
+ meta.func_id == special_kfunc_list[KF_bpf_list_push_back] ||
+ meta.func_id == special_kfunc_list[KF_bpf_rbtree_add]) {
+ release_ref_obj_id = regs[BPF_REG_2].ref_obj_id;
+ err = ref_convert_owning_non_owning(env, release_ref_obj_id);
+ if (err) {
+ verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n",
+ func_name, func_id);
+ return err;
+ }
+
+ err = release_reference(env, release_ref_obj_id);
+ if (err) {
+ verbose(env, "kfunc %s#%d reference has not been acquired before\n",
+ func_name, func_id);
+ return err;
+ }
+ }
+
+ if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add]) {
+ err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
+ set_rbtree_add_callback_state);
+ if (err) {
+ verbose(env, "kfunc %s#%d failed callback verification\n",
+ func_name, func_id);
+ return err;
+ }
+ }
+
for (i = 0; i < CALLER_SAVED_REGS; i++)
mark_reg_not_init(env, regs, caller_saved[i]);
@@ -9246,11 +9968,12 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
struct btf_field *field = meta.arg_list_head.field;
- mark_reg_known_zero(env, regs, BPF_REG_0);
- regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
- regs[BPF_REG_0].btf = field->list_head.btf;
- regs[BPF_REG_0].btf_id = field->list_head.value_btf_id;
- regs[BPF_REG_0].off = field->list_head.node_offset;
+ mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
+ } else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
+ meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
+ struct btf_field *field = meta.arg_rbtree_root.field;
+
+ mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
} else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
@@ -9316,7 +10039,13 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
if (is_kfunc_ret_null(&meta))
regs[BPF_REG_0].id = id;
regs[BPF_REG_0].ref_obj_id = id;
+ } else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
+ ref_set_non_owning(env, &regs[BPF_REG_0]);
}
+
+ if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove])
+ invalidate_non_owning_refs(env);
+
if (reg_may_point_to_spin_lock(&regs[BPF_REG_0]) && !regs[BPF_REG_0].id)
regs[BPF_REG_0].id = ++env->id_gen;
} /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
@@ -11502,8 +12231,10 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
*/
if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || !tnum_equals_const(reg->var_off, 0)))
return;
- if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL) && WARN_ON_ONCE(reg->off))
+ if (!(type_is_ptr_alloc_obj(reg->type) || type_is_non_owning_ref(reg->type)) &&
+ WARN_ON_ONCE(reg->off))
return;
+
if (is_null) {
reg->type = SCALAR_VALUE;
/* We don't need id and ref_obj_id from this point
@@ -12969,6 +13700,13 @@ static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
{
unsigned int i;
+ /* either both IDs should be set or both should be zero */
+ if (!!old_id != !!cur_id)
+ return false;
+
+ if (old_id == 0) /* cur_id == 0 as well */
+ return true;
+
for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
if (!idmap[i].old) {
/* Reached an empty slot; haven't seen this id before */
@@ -13080,79 +13818,74 @@ next:
}
}
+static bool regs_exact(const struct bpf_reg_state *rold,
+ const struct bpf_reg_state *rcur,
+ struct bpf_id_pair *idmap)
+{
+ return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
+ check_ids(rold->id, rcur->id, idmap) &&
+ check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
+}
+
/* Returns true if (rold safe implies rcur safe) */
static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
struct bpf_reg_state *rcur, struct bpf_id_pair *idmap)
{
- bool equal;
-
if (!(rold->live & REG_LIVE_READ))
/* explored state didn't use this */
return true;
-
- equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
-
if (rold->type == NOT_INIT)
/* explored state can't have used this */
return true;
if (rcur->type == NOT_INIT)
return false;
+
+ /* Enforce that register types have to match exactly, including their
+ * modifiers (like PTR_MAYBE_NULL, MEM_RDONLY, etc), as a general
+ * rule.
+ *
+ * One can make a point that using a pointer register as unbounded
+ * SCALAR would be technically acceptable, but this could lead to
+ * pointer leaks because scalars are allowed to leak while pointers
+ * are not. We could make this safe in special cases if root is
+ * calling us, but it's probably not worth the hassle.
+ *
+ * Also, register types that are *not* MAYBE_NULL could technically be
+ * safe to use as their MAYBE_NULL variants (e.g., PTR_TO_MAP_VALUE
+ * is safe to be used as PTR_TO_MAP_VALUE_OR_NULL, provided both point
+ * to the same map).
+ * However, if the old MAYBE_NULL register then got NULL checked,
+ * doing so could have affected others with the same id, and we can't
+ * check for that because we lost the id when we converted to
+ * a non-MAYBE_NULL variant.
+ * So, as a general rule we don't allow mixing MAYBE_NULL and
+ * non-MAYBE_NULL registers as well.
+ */
+ if (rold->type != rcur->type)
+ return false;
+
switch (base_type(rold->type)) {
case SCALAR_VALUE:
- if (equal)
+ if (regs_exact(rold, rcur, idmap))
return true;
if (env->explore_alu_limits)
return false;
- if (rcur->type == SCALAR_VALUE) {
- if (!rold->precise)
- return true;
- /* new val must satisfy old val knowledge */
- return range_within(rold, rcur) &&
- tnum_in(rold->var_off, rcur->var_off);
- } else {
- /* We're trying to use a pointer in place of a scalar.
- * Even if the scalar was unbounded, this could lead to
- * pointer leaks because scalars are allowed to leak
- * while pointers are not. We could make this safe in
- * special cases if root is calling us, but it's
- * probably not worth the hassle.
- */
- return false;
- }
+ if (!rold->precise)
+ return true;
+ /* new val must satisfy old val knowledge */
+ return range_within(rold, rcur) &&
+ tnum_in(rold->var_off, rcur->var_off);
case PTR_TO_MAP_KEY:
case PTR_TO_MAP_VALUE:
- /* a PTR_TO_MAP_VALUE could be safe to use as a
- * PTR_TO_MAP_VALUE_OR_NULL into the same map.
- * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
- * checked, doing so could have affected others with the same
- * id, and we can't check for that because we lost the id when
- * we converted to a PTR_TO_MAP_VALUE.
- */
- if (type_may_be_null(rold->type)) {
- if (!type_may_be_null(rcur->type))
- return false;
- if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
- return false;
- /* Check our ids match any regs they're supposed to */
- return check_ids(rold->id, rcur->id, idmap);
- }
-
/* If the new min/max/var_off satisfy the old ones and
* everything else matches, we are OK.
- * 'id' is not compared, since it's only used for maps with
- * bpf_spin_lock inside map element and in such cases if
- * the rest of the prog is valid for one map element then
- * it's valid for all map elements regardless of the key
- * used in bpf_map_lookup()
*/
- return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
+ return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 &&
range_within(rold, rcur) &&
tnum_in(rold->var_off, rcur->var_off) &&
check_ids(rold->id, rcur->id, idmap);
case PTR_TO_PACKET_META:
case PTR_TO_PACKET:
- if (rcur->type != rold->type)
- return false;
/* We must have at least as much range as the old ptr
* did, so that any accesses which were safe before are
* still safe. This is true even if old range < old off,
@@ -13167,7 +13900,7 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
if (rold->off != rcur->off)
return false;
/* id relations must be preserved */
- if (rold->id && !check_ids(rold->id, rcur->id, idmap))
+ if (!check_ids(rold->id, rcur->id, idmap))
return false;
/* new val must satisfy old val knowledge */
return range_within(rold, rcur) &&
@@ -13176,15 +13909,10 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
/* two stack pointers are equal only if they're pointing to
* the same stack frame, since fp-8 in foo != fp-8 in bar
*/
- return equal && rold->frameno == rcur->frameno;
+ return regs_exact(rold, rcur, idmap) && rold->frameno == rcur->frameno;
default:
- /* Only valid matches are exact, which memcmp() */
- return equal;
+ return regs_exact(rold, rcur, idmap);
}
-
- /* Shouldn't get here; if we do, say it's not safe */
- WARN_ON_ONCE(1);
- return false;
}
static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
@@ -13231,10 +13959,9 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
return false;
if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1)
continue;
- if (!is_spilled_reg(&old->stack[spi]))
- continue;
- if (!regsafe(env, &old->stack[spi].spilled_ptr,
- &cur->stack[spi].spilled_ptr, idmap))
+ /* Both old and cur are having same slot_type */
+ switch (old->stack[spi].slot_type[BPF_REG_SIZE - 1]) {
+ case STACK_SPILL:
/* when explored and current stack slot are both storing
* spilled registers, check that stored pointers types
* are the same as well.
@@ -13245,17 +13972,48 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
* such verifier states are not equivalent.
* return false to continue verification of this path
*/
+ if (!regsafe(env, &old->stack[spi].spilled_ptr,
+ &cur->stack[spi].spilled_ptr, idmap))
+ return false;
+ break;
+ case STACK_DYNPTR:
+ {
+ const struct bpf_reg_state *old_reg, *cur_reg;
+
+ old_reg = &old->stack[spi].spilled_ptr;
+ cur_reg = &cur->stack[spi].spilled_ptr;
+ if (old_reg->dynptr.type != cur_reg->dynptr.type ||
+ old_reg->dynptr.first_slot != cur_reg->dynptr.first_slot ||
+ !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
+ return false;
+ break;
+ }
+ case STACK_MISC:
+ case STACK_ZERO:
+ case STACK_INVALID:
+ continue;
+ /* Ensure that new unhandled slot types return false by default */
+ default:
return false;
+ }
}
return true;
}
-static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
+static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur,
+ struct bpf_id_pair *idmap)
{
+ int i;
+
if (old->acquired_refs != cur->acquired_refs)
return false;
- return !memcmp(old->refs, cur->refs,
- sizeof(*old->refs) * old->acquired_refs);
+
+ for (i = 0; i < old->acquired_refs; i++) {
+ if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap))
+ return false;
+ }
+
+ return true;
}
/* compare two verifier states
@@ -13297,7 +14055,7 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat
if (!stacksafe(env, old, cur, env->idmap_scratch))
return false;
- if (!refsafe(old, cur))
+ if (!refsafe(old, cur, env->idmap_scratch))
return false;
return true;
@@ -13834,7 +14592,7 @@ static int do_check(struct bpf_verifier_env *env)
env->prev_log_len = env->log.len_used;
}
- if (bpf_prog_is_dev_bound(env->prog->aux)) {
+ if (bpf_prog_is_offloaded(env->prog->aux)) {
err = bpf_prog_offload_verify_insn(env, env->insn_idx,
env->prev_insn_idx);
if (err)
@@ -13985,7 +14743,7 @@ static int do_check(struct bpf_verifier_env *env)
if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) ||
(insn->src_reg == BPF_PSEUDO_CALL) ||
(insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
- (insn->off != 0 || !is_bpf_list_api_kfunc(insn->imm)))) {
+ (insn->off != 0 || !is_bpf_graph_api_kfunc(insn->imm)))) {
verbose(env, "function calls are not allowed while holding a lock\n");
return -EINVAL;
}
@@ -14021,7 +14779,8 @@ static int do_check(struct bpf_verifier_env *env)
return -EINVAL;
}
- if (env->cur_state->active_lock.ptr) {
+ if (env->cur_state->active_lock.ptr &&
+ !in_rbtree_lock_required_cb(env)) {
verbose(env, "bpf_spin_unlock is missing\n");
return -EINVAL;
}
@@ -14283,9 +15042,10 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
{
enum bpf_prog_type prog_type = resolve_prog_type(prog);
- if (btf_record_has_field(map->record, BPF_LIST_HEAD)) {
+ if (btf_record_has_field(map->record, BPF_LIST_HEAD) ||
+ btf_record_has_field(map->record, BPF_RB_ROOT)) {
if (is_tracing_prog_type(prog_type)) {
- verbose(env, "tracing progs cannot use bpf_list_head yet\n");
+ verbose(env, "tracing progs cannot use bpf_{list_head,rb_root} yet\n");
return -EINVAL;
}
}
@@ -14314,7 +15074,7 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
}
}
- if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
+ if ((bpf_prog_is_offloaded(prog->aux) || bpf_map_is_offloaded(map)) &&
!bpf_offload_prog_map_match(prog, map)) {
verbose(env, "offload device mismatch between prog and map\n");
return -EINVAL;
@@ -14795,7 +15555,7 @@ static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
unsigned int orig_prog_len = env->prog->len;
int err;
- if (bpf_prog_is_dev_bound(env->prog->aux))
+ if (bpf_prog_is_offloaded(env->prog->aux))
bpf_prog_offload_remove_insns(env, off, cnt);
err = bpf_remove_insns(env->prog, off, cnt);
@@ -14876,7 +15636,7 @@ static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
else
continue;
- if (bpf_prog_is_dev_bound(env->prog->aux))
+ if (bpf_prog_is_offloaded(env->prog->aux))
bpf_prog_offload_replace_insn(env, i, &ja);
memcpy(insn, &ja, sizeof(ja));
@@ -15063,7 +15823,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
}
}
- if (bpf_prog_is_dev_bound(env->prog->aux))
+ if (bpf_prog_is_offloaded(env->prog->aux))
return 0;
insn = env->prog->insnsi + delta;
@@ -15463,7 +16223,7 @@ static int fixup_call_args(struct bpf_verifier_env *env)
int err = 0;
if (env->prog->jit_requested &&
- !bpf_prog_is_dev_bound(env->prog->aux)) {
+ !bpf_prog_is_offloaded(env->prog->aux)) {
err = jit_subprogs(env);
if (err == 0)
return 0;
@@ -15507,12 +16267,25 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
struct bpf_insn *insn_buf, int insn_idx, int *cnt)
{
const struct bpf_kfunc_desc *desc;
+ void *xdp_kfunc;
if (!insn->imm) {
verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
return -EINVAL;
}
+ *cnt = 0;
+
+ if (bpf_dev_bound_kfunc_id(insn->imm)) {
+ xdp_kfunc = bpf_dev_bound_resolve_kfunc(env->prog, insn->imm);
+ if (xdp_kfunc) {
+ insn->imm = BPF_CALL_IMM(xdp_kfunc);
+ return 0;
+ }
+
+ /* fallback to default kfunc when not supported by netdev */
+ }
+
/* insn->imm has the btf func_id. Replace it with
* an address (relative to __bpf_call_base).
*/
@@ -15523,7 +16296,6 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
return -EFAULT;
}
- *cnt = 0;
insn->imm = desc->imm;
if (insn->off)
return 0;
@@ -16449,7 +17221,7 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
}
if (st_ops->check_member) {
- int err = st_ops->check_member(t, member);
+ int err = st_ops->check_member(t, member, prog);
if (err) {
verbose(env, "attach to unsupported member %s of struct %s\n",
@@ -16530,6 +17302,12 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
if (tgt_prog) {
struct bpf_prog_aux *aux = tgt_prog->aux;
+ if (bpf_prog_is_dev_bound(prog->aux) &&
+ !bpf_prog_dev_bound_match(prog, tgt_prog)) {
+ bpf_log(log, "Target program bound device mismatch");
+ return -EINVAL;
+ }
+
for (i = 0; i < aux->func_info_cnt; i++)
if (aux->func_info[i].type_id == btf_id) {
subprog = i;
@@ -16751,6 +17529,24 @@ BTF_ID(func, rcu_read_unlock_strict)
#endif
BTF_SET_END(btf_id_deny)
+static bool can_be_sleepable(struct bpf_prog *prog)
+{
+ if (prog->type == BPF_PROG_TYPE_TRACING) {
+ switch (prog->expected_attach_type) {
+ case BPF_TRACE_FENTRY:
+ case BPF_TRACE_FEXIT:
+ case BPF_MODIFY_RETURN:
+ case BPF_TRACE_ITER:
+ return true;
+ default:
+ return false;
+ }
+ }
+ return prog->type == BPF_PROG_TYPE_LSM ||
+ prog->type == BPF_PROG_TYPE_KPROBE /* only for uprobes */ ||
+ prog->type == BPF_PROG_TYPE_STRUCT_OPS;
+}
+
static int check_attach_btf_id(struct bpf_verifier_env *env)
{
struct bpf_prog *prog = env->prog;
@@ -16769,9 +17565,8 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
return -EINVAL;
}
- if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING &&
- prog->type != BPF_PROG_TYPE_LSM && prog->type != BPF_PROG_TYPE_KPROBE) {
- verbose(env, "Only fentry/fexit/fmod_ret, lsm, and kprobe/uprobe programs can be sleepable\n");
+ if (prog->aux->sleepable && !can_be_sleepable(prog)) {
+ verbose(env, "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable\n");
return -EINVAL;
}
@@ -16950,7 +17745,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
if (ret < 0)
goto skip_full_check;
- if (bpf_prog_is_dev_bound(env->prog->aux)) {
+ if (bpf_prog_is_offloaded(env->prog->aux)) {
ret = bpf_prog_offload_verifier_prep(env->prog);
if (ret)
goto skip_full_check;
@@ -16963,7 +17758,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
ret = do_check_subprogs(env);
ret = ret ?: do_check_main(env);
- if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
+ if (ret == 0 && bpf_prog_is_offloaded(env->prog->aux))
ret = bpf_prog_offload_finalize(env);
skip_full_check:
@@ -16998,7 +17793,7 @@ skip_full_check:
/* do 32-bit optimization after insn patching has done so those patched
* insns could be handled correctly.
*/
- if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) {
+ if (ret == 0 && !bpf_prog_is_offloaded(env->prog->aux)) {
ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
: false;
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index 793ecff29038..831f1f472bb8 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -26,7 +26,7 @@ static struct cgroup_rstat_cpu *cgroup_rstat_cpu(struct cgroup *cgrp, int cpu)
* rstat_cpu->updated_children list. See the comment on top of
* cgroup_rstat_cpu definition for details.
*/
-void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
+__bpf_kfunc void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
{
raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu);
unsigned long flags;
@@ -231,7 +231,7 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep)
*
* This function may block.
*/
-void cgroup_rstat_flush(struct cgroup *cgrp)
+__bpf_kfunc void cgroup_rstat_flush(struct cgroup *cgrp)
{
might_sleep();
diff --git a/kernel/exit.c b/kernel/exit.c
index bccfa4218356..f2afdb0add7c 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1905,7 +1905,14 @@ bool thread_group_exited(struct pid *pid)
}
EXPORT_SYMBOL(thread_group_exited);
-__weak void abort(void)
+/*
+ * This needs to be __function_aligned as GCC implicitly makes any
+ * implementation of abort() cold and drops alignment specified by
+ * -falign-functions=N.
+ *
+ * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88345#c11
+ */
+__weak __function_aligned void abort(void)
{
BUG();
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 969e8f52f7da..b1cf259854ca 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -6,6 +6,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/btf.h>
#include <linux/capability.h>
#include <linux/mm.h>
#include <linux/file.h>
@@ -975,7 +976,7 @@ void __noclone __crash_kexec(struct pt_regs *regs)
}
STACK_FRAME_NON_STANDARD(__crash_kexec);
-void crash_kexec(struct pt_regs *regs)
+__bpf_kfunc void crash_kexec(struct pt_regs *regs)
{
int old_cpu, this_cpu;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1c18ecf9f98b..00e177de91cc 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -458,7 +458,7 @@ static inline int kprobe_optready(struct kprobe *p)
}
/* Return true if the kprobe is disarmed. Note: p must be on hash list */
-static inline bool kprobe_disarmed(struct kprobe *p)
+bool kprobe_disarmed(struct kprobe *p)
{
struct optimized_kprobe *op;
@@ -555,17 +555,15 @@ static void do_unoptimize_kprobes(void)
/* See comment in do_optimize_kprobes() */
lockdep_assert_cpus_held();
- /* Unoptimization must be done anytime */
- if (list_empty(&unoptimizing_list))
- return;
+ if (!list_empty(&unoptimizing_list))
+ arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
- arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
- /* Loop on 'freeing_list' for disarming */
+ /* Loop on 'freeing_list' for disarming and removing from kprobe hash list */
list_for_each_entry_safe(op, tmp, &freeing_list, list) {
/* Switching from detour code to origin */
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
- /* Disarm probes if marked disabled */
- if (kprobe_disabled(&op->kp))
+ /* Disarm probes if marked disabled and not gone */
+ if (kprobe_disabled(&op->kp) && !kprobe_gone(&op->kp))
arch_disarm_kprobe(&op->kp);
if (kprobe_unused(&op->kp)) {
/*
@@ -662,7 +660,7 @@ void wait_for_kprobe_optimizer(void)
mutex_unlock(&kprobe_mutex);
}
-static bool optprobe_queued_unopt(struct optimized_kprobe *op)
+bool optprobe_queued_unopt(struct optimized_kprobe *op)
{
struct optimized_kprobe *_op;
@@ -797,14 +795,13 @@ static void kill_optimized_kprobe(struct kprobe *p)
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
if (kprobe_unused(p)) {
- /* Enqueue if it is unused */
- list_add(&op->list, &freeing_list);
/*
- * Remove unused probes from the hash list. After waiting
- * for synchronization, this probe is reclaimed.
- * (reclaiming is done by do_free_cleaned_kprobes().)
+ * Unused kprobe is on unoptimizing or freeing list. We move it
+ * to freeing_list and let the kprobe_optimizer() remove it from
+ * the kprobe hash list and free it.
*/
- hlist_del_rcu(&op->kp.hlist);
+ if (optprobe_queued_unopt(op))
+ list_move(&op->list, &freeing_list);
}
/* Don't touch the code, because it is already freed. */
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 201f0c0482fb..4bd2d5e10f20 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -118,7 +118,6 @@ static struct klp_object *klp_find_object(struct klp_patch *patch,
}
struct klp_find_arg {
- const char *objname;
const char *name;
unsigned long addr;
unsigned long count;
@@ -148,15 +147,9 @@ static int klp_find_callback(void *data, const char *name,
{
struct klp_find_arg *args = data;
- if ((mod && !args->objname) || (!mod && args->objname))
- return 0;
-
if (strcmp(args->name, name))
return 0;
- if (args->objname && strcmp(args->objname, mod->name))
- return 0;
-
return klp_match_callback(data, addr);
}
@@ -164,7 +157,6 @@ static int klp_find_object_symbol(const char *objname, const char *name,
unsigned long sympos, unsigned long *addr)
{
struct klp_find_arg args = {
- .objname = objname,
.name = name,
.addr = 0,
.count = 0,
@@ -172,7 +164,7 @@ static int klp_find_object_symbol(const char *objname, const char *name,
};
if (objname)
- module_kallsyms_on_each_symbol(klp_find_callback, &args);
+ module_kallsyms_on_each_symbol(objname, klp_find_callback, &args);
else
kallsyms_on_each_match_symbol(klp_match_callback, name, &args);
@@ -268,6 +260,14 @@ static int klp_resolve_symbols(Elf_Shdr *sechdrs, const char *strtab,
return 0;
}
+void __weak clear_relocate_add(Elf_Shdr *sechdrs,
+ const char *strtab,
+ unsigned int symindex,
+ unsigned int relsec,
+ struct module *me)
+{
+}
+
/*
* At a high-level, there are two types of klp relocation sections: those which
* reference symbols which live in vmlinux; and those which reference symbols
@@ -291,10 +291,10 @@ static int klp_resolve_symbols(Elf_Shdr *sechdrs, const char *strtab,
* the to-be-patched module to be loaded and patched sometime *after* the
* klp module is loaded.
*/
-int klp_apply_section_relocs(struct module *pmod, Elf_Shdr *sechdrs,
- const char *shstrtab, const char *strtab,
- unsigned int symndx, unsigned int secndx,
- const char *objname)
+static int klp_write_section_relocs(struct module *pmod, Elf_Shdr *sechdrs,
+ const char *shstrtab, const char *strtab,
+ unsigned int symndx, unsigned int secndx,
+ const char *objname, bool apply)
{
int cnt, ret;
char sec_objname[MODULE_NAME_LEN];
@@ -316,11 +316,26 @@ int klp_apply_section_relocs(struct module *pmod, Elf_Shdr *sechdrs,
if (strcmp(objname ? objname : "vmlinux", sec_objname))
return 0;
- ret = klp_resolve_symbols(sechdrs, strtab, symndx, sec, sec_objname);
- if (ret)
- return ret;
+ if (apply) {
+ ret = klp_resolve_symbols(sechdrs, strtab, symndx,
+ sec, sec_objname);
+ if (ret)
+ return ret;
+
+ return apply_relocate_add(sechdrs, strtab, symndx, secndx, pmod);
+ }
+
+ clear_relocate_add(sechdrs, strtab, symndx, secndx, pmod);
+ return 0;
+}
- return apply_relocate_add(sechdrs, strtab, symndx, secndx, pmod);
+int klp_apply_section_relocs(struct module *pmod, Elf_Shdr *sechdrs,
+ const char *shstrtab, const char *strtab,
+ unsigned int symndx, unsigned int secndx,
+ const char *objname)
+{
+ return klp_write_section_relocs(pmod, sechdrs, shstrtab, strtab, symndx,
+ secndx, objname, true);
}
/*
@@ -769,8 +784,9 @@ static int klp_init_func(struct klp_object *obj, struct klp_func *func)
func->old_sympos ? func->old_sympos : 1);
}
-static int klp_apply_object_relocs(struct klp_patch *patch,
- struct klp_object *obj)
+static int klp_write_object_relocs(struct klp_patch *patch,
+ struct klp_object *obj,
+ bool apply)
{
int i, ret;
struct klp_modinfo *info = patch->mod->klp_info;
@@ -781,10 +797,10 @@ static int klp_apply_object_relocs(struct klp_patch *patch,
if (!(sec->sh_flags & SHF_RELA_LIVEPATCH))
continue;
- ret = klp_apply_section_relocs(patch->mod, info->sechdrs,
+ ret = klp_write_section_relocs(patch->mod, info->sechdrs,
info->secstrings,
patch->mod->core_kallsyms.strtab,
- info->symndx, i, obj->name);
+ info->symndx, i, obj->name, apply);
if (ret)
return ret;
}
@@ -792,6 +808,18 @@ static int klp_apply_object_relocs(struct klp_patch *patch,
return 0;
}
+static int klp_apply_object_relocs(struct klp_patch *patch,
+ struct klp_object *obj)
+{
+ return klp_write_object_relocs(patch, obj, true);
+}
+
+static void klp_clear_object_relocs(struct klp_patch *patch,
+ struct klp_object *obj)
+{
+ klp_write_object_relocs(patch, obj, false);
+}
+
/* parts of the initialization that is done only when the object is loaded */
static int klp_init_object_loaded(struct klp_patch *patch,
struct klp_object *obj)
@@ -1179,7 +1207,7 @@ static void klp_cleanup_module_patches_limited(struct module *mod,
klp_unpatch_object(obj);
klp_post_unpatch_callback(obj);
-
+ klp_clear_object_relocs(patch, obj);
klp_free_object_loaded(obj);
break;
}
diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c
index 4523f99b0358..ab2376a1be88 100644
--- a/kernel/module/kallsyms.c
+++ b/kernel/module/kallsyms.c
@@ -494,7 +494,8 @@ unsigned long module_kallsyms_lookup_name(const char *name)
return ret;
}
-int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
+int module_kallsyms_on_each_symbol(const char *modname,
+ int (*fn)(void *, const char *,
struct module *, unsigned long),
void *data)
{
@@ -509,6 +510,9 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
if (mod->state == MODULE_STATE_UNFORMED)
continue;
+ if (modname && strcmp(modname, mod->name))
+ continue;
+
/* Use rcu_dereference_sched() to remain compliant with the sparse tool */
preempt_disable();
kallsyms = rcu_dereference_sched(mod->kallsyms);
@@ -525,6 +529,13 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
if (ret != 0)
goto out;
}
+
+ /*
+ * The given module is found, the subsequent modules do not
+ * need to be compared.
+ */
+ if (modname)
+ break;
}
out:
mutex_unlock(&module_mutex);
diff --git a/kernel/module/main.c b/kernel/module/main.c
index 4ac3fe43e6c8..d3be89de706d 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -17,6 +17,7 @@
#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/kernel_read_file.h>
+#include <linux/kstrtox.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/elf.h>
@@ -2675,7 +2676,7 @@ static int unknown_module_param_cb(char *param, char *val, const char *modname,
int ret;
if (strcmp(param, "async_probe") == 0) {
- if (strtobool(val, &mod->async_probe_requested))
+ if (kstrtobool(val, &mod->async_probe_requested))
mod->async_probe_requested = true;
return 0;
}
diff --git a/kernel/params.c b/kernel/params.c
index 14d66070757b..6e34ca89ebae 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -4,6 +4,7 @@
*/
#include <linux/kernel.h>
+#include <linux/kstrtox.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/module.h>
@@ -310,7 +311,7 @@ int param_set_bool(const char *val, const struct kernel_param *kp)
if (!val) val = "1";
/* One of =[yYnN01] */
- return strtobool(val, kp->arg);
+ return kstrtobool(val, kp->arg);
}
EXPORT_SYMBOL(param_set_bool);
diff --git a/kernel/printk/index.c b/kernel/printk/index.c
index c85be186a783..a6b27526baaf 100644
--- a/kernel/printk/index.c
+++ b/kernel/printk/index.c
@@ -145,7 +145,7 @@ static void pi_create_file(struct module *mod)
#ifdef CONFIG_MODULES
static void pi_remove_file(struct module *mod)
{
- debugfs_remove(debugfs_lookup(pi_get_module_name(mod), dfs_index));
+ debugfs_lookup_and_remove(pi_get_module_name(mod), dfs_index);
}
static int pi_module_notify(struct notifier_block *nb, unsigned long op,
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index d947ca6c84f9..2a17704136f1 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -14,6 +14,21 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
#ifdef CONFIG_PRINTK
+#ifdef CONFIG_PRINTK_CALLER
+#define PRINTK_PREFIX_MAX 48
+#else
+#define PRINTK_PREFIX_MAX 32
+#endif
+
+/*
+ * the maximum size of a formatted record (i.e. with prefix added
+ * per line and dropped messages or in extended message format)
+ */
+#define PRINTK_MESSAGE_MAX 2048
+
+/* the maximum size allowed to be reserved for a record */
+#define PRINTKRB_RECORD_MAX 1024
+
/* Flags for a single printk record. */
enum printk_info_flags {
LOG_NEWLINE = 2, /* text ended with a newline */
@@ -48,6 +63,10 @@ u16 printk_parse_prefix(const char *text, int *level,
enum printk_info_flags *flags);
#else
+#define PRINTK_PREFIX_MAX 0
+#define PRINTK_MESSAGE_MAX 0
+#define PRINTKRB_RECORD_MAX 0
+
/*
* In !PRINTK builds we still export console_sem
* semaphore and some of console functions (console_unlock()/etc.), so
@@ -58,3 +77,29 @@ u16 printk_parse_prefix(const char *text, int *level,
static inline bool printk_percpu_data_ready(void) { return false; }
#endif /* CONFIG_PRINTK */
+
+/**
+ * struct printk_buffers - Buffers to read/format/output printk messages.
+ * @outbuf: After formatting, contains text to output.
+ * @scratchbuf: Used as temporary ringbuffer reading and string-print space.
+ */
+struct printk_buffers {
+ char outbuf[PRINTK_MESSAGE_MAX];
+ char scratchbuf[PRINTKRB_RECORD_MAX];
+};
+
+/**
+ * struct printk_message - Container for a prepared printk message.
+ * @pbufs: printk buffers used to prepare the message.
+ * @outbuf_len: The length of prepared text in @pbufs->outbuf to output. This
+ * does not count the terminator. A value of 0 means there is
+ * nothing to output and this record should be skipped.
+ * @seq: The sequence number of the record used for @pbufs->outbuf.
+ * @dropped: The number of dropped records from reading @seq.
+ */
+struct printk_message {
+ struct printk_buffers *pbufs;
+ unsigned int outbuf_len;
+ u64 seq;
+ unsigned long dropped;
+};
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 94f136b25f6a..fd0c9f913940 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -466,21 +466,6 @@ static struct latched_seq clear_seq = {
.val[1] = 0,
};
-#ifdef CONFIG_PRINTK_CALLER
-#define PREFIX_MAX 48
-#else
-#define PREFIX_MAX 32
-#endif
-
-/* the maximum size of a formatted record (i.e. with prefix added per line) */
-#define CONSOLE_LOG_MAX 1024
-
-/* the maximum size for a dropped text message */
-#define DROPPED_TEXT_MAX 64
-
-/* the maximum size allowed to be reserved for a record */
-#define LOG_LINE_MAX (CONSOLE_LOG_MAX - PREFIX_MAX)
-
#define LOG_LEVEL(v) ((v) & 0x07)
#define LOG_FACILITY(v) ((v) >> 3 & 0xff)
@@ -711,16 +696,15 @@ out:
return len;
}
+static bool printk_get_next_message(struct printk_message *pmsg, u64 seq,
+ bool is_extended, bool may_supress);
+
/* /dev/kmsg - userspace message inject/listen interface */
struct devkmsg_user {
atomic64_t seq;
struct ratelimit_state rs;
struct mutex lock;
- char buf[CONSOLE_EXT_LOG_MAX];
-
- struct printk_info info;
- char text_buf[CONSOLE_EXT_LOG_MAX];
- struct printk_record record;
+ struct printk_buffers pbufs;
};
static __printf(3, 4) __cold
@@ -746,7 +730,7 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
size_t len = iov_iter_count(from);
ssize_t ret = len;
- if (!user || len > LOG_LINE_MAX)
+ if (!user || len > PRINTKRB_RECORD_MAX)
return -EINVAL;
/* Ignore when user logging is disabled. */
@@ -802,8 +786,10 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
struct devkmsg_user *user = file->private_data;
- struct printk_record *r = &user->record;
- size_t len;
+ char *outbuf = &user->pbufs.outbuf[0];
+ struct printk_message pmsg = {
+ .pbufs = &user->pbufs,
+ };
ssize_t ret;
if (!user)
@@ -813,7 +799,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
if (ret)
return ret;
- if (!prb_read_valid(prb, atomic64_read(&user->seq), r)) {
+ if (!printk_get_next_message(&pmsg, atomic64_read(&user->seq), true, false)) {
if (file->f_flags & O_NONBLOCK) {
ret = -EAGAIN;
goto out;
@@ -830,36 +816,31 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
* This pairs with __wake_up_klogd:A.
*/
ret = wait_event_interruptible(log_wait,
- prb_read_valid(prb,
- atomic64_read(&user->seq), r)); /* LMM(devkmsg_read:A) */
+ printk_get_next_message(&pmsg, atomic64_read(&user->seq), true,
+ false)); /* LMM(devkmsg_read:A) */
if (ret)
goto out;
}
- if (r->info->seq != atomic64_read(&user->seq)) {
+ if (pmsg.dropped) {
/* our last seen message is gone, return error and reset */
- atomic64_set(&user->seq, r->info->seq);
+ atomic64_set(&user->seq, pmsg.seq);
ret = -EPIPE;
goto out;
}
- len = info_print_ext_header(user->buf, sizeof(user->buf), r->info);
- len += msg_print_ext_body(user->buf + len, sizeof(user->buf) - len,
- &r->text_buf[0], r->info->text_len,
- &r->info->dev_info);
-
- atomic64_set(&user->seq, r->info->seq + 1);
+ atomic64_set(&user->seq, pmsg.seq + 1);
- if (len > count) {
+ if (pmsg.outbuf_len > count) {
ret = -EINVAL;
goto out;
}
- if (copy_to_user(buf, user->buf, len)) {
+ if (copy_to_user(buf, outbuf, pmsg.outbuf_len)) {
ret = -EFAULT;
goto out;
}
- ret = len;
+ ret = pmsg.outbuf_len;
out:
mutex_unlock(&user->lock);
return ret;
@@ -953,9 +934,6 @@ static int devkmsg_open(struct inode *inode, struct file *file)
mutex_init(&user->lock);
- prb_rec_init_rd(&user->record, &user->info,
- &user->text_buf[0], sizeof(user->text_buf));
-
atomic64_set(&user->seq, prb_first_valid_seq(prb));
file->private_data = user;
@@ -1150,7 +1128,7 @@ static unsigned int __init add_to_rb(struct printk_ringbuffer *rb,
return prb_record_text_space(&e);
}
-static char setup_text_buf[LOG_LINE_MAX] __initdata;
+static char setup_text_buf[PRINTKRB_RECORD_MAX] __initdata;
void __init setup_log_buf(int early)
{
@@ -1416,7 +1394,7 @@ static size_t record_print_text(struct printk_record *r, bool syslog,
size_t text_len = r->info->text_len;
size_t buf_size = r->text_buf_size;
char *text = r->text_buf;
- char prefix[PREFIX_MAX];
+ char prefix[PRINTK_PREFIX_MAX];
bool truncated = false;
size_t prefix_len;
size_t line_len;
@@ -1515,7 +1493,7 @@ static size_t get_record_print_text_size(struct printk_info *info,
unsigned int line_count,
bool syslog, bool time)
{
- char prefix[PREFIX_MAX];
+ char prefix[PRINTK_PREFIX_MAX];
size_t prefix_len;
prefix_len = info_print_prefix(info, syslog, time, prefix);
@@ -1581,11 +1559,11 @@ static int syslog_print(char __user *buf, int size)
int len = 0;
u64 seq;
- text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL);
+ text = kmalloc(PRINTK_MESSAGE_MAX, GFP_KERNEL);
if (!text)
return -ENOMEM;
- prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX);
+ prb_rec_init_rd(&r, &info, text, PRINTK_MESSAGE_MAX);
mutex_lock(&syslog_lock);
@@ -1686,7 +1664,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
u64 seq;
bool time;
- text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL);
+ text = kmalloc(PRINTK_MESSAGE_MAX, GFP_KERNEL);
if (!text)
return -ENOMEM;
@@ -1698,7 +1676,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
seq = find_first_fitting_seq(latched_seq_read_nolock(&clear_seq), -1,
size, true, time);
- prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX);
+ prb_rec_init_rd(&r, &info, text, PRINTK_MESSAGE_MAX);
len = 0;
prb_for_each_record(seq, prb, seq, &r) {
@@ -2013,27 +1991,6 @@ static int console_trylock_spinning(void)
}
/*
- * Call the specified console driver, asking it to write out the specified
- * text and length. If @dropped_text is non-NULL and any records have been
- * dropped, a dropped message will be written out first.
- */
-static void call_console_driver(struct console *con, const char *text, size_t len,
- char *dropped_text)
-{
- size_t dropped_len;
-
- if (con->dropped && dropped_text) {
- dropped_len = snprintf(dropped_text, DROPPED_TEXT_MAX,
- "** %lu printk messages dropped **\n",
- con->dropped);
- con->dropped = 0;
- con->write(con, dropped_text, dropped_len);
- }
-
- con->write(con, text, len);
-}
-
-/*
* Recursion is tracked separately on each CPU. If NMIs are supported, an
* additional NMI context per CPU is also separately tracked. Until per-CPU
* is available, a separate "early tracking" is performed.
@@ -2243,8 +2200,8 @@ int vprintk_store(int facility, int level,
reserve_size = vsnprintf(&prefix_buf[0], sizeof(prefix_buf), fmt, args2) + 1;
va_end(args2);
- if (reserve_size > LOG_LINE_MAX)
- reserve_size = LOG_LINE_MAX;
+ if (reserve_size > PRINTKRB_RECORD_MAX)
+ reserve_size = PRINTKRB_RECORD_MAX;
/* Extract log level or control flags. */
if (facility == 0)
@@ -2258,7 +2215,7 @@ int vprintk_store(int facility, int level,
if (flags & LOG_CONT) {
prb_rec_init_wr(&r, reserve_size);
- if (prb_reserve_in_last(&e, prb, &r, caller_id, LOG_LINE_MAX)) {
+ if (prb_reserve_in_last(&e, prb, &r, caller_id, PRINTKRB_RECORD_MAX)) {
text_len = printk_sprint(&r.text_buf[r.info->text_len], reserve_size,
facility, &flags, fmt, args);
r.info->text_len += text_len;
@@ -2389,8 +2346,6 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre
#else /* CONFIG_PRINTK */
-#define CONSOLE_LOG_MAX 0
-#define DROPPED_TEXT_MAX 0
#define printk_time false
#define prb_read_valid(rb, seq, r) false
@@ -2414,10 +2369,6 @@ static ssize_t msg_print_ext_body(char *buf, size_t size,
struct dev_printk_info *dev_info) { return 0; }
static void console_lock_spinning_enable(void) { }
static int console_lock_spinning_disable_and_check(int cookie) { return 0; }
-static void call_console_driver(struct console *con, const char *text, size_t len,
- char *dropped_text)
-{
-}
static bool suppress_message_printing(int level) { return false; }
static bool pr_flush(int timeout_ms, bool reset_on_progress) { return true; }
static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; }
@@ -2744,16 +2695,136 @@ static void __console_unlock(void)
}
/*
- * Print one record for the given console. The record printed is whatever
- * record is the next available record for the given console.
+ * Prepend the message in @pmsg->pbufs->outbuf with a "dropped message". This
+ * is achieved by shifting the existing message over and inserting the dropped
+ * message.
+ *
+ * @pmsg is the printk message to prepend.
*
- * @text is a buffer of size CONSOLE_LOG_MAX.
+ * @dropped is the dropped count to report in the dropped message.
*
- * If extended messages should be printed, @ext_text is a buffer of size
- * CONSOLE_EXT_LOG_MAX. Otherwise @ext_text must be NULL.
+ * If the message text in @pmsg->pbufs->outbuf does not have enough space for
+ * the dropped message, the message text will be sufficiently truncated.
*
- * If dropped messages should be printed, @dropped_text is a buffer of size
- * DROPPED_TEXT_MAX. Otherwise @dropped_text must be NULL.
+ * If @pmsg->pbufs->outbuf is modified, @pmsg->outbuf_len is updated.
+ */
+#ifdef CONFIG_PRINTK
+static void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped)
+{
+ struct printk_buffers *pbufs = pmsg->pbufs;
+ const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf);
+ const size_t outbuf_sz = sizeof(pbufs->outbuf);
+ char *scratchbuf = &pbufs->scratchbuf[0];
+ char *outbuf = &pbufs->outbuf[0];
+ size_t len;
+
+ len = scnprintf(scratchbuf, scratchbuf_sz,
+ "** %lu printk messages dropped **\n", dropped);
+
+ /*
+ * Make sure outbuf is sufficiently large before prepending.
+ * Keep at least the prefix when the message must be truncated.
+ * It is a rather theoretical problem when someone tries to
+ * use a minimalist buffer.
+ */
+ if (WARN_ON_ONCE(len + PRINTK_PREFIX_MAX >= outbuf_sz))
+ return;
+
+ if (pmsg->outbuf_len + len >= outbuf_sz) {
+ /* Truncate the message, but keep it terminated. */
+ pmsg->outbuf_len = outbuf_sz - (len + 1);
+ outbuf[pmsg->outbuf_len] = 0;
+ }
+
+ memmove(outbuf + len, outbuf, pmsg->outbuf_len + 1);
+ memcpy(outbuf, scratchbuf, len);
+ pmsg->outbuf_len += len;
+}
+#else
+#define console_prepend_dropped(pmsg, dropped)
+#endif /* CONFIG_PRINTK */
+
+/*
+ * Read and format the specified record (or a later record if the specified
+ * record is not available).
+ *
+ * @pmsg will contain the formatted result. @pmsg->pbufs must point to a
+ * struct printk_buffers.
+ *
+ * @seq is the record to read and format. If it is not available, the next
+ * valid record is read.
+ *
+ * @is_extended specifies if the message should be formatted for extended
+ * console output.
+ *
+ * @may_supress specifies if records may be skipped based on loglevel.
+ *
+ * Returns false if no record is available. Otherwise true and all fields
+ * of @pmsg are valid. (See the documentation of struct printk_message
+ * for information about the @pmsg fields.)
+ */
+static bool printk_get_next_message(struct printk_message *pmsg, u64 seq,
+ bool is_extended, bool may_suppress)
+{
+ static int panic_console_dropped;
+
+ struct printk_buffers *pbufs = pmsg->pbufs;
+ const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf);
+ const size_t outbuf_sz = sizeof(pbufs->outbuf);
+ char *scratchbuf = &pbufs->scratchbuf[0];
+ char *outbuf = &pbufs->outbuf[0];
+ struct printk_info info;
+ struct printk_record r;
+ size_t len = 0;
+
+ /*
+ * Formatting extended messages requires a separate buffer, so use the
+ * scratch buffer to read in the ringbuffer text.
+ *
+ * Formatting normal messages is done in-place, so read the ringbuffer
+ * text directly into the output buffer.
+ */
+ if (is_extended)
+ prb_rec_init_rd(&r, &info, scratchbuf, scratchbuf_sz);
+ else
+ prb_rec_init_rd(&r, &info, outbuf, outbuf_sz);
+
+ if (!prb_read_valid(prb, seq, &r))
+ return false;
+
+ pmsg->seq = r.info->seq;
+ pmsg->dropped = r.info->seq - seq;
+
+ /*
+ * Check for dropped messages in panic here so that printk
+ * suppression can occur as early as possible if necessary.
+ */
+ if (pmsg->dropped &&
+ panic_in_progress() &&
+ panic_console_dropped++ > 10) {
+ suppress_panic_printk = 1;
+ pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n");
+ }
+
+ /* Skip record that has level above the console loglevel. */
+ if (may_suppress && suppress_message_printing(r.info->level))
+ goto out;
+
+ if (is_extended) {
+ len = info_print_ext_header(outbuf, outbuf_sz, r.info);
+ len += msg_print_ext_body(outbuf + len, outbuf_sz - len,
+ &r.text_buf[0], r.info->text_len, &r.info->dev_info);
+ } else {
+ len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time);
+ }
+out:
+ pmsg->outbuf_len = len;
+ return true;
+}
+
+/*
+ * Print one record for the given console. The record printed is whatever
+ * record is the next available record for the given console.
*
* @handover will be set to true if a printk waiter has taken over the
* console_lock, in which case the caller is no longer holding both the
@@ -2766,46 +2837,33 @@ static void __console_unlock(void)
*
* Requires the console_lock and the SRCU read lock.
*/
-static bool console_emit_next_record(struct console *con, char *text, char *ext_text,
- char *dropped_text, bool *handover, int cookie)
+static bool console_emit_next_record(struct console *con, bool *handover, int cookie)
{
- static int panic_console_dropped;
- struct printk_info info;
- struct printk_record r;
- unsigned long flags;
- char *write_text;
- size_t len;
+ static struct printk_buffers pbufs;
- prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX);
+ bool is_extended = console_srcu_read_flags(con) & CON_EXTENDED;
+ char *outbuf = &pbufs.outbuf[0];
+ struct printk_message pmsg = {
+ .pbufs = &pbufs,
+ };
+ unsigned long flags;
*handover = false;
- if (!prb_read_valid(prb, con->seq, &r))
+ if (!printk_get_next_message(&pmsg, con->seq, is_extended, true))
return false;
- if (con->seq != r.info->seq) {
- con->dropped += r.info->seq - con->seq;
- con->seq = r.info->seq;
- if (panic_in_progress() && panic_console_dropped++ > 10) {
- suppress_panic_printk = 1;
- pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n");
- }
- }
+ con->dropped += pmsg.dropped;
- /* Skip record that has level above the console loglevel. */
- if (suppress_message_printing(r.info->level)) {
- con->seq++;
+ /* Skip messages of formatted length 0. */
+ if (pmsg.outbuf_len == 0) {
+ con->seq = pmsg.seq + 1;
goto skip;
}
- if (ext_text) {
- write_text = ext_text;
- len = info_print_ext_header(ext_text, CONSOLE_EXT_LOG_MAX, r.info);
- len += msg_print_ext_body(ext_text + len, CONSOLE_EXT_LOG_MAX - len,
- &r.text_buf[0], r.info->text_len, &r.info->dev_info);
- } else {
- write_text = text;
- len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time);
+ if (con->dropped && !is_extended) {
+ console_prepend_dropped(&pmsg, con->dropped);
+ con->dropped = 0;
}
/*
@@ -2821,11 +2879,15 @@ static bool console_emit_next_record(struct console *con, char *text, char *ext_
printk_safe_enter_irqsave(flags);
console_lock_spinning_enable();
- stop_critical_timings(); /* don't trace print latency */
- call_console_driver(con, write_text, len, dropped_text);
+ /* Do not trace print latency. */
+ stop_critical_timings();
+
+ /* Write everything out to the hardware. */
+ con->write(con, outbuf, pmsg.outbuf_len);
+
start_critical_timings();
- con->seq++;
+ con->seq = pmsg.seq + 1;
*handover = console_lock_spinning_disable_and_check(cookie);
printk_safe_exit_irqrestore(flags);
@@ -2858,9 +2920,6 @@ skip:
*/
static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handover)
{
- static char dropped_text[DROPPED_TEXT_MAX];
- static char ext_text[CONSOLE_EXT_LOG_MAX];
- static char text[CONSOLE_LOG_MAX];
bool any_usable = false;
struct console *con;
bool any_progress;
@@ -2880,16 +2939,7 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove
continue;
any_usable = true;
- if (console_srcu_read_flags(con) & CON_EXTENDED) {
- /* Extended consoles do not print "dropped messages". */
- progress = console_emit_next_record(con, &text[0],
- &ext_text[0], NULL,
- handover, cookie);
- } else {
- progress = console_emit_next_record(con, &text[0],
- NULL, &dropped_text[0],
- handover, cookie);
- }
+ progress = console_emit_next_record(con, handover, cookie);
/*
* If a handover has occurred, the SRCU read lock
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index d93c3379e901..051aaf65c749 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -3,6 +3,8 @@
* Scheduler topology setup/handling methods
*/
+#include <linux/bsearch.h>
+
DEFINE_MUTEX(sched_domains_mutex);
/* Protected by sched_domains_mutex: */
@@ -2067,6 +2069,99 @@ unlock:
return found;
}
+struct __cmp_key {
+ const struct cpumask *cpus;
+ struct cpumask ***masks;
+ int node;
+ int cpu;
+ int w;
+};
+
+static int hop_cmp(const void *a, const void *b)
+{
+ struct cpumask **prev_hop, **cur_hop = *(struct cpumask ***)b;
+ struct __cmp_key *k = (struct __cmp_key *)a;
+
+ if (cpumask_weight_and(k->cpus, cur_hop[k->node]) <= k->cpu)
+ return 1;
+
+ if (b == k->masks) {
+ k->w = 0;
+ return 0;
+ }
+
+ prev_hop = *((struct cpumask ***)b - 1);
+ k->w = cpumask_weight_and(k->cpus, prev_hop[k->node]);
+ if (k->w <= k->cpu)
+ return 0;
+
+ return -1;
+}
+
+/*
+ * sched_numa_find_nth_cpu() - given the NUMA topology, find the Nth next cpu
+ * closest to @cpu from @cpumask.
+ * cpumask: cpumask to find a cpu from
+ * cpu: Nth cpu to find
+ *
+ * returns: cpu, or nr_cpu_ids when nothing found.
+ */
+int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node)
+{
+ struct __cmp_key k = { .cpus = cpus, .node = node, .cpu = cpu };
+ struct cpumask ***hop_masks;
+ int hop, ret = nr_cpu_ids;
+
+ rcu_read_lock();
+
+ k.masks = rcu_dereference(sched_domains_numa_masks);
+ if (!k.masks)
+ goto unlock;
+
+ hop_masks = bsearch(&k, k.masks, sched_domains_numa_levels, sizeof(k.masks[0]), hop_cmp);
+ hop = hop_masks - k.masks;
+
+ ret = hop ?
+ cpumask_nth_and_andnot(cpu - k.w, cpus, k.masks[hop][node], k.masks[hop-1][node]) :
+ cpumask_nth_and(cpu, cpus, k.masks[0][node]);
+unlock:
+ rcu_read_unlock();
+ return ret;
+}
+EXPORT_SYMBOL_GPL(sched_numa_find_nth_cpu);
+
+/**
+ * sched_numa_hop_mask() - Get the cpumask of CPUs at most @hops hops away from
+ * @node
+ * @node: The node to count hops from.
+ * @hops: Include CPUs up to that many hops away. 0 means local node.
+ *
+ * Return: On success, a pointer to a cpumask of CPUs at most @hops away from
+ * @node, an error value otherwise.
+ *
+ * Requires rcu_lock to be held. Returned cpumask is only valid within that
+ * read-side section, copy it if required beyond that.
+ *
+ * Note that not all hops are equal in distance; see sched_init_numa() for how
+ * distances and masks are handled.
+ * Also note that this is a reflection of sched_domains_numa_masks, which may change
+ * during the lifetime of the system (offline nodes are taken out of the masks).
+ */
+const struct cpumask *sched_numa_hop_mask(unsigned int node, unsigned int hops)
+{
+ struct cpumask ***masks;
+
+ if (node >= nr_node_ids || hops >= sched_domains_numa_levels)
+ return ERR_PTR(-EINVAL);
+
+ masks = rcu_dereference(sched_domains_numa_masks);
+ if (!masks)
+ return ERR_PTR(-EBUSY);
+
+ return masks[hops][node];
+}
+EXPORT_SYMBOL_GPL(sched_numa_hop_mask);
+
#endif /* CONFIG_NUMA */
static int __sdt_alloc(const struct cpumask *cpu_map)
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d7043043f59c..a856d4a34c67 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -42,6 +42,9 @@ config HAVE_DYNAMIC_FTRACE_WITH_REGS
config HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
bool
+config HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS
+ bool
+
config HAVE_DYNAMIC_FTRACE_WITH_ARGS
bool
help
@@ -239,7 +242,7 @@ config DYNAMIC_FTRACE
enabled, and the functions not enabled will not affect
performance of the system.
- See the files in /sys/kernel/debug/tracing:
+ See the files in /sys/kernel/tracing:
available_filter_functions
set_ftrace_filter
set_ftrace_notrace
@@ -257,6 +260,10 @@ config DYNAMIC_FTRACE_WITH_DIRECT_CALLS
depends on DYNAMIC_FTRACE_WITH_REGS
depends on HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+config DYNAMIC_FTRACE_WITH_CALL_OPS
+ def_bool y
+ depends on HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS
+
config DYNAMIC_FTRACE_WITH_ARGS
def_bool y
depends on DYNAMIC_FTRACE
@@ -299,7 +306,7 @@ config STACK_TRACER
select KALLSYMS
help
This special tracer records the maximum stack footprint of the
- kernel and displays it in /sys/kernel/debug/tracing/stack_trace.
+ kernel and displays it in /sys/kernel/tracing/stack_trace.
This tracer works by hooking into every function call that the
kernel executes, and keeping a maximum stack depth value and
@@ -339,7 +346,7 @@ config IRQSOFF_TRACER
disabled by default and can be runtime (re-)started
via:
- echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
+ echo 0 > /sys/kernel/tracing/tracing_max_latency
(Note that kernel size and overhead increase with this option
enabled. This option and the preempt-off timing option can be
@@ -363,7 +370,7 @@ config PREEMPT_TRACER
disabled by default and can be runtime (re-)started
via:
- echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
+ echo 0 > /sys/kernel/tracing/tracing_max_latency
(Note that kernel size and overhead increase with this option
enabled. This option and the irqs-off timing option can be
@@ -515,7 +522,7 @@ config TRACER_SNAPSHOT
Allow tracing users to take snapshot of the current buffer using the
ftrace interface, e.g.:
- echo 1 > /sys/kernel/debug/tracing/snapshot
+ echo 1 > /sys/kernel/tracing/snapshot
cat snapshot
config TRACER_SNAPSHOT_PER_CPU_SWAP
@@ -527,7 +534,7 @@ config TRACER_SNAPSHOT_PER_CPU_SWAP
full swap (all buffers). If this is set, then the following is
allowed:
- echo 1 > /sys/kernel/debug/tracing/per_cpu/cpu2/snapshot
+ echo 1 > /sys/kernel/tracing/per_cpu/cpu2/snapshot
After which, only the tracing buffer for CPU 2 was swapped with
the main tracing buffer, and the other CPU buffers remain the same.
@@ -574,7 +581,7 @@ config PROFILE_ANNOTATED_BRANCHES
This tracer profiles all likely and unlikely macros
in the kernel. It will display the results in:
- /sys/kernel/debug/tracing/trace_stat/branch_annotated
+ /sys/kernel/tracing/trace_stat/branch_annotated
Note: this will add a significant overhead; only turn this
on if you need to profile the system's use of these macros.
@@ -587,7 +594,7 @@ config PROFILE_ALL_BRANCHES
taken in the kernel is recorded whether it hit or miss.
The results will be displayed in:
- /sys/kernel/debug/tracing/trace_stat/branch_all
+ /sys/kernel/tracing/trace_stat/branch_all
This option also enables the likely/unlikely profiler.
@@ -638,8 +645,8 @@ config BLK_DEV_IO_TRACE
Tracing also is possible using the ftrace interface, e.g.:
echo 1 > /sys/block/sda/sda1/trace/enable
- echo blk > /sys/kernel/debug/tracing/current_tracer
- cat /sys/kernel/debug/tracing/trace_pipe
+ echo blk > /sys/kernel/tracing/current_tracer
+ cat /sys/kernel/tracing/trace_pipe
If unsure, say N.
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index b8ac8b09c86f..e8da032bb6fc 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -369,8 +369,6 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
return &bpf_probe_write_user_proto;
}
-static DEFINE_RAW_SPINLOCK(trace_printk_lock);
-
#define MAX_TRACE_PRINTK_VARARGS 3
#define BPF_TRACE_PRINTK_SIZE 1024
@@ -378,23 +376,22 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
u64, arg2, u64, arg3)
{
u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 };
- u32 *bin_args;
- static char buf[BPF_TRACE_PRINTK_SIZE];
- unsigned long flags;
+ struct bpf_bprintf_data data = {
+ .get_bin_args = true,
+ .get_buf = true,
+ };
int ret;
- ret = bpf_bprintf_prepare(fmt, fmt_size, args, &bin_args,
- MAX_TRACE_PRINTK_VARARGS);
+ ret = bpf_bprintf_prepare(fmt, fmt_size, args,
+ MAX_TRACE_PRINTK_VARARGS, &data);
if (ret < 0)
return ret;
- raw_spin_lock_irqsave(&trace_printk_lock, flags);
- ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
+ ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args);
- trace_bpf_trace_printk(buf);
- raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
+ trace_bpf_trace_printk(data.buf);
- bpf_bprintf_cleanup();
+ bpf_bprintf_cleanup(&data);
return ret;
}
@@ -427,30 +424,29 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
return &bpf_trace_printk_proto;
}
-BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, data,
+BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, args,
u32, data_len)
{
- static char buf[BPF_TRACE_PRINTK_SIZE];
- unsigned long flags;
+ struct bpf_bprintf_data data = {
+ .get_bin_args = true,
+ .get_buf = true,
+ };
int ret, num_args;
- u32 *bin_args;
if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 ||
- (data_len && !data))
+ (data_len && !args))
return -EINVAL;
num_args = data_len / 8;
- ret = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args);
+ ret = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data);
if (ret < 0)
return ret;
- raw_spin_lock_irqsave(&trace_printk_lock, flags);
- ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
+ ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args);
- trace_bpf_trace_printk(buf);
- raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
+ trace_bpf_trace_printk(data.buf);
- bpf_bprintf_cleanup();
+ bpf_bprintf_cleanup(&data);
return ret;
}
@@ -472,23 +468,25 @@ const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void)
}
BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
- const void *, data, u32, data_len)
+ const void *, args, u32, data_len)
{
+ struct bpf_bprintf_data data = {
+ .get_bin_args = true,
+ };
int err, num_args;
- u32 *bin_args;
if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 ||
- (data_len && !data))
+ (data_len && !args))
return -EINVAL;
num_args = data_len / 8;
- err = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args);
+ err = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data);
if (err < 0)
return err;
- seq_bprintf(m, fmt, bin_args);
+ seq_bprintf(m, fmt, data.bin_args);
- bpf_bprintf_cleanup();
+ bpf_bprintf_cleanup(&data);
return seq_has_overflowed(m) ? -EOVERFLOW : 0;
}
@@ -1237,7 +1235,7 @@ __diag_ignore_all("-Wmissing-prototypes",
* Return: a bpf_key pointer with a valid key pointer if the key is found, a
* NULL pointer otherwise.
*/
-struct bpf_key *bpf_lookup_user_key(u32 serial, u64 flags)
+__bpf_kfunc struct bpf_key *bpf_lookup_user_key(u32 serial, u64 flags)
{
key_ref_t key_ref;
struct bpf_key *bkey;
@@ -1286,7 +1284,7 @@ struct bpf_key *bpf_lookup_user_key(u32 serial, u64 flags)
* Return: a bpf_key pointer with an invalid key pointer set from the
* pre-determined ID on success, a NULL pointer otherwise
*/
-struct bpf_key *bpf_lookup_system_key(u64 id)
+__bpf_kfunc struct bpf_key *bpf_lookup_system_key(u64 id)
{
struct bpf_key *bkey;
@@ -1310,7 +1308,7 @@ struct bpf_key *bpf_lookup_system_key(u64 id)
* Decrement the reference count of the key inside *bkey*, if the pointer
* is valid, and free *bkey*.
*/
-void bpf_key_put(struct bpf_key *bkey)
+__bpf_kfunc void bpf_key_put(struct bpf_key *bkey)
{
if (bkey->has_ref)
key_put(bkey->key);
@@ -1330,7 +1328,7 @@ void bpf_key_put(struct bpf_key *bkey)
*
* Return: 0 on success, a negative value on error.
*/
-int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr,
+__bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr,
struct bpf_dynptr_kern *sig_ptr,
struct bpf_key *trusted_keyring)
{
@@ -2686,69 +2684,77 @@ static void symbols_swap_r(void *a, void *b, int size, const void *priv)
}
}
-struct module_addr_args {
- unsigned long *addrs;
- u32 addrs_cnt;
+struct modules_array {
struct module **mods;
int mods_cnt;
int mods_cap;
};
-static int module_callback(void *data, const char *name,
- struct module *mod, unsigned long addr)
+static int add_module(struct modules_array *arr, struct module *mod)
{
- struct module_addr_args *args = data;
struct module **mods;
- /* We iterate all modules symbols and for each we:
- * - search for it in provided addresses array
- * - if found we check if we already have the module pointer stored
- * (we iterate modules sequentially, so we can check just the last
- * module pointer)
- * - take module reference and store it
- */
- if (!bsearch(&addr, args->addrs, args->addrs_cnt, sizeof(addr),
- bpf_kprobe_multi_addrs_cmp))
- return 0;
-
- if (args->mods && args->mods[args->mods_cnt - 1] == mod)
- return 0;
-
- if (args->mods_cnt == args->mods_cap) {
- args->mods_cap = max(16, args->mods_cap * 3 / 2);
- mods = krealloc_array(args->mods, args->mods_cap, sizeof(*mods), GFP_KERNEL);
+ if (arr->mods_cnt == arr->mods_cap) {
+ arr->mods_cap = max(16, arr->mods_cap * 3 / 2);
+ mods = krealloc_array(arr->mods, arr->mods_cap, sizeof(*mods), GFP_KERNEL);
if (!mods)
return -ENOMEM;
- args->mods = mods;
+ arr->mods = mods;
}
- if (!try_module_get(mod))
- return -EINVAL;
-
- args->mods[args->mods_cnt] = mod;
- args->mods_cnt++;
+ arr->mods[arr->mods_cnt] = mod;
+ arr->mods_cnt++;
return 0;
}
+static bool has_module(struct modules_array *arr, struct module *mod)
+{
+ int i;
+
+ for (i = arr->mods_cnt - 1; i >= 0; i--) {
+ if (arr->mods[i] == mod)
+ return true;
+ }
+ return false;
+}
+
static int get_modules_for_addrs(struct module ***mods, unsigned long *addrs, u32 addrs_cnt)
{
- struct module_addr_args args = {
- .addrs = addrs,
- .addrs_cnt = addrs_cnt,
- };
- int err;
+ struct modules_array arr = {};
+ u32 i, err = 0;
+
+ for (i = 0; i < addrs_cnt; i++) {
+ struct module *mod;
+
+ preempt_disable();
+ mod = __module_address(addrs[i]);
+ /* Either no module or we it's already stored */
+ if (!mod || has_module(&arr, mod)) {
+ preempt_enable();
+ continue;
+ }
+ if (!try_module_get(mod))
+ err = -EINVAL;
+ preempt_enable();
+ if (err)
+ break;
+ err = add_module(&arr, mod);
+ if (err) {
+ module_put(mod);
+ break;
+ }
+ }
/* We return either err < 0 in case of error, ... */
- err = module_kallsyms_on_each_symbol(module_callback, &args);
if (err) {
- kprobe_multi_put_modules(args.mods, args.mods_cnt);
- kfree(args.mods);
+ kprobe_multi_put_modules(arr.mods, arr.mods_cnt);
+ kfree(arr.mods);
return err;
}
/* or number of modules found if everything is ok. */
- *mods = args.mods;
- return args.mods_cnt;
+ *mods = arr.mods;
+ return arr.mods_cnt;
}
int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
@@ -2861,13 +2867,6 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
bpf_kprobe_multi_cookie_cmp,
bpf_kprobe_multi_cookie_swap,
link);
- } else {
- /*
- * We need to sort addrs array even if there are no cookies
- * provided, to allow bsearch in get_modules_for_addrs.
- */
- sort(addrs, cnt, sizeof(*addrs),
- bpf_kprobe_multi_addrs_cmp, NULL);
}
err = get_modules_for_addrs(&link->mods, addrs, cnt);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 750aa3f08b25..29baa97d0d53 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -125,6 +125,33 @@ struct ftrace_ops global_ops;
void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs);
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+/*
+ * Stub used to invoke the list ops without requiring a separate trampoline.
+ */
+const struct ftrace_ops ftrace_list_ops = {
+ .func = ftrace_ops_list_func,
+ .flags = FTRACE_OPS_FL_STUB,
+};
+
+static void ftrace_ops_nop_func(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op,
+ struct ftrace_regs *fregs)
+{
+ /* do nothing */
+}
+
+/*
+ * Stub used when a call site is disabled. May be called transiently by threads
+ * which have made it into ftrace_caller but haven't yet recovered the ops at
+ * the point the call site is disabled.
+ */
+const struct ftrace_ops ftrace_nop_ops = {
+ .func = ftrace_ops_nop_func,
+ .flags = FTRACE_OPS_FL_STUB,
+};
+#endif
+
static inline void ftrace_ops_init(struct ftrace_ops *ops)
{
#ifdef CONFIG_DYNAMIC_FTRACE
@@ -1819,6 +1846,18 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
* if rec count is zero.
*/
}
+
+ /*
+ * If the rec has a single associated ops, and ops->func can be
+ * called directly, allow the call site to call via the ops.
+ */
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS) &&
+ ftrace_rec_count(rec) == 1 &&
+ ftrace_ops_get_func(ops) == ops->func)
+ rec->flags |= FTRACE_FL_CALL_OPS;
+ else
+ rec->flags &= ~FTRACE_FL_CALL_OPS;
+
count++;
/* Must match FTRACE_UPDATE_CALLS in ftrace_modify_all_code() */
@@ -2113,8 +2152,9 @@ void ftrace_bug(int failed, struct dyn_ftrace *rec)
struct ftrace_ops *ops = NULL;
pr_info("ftrace record flags: %lx\n", rec->flags);
- pr_cont(" (%ld)%s", ftrace_rec_count(rec),
- rec->flags & FTRACE_FL_REGS ? " R" : " ");
+ pr_cont(" (%ld)%s%s", ftrace_rec_count(rec),
+ rec->flags & FTRACE_FL_REGS ? " R" : " ",
+ rec->flags & FTRACE_FL_CALL_OPS ? " O" : " ");
if (rec->flags & FTRACE_FL_TRAMP_EN) {
ops = ftrace_find_tramp_ops_any(rec);
if (ops) {
@@ -2182,6 +2222,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update)
* want the direct enabled (it will be done via the
* direct helper). But if DIRECT_EN is set, and
* the count is not one, we need to clear it.
+ *
*/
if (ftrace_rec_count(rec) == 1) {
if (!(rec->flags & FTRACE_FL_DIRECT) !=
@@ -2190,6 +2231,19 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update)
} else if (rec->flags & FTRACE_FL_DIRECT_EN) {
flag |= FTRACE_FL_DIRECT;
}
+
+ /*
+ * Ops calls are special, as count matters.
+ * As with direct calls, they must only be enabled when count
+ * is one, otherwise they'll be handled via the list ops.
+ */
+ if (ftrace_rec_count(rec) == 1) {
+ if (!(rec->flags & FTRACE_FL_CALL_OPS) !=
+ !(rec->flags & FTRACE_FL_CALL_OPS_EN))
+ flag |= FTRACE_FL_CALL_OPS;
+ } else if (rec->flags & FTRACE_FL_CALL_OPS_EN) {
+ flag |= FTRACE_FL_CALL_OPS;
+ }
}
/* If the state of this record hasn't changed, then do nothing */
@@ -2234,6 +2288,21 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update)
rec->flags &= ~FTRACE_FL_DIRECT_EN;
}
}
+
+ if (flag & FTRACE_FL_CALL_OPS) {
+ if (ftrace_rec_count(rec) == 1) {
+ if (rec->flags & FTRACE_FL_CALL_OPS)
+ rec->flags |= FTRACE_FL_CALL_OPS_EN;
+ else
+ rec->flags &= ~FTRACE_FL_CALL_OPS_EN;
+ } else {
+ /*
+ * Can only call directly if there's
+ * only one set of associated ops.
+ */
+ rec->flags &= ~FTRACE_FL_CALL_OPS_EN;
+ }
+ }
}
/*
@@ -2263,7 +2332,8 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update)
* and REGS states. The _EN flags must be disabled though.
*/
rec->flags &= ~(FTRACE_FL_ENABLED | FTRACE_FL_TRAMP_EN |
- FTRACE_FL_REGS_EN | FTRACE_FL_DIRECT_EN);
+ FTRACE_FL_REGS_EN | FTRACE_FL_DIRECT_EN |
+ FTRACE_FL_CALL_OPS_EN);
}
ftrace_bug_type = FTRACE_BUG_NOP;
@@ -2436,6 +2506,25 @@ ftrace_find_tramp_ops_new(struct dyn_ftrace *rec)
return NULL;
}
+struct ftrace_ops *
+ftrace_find_unique_ops(struct dyn_ftrace *rec)
+{
+ struct ftrace_ops *op, *found = NULL;
+ unsigned long ip = rec->ip;
+
+ do_for_each_ftrace_op(op, ftrace_ops_list) {
+
+ if (hash_contains_ip(ip, op->func_hash)) {
+ if (found)
+ return NULL;
+ found = op;
+ }
+
+ } while_for_each_ftrace_op(op);
+
+ return found;
+}
+
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
/* Protected by rcu_tasks for reading, and direct_mutex for writing */
static struct ftrace_hash *direct_functions = EMPTY_HASH;
@@ -3785,11 +3874,12 @@ static int t_show(struct seq_file *m, void *v)
if (iter->flags & FTRACE_ITER_ENABLED) {
struct ftrace_ops *ops;
- seq_printf(m, " (%ld)%s%s%s",
+ seq_printf(m, " (%ld)%s%s%s%s",
ftrace_rec_count(rec),
rec->flags & FTRACE_FL_REGS ? " R" : " ",
rec->flags & FTRACE_FL_IPMODIFY ? " I" : " ",
- rec->flags & FTRACE_FL_DIRECT ? " D" : " ");
+ rec->flags & FTRACE_FL_DIRECT ? " D" : " ",
+ rec->flags & FTRACE_FL_CALL_OPS ? " O" : " ");
if (rec->flags & FTRACE_FL_TRAMP_EN) {
ops = ftrace_find_tramp_ops_any(rec);
if (ops) {
@@ -3805,6 +3895,15 @@ static int t_show(struct seq_file *m, void *v)
} else {
add_trampoline_func(m, NULL, rec);
}
+ if (rec->flags & FTRACE_FL_CALL_OPS_EN) {
+ ops = ftrace_find_unique_ops(rec);
+ if (ops) {
+ seq_printf(m, "\tops: %pS (%pS)",
+ ops, ops->func);
+ } else {
+ seq_puts(m, "\tops: ERROR!");
+ }
+ }
if (rec->flags & FTRACE_FL_DIRECT) {
unsigned long direct;
@@ -8345,7 +8444,7 @@ int ftrace_lookup_symbols(const char **sorted_syms, size_t cnt, unsigned long *a
found_all = kallsyms_on_each_symbol(kallsyms_callback, &args);
if (found_all)
return 0;
- found_all = module_kallsyms_on_each_symbol(kallsyms_callback, &args);
+ found_all = module_kallsyms_on_each_symbol(NULL, kallsyms_callback, &args);
return found_all ? 0 : -ESRCH;
}
diff --git a/kernel/trace/kprobe_event_gen_test.c b/kernel/trace/kprobe_event_gen_test.c
index c736487fc0e4..4850fdfe27f1 100644
--- a/kernel/trace/kprobe_event_gen_test.c
+++ b/kernel/trace/kprobe_event_gen_test.c
@@ -21,7 +21,7 @@
* Then:
*
* # insmod kernel/trace/kprobe_event_gen_test.ko
- * # cat /sys/kernel/debug/tracing/trace
+ * # cat /sys/kernel/tracing/trace
*
* You should see many instances of the "gen_kprobe_test" and
* "gen_kretprobe_test" events in the trace buffer.
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index c366a0a9ddba..af50d931b020 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1581,19 +1581,6 @@ static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
}
/**
- * rb_check_list - make sure a pointer to a list has the last bits zero
- */
-static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer,
- struct list_head *list)
-{
- if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev) != list->prev))
- return 1;
- if (RB_WARN_ON(cpu_buffer, rb_list_head(list->next) != list->next))
- return 1;
- return 0;
-}
-
-/**
* rb_check_pages - integrity check of buffer pages
* @cpu_buffer: CPU buffer with pages to test
*
@@ -1602,36 +1589,27 @@ static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer,
*/
static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
{
- struct list_head *head = cpu_buffer->pages;
- struct buffer_page *bpage, *tmp;
-
- /* Reset the head page if it exists */
- if (cpu_buffer->head_page)
- rb_set_head_page(cpu_buffer);
-
- rb_head_page_deactivate(cpu_buffer);
+ struct list_head *head = rb_list_head(cpu_buffer->pages);
+ struct list_head *tmp;
- if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
- return -1;
- if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
+ if (RB_WARN_ON(cpu_buffer,
+ rb_list_head(rb_list_head(head->next)->prev) != head))
return -1;
- if (rb_check_list(cpu_buffer, head))
+ if (RB_WARN_ON(cpu_buffer,
+ rb_list_head(rb_list_head(head->prev)->next) != head))
return -1;
- list_for_each_entry_safe(bpage, tmp, head, list) {
+ for (tmp = rb_list_head(head->next); tmp != head; tmp = rb_list_head(tmp->next)) {
if (RB_WARN_ON(cpu_buffer,
- bpage->list.next->prev != &bpage->list))
+ rb_list_head(rb_list_head(tmp->next)->prev) != tmp))
return -1;
+
if (RB_WARN_ON(cpu_buffer,
- bpage->list.prev->next != &bpage->list))
- return -1;
- if (rb_check_list(cpu_buffer, &bpage->list))
+ rb_list_head(rb_list_head(tmp->prev)->next) != tmp))
return -1;
}
- rb_head_page_activate(cpu_buffer);
-
return 0;
}
@@ -2886,7 +2864,7 @@ rb_check_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
sched_clock_stable() ? "" :
"If you just came from a suspend/resume,\n"
"please switch to the trace global clock:\n"
- " echo global > /sys/kernel/debug/tracing/trace_clock\n"
+ " echo global > /sys/kernel/tracing/trace_clock\n"
"or add trace_clock=global to the kernel command line\n");
}
@@ -5626,11 +5604,16 @@ EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page);
*/
void ring_buffer_free_read_page(struct trace_buffer *buffer, int cpu, void *data)
{
- struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+ struct ring_buffer_per_cpu *cpu_buffer;
struct buffer_data_page *bpage = data;
struct page *page = virt_to_page(bpage);
unsigned long flags;
+ if (!buffer || !buffer->buffers || !buffer->buffers[cpu])
+ return;
+
+ cpu_buffer = buffer->buffers[cpu];
+
/* If the page is still in use someplace else, we can't reuse it */
if (page_ref_count(page) > 1)
goto out;
diff --git a/kernel/trace/synth_event_gen_test.c b/kernel/trace/synth_event_gen_test.c
index 8d77526892f4..8dfe85499d4a 100644
--- a/kernel/trace/synth_event_gen_test.c
+++ b/kernel/trace/synth_event_gen_test.c
@@ -22,7 +22,7 @@
* Then:
*
* # insmod kernel/trace/synth_event_gen_test.ko
- * # cat /sys/kernel/debug/tracing/trace
+ * # cat /sys/kernel/tracing/trace
*
* You should see several events in the trace buffer -
* "create_synth_test", "empty_synth_test", and several instances of
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 54a163ae4815..45551c7b4c36 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -49,6 +49,8 @@
#include <linux/irq_work.h>
#include <linux/workqueue.h>
+#include <asm/setup.h> /* COMMAND_LINE_SIZE */
+
#include "trace.h"
#include "trace_output.h"
@@ -186,6 +188,12 @@ static char *default_bootup_tracer;
static bool allocate_snapshot;
static bool snapshot_at_boot;
+static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
+static int boot_instance_index;
+
+static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
+static int boot_snapshot_index;
+
static int __init set_cmdline_ftrace(char *str)
{
strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
@@ -222,9 +230,22 @@ __setup("traceoff_on_warning", stop_trace_on_warning);
static int __init boot_alloc_snapshot(char *str)
{
- allocate_snapshot = true;
- /* We also need the main ring buffer expanded */
- ring_buffer_expanded = true;
+ char *slot = boot_snapshot_info + boot_snapshot_index;
+ int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
+ int ret;
+
+ if (str[0] == '=') {
+ str++;
+ if (strlen(str) >= left)
+ return -1;
+
+ ret = snprintf(slot, left, "%s\t", str);
+ boot_snapshot_index += ret;
+ } else {
+ allocate_snapshot = true;
+ /* We also need the main ring buffer expanded */
+ ring_buffer_expanded = true;
+ }
return 1;
}
__setup("alloc_snapshot", boot_alloc_snapshot);
@@ -239,6 +260,23 @@ static int __init boot_snapshot(char *str)
__setup("ftrace_boot_snapshot", boot_snapshot);
+static int __init boot_instance(char *str)
+{
+ char *slot = boot_instance_info + boot_instance_index;
+ int left = sizeof(boot_instance_info) - boot_instance_index;
+ int ret;
+
+ if (strlen(str) >= left)
+ return -1;
+
+ ret = snprintf(slot, left, "%s\t", str);
+ boot_instance_index += ret;
+
+ return 1;
+}
+__setup("trace_instance=", boot_instance);
+
+
static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
static int __init set_trace_boot_options(char *str)
@@ -1001,13 +1039,8 @@ __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *ev
ring_buffer_unlock_commit(buffer);
}
-/**
- * __trace_puts - write a constant string into the trace buffer.
- * @ip: The address of the caller
- * @str: The constant string to write
- * @size: The size of the string.
- */
-int __trace_puts(unsigned long ip, const char *str, int size)
+int __trace_array_puts(struct trace_array *tr, unsigned long ip,
+ const char *str, int size)
{
struct ring_buffer_event *event;
struct trace_buffer *buffer;
@@ -1015,7 +1048,7 @@ int __trace_puts(unsigned long ip, const char *str, int size)
unsigned int trace_ctx;
int alloc;
- if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
+ if (!(tr->trace_flags & TRACE_ITER_PRINTK))
return 0;
if (unlikely(tracing_selftest_running || tracing_disabled))
@@ -1024,7 +1057,7 @@ int __trace_puts(unsigned long ip, const char *str, int size)
alloc = sizeof(*entry) + size + 2; /* possible \n added */
trace_ctx = tracing_gen_ctx();
- buffer = global_trace.array_buffer.buffer;
+ buffer = tr->array_buffer.buffer;
ring_buffer_nest_start(buffer);
event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
trace_ctx);
@@ -1046,11 +1079,23 @@ int __trace_puts(unsigned long ip, const char *str, int size)
entry->buf[size] = '\0';
__buffer_unlock_commit(buffer, event);
- ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
+ ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
out:
ring_buffer_nest_end(buffer);
return size;
}
+EXPORT_SYMBOL_GPL(__trace_array_puts);
+
+/**
+ * __trace_puts - write a constant string into the trace buffer.
+ * @ip: The address of the caller
+ * @str: The constant string to write
+ * @size: The size of the string.
+ */
+int __trace_puts(unsigned long ip, const char *str, int size)
+{
+ return __trace_array_puts(&global_trace, ip, str, size);
+}
EXPORT_SYMBOL_GPL(__trace_puts);
/**
@@ -1142,7 +1187,7 @@ void tracing_snapshot_instance(struct trace_array *tr)
*
* Note, make sure to allocate the snapshot with either
* a tracing_snapshot_alloc(), or by doing it manually
- * with: echo 1 > /sys/kernel/debug/tracing/snapshot
+ * with: echo 1 > /sys/kernel/tracing/snapshot
*
* If the snapshot buffer is not allocated, it will stop tracing.
* Basically making a permanent snapshot.
@@ -5601,7 +5646,7 @@ static const char readme_msg[] =
#ifdef CONFIG_HIST_TRIGGERS
"\t s:[synthetic/]<event> <field> [<field>]\n"
#endif
- "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]\n"
+ "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
"\t -:[<group>/][<event>]\n"
#ifdef CONFIG_KPROBE_EVENTS
"\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
@@ -5618,7 +5663,7 @@ static const char readme_msg[] =
"\t $stack<index>, $stack, $retval, $comm,\n"
#endif
"\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
- "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
+ "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
"\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
"\t symstr, <type>\\[<array-size>\\]\n"
#ifdef CONFIG_HIST_TRIGGERS
@@ -5760,7 +5805,7 @@ static const char readme_msg[] =
#ifdef CONFIG_SYNTH_EVENTS
" events/synthetic_events\t- Create/append/remove/show synthetic events\n"
"\t Write into this file to define/undefine new synthetic events.\n"
- "\t example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
+ "\t example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
#endif
#endif
;
@@ -9225,10 +9270,6 @@ static int allocate_trace_buffers(struct trace_array *tr, int size)
}
tr->allocated_snapshot = allocate_snapshot;
- /*
- * Only the top level trace array gets its snapshot allocated
- * from the kernel command line.
- */
allocate_snapshot = false;
#endif
@@ -10144,6 +10185,79 @@ out:
return ret;
}
+#ifdef CONFIG_TRACER_MAX_TRACE
+__init static bool tr_needs_alloc_snapshot(const char *name)
+{
+ char *test;
+ int len = strlen(name);
+ bool ret;
+
+ if (!boot_snapshot_index)
+ return false;
+
+ if (strncmp(name, boot_snapshot_info, len) == 0 &&
+ boot_snapshot_info[len] == '\t')
+ return true;
+
+ test = kmalloc(strlen(name) + 3, GFP_KERNEL);
+ if (!test)
+ return false;
+
+ sprintf(test, "\t%s\t", name);
+ ret = strstr(boot_snapshot_info, test) == NULL;
+ kfree(test);
+ return ret;
+}
+
+__init static void do_allocate_snapshot(const char *name)
+{
+ if (!tr_needs_alloc_snapshot(name))
+ return;
+
+ /*
+ * When allocate_snapshot is set, the next call to
+ * allocate_trace_buffers() (called by trace_array_get_by_name())
+ * will allocate the snapshot buffer. That will alse clear
+ * this flag.
+ */
+ allocate_snapshot = true;
+}
+#else
+static inline void do_allocate_snapshot(const char *name) { }
+#endif
+
+__init static void enable_instances(void)
+{
+ struct trace_array *tr;
+ char *curr_str;
+ char *str;
+ char *tok;
+
+ /* A tab is always appended */
+ boot_instance_info[boot_instance_index - 1] = '\0';
+ str = boot_instance_info;
+
+ while ((curr_str = strsep(&str, "\t"))) {
+
+ tok = strsep(&curr_str, ",");
+
+ if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
+ do_allocate_snapshot(tok);
+
+ tr = trace_array_get_by_name(tok);
+ if (!tr) {
+ pr_warn("Failed to create instance buffer %s\n", curr_str);
+ continue;
+ }
+ /* Allow user space to delete it */
+ trace_array_put(tr);
+
+ while ((tok = strsep(&curr_str, ","))) {
+ early_enable_events(tr, tok, true);
+ }
+ }
+}
+
__init static int tracer_alloc_buffers(void)
{
int ring_buf_size;
@@ -10277,10 +10391,19 @@ out:
void __init ftrace_boot_snapshot(void)
{
+ struct trace_array *tr;
+
if (snapshot_at_boot) {
tracing_snapshot();
internal_trace_puts("** Boot snapshot taken **\n");
}
+
+ list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+ if (tr == &global_trace)
+ continue;
+ trace_array_puts(tr, "** Boot snapshot taken **\n");
+ tracing_snapshot_instance(tr);
+ }
}
void __init early_trace_init(void)
@@ -10302,6 +10425,9 @@ void __init early_trace_init(void)
void __init trace_init(void)
{
trace_event_init();
+
+ if (boot_instance_index)
+ enable_instances();
}
__init static void clear_boot_tracer(void)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 085a31b978a5..f3aae2be1d53 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -113,6 +113,10 @@ enum trace_type {
#define MEM_FAIL(condition, fmt, ...) \
DO_ONCE_LITE_IF(condition, pr_err, "ERROR: " fmt, ##__VA_ARGS__)
+#define HIST_STACKTRACE_DEPTH 16
+#define HIST_STACKTRACE_SIZE (HIST_STACKTRACE_DEPTH * sizeof(unsigned long))
+#define HIST_STACKTRACE_SKIP 5
+
/*
* syscalls are special, and need special handling, this is why
* they are not included in trace_entries.h
@@ -1331,6 +1335,8 @@ DECLARE_PER_CPU(int, trace_buffered_event_cnt);
void trace_buffered_event_disable(void);
void trace_buffered_event_enable(void);
+void early_enable_events(struct trace_array *tr, char *buf, bool disable_first);
+
static inline void
__trace_event_discard_commit(struct trace_buffer *buffer,
struct ring_buffer_event *event)
diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
index 352b65e2b910..594ac1d086aa 100644
--- a/kernel/trace/trace_eprobe.c
+++ b/kernel/trace/trace_eprobe.c
@@ -923,17 +923,13 @@ static int trace_eprobe_parse_filter(struct trace_eprobe *ep, int argc, const ch
p = ep->filter_str;
for (i = 0; i < argc; i++) {
- ret = snprintf(p, len, "%s ", argv[i]);
- if (ret < 0)
- goto error;
- if (ret > len) {
- ret = -E2BIG;
- goto error;
- }
+ if (i)
+ ret = snprintf(p, len, " %s", argv[i]);
+ else
+ ret = snprintf(p, len, "%s", argv[i]);
p += ret;
len -= ret;
}
- p[-1] = '\0';
/*
* Ensure the filter string can be parsed correctly. Note, this
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 6a942fa275c7..654ffa40457a 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2281,8 +2281,6 @@ create_new_subsystem(const char *name)
if (!system->name)
goto out_free;
- system->filter = NULL;
-
system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
if (!system->filter)
goto out_free;
@@ -2843,7 +2841,7 @@ static __init int setup_trace_triggers(char *str)
if (!trigger)
break;
bootup_triggers[i].event = strsep(&trigger, ".");
- bootup_triggers[i].trigger = strsep(&trigger, ".");
+ bootup_triggers[i].trigger = trigger;
if (!bootup_triggers[i].trigger)
break;
}
@@ -3771,10 +3769,9 @@ static __init int event_trace_memsetup(void)
return 0;
}
-static __init void
-early_enable_events(struct trace_array *tr, bool disable_first)
+__init void
+early_enable_events(struct trace_array *tr, char *buf, bool disable_first)
{
- char *buf = bootup_event_buf;
char *token;
int ret;
@@ -3827,7 +3824,7 @@ static __init int event_trace_enable(void)
*/
__trace_early_add_events(tr);
- early_enable_events(tr, false);
+ early_enable_events(tr, bootup_event_buf, false);
trace_printk_start_comm();
@@ -3855,7 +3852,7 @@ static __init int event_trace_enable_again(void)
if (!tr)
return -ENODEV;
- early_enable_events(tr, true);
+ early_enable_events(tr, bootup_event_buf, true);
return 0;
}
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index e095c3b3a50d..1dad64267878 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -64,6 +64,7 @@ enum filter_pred_fn {
FILTER_PRED_FN_PCHAR_USER,
FILTER_PRED_FN_PCHAR,
FILTER_PRED_FN_CPU,
+ FILTER_PRED_FN_FUNCTION,
FILTER_PRED_FN_,
FILTER_PRED_TEST_VISITED,
};
@@ -71,6 +72,7 @@ enum filter_pred_fn {
struct filter_pred {
enum filter_pred_fn fn_num;
u64 val;
+ u64 val2;
struct regex regex;
unsigned short *ops;
struct ftrace_event_field *field;
@@ -103,6 +105,7 @@ struct filter_pred {
C(INVALID_FILTER, "Meaningless filter expression"), \
C(IP_FIELD_ONLY, "Only 'ip' field is supported for function trace"), \
C(INVALID_VALUE, "Invalid value (did you forget quotes)?"), \
+ C(NO_FUNCTION, "Function not found"), \
C(ERRNO, "Error"), \
C(NO_FILTER, "No filter found")
@@ -876,6 +879,17 @@ static int filter_pred_comm(struct filter_pred *pred, void *event)
return cmp ^ pred->not;
}
+/* Filter predicate for functions. */
+static int filter_pred_function(struct filter_pred *pred, void *event)
+{
+ unsigned long *addr = (unsigned long *)(event + pred->offset);
+ unsigned long start = (unsigned long)pred->val;
+ unsigned long end = (unsigned long)pred->val2;
+ int ret = *addr >= start && *addr < end;
+
+ return pred->op == OP_EQ ? ret : !ret;
+}
+
/*
* regex_match_foo - Basic regex callbacks
*
@@ -1335,6 +1349,8 @@ static int filter_pred_fn_call(struct filter_pred *pred, void *event)
return filter_pred_pchar(pred, event);
case FILTER_PRED_FN_CPU:
return filter_pred_cpu(pred, event);
+ case FILTER_PRED_FN_FUNCTION:
+ return filter_pred_function(pred, event);
case FILTER_PRED_TEST_VISITED:
return test_pred_visited_fn(pred, event);
default:
@@ -1350,8 +1366,13 @@ static int parse_pred(const char *str, void *data,
struct trace_event_call *call = data;
struct ftrace_event_field *field;
struct filter_pred *pred = NULL;
+ unsigned long offset;
+ unsigned long size;
+ unsigned long ip;
char num_buf[24]; /* Big enough to hold an address */
char *field_name;
+ char *name;
+ bool function = false;
bool ustring = false;
char q;
u64 val;
@@ -1393,6 +1414,12 @@ static int parse_pred(const char *str, void *data,
i += len;
}
+ /* See if the field is a kernel function name */
+ if ((len = str_has_prefix(str + i, ".function"))) {
+ function = true;
+ i += len;
+ }
+
while (isspace(str[i]))
i++;
@@ -1423,7 +1450,71 @@ static int parse_pred(const char *str, void *data,
pred->offset = field->offset;
pred->op = op;
- if (ftrace_event_is_function(call)) {
+ if (function) {
+ /* The field must be the same size as long */
+ if (field->size != sizeof(long)) {
+ parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP, pos + i);
+ goto err_free;
+ }
+
+ /* Function only works with '==' or '!=' and an unquoted string */
+ switch (op) {
+ case OP_NE:
+ case OP_EQ:
+ break;
+ default:
+ parse_error(pe, FILT_ERR_INVALID_OP, pos + i);
+ goto err_free;
+ }
+
+ if (isdigit(str[i])) {
+ /* We allow 0xDEADBEEF */
+ while (isalnum(str[i]))
+ i++;
+
+ len = i - s;
+ /* 0xfeedfacedeadbeef is 18 chars max */
+ if (len >= sizeof(num_buf)) {
+ parse_error(pe, FILT_ERR_OPERAND_TOO_LONG, pos + i);
+ goto err_free;
+ }
+
+ strncpy(num_buf, str + s, len);
+ num_buf[len] = 0;
+
+ ret = kstrtoul(num_buf, 0, &ip);
+ if (ret) {
+ parse_error(pe, FILT_ERR_INVALID_VALUE, pos + i);
+ goto err_free;
+ }
+ } else {
+ s = i;
+ for (; str[i] && !isspace(str[i]); i++)
+ ;
+
+ len = i - s;
+ name = kmemdup_nul(str + s, len, GFP_KERNEL);
+ if (!name)
+ goto err_mem;
+ ip = kallsyms_lookup_name(name);
+ kfree(name);
+ if (!ip) {
+ parse_error(pe, FILT_ERR_NO_FUNCTION, pos + i);
+ goto err_free;
+ }
+ }
+
+ /* Now find the function start and end address */
+ if (!kallsyms_lookup_size_offset(ip, &size, &offset)) {
+ parse_error(pe, FILT_ERR_NO_FUNCTION, pos + i);
+ goto err_free;
+ }
+
+ pred->fn_num = FILTER_PRED_FN_FUNCTION;
+ pred->val = ip - offset;
+ pred->val2 = pred->val + size;
+
+ } else if (ftrace_event_is_function(call)) {
/*
* Perf does things different with function events.
* It only allows an "ip" field, and expects a string.
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 5edbf6b1da3f..89877a18f933 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -135,6 +135,7 @@ enum hist_field_fn {
HIST_FIELD_FN_DIV_NOT_POWER2,
HIST_FIELD_FN_DIV_MULT_SHIFT,
HIST_FIELD_FN_EXECNAME,
+ HIST_FIELD_FN_STACK,
};
/*
@@ -480,10 +481,6 @@ DEFINE_HIST_FIELD_FN(u8);
#define for_each_hist_key_field(i, hist_data) \
for ((i) = (hist_data)->n_vals; (i) < (hist_data)->n_fields; (i)++)
-#define HIST_STACKTRACE_DEPTH 16
-#define HIST_STACKTRACE_SIZE (HIST_STACKTRACE_DEPTH * sizeof(unsigned long))
-#define HIST_STACKTRACE_SKIP 5
-
#define HITCOUNT_IDX 0
#define HIST_KEY_SIZE_MAX (MAX_FILTER_STR_VAL + HIST_STACKTRACE_SIZE)
@@ -1360,7 +1357,12 @@ static const char *hist_field_name(struct hist_field *field,
field_name = field->name;
} else if (field->flags & HIST_FIELD_FL_TIMESTAMP)
field_name = "common_timestamp";
- else if (field->flags & HIST_FIELD_FL_HITCOUNT)
+ else if (field->flags & HIST_FIELD_FL_STACKTRACE) {
+ if (field->field)
+ field_name = field->field->name;
+ else
+ field_name = "stacktrace";
+ } else if (field->flags & HIST_FIELD_FL_HITCOUNT)
field_name = "hitcount";
if (field_name == NULL)
@@ -1718,6 +1720,8 @@ static const char *get_hist_field_flags(struct hist_field *hist_field)
flags_str = "percent";
else if (hist_field->flags & HIST_FIELD_FL_GRAPH)
flags_str = "graph";
+ else if (hist_field->flags & HIST_FIELD_FL_STACKTRACE)
+ flags_str = "stacktrace";
return flags_str;
}
@@ -1979,7 +1983,14 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
}
if (flags & HIST_FIELD_FL_STACKTRACE) {
- hist_field->fn_num = HIST_FIELD_FN_NOP;
+ if (field)
+ hist_field->fn_num = HIST_FIELD_FN_STACK;
+ else
+ hist_field->fn_num = HIST_FIELD_FN_NOP;
+ hist_field->size = HIST_STACKTRACE_SIZE;
+ hist_field->type = kstrdup_const("unsigned long[]", GFP_KERNEL);
+ if (!hist_field->type)
+ goto free;
goto out;
}
@@ -2312,6 +2323,8 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
*flags |= HIST_FIELD_FL_EXECNAME;
else if (strcmp(modifier, "syscall") == 0)
*flags |= HIST_FIELD_FL_SYSCALL;
+ else if (strcmp(modifier, "stacktrace") == 0)
+ *flags |= HIST_FIELD_FL_STACKTRACE;
else if (strcmp(modifier, "log2") == 0)
*flags |= HIST_FIELD_FL_LOG2;
else if (strcmp(modifier, "usecs") == 0)
@@ -2351,6 +2364,8 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
hist_data->enable_timestamps = true;
if (*flags & HIST_FIELD_FL_TIMESTAMP_USECS)
hist_data->attrs->ts_in_usecs = true;
+ } else if (strcmp(field_name, "stacktrace") == 0) {
+ *flags |= HIST_FIELD_FL_STACKTRACE;
} else if (strcmp(field_name, "common_cpu") == 0)
*flags |= HIST_FIELD_FL_CPU;
else if (strcmp(field_name, "hitcount") == 0)
@@ -3111,6 +3126,9 @@ static inline void __update_field_vars(struct tracing_map_elt *elt,
unsigned int i, j, var_idx;
u64 var_val;
+ /* Make sure stacktrace can fit in the string variable length */
+ BUILD_BUG_ON((HIST_STACKTRACE_DEPTH + 1) * sizeof(long) >= STR_VAR_LEN_MAX);
+
for (i = 0, j = field_var_str_start; i < n_field_vars; i++) {
struct field_var *field_var = field_vars[i];
struct hist_field *var = field_var->var;
@@ -3119,13 +3137,26 @@ static inline void __update_field_vars(struct tracing_map_elt *elt,
var_val = hist_fn_call(val, elt, buffer, rbe, rec);
var_idx = var->var.idx;
- if (val->flags & HIST_FIELD_FL_STRING) {
+ if (val->flags & (HIST_FIELD_FL_STRING |
+ HIST_FIELD_FL_STACKTRACE)) {
char *str = elt_data->field_var_str[j++];
char *val_str = (char *)(uintptr_t)var_val;
unsigned int size;
- size = min(val->size, STR_VAR_LEN_MAX);
- strscpy(str, val_str, size);
+ if (val->flags & HIST_FIELD_FL_STRING) {
+ size = min(val->size, STR_VAR_LEN_MAX);
+ strscpy(str, val_str, size);
+ } else {
+ char *stack_start = str + sizeof(unsigned long);
+ int e;
+
+ e = stack_trace_save((void *)stack_start,
+ HIST_STACKTRACE_DEPTH,
+ HIST_STACKTRACE_SKIP);
+ if (e < HIST_STACKTRACE_DEPTH - 1)
+ ((unsigned long *)stack_start)[e] = 0;
+ *((unsigned long *)str) = e;
+ }
var_val = (u64)(uintptr_t)str;
}
tracing_map_set_var(elt, var_idx, var_val);
@@ -3824,7 +3855,8 @@ static void save_field_var(struct hist_trigger_data *hist_data,
{
hist_data->field_vars[hist_data->n_field_vars++] = field_var;
- if (field_var->val->flags & HIST_FIELD_FL_STRING)
+ /* Stack traces are saved in the string storage too */
+ if (field_var->val->flags & (HIST_FIELD_FL_STRING | HIST_FIELD_FL_STACKTRACE))
hist_data->n_field_var_str++;
}
@@ -3849,6 +3881,9 @@ static int check_synth_field(struct synth_event *event,
&& field->is_dynamic)
return 0;
+ if (strstr(hist_field->type, "long[") && field->is_stack)
+ return 0;
+
if (strcmp(field->type, hist_field->type) != 0) {
if (field->size != hist_field->size ||
(!field->is_string && field->is_signed != hist_field->is_signed))
@@ -4103,7 +4138,8 @@ static int action_create(struct hist_trigger_data *hist_data,
}
hist_data->save_vars[hist_data->n_save_vars++] = field_var;
- if (field_var->val->flags & HIST_FIELD_FL_STRING)
+ if (field_var->val->flags &
+ (HIST_FIELD_FL_STRING | HIST_FIELD_FL_STACKTRACE))
hist_data->n_save_var_str++;
kfree(param);
}
@@ -4242,6 +4278,19 @@ static u64 hist_field_execname(struct hist_field *hist_field,
return (u64)(unsigned long)(elt_data->comm);
}
+static u64 hist_field_stack(struct hist_field *hist_field,
+ struct tracing_map_elt *elt,
+ struct trace_buffer *buffer,
+ struct ring_buffer_event *rbe,
+ void *event)
+{
+ u32 str_item = *(u32 *)(event + hist_field->field->offset);
+ int str_loc = str_item & 0xffff;
+ char *addr = (char *)(event + str_loc);
+
+ return (u64)(unsigned long)addr;
+}
+
static u64 hist_fn_call(struct hist_field *hist_field,
struct tracing_map_elt *elt,
struct trace_buffer *buffer,
@@ -4305,6 +4354,8 @@ static u64 hist_fn_call(struct hist_field *hist_field,
return div_by_mult_and_shift(hist_field, elt, buffer, rbe, event);
case HIST_FIELD_FN_EXECNAME:
return hist_field_execname(hist_field, elt, buffer, rbe, event);
+ case HIST_FIELD_FN_STACK:
+ return hist_field_stack(hist_field, elt, buffer, rbe, event);
default:
return 0;
}
@@ -4351,7 +4402,8 @@ static int create_var_field(struct hist_trigger_data *hist_data,
if (!ret && hist_data->fields[val_idx]->flags & HIST_FIELD_FL_EXECNAME)
update_var_execname(hist_data->fields[val_idx]);
- if (!ret && hist_data->fields[val_idx]->flags & HIST_FIELD_FL_STRING)
+ if (!ret && hist_data->fields[val_idx]->flags &
+ (HIST_FIELD_FL_STRING | HIST_FIELD_FL_STACKTRACE))
hist_data->fields[val_idx]->var_str_idx = hist_data->n_var_str++;
return ret;
@@ -5092,7 +5144,8 @@ static void hist_trigger_elt_update(struct hist_trigger_data *hist_data,
if (hist_field->flags & HIST_FIELD_FL_VAR) {
var_idx = hist_field->var.idx;
- if (hist_field->flags & HIST_FIELD_FL_STRING) {
+ if (hist_field->flags &
+ (HIST_FIELD_FL_STRING | HIST_FIELD_FL_STACKTRACE)) {
unsigned int str_start, var_str_idx, idx;
char *str, *val_str;
unsigned int size;
@@ -5105,9 +5158,20 @@ static void hist_trigger_elt_update(struct hist_trigger_data *hist_data,
str = elt_data->field_var_str[idx];
val_str = (char *)(uintptr_t)hist_val;
- size = min(hist_field->size, STR_VAR_LEN_MAX);
- strscpy(str, val_str, size);
-
+ if (hist_field->flags & HIST_FIELD_FL_STRING) {
+ size = min(hist_field->size, STR_VAR_LEN_MAX);
+ strscpy(str, val_str, size);
+ } else {
+ char *stack_start = str + sizeof(unsigned long);
+ int e;
+
+ e = stack_trace_save((void *)stack_start,
+ HIST_STACKTRACE_DEPTH,
+ HIST_STACKTRACE_SKIP);
+ if (e < HIST_STACKTRACE_DEPTH - 1)
+ ((unsigned long *)stack_start)[e] = 0;
+ *((unsigned long *)str) = e;
+ }
hist_val = (u64)(uintptr_t)str;
}
tracing_map_set_var(elt, var_idx, hist_val);
@@ -5193,8 +5257,17 @@ static void event_hist_trigger(struct event_trigger_data *data,
if (key_field->flags & HIST_FIELD_FL_STACKTRACE) {
memset(entries, 0, HIST_STACKTRACE_SIZE);
- stack_trace_save(entries, HIST_STACKTRACE_DEPTH,
- HIST_STACKTRACE_SKIP);
+ if (key_field->field) {
+ unsigned long *stack, n_entries;
+
+ field_contents = hist_fn_call(key_field, elt, buffer, rbe, rec);
+ stack = (unsigned long *)(long)field_contents;
+ n_entries = *stack;
+ memcpy(entries, ++stack, n_entries * sizeof(unsigned long));
+ } else {
+ stack_trace_save(entries, HIST_STACKTRACE_DEPTH,
+ HIST_STACKTRACE_SKIP);
+ }
key = entries;
} else {
field_contents = hist_fn_call(key_field, elt, buffer, rbe, rec);
@@ -5297,7 +5370,10 @@ static void hist_trigger_print_key(struct seq_file *m,
seq_printf(m, "%s: %-30s[%3llu]", field_name,
syscall_name, uval);
} else if (key_field->flags & HIST_FIELD_FL_STACKTRACE) {
- seq_puts(m, "stacktrace:\n");
+ if (key_field->field)
+ seq_printf(m, "%s.stacktrace", key_field->field->name);
+ else
+ seq_puts(m, "stacktrace:\n");
hist_trigger_stacktrace_print(m,
key + key_field->offset,
HIST_STACKTRACE_DEPTH);
@@ -5842,7 +5918,8 @@ static void hist_field_print(struct seq_file *m, struct hist_field *hist_field)
if (hist_field->flags) {
if (!(hist_field->flags & HIST_FIELD_FL_VAR_REF) &&
- !(hist_field->flags & HIST_FIELD_FL_EXPR)) {
+ !(hist_field->flags & HIST_FIELD_FL_EXPR) &&
+ !(hist_field->flags & HIST_FIELD_FL_STACKTRACE)) {
const char *flags = get_hist_field_flags(hist_field);
if (flags)
@@ -5875,9 +5952,12 @@ static int event_hist_trigger_print(struct seq_file *m,
if (i > hist_data->n_vals)
seq_puts(m, ",");
- if (field->flags & HIST_FIELD_FL_STACKTRACE)
- seq_puts(m, "stacktrace");
- else
+ if (field->flags & HIST_FIELD_FL_STACKTRACE) {
+ if (field->field)
+ seq_printf(m, "%s.stacktrace", field->field->name);
+ else
+ seq_puts(m, "stacktrace");
+ } else
hist_field_print(m, field);
}
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 67592eed0be8..70bddb25d9c0 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -173,6 +173,14 @@ static int synth_field_is_string(char *type)
return false;
}
+static int synth_field_is_stack(char *type)
+{
+ if (strstr(type, "long[") != NULL)
+ return true;
+
+ return false;
+}
+
static int synth_field_string_size(char *type)
{
char buf[4], *end, *start;
@@ -248,6 +256,8 @@ static int synth_field_size(char *type)
size = sizeof(gfp_t);
else if (synth_field_is_string(type))
size = synth_field_string_size(type);
+ else if (synth_field_is_stack(type))
+ size = 0;
return size;
}
@@ -292,6 +302,8 @@ static const char *synth_field_fmt(char *type)
fmt = "%x";
else if (synth_field_is_string(type))
fmt = "%.*s";
+ else if (synth_field_is_stack(type))
+ fmt = "%s";
return fmt;
}
@@ -371,6 +383,23 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
i == se->n_fields - 1 ? "" : " ");
n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
}
+ } else if (se->fields[i]->is_stack) {
+ u32 offset, data_offset, len;
+ unsigned long *p, *end;
+
+ offset = (u32)entry->fields[n_u64];
+ data_offset = offset & 0xffff;
+ len = offset >> 16;
+
+ p = (void *)entry + data_offset;
+ end = (void *)p + len - (sizeof(long) - 1);
+
+ trace_seq_printf(s, "%s=STACK:\n", se->fields[i]->name);
+
+ for (; *p && p < end; p++)
+ trace_seq_printf(s, "=> %pS\n", (void *)*p);
+ n_u64++;
+
} else {
struct trace_print_flags __flags[] = {
__def_gfpflag_names, {-1, NULL} };
@@ -416,8 +445,7 @@ static unsigned int trace_string(struct synth_trace_event *entry,
if (is_dynamic) {
u32 data_offset;
- data_offset = offsetof(typeof(*entry), fields);
- data_offset += event->n_u64 * sizeof(u64);
+ data_offset = struct_size(entry, fields, event->n_u64);
data_offset += data_size;
len = kern_fetch_store_strlen((unsigned long)str_val);
@@ -447,6 +475,43 @@ static unsigned int trace_string(struct synth_trace_event *entry,
return len;
}
+static unsigned int trace_stack(struct synth_trace_event *entry,
+ struct synth_event *event,
+ long *stack,
+ unsigned int data_size,
+ unsigned int *n_u64)
+{
+ unsigned int len;
+ u32 data_offset;
+ void *data_loc;
+
+ data_offset = struct_size(entry, fields, event->n_u64);
+ data_offset += data_size;
+
+ for (len = 0; len < HIST_STACKTRACE_DEPTH; len++) {
+ if (!stack[len])
+ break;
+ }
+
+ /* Include the zero'd element if it fits */
+ if (len < HIST_STACKTRACE_DEPTH)
+ len++;
+
+ len *= sizeof(long);
+
+ /* Find the dynamic section to copy the stack into. */
+ data_loc = (void *)entry + data_offset;
+ memcpy(data_loc, stack, len);
+
+ /* Fill in the field that holds the offset/len combo */
+ data_offset |= len << 16;
+ *(u32 *)&entry->fields[*n_u64] = data_offset;
+
+ (*n_u64)++;
+
+ return len;
+}
+
static notrace void trace_event_raw_event_synth(void *__data,
u64 *var_ref_vals,
unsigned int *var_ref_idx)
@@ -473,7 +538,12 @@ static notrace void trace_event_raw_event_synth(void *__data,
val_idx = var_ref_idx[field_pos];
str_val = (char *)(long)var_ref_vals[val_idx];
- len = kern_fetch_store_strlen((unsigned long)str_val);
+ if (event->dynamic_fields[i]->is_stack) {
+ len = *((unsigned long *)str_val);
+ len *= sizeof(unsigned long);
+ } else {
+ len = kern_fetch_store_strlen((unsigned long)str_val);
+ }
fields_size += len;
}
@@ -499,6 +569,12 @@ static notrace void trace_event_raw_event_synth(void *__data,
event->fields[i]->is_dynamic,
data_size, &n_u64);
data_size += len; /* only dynamic string increments */
+ } else if (event->fields[i]->is_stack) {
+ long *stack = (long *)(long)var_ref_vals[val_idx];
+
+ len = trace_stack(entry, event, stack,
+ data_size, &n_u64);
+ data_size += len;
} else {
struct synth_field *field = event->fields[i];
u64 val = var_ref_vals[val_idx];
@@ -561,6 +637,9 @@ static int __set_synth_event_print_fmt(struct synth_event *event,
event->fields[i]->is_dynamic)
pos += snprintf(buf + pos, LEN_OR_ZERO,
", __get_str(%s)", event->fields[i]->name);
+ else if (event->fields[i]->is_stack)
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ ", __get_stacktrace(%s)", event->fields[i]->name);
else
pos += snprintf(buf + pos, LEN_OR_ZERO,
", REC->%s", event->fields[i]->name);
@@ -697,7 +776,8 @@ static struct synth_field *parse_synth_field(int argc, char **argv,
ret = -EINVAL;
goto free;
} else if (size == 0) {
- if (synth_field_is_string(field->type)) {
+ if (synth_field_is_string(field->type) ||
+ synth_field_is_stack(field->type)) {
char *type;
len = sizeof("__data_loc ") + strlen(field->type) + 1;
@@ -728,6 +808,8 @@ static struct synth_field *parse_synth_field(int argc, char **argv,
if (synth_field_is_string(field->type))
field->is_string = true;
+ else if (synth_field_is_stack(field->type))
+ field->is_stack = true;
field->is_signed = synth_field_signed(field->type);
out:
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index 210e1f168392..04f0fdae19a1 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -1539,7 +1539,7 @@ static void osnoise_sleep(void)
wake_time = ktime_add_us(ktime_get(), interval);
__set_current_state(TASK_INTERRUPTIBLE);
- while (schedule_hrtimeout_range(&wake_time, 0, HRTIMER_MODE_ABS)) {
+ while (schedule_hrtimeout(&wake_time, HRTIMER_MODE_ABS)) {
if (kthread_should_stop())
break;
}
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 01ebabbbe8c9..11008c098727 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -50,6 +50,7 @@ DEFINE_BASIC_PRINT_TYPE_FUNC(x8, u8, "0x%x")
DEFINE_BASIC_PRINT_TYPE_FUNC(x16, u16, "0x%x")
DEFINE_BASIC_PRINT_TYPE_FUNC(x32, u32, "0x%x")
DEFINE_BASIC_PRINT_TYPE_FUNC(x64, u64, "0x%Lx")
+DEFINE_BASIC_PRINT_TYPE_FUNC(char, u8, "'%c'")
int PRINT_TYPE_FUNC_NAME(symbol)(struct trace_seq *s, void *data, void *ent)
{
@@ -95,6 +96,7 @@ static const struct fetch_type probe_fetch_types[] = {
ASSIGN_FETCH_TYPE_ALIAS(x16, u16, u16, 0),
ASSIGN_FETCH_TYPE_ALIAS(x32, u32, u32, 0),
ASSIGN_FETCH_TYPE_ALIAS(x64, u64, u64, 0),
+ ASSIGN_FETCH_TYPE_ALIAS(char, u8, u8, 0),
ASSIGN_FETCH_TYPE_ALIAS(symbol, ADDR_FETCH_TYPE, ADDR_FETCH_TYPE, 0),
ASSIGN_FETCH_TYPE_END
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 23acfd1c3812..b4f99553411e 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -166,6 +166,7 @@ DECLARE_BASIC_PRINT_TYPE_FUNC(x16);
DECLARE_BASIC_PRINT_TYPE_FUNC(x32);
DECLARE_BASIC_PRINT_TYPE_FUNC(x64);
+DECLARE_BASIC_PRINT_TYPE_FUNC(char);
DECLARE_BASIC_PRINT_TYPE_FUNC(string);
DECLARE_BASIC_PRINT_TYPE_FUNC(symbol);
diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c
index 9c90b3a7dce2..e5e299260d0c 100644
--- a/kernel/trace/trace_seq.c
+++ b/kernel/trace/trace_seq.c
@@ -403,3 +403,26 @@ int trace_seq_hex_dump(struct trace_seq *s, const char *prefix_str,
return 1;
}
EXPORT_SYMBOL(trace_seq_hex_dump);
+
+/*
+ * trace_seq_acquire - acquire seq buffer with size len
+ * @s: trace sequence descriptor
+ * @len: size of buffer to be acquired
+ *
+ * acquire buffer with size of @len from trace_seq for output usage,
+ * user can fill string into that buffer.
+ *
+ * Returns start address of acquired buffer.
+ *
+ * it allow multiple usage in one trace output function call.
+ */
+char *trace_seq_acquire(struct trace_seq *s, unsigned int len)
+{
+ char *ret = trace_seq_buffer_ptr(s);
+
+ if (!WARN_ON_ONCE(seq_buf_buffer_left(&s->seq) < len))
+ seq_buf_commit(&s->seq, len);
+
+ return ret;
+}
+EXPORT_SYMBOL(trace_seq_acquire);
diff --git a/kernel/trace/trace_synth.h b/kernel/trace/trace_synth.h
index b29595fe3ac5..43f6fb6078db 100644
--- a/kernel/trace/trace_synth.h
+++ b/kernel/trace/trace_synth.h
@@ -18,6 +18,7 @@ struct synth_field {
bool is_signed;
bool is_string;
bool is_dynamic;
+ bool is_stack;
};
struct synth_event {
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index f23144af5743..8d1507dd0724 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -571,8 +571,8 @@ static void for_each_tracepoint_range(
bool trace_module_has_bad_taint(struct module *mod)
{
return mod->taints & ~((1 << TAINT_OOT_MODULE) | (1 << TAINT_CRAP) |
- (1 << TAINT_UNSIGNED_MODULE) |
- (1 << TAINT_TEST));
+ (1 << TAINT_UNSIGNED_MODULE) | (1 << TAINT_TEST) |
+ (1 << TAINT_LIVEPATCH));
}
static BLOCKING_NOTIFIER_HEAD(tracepoint_notify_list);