summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/arena.c2
-rw-r--r--kernel/bpf/bpf_cgrp_storage.c2
-rw-r--r--kernel/bpf/btf.c2
-rw-r--r--kernel/bpf/ringbuf.c4
-rw-r--r--kernel/bpf/syscall.c43
-rw-r--r--kernel/bpf/verifier.c31
-rw-r--r--kernel/cgroup/dmem.c50
-rw-r--r--kernel/trace/fprobe.c12
-rw-r--r--kernel/trace/ftrace.c36
-rw-r--r--kernel/trace/trace_events.c11
-rw-r--r--kernel/trace/trace_functions.c6
11 files changed, 101 insertions, 98 deletions
diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c
index 870aeb51d70a..095a9554e1de 100644
--- a/kernel/bpf/arena.c
+++ b/kernel/bpf/arena.c
@@ -39,7 +39,7 @@
*/
/* number of bytes addressable by LDX/STX insn with 16-bit 'off' field */
-#define GUARD_SZ (1ull << sizeof_field(struct bpf_insn, off) * 8)
+#define GUARD_SZ round_up(1ull << sizeof_field(struct bpf_insn, off) * 8, PAGE_SIZE << 1)
#define KERN_VM_SZ (SZ_4G + GUARD_SZ)
struct bpf_arena {
diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c
index d5dc65bb1755..54ff2a85d4c0 100644
--- a/kernel/bpf/bpf_cgrp_storage.c
+++ b/kernel/bpf/bpf_cgrp_storage.c
@@ -153,7 +153,7 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
static void cgroup_storage_map_free(struct bpf_map *map)
{
- bpf_local_storage_map_free(map, &cgroup_cache, NULL);
+ bpf_local_storage_map_free(map, &cgroup_cache, &bpf_cgrp_storage_busy);
}
/* *gfp_flags* is a hidden argument provided by the verifier */
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 9de6acddd479..c3223e0db2f5 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -6507,6 +6507,8 @@ static const struct bpf_raw_tp_null_args raw_tp_null_args[] = {
/* rxrpc */
{ "rxrpc_recvdata", 0x1 },
{ "rxrpc_resend", 0x10 },
+ /* skb */
+ {"kfree_skb", 0x1000},
/* sunrpc */
{ "xs_stream_read_data", 0x1 },
/* ... from xprt_cong_event event class */
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index e1cfe890e0be..1499d8caa9a3 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -268,8 +268,6 @@ static int ringbuf_map_mmap_kern(struct bpf_map *map, struct vm_area_struct *vma
/* allow writable mapping for the consumer_pos only */
if (vma->vm_pgoff != 0 || vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EPERM;
- } else {
- vm_flags_clear(vma, VM_MAYWRITE);
}
/* remap_vmalloc_range() checks size and offset constraints */
return remap_vmalloc_range(vma, rb_map->rb,
@@ -289,8 +287,6 @@ static int ringbuf_map_mmap_user(struct bpf_map *map, struct vm_area_struct *vma
* position, and the ring buffer data itself.
*/
return -EPERM;
- } else {
- vm_flags_clear(vma, VM_MAYWRITE);
}
/* remap_vmalloc_range() checks size and offset constraints */
return remap_vmalloc_range(vma, rb_map->rb, vma->vm_pgoff + RINGBUF_PGOFF);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index c420edbfb7c8..e1e42e918ba7 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1035,7 +1035,7 @@ static const struct vm_operations_struct bpf_map_default_vmops = {
static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct bpf_map *map = filp->private_data;
- int err;
+ int err = 0;
if (!map->ops->map_mmap || !IS_ERR_OR_NULL(map->record))
return -ENOTSUPP;
@@ -1059,24 +1059,33 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
err = -EACCES;
goto out;
}
+ bpf_map_write_active_inc(map);
}
+out:
+ mutex_unlock(&map->freeze_mutex);
+ if (err)
+ return err;
/* set default open/close callbacks */
vma->vm_ops = &bpf_map_default_vmops;
vma->vm_private_data = map;
vm_flags_clear(vma, VM_MAYEXEC);
+ /* If mapping is read-only, then disallow potentially re-mapping with
+ * PROT_WRITE by dropping VM_MAYWRITE flag. This VM_MAYWRITE clearing
+ * means that as far as BPF map's memory-mapped VMAs are concerned,
+ * VM_WRITE and VM_MAYWRITE and equivalent, if one of them is set,
+ * both should be set, so we can forget about VM_MAYWRITE and always
+ * check just VM_WRITE
+ */
if (!(vma->vm_flags & VM_WRITE))
- /* disallow re-mapping with PROT_WRITE */
vm_flags_clear(vma, VM_MAYWRITE);
err = map->ops->map_mmap(map, vma);
- if (err)
- goto out;
+ if (err) {
+ if (vma->vm_flags & VM_WRITE)
+ bpf_map_write_active_dec(map);
+ }
- if (vma->vm_flags & VM_MAYWRITE)
- bpf_map_write_active_inc(map);
-out:
- mutex_unlock(&map->freeze_mutex);
return err;
}
@@ -1968,8 +1977,6 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file,
return err;
}
-#define MAP_LOOKUP_RETRIES 3
-
int generic_map_lookup_batch(struct bpf_map *map,
const union bpf_attr *attr,
union bpf_attr __user *uattr)
@@ -1979,8 +1986,8 @@ int generic_map_lookup_batch(struct bpf_map *map,
void __user *values = u64_to_user_ptr(attr->batch.values);
void __user *keys = u64_to_user_ptr(attr->batch.keys);
void *buf, *buf_prevkey, *prev_key, *key, *value;
- int err, retry = MAP_LOOKUP_RETRIES;
u32 value_size, cp, max_count;
+ int err;
if (attr->batch.elem_flags & ~BPF_F_LOCK)
return -EINVAL;
@@ -2026,14 +2033,8 @@ int generic_map_lookup_batch(struct bpf_map *map,
err = bpf_map_copy_value(map, key, value,
attr->batch.elem_flags);
- if (err == -ENOENT) {
- if (retry) {
- retry--;
- continue;
- }
- err = -EINTR;
- break;
- }
+ if (err == -ENOENT)
+ goto next_key;
if (err)
goto free_buf;
@@ -2048,12 +2049,12 @@ int generic_map_lookup_batch(struct bpf_map *map,
goto free_buf;
}
+ cp++;
+next_key:
if (!prev_key)
prev_key = buf_prevkey;
swap(prev_key, key);
- retry = MAP_LOOKUP_RETRIES;
- cp++;
cond_resched();
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9971c03adfd5..60611df77957 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1501,6 +1501,8 @@ static int acquire_lock_state(struct bpf_verifier_env *env, int insn_idx, enum r
struct bpf_reference_state *s;
s = acquire_reference_state(env, insn_idx);
+ if (!s)
+ return -ENOMEM;
s->type = type;
s->id = id;
s->ptr = ptr;
@@ -9149,10 +9151,11 @@ static int check_reg_const_str(struct bpf_verifier_env *env,
return 0;
}
-/* Returns constant key value if possible, else negative error */
-static s64 get_constant_map_key(struct bpf_verifier_env *env,
+/* Returns constant key value in `value` if possible, else negative error */
+static int get_constant_map_key(struct bpf_verifier_env *env,
struct bpf_reg_state *key,
- u32 key_size)
+ u32 key_size,
+ s64 *value)
{
struct bpf_func_state *state = func(env, key);
struct bpf_reg_state *reg;
@@ -9179,8 +9182,10 @@ static s64 get_constant_map_key(struct bpf_verifier_env *env,
/* First handle precisely tracked STACK_ZERO */
for (i = off; i >= 0 && stype[i] == STACK_ZERO; i--)
zero_size++;
- if (zero_size >= key_size)
+ if (zero_size >= key_size) {
+ *value = 0;
return 0;
+ }
/* Check that stack contains a scalar spill of expected size */
if (!is_spilled_scalar_reg(&state->stack[spi]))
@@ -9203,9 +9208,12 @@ static s64 get_constant_map_key(struct bpf_verifier_env *env,
if (err < 0)
return err;
- return reg->var_off.value;
+ *value = reg->var_off.value;
+ return 0;
}
+static bool can_elide_value_nullness(enum bpf_map_type type);
+
static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
struct bpf_call_arg_meta *meta,
const struct bpf_func_proto *fn,
@@ -9354,9 +9362,16 @@ skip_type_check:
err = check_helper_mem_access(env, regno, key_size, BPF_READ, false, NULL);
if (err)
return err;
- meta->const_map_key = get_constant_map_key(env, reg, key_size);
- if (meta->const_map_key < 0 && meta->const_map_key != -EOPNOTSUPP)
- return meta->const_map_key;
+ if (can_elide_value_nullness(meta->map_ptr->map_type)) {
+ err = get_constant_map_key(env, reg, key_size, &meta->const_map_key);
+ if (err < 0) {
+ meta->const_map_key = -1;
+ if (err == -EOPNOTSUPP)
+ err = 0;
+ else
+ return err;
+ }
+ }
break;
case ARG_PTR_TO_MAP_VALUE:
if (type_may_be_null(arg_type) && register_is_null(reg))
diff --git a/kernel/cgroup/dmem.c b/kernel/cgroup/dmem.c
index fbe34299673d..10b63433f057 100644
--- a/kernel/cgroup/dmem.c
+++ b/kernel/cgroup/dmem.c
@@ -220,60 +220,32 @@ dmem_cgroup_calculate_protection(struct dmem_cgroup_pool_state *limit_pool,
struct dmem_cgroup_pool_state *test_pool)
{
struct page_counter *climit;
- struct cgroup_subsys_state *css, *next_css;
+ struct cgroup_subsys_state *css;
struct dmemcg_state *dmemcg_iter;
- struct dmem_cgroup_pool_state *pool, *parent_pool;
- bool found_descendant;
+ struct dmem_cgroup_pool_state *pool, *found_pool;
climit = &limit_pool->cnt;
rcu_read_lock();
- parent_pool = pool = limit_pool;
- css = &limit_pool->cs->css;
- /*
- * This logic is roughly equivalent to css_foreach_descendant_pre,
- * except we also track the parent pool to find out which pool we need
- * to calculate protection values for.
- *
- * We can stop the traversal once we find test_pool among the
- * descendants since we don't really care about any others.
- */
- while (pool != test_pool) {
- next_css = css_next_child(NULL, css);
- if (next_css) {
- parent_pool = pool;
- } else {
- while (css != &limit_pool->cs->css) {
- next_css = css_next_child(css, css->parent);
- if (next_css)
- break;
- css = css->parent;
- parent_pool = pool_parent(parent_pool);
- }
- /*
- * We can only hit this when test_pool is not a
- * descendant of limit_pool.
- */
- if (WARN_ON_ONCE(css == &limit_pool->cs->css))
- break;
- }
- css = next_css;
-
- found_descendant = false;
+ css_for_each_descendant_pre(css, &limit_pool->cs->css) {
dmemcg_iter = container_of(css, struct dmemcg_state, css);
+ found_pool = NULL;
list_for_each_entry_rcu(pool, &dmemcg_iter->pools, css_node) {
- if (pool_parent(pool) == parent_pool) {
- found_descendant = true;
+ if (pool->region == limit_pool->region) {
+ found_pool = pool;
break;
}
}
- if (!found_descendant)
+ if (!found_pool)
continue;
page_counter_calculate_protection(
- climit, &pool->cnt, true);
+ climit, &found_pool->cnt, true);
+
+ if (found_pool == test_pool)
+ break;
}
rcu_read_unlock();
}
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 2560b312ad57..33082c4e8154 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -403,13 +403,12 @@ static void fprobe_graph_remove_ips(unsigned long *addrs, int num)
lockdep_assert_held(&fprobe_mutex);
fprobe_graph_active--;
- if (!fprobe_graph_active) {
- /* Q: should we unregister it ? */
+ /* Q: should we unregister it ? */
+ if (!fprobe_graph_active)
unregister_ftrace_graph(&fprobe_graph_ops);
- return;
- }
- ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0);
+ if (num)
+ ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0);
}
static int symbols_cmp(const void *a, const void *b)
@@ -679,8 +678,7 @@ int unregister_fprobe(struct fprobe *fp)
}
del_fprobe_hash(fp);
- if (count)
- fprobe_graph_remove_ips(addrs, count);
+ fprobe_graph_remove_ips(addrs, count);
kfree_rcu(hlist_array, rcu);
fp->hlist_array = NULL;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 728ecda6e8d4..6b0c25761ccb 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3220,15 +3220,22 @@ static struct ftrace_hash *copy_hash(struct ftrace_hash *src)
* The filter_hash updates uses just the append_hash() function
* and the notrace_hash does not.
*/
-static int append_hash(struct ftrace_hash **hash, struct ftrace_hash *new_hash)
+static int append_hash(struct ftrace_hash **hash, struct ftrace_hash *new_hash,
+ int size_bits)
{
struct ftrace_func_entry *entry;
int size;
int i;
- /* An empty hash does everything */
- if (ftrace_hash_empty(*hash))
- return 0;
+ if (*hash) {
+ /* An empty hash does everything */
+ if (ftrace_hash_empty(*hash))
+ return 0;
+ } else {
+ *hash = alloc_ftrace_hash(size_bits);
+ if (!*hash)
+ return -ENOMEM;
+ }
/* If new_hash has everything make hash have everything */
if (ftrace_hash_empty(new_hash)) {
@@ -3292,16 +3299,18 @@ static int intersect_hash(struct ftrace_hash **hash, struct ftrace_hash *new_has
/* Return a new hash that has a union of all @ops->filter_hash entries */
static struct ftrace_hash *append_hashes(struct ftrace_ops *ops)
{
- struct ftrace_hash *new_hash;
+ struct ftrace_hash *new_hash = NULL;
struct ftrace_ops *subops;
+ int size_bits;
int ret;
- new_hash = alloc_ftrace_hash(ops->func_hash->filter_hash->size_bits);
- if (!new_hash)
- return NULL;
+ if (ops->func_hash->filter_hash)
+ size_bits = ops->func_hash->filter_hash->size_bits;
+ else
+ size_bits = FTRACE_HASH_DEFAULT_BITS;
list_for_each_entry(subops, &ops->subop_list, list) {
- ret = append_hash(&new_hash, subops->func_hash->filter_hash);
+ ret = append_hash(&new_hash, subops->func_hash->filter_hash, size_bits);
if (ret < 0) {
free_ftrace_hash(new_hash);
return NULL;
@@ -3310,7 +3319,8 @@ static struct ftrace_hash *append_hashes(struct ftrace_ops *ops)
if (ftrace_hash_empty(new_hash))
break;
}
- return new_hash;
+ /* Can't return NULL as that means this failed */
+ return new_hash ? : EMPTY_HASH;
}
/* Make @ops trace evenything except what all its subops do not trace */
@@ -3505,7 +3515,8 @@ int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int
filter_hash = alloc_and_copy_ftrace_hash(size_bits, ops->func_hash->filter_hash);
if (!filter_hash)
return -ENOMEM;
- ret = append_hash(&filter_hash, subops->func_hash->filter_hash);
+ ret = append_hash(&filter_hash, subops->func_hash->filter_hash,
+ size_bits);
if (ret < 0) {
free_ftrace_hash(filter_hash);
return ret;
@@ -5707,6 +5718,9 @@ __ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove)
return -ENOENT;
free_hash_entry(hash, entry);
return 0;
+ } else if (__ftrace_lookup_ip(hash, ip) != NULL) {
+ /* Already exists */
+ return 0;
}
entry = add_hash_entry(hash, ip);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 4cb275316e51..513de9ceb80e 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1591,6 +1591,13 @@ s_next(struct seq_file *m, void *v, loff_t *pos)
return iter;
#endif
+ /*
+ * The iter is allocated in s_start() and passed via the 'v'
+ * parameter. To stop the iterator, NULL must be returned. But
+ * the return value is what the 'v' parameter in s_stop() receives
+ * and frees. Free iter here as it will no longer be used.
+ */
+ kfree(iter);
return NULL;
}
@@ -1667,9 +1674,9 @@ static int s_show(struct seq_file *m, void *v)
}
#endif
-static void s_stop(struct seq_file *m, void *p)
+static void s_stop(struct seq_file *m, void *v)
{
- kfree(p);
+ kfree(v);
t_stop(m, NULL);
}
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index d358c9935164..df56f9b76010 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -216,7 +216,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
parent_ip = function_get_true_parent_ip(parent_ip, fregs);
- trace_ctx = tracing_gen_ctx();
+ trace_ctx = tracing_gen_ctx_dec();
data = this_cpu_ptr(tr->array_buffer.data);
if (!atomic_read(&data->disabled))
@@ -321,7 +321,6 @@ function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip,
struct trace_array *tr = op->private;
struct trace_array_cpu *data;
unsigned int trace_ctx;
- unsigned long flags;
int bit;
if (unlikely(!tr->function_enabled))
@@ -347,8 +346,7 @@ function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip,
if (is_repeat_check(tr, last_info, ip, parent_ip))
goto out;
- local_save_flags(flags);
- trace_ctx = tracing_gen_ctx_flags(flags);
+ trace_ctx = tracing_gen_ctx_dec();
process_repeats(tr, ip, parent_ip, last_info, trace_ctx);
trace_function(tr, ip, parent_ip, trace_ctx);