diff options
Diffstat (limited to 'kernel/bpf/task_iter.c')
| -rw-r--r-- | kernel/bpf/task_iter.c | 302 |
1 files changed, 254 insertions, 48 deletions
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index c4ab9d6cdbe9..98d9b4c0daff 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -5,9 +5,10 @@ #include <linux/namei.h> #include <linux/pid_namespace.h> #include <linux/fs.h> -#include <linux/fdtable.h> #include <linux/filter.h> +#include <linux/bpf_mem_alloc.h> #include <linux/btf_ids.h> +#include <linux/mm_types.h> #include "mmap_unlock_work.h" static const char * const iter_task_type_names[] = { @@ -35,16 +36,13 @@ static struct task_struct *task_group_seq_get_next(struct bpf_iter_seq_task_comm u32 *tid, bool skip_if_dup_files) { - struct task_struct *task, *next_task; + struct task_struct *task; struct pid *pid; - u32 saved_tid; + u32 next_tid; if (!*tid) { /* The first time, the iterator calls this function. */ pid = find_pid_ns(common->pid, common->ns); - if (!pid) - return NULL; - task = get_pid_task(pid, PIDTYPE_TGID); if (!task) return NULL; @@ -66,44 +64,25 @@ static struct task_struct *task_group_seq_get_next(struct bpf_iter_seq_task_comm return task; } - pid = find_pid_ns(common->pid_visiting, common->ns); - if (!pid) - return NULL; - - task = get_pid_task(pid, PIDTYPE_PID); + task = find_task_by_pid_ns(common->pid_visiting, common->ns); if (!task) return NULL; retry: - if (!pid_alive(task)) { - put_task_struct(task); - return NULL; - } - - next_task = next_thread(task); - put_task_struct(task); - if (!next_task) + task = __next_thread(task); + if (!task) return NULL; - saved_tid = *tid; - *tid = __task_pid_nr_ns(next_task, PIDTYPE_PID, common->ns); - if (!*tid || *tid == common->pid) { - /* Run out of tasks of a process. The tasks of a - * thread_group are linked as circular linked list. - */ - *tid = saved_tid; - return NULL; - } - - get_task_struct(next_task); - common->pid_visiting = *tid; + next_tid = __task_pid_nr_ns(task, PIDTYPE_PID, common->ns); + if (!next_tid) + goto retry; - if (skip_if_dup_files && task->files == task->group_leader->files) { - task = next_task; + if (skip_if_dup_files && task->files == task->group_leader->files) goto retry; - } - return next_task; + *tid = common->pid_visiting = next_tid; + get_task_struct(task); + return task; } static struct task_struct *task_seq_get_next(struct bpf_iter_seq_task_common *common, @@ -119,7 +98,7 @@ static struct task_struct *task_seq_get_next(struct bpf_iter_seq_task_common *co rcu_read_lock(); pid = find_pid_ns(common->pid, common->ns); if (pid) { - task = get_pid_task(pid, PIDTYPE_TGID); + task = get_pid_task(pid, PIDTYPE_PID); *tid = common->pid; } rcu_read_unlock(); @@ -281,6 +260,7 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info) u32 saved_tid = info->tid; struct task_struct *curr_task; unsigned int curr_fd = info->fd; + struct file *f; /* If this function returns a non-NULL file object, * it held a reference to the task/file. @@ -305,23 +285,14 @@ again: curr_fd = 0; } - rcu_read_lock(); - for (;; curr_fd++) { - struct file *f; - f = task_lookup_next_fd_rcu(curr_task, &curr_fd); - if (!f) - break; - if (!get_file_rcu(f)) - continue; - + f = fget_task_next(curr_task, &curr_fd); + if (f) { /* set info->fd */ info->fd = curr_fd; - rcu_read_unlock(); return f; } /* the current task is done, go to the next task */ - rcu_read_unlock(); put_task_struct(curr_task); if (info->common.type == BPF_TASK_ITER_TID) { @@ -724,7 +695,7 @@ static struct bpf_iter_reg task_reg_info = { .ctx_arg_info_size = 1, .ctx_arg_info = { { offsetof(struct bpf_iter__task, task), - PTR_TO_BTF_ID_OR_NULL }, + PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED }, }, .seq_info = &task_seq_info, .fill_link_info = bpf_iter_fill_link_info, @@ -823,6 +794,241 @@ const struct bpf_func_proto bpf_find_vma_proto = { .arg5_type = ARG_ANYTHING, }; +struct bpf_iter_task_vma_kern_data { + struct task_struct *task; + struct mm_struct *mm; + struct mmap_unlock_irq_work *work; + struct vma_iterator vmi; +}; + +struct bpf_iter_task_vma { + /* opaque iterator state; having __u64 here allows to preserve correct + * alignment requirements in vmlinux.h, generated from BTF + */ + __u64 __opaque[1]; +} __attribute__((aligned(8))); + +/* Non-opaque version of bpf_iter_task_vma */ +struct bpf_iter_task_vma_kern { + struct bpf_iter_task_vma_kern_data *data; +} __attribute__((aligned(8))); + +__bpf_kfunc_start_defs(); + +__bpf_kfunc int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it, + struct task_struct *task, u64 addr) +{ + struct bpf_iter_task_vma_kern *kit = (void *)it; + bool irq_work_busy = false; + int err; + + BUILD_BUG_ON(sizeof(struct bpf_iter_task_vma_kern) != sizeof(struct bpf_iter_task_vma)); + BUILD_BUG_ON(__alignof__(struct bpf_iter_task_vma_kern) != __alignof__(struct bpf_iter_task_vma)); + + /* is_iter_reg_valid_uninit guarantees that kit hasn't been initialized + * before, so non-NULL kit->data doesn't point to previously + * bpf_mem_alloc'd bpf_iter_task_vma_kern_data + */ + kit->data = bpf_mem_alloc(&bpf_global_ma, sizeof(struct bpf_iter_task_vma_kern_data)); + if (!kit->data) + return -ENOMEM; + + kit->data->task = get_task_struct(task); + kit->data->mm = task->mm; + if (!kit->data->mm) { + err = -ENOENT; + goto err_cleanup_iter; + } + + /* kit->data->work == NULL is valid after bpf_mmap_unlock_get_irq_work */ + irq_work_busy = bpf_mmap_unlock_get_irq_work(&kit->data->work); + if (irq_work_busy || !mmap_read_trylock(kit->data->mm)) { + err = -EBUSY; + goto err_cleanup_iter; + } + + vma_iter_init(&kit->data->vmi, kit->data->mm, addr); + return 0; + +err_cleanup_iter: + if (kit->data->task) + put_task_struct(kit->data->task); + bpf_mem_free(&bpf_global_ma, kit->data); + /* NULL kit->data signals failed bpf_iter_task_vma initialization */ + kit->data = NULL; + return err; +} + +__bpf_kfunc struct vm_area_struct *bpf_iter_task_vma_next(struct bpf_iter_task_vma *it) +{ + struct bpf_iter_task_vma_kern *kit = (void *)it; + + if (!kit->data) /* bpf_iter_task_vma_new failed */ + return NULL; + return vma_next(&kit->data->vmi); +} + +__bpf_kfunc void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it) +{ + struct bpf_iter_task_vma_kern *kit = (void *)it; + + if (kit->data) { + bpf_mmap_unlock_mm(kit->data->work, kit->data->mm); + put_task_struct(kit->data->task); + bpf_mem_free(&bpf_global_ma, kit->data); + } +} + +__bpf_kfunc_end_defs(); + +#ifdef CONFIG_CGROUPS + +struct bpf_iter_css_task { + __u64 __opaque[1]; +} __attribute__((aligned(8))); + +struct bpf_iter_css_task_kern { + struct css_task_iter *css_it; +} __attribute__((aligned(8))); + +__bpf_kfunc_start_defs(); + +__bpf_kfunc int bpf_iter_css_task_new(struct bpf_iter_css_task *it, + struct cgroup_subsys_state *css, unsigned int flags) +{ + struct bpf_iter_css_task_kern *kit = (void *)it; + + BUILD_BUG_ON(sizeof(struct bpf_iter_css_task_kern) != sizeof(struct bpf_iter_css_task)); + BUILD_BUG_ON(__alignof__(struct bpf_iter_css_task_kern) != + __alignof__(struct bpf_iter_css_task)); + kit->css_it = NULL; + switch (flags) { + case CSS_TASK_ITER_PROCS | CSS_TASK_ITER_THREADED: + case CSS_TASK_ITER_PROCS: + case 0: + break; + default: + return -EINVAL; + } + + kit->css_it = bpf_mem_alloc(&bpf_global_ma, sizeof(struct css_task_iter)); + if (!kit->css_it) + return -ENOMEM; + css_task_iter_start(css, flags, kit->css_it); + return 0; +} + +__bpf_kfunc struct task_struct *bpf_iter_css_task_next(struct bpf_iter_css_task *it) +{ + struct bpf_iter_css_task_kern *kit = (void *)it; + + if (!kit->css_it) + return NULL; + return css_task_iter_next(kit->css_it); +} + +__bpf_kfunc void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it) +{ + struct bpf_iter_css_task_kern *kit = (void *)it; + + if (!kit->css_it) + return; + css_task_iter_end(kit->css_it); + bpf_mem_free(&bpf_global_ma, kit->css_it); +} + +__bpf_kfunc_end_defs(); + +#endif /* CONFIG_CGROUPS */ + +struct bpf_iter_task { + __u64 __opaque[3]; +} __attribute__((aligned(8))); + +struct bpf_iter_task_kern { + struct task_struct *task; + struct task_struct *pos; + unsigned int flags; +} __attribute__((aligned(8))); + +enum { + /* all process in the system */ + BPF_TASK_ITER_ALL_PROCS, + /* all threads in the system */ + BPF_TASK_ITER_ALL_THREADS, + /* all threads of a specific process */ + BPF_TASK_ITER_PROC_THREADS +}; + +__bpf_kfunc_start_defs(); + +__bpf_kfunc int bpf_iter_task_new(struct bpf_iter_task *it, + struct task_struct *task__nullable, unsigned int flags) +{ + struct bpf_iter_task_kern *kit = (void *)it; + + BUILD_BUG_ON(sizeof(struct bpf_iter_task_kern) > sizeof(struct bpf_iter_task)); + BUILD_BUG_ON(__alignof__(struct bpf_iter_task_kern) != + __alignof__(struct bpf_iter_task)); + + kit->pos = NULL; + + switch (flags) { + case BPF_TASK_ITER_ALL_THREADS: + case BPF_TASK_ITER_ALL_PROCS: + break; + case BPF_TASK_ITER_PROC_THREADS: + if (!task__nullable) + return -EINVAL; + break; + default: + return -EINVAL; + } + + if (flags == BPF_TASK_ITER_PROC_THREADS) + kit->task = task__nullable; + else + kit->task = &init_task; + kit->pos = kit->task; + kit->flags = flags; + return 0; +} + +__bpf_kfunc struct task_struct *bpf_iter_task_next(struct bpf_iter_task *it) +{ + struct bpf_iter_task_kern *kit = (void *)it; + struct task_struct *pos; + unsigned int flags; + + flags = kit->flags; + pos = kit->pos; + + if (!pos) + return pos; + + if (flags == BPF_TASK_ITER_ALL_PROCS) + goto get_next_task; + + kit->pos = __next_thread(kit->pos); + if (kit->pos || flags == BPF_TASK_ITER_PROC_THREADS) + return pos; + +get_next_task: + kit->task = next_task(kit->task); + if (kit->task == &init_task) + kit->pos = NULL; + else + kit->pos = kit->task; + + return pos; +} + +__bpf_kfunc void bpf_iter_task_destroy(struct bpf_iter_task *it) +{ +} + +__bpf_kfunc_end_defs(); + DEFINE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work); static void do_mmap_read_unlock(struct irq_work *entry) |
