diff options
Diffstat (limited to 'kernel/bpf/inode.c')
| -rw-r--r-- | kernel/bpf/inode.c | 395 |
1 files changed, 340 insertions, 55 deletions
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 4f841e16779e..9f866a010dad 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -20,6 +20,7 @@ #include <linux/filter.h> #include <linux/bpf.h> #include <linux/bpf_trace.h> +#include <linux/kstrtox.h> #include "preload/bpf_preload.h" enum bpf_type { @@ -98,9 +99,9 @@ static const struct inode_operations bpf_prog_iops = { }; static const struct inode_operations bpf_map_iops = { }; static const struct inode_operations bpf_link_iops = { }; -static struct inode *bpf_get_inode(struct super_block *sb, - const struct inode *dir, - umode_t mode) +struct inode *bpf_get_inode(struct super_block *sb, + const struct inode *dir, + umode_t mode) { struct inode *inode; @@ -118,11 +119,9 @@ static struct inode *bpf_get_inode(struct super_block *sb, return ERR_PTR(-ENOSPC); inode->i_ino = get_next_ino(); - inode->i_atime = current_time(inode); - inode->i_mtime = inode->i_atime; - inode->i_ctime = inode->i_atime; + simple_inode_init_ts(inode); - inode_init_owner(&init_user_ns, inode, dir, mode); + inode_init_owner(&nop_mnt_idmap, inode, dir, mode); return inode; } @@ -145,21 +144,19 @@ static int bpf_inode_type(const struct inode *inode, enum bpf_type *type) static void bpf_dentry_finalize(struct dentry *dentry, struct inode *inode, struct inode *dir) { - d_instantiate(dentry, inode); - dget(dentry); + d_make_persistent(dentry, inode); - dir->i_mtime = current_time(dir); - dir->i_ctime = dir->i_mtime; + inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); } -static int bpf_mkdir(struct user_namespace *mnt_userns, struct inode *dir, - struct dentry *dentry, umode_t mode) +static struct dentry *bpf_mkdir(struct mnt_idmap *idmap, struct inode *dir, + struct dentry *dentry, umode_t mode) { struct inode *inode; inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR); if (IS_ERR(inode)) - return PTR_ERR(inode); + return ERR_CAST(inode); inode->i_op = &bpf_dir_iops; inode->i_fop = &simple_dir_operations; @@ -168,7 +165,7 @@ static int bpf_mkdir(struct user_namespace *mnt_userns, struct inode *dir, inc_nlink(dir); bpf_dentry_finalize(dentry, inode, dir); - return 0; + return NULL; } struct map_iter { @@ -382,7 +379,7 @@ bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags) return simple_lookup(dir, dentry, flags); } -static int bpf_symlink(struct user_namespace *mnt_userns, struct inode *dir, +static int bpf_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *target) { char *link = kstrdup(target, GFP_USER | __GFP_NOWARN); @@ -422,20 +419,16 @@ static int bpf_iter_link_pin_kernel(struct dentry *parent, struct dentry *dentry; int ret; - inode_lock(parent->d_inode); - dentry = lookup_one_len(name, parent, strlen(name)); - if (IS_ERR(dentry)) { - inode_unlock(parent->d_inode); + dentry = simple_start_creating(parent, name); + if (IS_ERR(dentry)) return PTR_ERR(dentry); - } ret = bpf_mkobj_ops(dentry, mode, link, &bpf_link_iops, &bpf_iter_fops); - dput(dentry); - inode_unlock(parent->d_inode); + simple_done_creating(dentry); return ret; } -static int bpf_obj_do_pin(const char __user *pathname, void *raw, +static int bpf_obj_do_pin(int path_fd, const char __user *pathname, void *raw, enum bpf_type type) { struct dentry *dentry; @@ -444,22 +437,21 @@ static int bpf_obj_do_pin(const char __user *pathname, void *raw, umode_t mode; int ret; - dentry = user_path_create(AT_FDCWD, pathname, &path, 0); + dentry = start_creating_user_path(path_fd, pathname, &path, 0); if (IS_ERR(dentry)) return PTR_ERR(dentry); - mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); - - ret = security_path_mknod(&path, dentry, mode, 0); - if (ret) - goto out; - dir = d_inode(path.dentry); if (dir->i_op != &bpf_dir_iops) { ret = -EPERM; goto out; } + mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); + ret = security_path_mknod(&path, dentry, mode, 0); + if (ret) + goto out; + switch (type) { case BPF_TYPE_PROG: ret = vfs_mkobj(dentry, mode, bpf_mkprog, raw); @@ -474,11 +466,11 @@ static int bpf_obj_do_pin(const char __user *pathname, void *raw, ret = -EPERM; } out: - done_path_create(&path, dentry); + end_creating_path(&path, dentry); return ret; } -int bpf_obj_pin_user(u32 ufd, const char __user *pathname) +int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname) { enum bpf_type type; void *raw; @@ -488,14 +480,14 @@ int bpf_obj_pin_user(u32 ufd, const char __user *pathname) if (IS_ERR(raw)) return PTR_ERR(raw); - ret = bpf_obj_do_pin(pathname, raw, type); + ret = bpf_obj_do_pin(path_fd, pathname, raw, type); if (ret != 0) bpf_any_put(raw, type); return ret; } -static void *bpf_obj_do_get(const char __user *pathname, +static void *bpf_obj_do_get(int path_fd, const char __user *pathname, enum bpf_type *type, int flags) { struct inode *inode; @@ -503,7 +495,7 @@ static void *bpf_obj_do_get(const char __user *pathname, void *raw; int ret; - ret = user_path_at(AT_FDCWD, pathname, LOOKUP_FOLLOW, &path); + ret = user_path_at(path_fd, pathname, LOOKUP_FOLLOW, &path); if (ret) return ERR_PTR(ret); @@ -527,7 +519,7 @@ out: return ERR_PTR(ret); } -int bpf_obj_get_user(const char __user *pathname, int flags) +int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags) { enum bpf_type type = BPF_TYPE_UNSPEC; int f_flags; @@ -538,7 +530,7 @@ int bpf_obj_get_user(const char __user *pathname, int flags) if (f_flags < 0) return f_flags; - raw = bpf_obj_do_get(pathname, &type, f_flags); + raw = bpf_obj_do_get(path_fd, pathname, &type, f_flags); if (IS_ERR(raw)) return PTR_ERR(raw); @@ -559,7 +551,7 @@ int bpf_obj_get_user(const char __user *pathname, int flags) static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type) { struct bpf_prog *prog; - int ret = inode_permission(&init_user_ns, inode, MAY_READ); + int ret = inode_permission(&nop_mnt_idmap, inode, MAY_READ); if (ret) return ERR_PTR(ret); @@ -598,19 +590,187 @@ struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type typ } EXPORT_SYMBOL(bpf_prog_get_type_path); +struct bpffs_btf_enums { + const struct btf *btf; + const struct btf_type *cmd_t; + const struct btf_type *map_t; + const struct btf_type *prog_t; + const struct btf_type *attach_t; +}; + +static int find_bpffs_btf_enums(struct bpffs_btf_enums *info) +{ + const struct btf *btf; + const struct btf_type *t; + const char *name; + int i, n; + + memset(info, 0, sizeof(*info)); + + btf = bpf_get_btf_vmlinux(); + if (IS_ERR(btf)) + return PTR_ERR(btf); + if (!btf) + return -ENOENT; + + info->btf = btf; + + for (i = 1, n = btf_nr_types(btf); i < n; i++) { + t = btf_type_by_id(btf, i); + if (!btf_type_is_enum(t)) + continue; + + name = btf_name_by_offset(btf, t->name_off); + if (!name) + continue; + + if (strcmp(name, "bpf_cmd") == 0) + info->cmd_t = t; + else if (strcmp(name, "bpf_map_type") == 0) + info->map_t = t; + else if (strcmp(name, "bpf_prog_type") == 0) + info->prog_t = t; + else if (strcmp(name, "bpf_attach_type") == 0) + info->attach_t = t; + else + continue; + + if (info->cmd_t && info->map_t && info->prog_t && info->attach_t) + return 0; + } + + return -ESRCH; +} + +static bool find_btf_enum_const(const struct btf *btf, const struct btf_type *enum_t, + const char *prefix, const char *str, int *value) +{ + const struct btf_enum *e; + const char *name; + int i, n, pfx_len = strlen(prefix); + + *value = 0; + + if (!btf || !enum_t) + return false; + + for (i = 0, n = btf_vlen(enum_t); i < n; i++) { + e = &btf_enum(enum_t)[i]; + + name = btf_name_by_offset(btf, e->name_off); + if (!name || strncasecmp(name, prefix, pfx_len) != 0) + continue; + + /* match symbolic name case insensitive and ignoring prefix */ + if (strcasecmp(name + pfx_len, str) == 0) { + *value = e->val; + return true; + } + } + + return false; +} + +static void seq_print_delegate_opts(struct seq_file *m, + const char *opt_name, + const struct btf *btf, + const struct btf_type *enum_t, + const char *prefix, + u64 delegate_msk, u64 any_msk) +{ + const struct btf_enum *e; + bool first = true; + const char *name; + u64 msk; + int i, n, pfx_len = strlen(prefix); + + delegate_msk &= any_msk; /* clear unknown bits */ + + if (delegate_msk == 0) + return; + + seq_printf(m, ",%s", opt_name); + if (delegate_msk == any_msk) { + seq_printf(m, "=any"); + return; + } + + if (btf && enum_t) { + for (i = 0, n = btf_vlen(enum_t); i < n; i++) { + e = &btf_enum(enum_t)[i]; + name = btf_name_by_offset(btf, e->name_off); + if (!name || strncasecmp(name, prefix, pfx_len) != 0) + continue; + msk = 1ULL << e->val; + if (delegate_msk & msk) { + /* emit lower-case name without prefix */ + seq_putc(m, first ? '=' : ':'); + name += pfx_len; + while (*name) { + seq_putc(m, tolower(*name)); + name++; + } + + delegate_msk &= ~msk; + first = false; + } + } + } + if (delegate_msk) + seq_printf(m, "%c0x%llx", first ? '=' : ':', delegate_msk); +} + /* * Display the mount options in /proc/mounts. */ static int bpf_show_options(struct seq_file *m, struct dentry *root) { - umode_t mode = d_inode(root)->i_mode & S_IALLUGO & ~S_ISVTX; - + struct inode *inode = d_inode(root); + umode_t mode = inode->i_mode & S_IALLUGO & ~S_ISVTX; + struct bpf_mount_opts *opts = root->d_sb->s_fs_info; + u64 mask; + + if (!uid_eq(inode->i_uid, GLOBAL_ROOT_UID)) + seq_printf(m, ",uid=%u", + from_kuid_munged(&init_user_ns, inode->i_uid)); + if (!gid_eq(inode->i_gid, GLOBAL_ROOT_GID)) + seq_printf(m, ",gid=%u", + from_kgid_munged(&init_user_ns, inode->i_gid)); if (mode != S_IRWXUGO) seq_printf(m, ",mode=%o", mode); + + if (opts->delegate_cmds || opts->delegate_maps || + opts->delegate_progs || opts->delegate_attachs) { + struct bpffs_btf_enums info; + + /* ignore errors, fallback to hex */ + (void)find_bpffs_btf_enums(&info); + + mask = (1ULL << __MAX_BPF_CMD) - 1; + seq_print_delegate_opts(m, "delegate_cmds", + info.btf, info.cmd_t, "BPF_", + opts->delegate_cmds, mask); + + mask = (1ULL << __MAX_BPF_MAP_TYPE) - 1; + seq_print_delegate_opts(m, "delegate_maps", + info.btf, info.map_t, "BPF_MAP_TYPE_", + opts->delegate_maps, mask); + + mask = (1ULL << __MAX_BPF_PROG_TYPE) - 1; + seq_print_delegate_opts(m, "delegate_progs", + info.btf, info.prog_t, "BPF_PROG_TYPE_", + opts->delegate_progs, mask); + + mask = (1ULL << __MAX_BPF_ATTACH_TYPE) - 1; + seq_print_delegate_opts(m, "delegate_attachs", + info.btf, info.attach_t, "BPF_", + opts->delegate_attachs, mask); + } + return 0; } -static void bpf_free_inode(struct inode *inode) +static void bpf_destroy_inode(struct inode *inode) { enum bpf_type type; @@ -621,31 +781,41 @@ static void bpf_free_inode(struct inode *inode) free_inode_nonrcu(inode); } -static const struct super_operations bpf_super_ops = { +const struct super_operations bpf_super_ops = { .statfs = simple_statfs, - .drop_inode = generic_delete_inode, + .drop_inode = inode_just_drop, .show_options = bpf_show_options, - .free_inode = bpf_free_inode, + .destroy_inode = bpf_destroy_inode, }; enum { + OPT_UID, + OPT_GID, OPT_MODE, + OPT_DELEGATE_CMDS, + OPT_DELEGATE_MAPS, + OPT_DELEGATE_PROGS, + OPT_DELEGATE_ATTACHS, }; static const struct fs_parameter_spec bpf_fs_parameters[] = { + fsparam_u32 ("uid", OPT_UID), + fsparam_u32 ("gid", OPT_GID), fsparam_u32oct ("mode", OPT_MODE), + fsparam_string ("delegate_cmds", OPT_DELEGATE_CMDS), + fsparam_string ("delegate_maps", OPT_DELEGATE_MAPS), + fsparam_string ("delegate_progs", OPT_DELEGATE_PROGS), + fsparam_string ("delegate_attachs", OPT_DELEGATE_ATTACHS), {} }; -struct bpf_mount_opts { - umode_t mode; -}; - static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param) { - struct bpf_mount_opts *opts = fc->fs_private; + struct bpf_mount_opts *opts = fc->s_fs_info; struct fs_parse_result result; - int opt; + kuid_t uid; + kgid_t gid; + int opt, err; opt = fs_parse(fc, bpf_fs_parameters, param, &result); if (opt < 0) { @@ -666,12 +836,104 @@ static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param) } switch (opt) { + case OPT_UID: + uid = make_kuid(current_user_ns(), result.uint_32); + if (!uid_valid(uid)) + goto bad_value; + + /* + * The requested uid must be representable in the + * filesystem's idmapping. + */ + if (!kuid_has_mapping(fc->user_ns, uid)) + goto bad_value; + + opts->uid = uid; + break; + case OPT_GID: + gid = make_kgid(current_user_ns(), result.uint_32); + if (!gid_valid(gid)) + goto bad_value; + + /* + * The requested gid must be representable in the + * filesystem's idmapping. + */ + if (!kgid_has_mapping(fc->user_ns, gid)) + goto bad_value; + + opts->gid = gid; + break; case OPT_MODE: opts->mode = result.uint_32 & S_IALLUGO; break; + case OPT_DELEGATE_CMDS: + case OPT_DELEGATE_MAPS: + case OPT_DELEGATE_PROGS: + case OPT_DELEGATE_ATTACHS: { + struct bpffs_btf_enums info; + const struct btf_type *enum_t; + const char *enum_pfx; + u64 *delegate_msk, msk = 0; + char *p, *str; + int val; + + /* ignore errors, fallback to hex */ + (void)find_bpffs_btf_enums(&info); + + switch (opt) { + case OPT_DELEGATE_CMDS: + delegate_msk = &opts->delegate_cmds; + enum_t = info.cmd_t; + enum_pfx = "BPF_"; + break; + case OPT_DELEGATE_MAPS: + delegate_msk = &opts->delegate_maps; + enum_t = info.map_t; + enum_pfx = "BPF_MAP_TYPE_"; + break; + case OPT_DELEGATE_PROGS: + delegate_msk = &opts->delegate_progs; + enum_t = info.prog_t; + enum_pfx = "BPF_PROG_TYPE_"; + break; + case OPT_DELEGATE_ATTACHS: + delegate_msk = &opts->delegate_attachs; + enum_t = info.attach_t; + enum_pfx = "BPF_"; + break; + default: + return -EINVAL; + } + + str = param->string; + while ((p = strsep(&str, ":"))) { + if (strcmp(p, "any") == 0) { + msk |= ~0ULL; + } else if (find_btf_enum_const(info.btf, enum_t, enum_pfx, p, &val)) { + msk |= 1ULL << val; + } else { + err = kstrtou64(p, 0, &msk); + if (err) + return err; + } + } + + /* Setting delegation mount options requires privileges */ + if (msk && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + *delegate_msk |= msk; + break; + } + default: + /* ignore unknown mount options */ + break; } return 0; +bad_value: + return invalfc(fc, "Bad value for '%s'", param->key); } struct bpf_preload_ops *bpf_preload_ops; @@ -743,10 +1005,14 @@ out: static int bpf_fill_super(struct super_block *sb, struct fs_context *fc) { static const struct tree_descr bpf_rfiles[] = { { "" } }; - struct bpf_mount_opts *opts = fc->fs_private; + struct bpf_mount_opts *opts = sb->s_fs_info; struct inode *inode; int ret; + /* Mounting an instance of BPF FS requires privileges */ + if (fc->user_ns != &init_user_ns && !capable(CAP_SYS_ADMIN)) + return -EPERM; + ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles); if (ret) return ret; @@ -754,6 +1020,8 @@ static int bpf_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_op = &bpf_super_ops; inode = sb->s_root->d_inode; + inode->i_uid = opts->uid; + inode->i_gid = opts->gid; inode->i_op = &bpf_dir_iops; inode->i_mode &= ~S_IALLUGO; populate_bpffs(sb->s_root); @@ -768,7 +1036,7 @@ static int bpf_get_tree(struct fs_context *fc) static void bpf_free_fc(struct fs_context *fc) { - kfree(fc->fs_private); + kfree(fc->s_fs_info); } static const struct fs_context_operations bpf_context_ops = { @@ -789,18 +1057,35 @@ static int bpf_init_fs_context(struct fs_context *fc) return -ENOMEM; opts->mode = S_IRWXUGO; + opts->uid = current_fsuid(); + opts->gid = current_fsgid(); - fc->fs_private = opts; + /* start out with no BPF token delegation enabled */ + opts->delegate_cmds = 0; + opts->delegate_maps = 0; + opts->delegate_progs = 0; + opts->delegate_attachs = 0; + + fc->s_fs_info = opts; fc->ops = &bpf_context_ops; return 0; } +static void bpf_kill_super(struct super_block *sb) +{ + struct bpf_mount_opts *opts = sb->s_fs_info; + + kill_anon_super(sb); + kfree(opts); +} + static struct file_system_type bpf_fs_type = { .owner = THIS_MODULE, .name = "bpf", .init_fs_context = bpf_init_fs_context, .parameters = bpf_fs_parameters, - .kill_sb = kill_litter_super, + .kill_sb = bpf_kill_super, + .fs_flags = FS_USERNS_MOUNT, }; static int __init bpf_init(void) |
