diff options
Diffstat (limited to 'fs/tracefs')
| -rw-r--r-- | fs/tracefs/Makefile | 1 | ||||
| -rw-r--r-- | fs/tracefs/event_inode.c | 914 | ||||
| -rw-r--r-- | fs/tracefs/inode.c | 618 | ||||
| -rw-r--r-- | fs/tracefs/internal.h | 79 |
4 files changed, 1394 insertions, 218 deletions
diff --git a/fs/tracefs/Makefile b/fs/tracefs/Makefile index 7c35a282b484..73c56da8e284 100644 --- a/fs/tracefs/Makefile +++ b/fs/tracefs/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only tracefs-objs := inode.o +tracefs-objs += event_inode.o obj-$(CONFIG_TRACING) += tracefs.o diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c new file mode 100644 index 000000000000..61cbdafa2411 --- /dev/null +++ b/fs/tracefs/event_inode.c @@ -0,0 +1,914 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * event_inode.c - part of tracefs, a pseudo file system for activating tracing + * + * Copyright (C) 2020-23 VMware Inc, author: Steven Rostedt <rostedt@goodmis.org> + * Copyright (C) 2020-23 VMware Inc, author: Ajay Kaher <akaher@vmware.com> + * Copyright (C) 2023 Google, author: Steven Rostedt <rostedt@goodmis.org> + * + * eventfs is used to dynamically create inodes and dentries based on the + * meta data provided by the tracing system. + * + * eventfs stores the meta-data of files/dirs and holds off on creating + * inodes/dentries of the files. When accessed, the eventfs will create the + * inodes/dentries in a just-in-time (JIT) manner. The eventfs will clean up + * and delete the inodes/dentries when they are no longer referenced. + */ +#include <linux/fsnotify.h> +#include <linux/fs.h> +#include <linux/namei.h> +#include <linux/workqueue.h> +#include <linux/security.h> +#include <linux/tracefs.h> +#include <linux/kref.h> +#include <linux/delay.h> +#include "internal.h" + +/* + * eventfs_mutex protects the eventfs_inode (ei) dentry. Any access + * to the ei->dentry must be done under this mutex and after checking + * if ei->is_freed is not set. When ei->is_freed is set, the dentry + * is on its way to being freed after the last dput() is made on it. + */ +static DEFINE_MUTEX(eventfs_mutex); + +/* Choose something "unique" ;-) */ +#define EVENTFS_FILE_INODE_INO 0x12c4e37 + +struct eventfs_root_inode { + struct eventfs_inode ei; + struct dentry *events_dir; +}; + +static struct eventfs_root_inode *get_root_inode(struct eventfs_inode *ei) +{ + WARN_ON_ONCE(!ei->is_events); + return container_of(ei, struct eventfs_root_inode, ei); +} + +/* Just try to make something consistent and unique */ +static int eventfs_dir_ino(struct eventfs_inode *ei) +{ + if (!ei->ino) { + ei->ino = get_next_ino(); + /* Must not have the file inode number */ + if (ei->ino == EVENTFS_FILE_INODE_INO) + ei->ino = get_next_ino(); + } + + return ei->ino; +} + +/* + * The eventfs_inode (ei) itself is protected by SRCU. It is released from + * its parent's list and will have is_freed set (under eventfs_mutex). + * After the SRCU grace period is over and the last dput() is called + * the ei is freed. + */ +DEFINE_STATIC_SRCU(eventfs_srcu); + +/* Mode is unsigned short, use the upper bits for flags */ +enum { + EVENTFS_SAVE_MODE = BIT(16), + EVENTFS_SAVE_UID = BIT(17), + EVENTFS_SAVE_GID = BIT(18), +}; + +#define EVENTFS_MODE_MASK (EVENTFS_SAVE_MODE - 1) + +static void free_ei_rcu(struct rcu_head *rcu) +{ + struct eventfs_inode *ei = container_of(rcu, struct eventfs_inode, rcu); + struct eventfs_root_inode *rei; + + kfree(ei->entry_attrs); + kfree_const(ei->name); + if (ei->is_events) { + rei = get_root_inode(ei); + kfree(rei); + } else { + kfree(ei); + } +} + +/* + * eventfs_inode reference count management. + * + * NOTE! We count only references from dentries, in the + * form 'dentry->d_fsdata'. There are also references from + * directory inodes ('ti->private'), but the dentry reference + * count is always a superset of the inode reference count. + */ +static void release_ei(struct kref *ref) +{ + struct eventfs_inode *ei = container_of(ref, struct eventfs_inode, kref); + const struct eventfs_entry *entry; + + WARN_ON_ONCE(!ei->is_freed); + + for (int i = 0; i < ei->nr_entries; i++) { + entry = &ei->entries[i]; + if (entry->release) + entry->release(entry->name, ei->data); + } + + call_srcu(&eventfs_srcu, &ei->rcu, free_ei_rcu); +} + +static inline void put_ei(struct eventfs_inode *ei) +{ + if (ei) + kref_put(&ei->kref, release_ei); +} + +static inline void free_ei(struct eventfs_inode *ei) +{ + if (ei) { + ei->is_freed = 1; + put_ei(ei); + } +} + +/* + * Called when creation of an ei fails, do not call release() functions. + */ +static inline void cleanup_ei(struct eventfs_inode *ei) +{ + if (ei) { + /* Set nr_entries to 0 to prevent release() function being called */ + ei->nr_entries = 0; + free_ei(ei); + } +} + +static inline struct eventfs_inode *get_ei(struct eventfs_inode *ei) +{ + if (ei) + kref_get(&ei->kref); + return ei; +} + +static struct dentry *eventfs_root_lookup(struct inode *dir, + struct dentry *dentry, + unsigned int flags); +static int eventfs_iterate(struct file *file, struct dir_context *ctx); + +static void update_attr(struct eventfs_attr *attr, struct iattr *iattr) +{ + unsigned int ia_valid = iattr->ia_valid; + + if (ia_valid & ATTR_MODE) { + attr->mode = (attr->mode & ~EVENTFS_MODE_MASK) | + (iattr->ia_mode & EVENTFS_MODE_MASK) | + EVENTFS_SAVE_MODE; + } + if (ia_valid & ATTR_UID) { + attr->mode |= EVENTFS_SAVE_UID; + attr->uid = iattr->ia_uid; + } + if (ia_valid & ATTR_GID) { + attr->mode |= EVENTFS_SAVE_GID; + attr->gid = iattr->ia_gid; + } +} + +static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry, + struct iattr *iattr) +{ + const struct eventfs_entry *entry; + struct eventfs_inode *ei; + const char *name; + int ret; + + mutex_lock(&eventfs_mutex); + ei = dentry->d_fsdata; + if (ei->is_freed) { + /* Do not allow changes if the event is about to be removed. */ + mutex_unlock(&eventfs_mutex); + return -ENODEV; + } + + /* Preallocate the children mode array if necessary */ + if (!(dentry->d_inode->i_mode & S_IFDIR)) { + if (!ei->entry_attrs) { + ei->entry_attrs = kcalloc(ei->nr_entries, sizeof(*ei->entry_attrs), + GFP_NOFS); + if (!ei->entry_attrs) { + ret = -ENOMEM; + goto out; + } + } + } + + ret = simple_setattr(idmap, dentry, iattr); + if (ret < 0) + goto out; + + /* + * If this is a dir, then update the ei cache, only the file + * mode is saved in the ei->m_children, and the ownership is + * determined by the parent directory. + */ + if (dentry->d_inode->i_mode & S_IFDIR) { + /* Just use the inode permissions for the events directory */ + if (!ei->is_events) + update_attr(&ei->attr, iattr); + + } else { + name = dentry->d_name.name; + + for (int i = 0; i < ei->nr_entries; i++) { + entry = &ei->entries[i]; + if (strcmp(name, entry->name) == 0) { + update_attr(&ei->entry_attrs[i], iattr); + break; + } + } + } + out: + mutex_unlock(&eventfs_mutex); + return ret; +} + +static const struct inode_operations eventfs_dir_inode_operations = { + .lookup = eventfs_root_lookup, + .setattr = eventfs_set_attr, +}; + +static const struct inode_operations eventfs_file_inode_operations = { + .setattr = eventfs_set_attr, +}; + +static const struct file_operations eventfs_file_operations = { + .read = generic_read_dir, + .iterate_shared = eventfs_iterate, + .llseek = generic_file_llseek, +}; + +static void eventfs_set_attrs(struct eventfs_inode *ei, bool update_uid, kuid_t uid, + bool update_gid, kgid_t gid, int level) +{ + struct eventfs_inode *ei_child; + + /* Update events/<system>/<event> */ + if (WARN_ON_ONCE(level > 3)) + return; + + if (update_uid) { + ei->attr.mode &= ~EVENTFS_SAVE_UID; + ei->attr.uid = uid; + } + + if (update_gid) { + ei->attr.mode &= ~EVENTFS_SAVE_GID; + ei->attr.gid = gid; + } + + list_for_each_entry(ei_child, &ei->children, list) { + eventfs_set_attrs(ei_child, update_uid, uid, update_gid, gid, level + 1); + } + + if (!ei->entry_attrs) + return; + + for (int i = 0; i < ei->nr_entries; i++) { + if (update_uid) { + ei->entry_attrs[i].mode &= ~EVENTFS_SAVE_UID; + ei->entry_attrs[i].uid = uid; + } + if (update_gid) { + ei->entry_attrs[i].mode &= ~EVENTFS_SAVE_GID; + ei->entry_attrs[i].gid = gid; + } + } + +} + +/* + * On a remount of tracefs, if UID or GID options are set, then + * the mount point inode permissions should be used. + * Reset the saved permission flags appropriately. + */ +void eventfs_remount(struct tracefs_inode *ti, bool update_uid, bool update_gid) +{ + struct eventfs_inode *ei = ti->private; + + /* Only the events directory does the updates */ + if (!ei || !ei->is_events || ei->is_freed) + return; + + eventfs_set_attrs(ei, update_uid, ti->vfs_inode.i_uid, + update_gid, ti->vfs_inode.i_gid, 0); +} + +static void update_inode_attr(struct inode *inode, umode_t mode, + struct eventfs_attr *attr, struct eventfs_root_inode *rei) +{ + if (attr && attr->mode & EVENTFS_SAVE_MODE) + inode->i_mode = attr->mode & EVENTFS_MODE_MASK; + else + inode->i_mode = mode; + + if (attr && attr->mode & EVENTFS_SAVE_UID) + inode->i_uid = attr->uid; + else + inode->i_uid = rei->ei.attr.uid; + + if (attr && attr->mode & EVENTFS_SAVE_GID) + inode->i_gid = attr->gid; + else + inode->i_gid = rei->ei.attr.gid; +} + +static struct inode *eventfs_get_inode(struct dentry *dentry, struct eventfs_attr *attr, + umode_t mode, struct eventfs_inode *ei) +{ + struct eventfs_root_inode *rei; + struct eventfs_inode *pei; + struct tracefs_inode *ti; + struct inode *inode; + + inode = tracefs_get_inode(dentry->d_sb); + if (!inode) + return NULL; + + ti = get_tracefs(inode); + ti->private = ei; + ti->flags |= TRACEFS_EVENT_INODE; + + /* Find the top dentry that holds the "events" directory */ + do { + dentry = dentry->d_parent; + /* Directories always have d_fsdata */ + pei = dentry->d_fsdata; + } while (!pei->is_events); + + rei = get_root_inode(pei); + + update_inode_attr(inode, mode, attr, rei); + + return inode; +} + +/** + * lookup_file - look up a file in the tracefs filesystem + * @parent_ei: Pointer to the eventfs_inode that represents parent of the file + * @dentry: the dentry to look up + * @mode: the permission that the file should have. + * @attr: saved attributes changed by user + * @data: something that the caller will want to get to later on. + * @fop: struct file_operations that should be used for this file. + * + * This function creates a dentry that represents a file in the eventsfs_inode + * directory. The inode.i_private pointer will point to @data in the open() + * call. + */ +static struct dentry *lookup_file(struct eventfs_inode *parent_ei, + struct dentry *dentry, + umode_t mode, + struct eventfs_attr *attr, + void *data, + const struct file_operations *fop) +{ + struct inode *inode; + + if (!(mode & S_IFMT)) + mode |= S_IFREG; + + if (WARN_ON_ONCE(!S_ISREG(mode))) + return ERR_PTR(-EIO); + + /* Only directories have ti->private set to an ei, not files */ + inode = eventfs_get_inode(dentry, attr, mode, NULL); + if (unlikely(!inode)) + return ERR_PTR(-ENOMEM); + + inode->i_op = &eventfs_file_inode_operations; + inode->i_fop = fop; + inode->i_private = data; + + /* All files will have the same inode number */ + inode->i_ino = EVENTFS_FILE_INODE_INO; + + // Files have their parent's ei as their fsdata + dentry->d_fsdata = get_ei(parent_ei); + + d_add(dentry, inode); + return NULL; +}; + +/** + * lookup_dir_entry - look up a dir in the tracefs filesystem + * @dentry: the directory to look up + * @pei: Pointer to the parent eventfs_inode if available + * @ei: the eventfs_inode that represents the directory to create + * + * This function will look up a dentry for a directory represented by + * a eventfs_inode. + */ +static struct dentry *lookup_dir_entry(struct dentry *dentry, + struct eventfs_inode *pei, struct eventfs_inode *ei) +{ + struct inode *inode; + umode_t mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; + + inode = eventfs_get_inode(dentry, &ei->attr, mode, ei); + if (unlikely(!inode)) + return ERR_PTR(-ENOMEM); + + inode->i_op = &eventfs_dir_inode_operations; + inode->i_fop = &eventfs_file_operations; + + /* All directories will have the same inode number */ + inode->i_ino = eventfs_dir_ino(ei); + + dentry->d_fsdata = get_ei(ei); + + d_add(dentry, inode); + return NULL; +} + +static inline struct eventfs_inode *init_ei(struct eventfs_inode *ei, const char *name) +{ + ei->name = kstrdup_const(name, GFP_KERNEL); + if (!ei->name) + return NULL; + kref_init(&ei->kref); + return ei; +} + +static inline struct eventfs_inode *alloc_ei(const char *name) +{ + struct eventfs_inode *ei = kzalloc(sizeof(*ei), GFP_KERNEL); + struct eventfs_inode *result; + + if (!ei) + return NULL; + + result = init_ei(ei, name); + if (!result) + kfree(ei); + + return result; +} + +static inline struct eventfs_inode *alloc_root_ei(const char *name) +{ + struct eventfs_root_inode *rei = kzalloc(sizeof(*rei), GFP_KERNEL); + struct eventfs_inode *ei; + + if (!rei) + return NULL; + + rei->ei.is_events = 1; + ei = init_ei(&rei->ei, name); + if (!ei) + kfree(rei); + + return ei; +} + +/** + * eventfs_d_release - dentry is going away + * @dentry: dentry which has the reference to remove. + * + * Remove the association between a dentry from an eventfs_inode. + */ +void eventfs_d_release(struct dentry *dentry) +{ + put_ei(dentry->d_fsdata); +} + +/** + * lookup_file_dentry - create a dentry for a file of an eventfs_inode + * @dentry: The parent dentry under which the new file's dentry will be created + * @ei: the eventfs_inode that the file will be created under + * @idx: the index into the entry_attrs[] of the @ei + * @mode: The mode of the file. + * @data: The data to use to set the inode of the file with on open() + * @fops: The fops of the file to be created. + * + * This function creates a dentry for a file associated with an + * eventfs_inode @ei. It uses the entry attributes specified by @idx, + * if available. The file will have the specified @mode and its inode will be + * set up with @data upon open. The file operations will be set to @fops. + * + * Return: Returns a pointer to the newly created file's dentry or an error + * pointer. + */ +static struct dentry * +lookup_file_dentry(struct dentry *dentry, + struct eventfs_inode *ei, int idx, + umode_t mode, void *data, + const struct file_operations *fops) +{ + struct eventfs_attr *attr = NULL; + + if (ei->entry_attrs) + attr = &ei->entry_attrs[idx]; + + return lookup_file(ei, dentry, mode, attr, data, fops); +} + +/** + * eventfs_root_lookup - lookup routine to create file/dir + * @dir: in which a lookup is being done + * @dentry: file/dir dentry + * @flags: Just passed to simple_lookup() + * + * Used to create dynamic file/dir with-in @dir, search with-in @ei + * list, if @dentry found go ahead and create the file/dir + */ + +static struct dentry *eventfs_root_lookup(struct inode *dir, + struct dentry *dentry, + unsigned int flags) +{ + struct eventfs_inode *ei_child; + struct tracefs_inode *ti; + struct eventfs_inode *ei; + const char *name = dentry->d_name.name; + struct dentry *result = NULL; + + ti = get_tracefs(dir); + if (WARN_ON_ONCE(!(ti->flags & TRACEFS_EVENT_INODE))) + return ERR_PTR(-EIO); + + mutex_lock(&eventfs_mutex); + + ei = ti->private; + if (!ei || ei->is_freed) + goto out; + + list_for_each_entry(ei_child, &ei->children, list) { + if (strcmp(ei_child->name, name) != 0) + continue; + /* A child is freed and removed from the list at the same time */ + if (WARN_ON_ONCE(ei_child->is_freed)) + goto out; + result = lookup_dir_entry(dentry, ei, ei_child); + goto out; + } + + for (int i = 0; i < ei->nr_entries; i++) { + void *data; + umode_t mode; + const struct file_operations *fops; + const struct eventfs_entry *entry = &ei->entries[i]; + + if (strcmp(name, entry->name) != 0) + continue; + + data = ei->data; + if (entry->callback(name, &mode, &data, &fops) <= 0) + goto out; + + result = lookup_file_dentry(dentry, ei, i, mode, data, fops); + goto out; + } + out: + mutex_unlock(&eventfs_mutex); + return result; +} + +/* + * Walk the children of a eventfs_inode to fill in getdents(). + */ +static int eventfs_iterate(struct file *file, struct dir_context *ctx) +{ + const struct file_operations *fops; + struct inode *f_inode = file_inode(file); + const struct eventfs_entry *entry; + struct eventfs_inode *ei_child; + struct tracefs_inode *ti; + struct eventfs_inode *ei; + const char *name; + umode_t mode; + int idx; + int ret = -EINVAL; + int ino; + int i, r, c; + + if (!dir_emit_dots(file, ctx)) + return 0; + + ti = get_tracefs(f_inode); + if (!(ti->flags & TRACEFS_EVENT_INODE)) + return -EINVAL; + + c = ctx->pos - 2; + + idx = srcu_read_lock(&eventfs_srcu); + + mutex_lock(&eventfs_mutex); + ei = READ_ONCE(ti->private); + if (ei && ei->is_freed) + ei = NULL; + mutex_unlock(&eventfs_mutex); + + if (!ei) + goto out; + + /* + * Need to create the dentries and inodes to have a consistent + * inode number. + */ + ret = 0; + + /* Start at 'c' to jump over already read entries */ + for (i = c; i < ei->nr_entries; i++, ctx->pos++) { + void *cdata = ei->data; + + entry = &ei->entries[i]; + name = entry->name; + + mutex_lock(&eventfs_mutex); + /* If ei->is_freed then just bail here, nothing more to do */ + if (ei->is_freed) { + mutex_unlock(&eventfs_mutex); + goto out; + } + r = entry->callback(name, &mode, &cdata, &fops); + mutex_unlock(&eventfs_mutex); + if (r <= 0) + continue; + + ino = EVENTFS_FILE_INODE_INO; + + if (!dir_emit(ctx, name, strlen(name), ino, DT_REG)) + goto out; + } + + /* Subtract the skipped entries above */ + c -= min((unsigned int)c, (unsigned int)ei->nr_entries); + + list_for_each_entry_srcu(ei_child, &ei->children, list, + srcu_read_lock_held(&eventfs_srcu)) { + + if (c > 0) { + c--; + continue; + } + + ctx->pos++; + + if (ei_child->is_freed) + continue; + + name = ei_child->name; + + ino = eventfs_dir_ino(ei_child); + + if (!dir_emit(ctx, name, strlen(name), ino, DT_DIR)) + goto out_dec; + } + ret = 1; + out: + srcu_read_unlock(&eventfs_srcu, idx); + + return ret; + + out_dec: + /* Incremented ctx->pos without adding something, reset it */ + ctx->pos--; + goto out; +} + +/** + * eventfs_create_dir - Create the eventfs_inode for this directory + * @name: The name of the directory to create. + * @parent: The eventfs_inode of the parent directory. + * @entries: A list of entries that represent the files under this directory + * @size: The number of @entries + * @data: The default data to pass to the files (an entry may override it). + * + * This function creates the descriptor to represent a directory in the + * eventfs. This descriptor is an eventfs_inode, and it is returned to be + * used to create other children underneath. + * + * The @entries is an array of eventfs_entry structures which has: + * const char *name + * eventfs_callback callback; + * + * The name is the name of the file, and the callback is a pointer to a function + * that will be called when the file is reference (either by lookup or by + * reading a directory). The callback is of the prototype: + * + * int callback(const char *name, umode_t *mode, void **data, + * const struct file_operations **fops); + * + * When a file needs to be created, this callback will be called with + * name = the name of the file being created (so that the same callback + * may be used for multiple files). + * mode = a place to set the file's mode + * data = A pointer to @data, and the callback may replace it, which will + * cause the file created to pass the new data to the open() call. + * fops = the fops to use for the created file. + * + * NB. @callback is called while holding internal locks of the eventfs + * system. The callback must not call any code that might also call into + * the tracefs or eventfs system or it will risk creating a deadlock. + */ +struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent, + const struct eventfs_entry *entries, + int size, void *data) +{ + struct eventfs_inode *ei; + + if (!parent) + return ERR_PTR(-EINVAL); + + ei = alloc_ei(name); + if (!ei) + return ERR_PTR(-ENOMEM); + + ei->entries = entries; + ei->nr_entries = size; + ei->data = data; + INIT_LIST_HEAD(&ei->children); + INIT_LIST_HEAD(&ei->list); + + mutex_lock(&eventfs_mutex); + if (!parent->is_freed) + list_add_tail(&ei->list, &parent->children); + mutex_unlock(&eventfs_mutex); + + /* Was the parent freed? */ + if (list_empty(&ei->list)) { + cleanup_ei(ei); + ei = ERR_PTR(-EBUSY); + } + return ei; +} + +/** + * eventfs_create_events_dir - create the top level events directory + * @name: The name of the top level directory to create. + * @parent: Parent dentry for this file in the tracefs directory. + * @entries: A list of entries that represent the files under this directory + * @size: The number of @entries + * @data: The default data to pass to the files (an entry may override it). + * + * This function creates the top of the trace event directory. + * + * See eventfs_create_dir() for use of @entries. + */ +struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent, + const struct eventfs_entry *entries, + int size, void *data) +{ + struct dentry *dentry; + struct eventfs_root_inode *rei; + struct eventfs_inode *ei; + struct tracefs_inode *ti; + struct inode *inode; + kuid_t uid; + kgid_t gid; + + if (security_locked_down(LOCKDOWN_TRACEFS)) + return NULL; + + dentry = tracefs_start_creating(name, parent); + if (IS_ERR(dentry)) + return ERR_CAST(dentry); + + ei = alloc_root_ei(name); + if (!ei) + goto fail; + + inode = tracefs_get_inode(dentry->d_sb); + if (unlikely(!inode)) + goto fail; + + // Note: we have a ref to the dentry from tracefs_start_creating() + rei = get_root_inode(ei); + rei->events_dir = dentry; + + ei->entries = entries; + ei->nr_entries = size; + ei->data = data; + + /* Save the ownership of this directory */ + uid = d_inode(dentry->d_parent)->i_uid; + gid = d_inode(dentry->d_parent)->i_gid; + + /* + * The ei->attr will be used as the default values for the + * files beneath this directory. + */ + ei->attr.uid = uid; + ei->attr.gid = gid; + + INIT_LIST_HEAD(&ei->children); + INIT_LIST_HEAD(&ei->list); + + ti = get_tracefs(inode); + ti->flags |= TRACEFS_EVENT_INODE; + ti->private = ei; + + inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; + inode->i_uid = uid; + inode->i_gid = gid; + inode->i_op = &eventfs_dir_inode_operations; + inode->i_fop = &eventfs_file_operations; + + dentry->d_fsdata = get_ei(ei); + + /* + * Keep all eventfs directories with i_nlink == 1. + * Due to the dynamic nature of the dentry creations and not + * wanting to add a pointer to the parent eventfs_inode in the + * eventfs_inode structure, keeping the i_nlink in sync with the + * number of directories would cause too much complexity for + * something not worth much. Keeping directory links at 1 + * tells userspace not to trust the link number. + */ + d_make_persistent(dentry, inode); + /* The dentry of the "events" parent does keep track though */ + inc_nlink(dentry->d_parent->d_inode); + fsnotify_mkdir(dentry->d_parent->d_inode, dentry); + tracefs_end_creating(dentry); + + return ei; + + fail: + cleanup_ei(ei); + tracefs_failed_creating(dentry); + return ERR_PTR(-ENOMEM); +} + +/** + * eventfs_remove_rec - remove eventfs dir or file from list + * @ei: eventfs_inode to be removed. + * @level: prevent recursion from going more than 3 levels deep. + * + * This function recursively removes eventfs_inodes which + * contains info of files and/or directories. + */ +static void eventfs_remove_rec(struct eventfs_inode *ei, int level) +{ + struct eventfs_inode *ei_child; + + /* + * Check recursion depth. It should never be greater than 3: + * 0 - events/ + * 1 - events/group/ + * 2 - events/group/event/ + * 3 - events/group/event/file + */ + if (WARN_ON_ONCE(level > 3)) + return; + + /* search for nested folders or files */ + list_for_each_entry(ei_child, &ei->children, list) + eventfs_remove_rec(ei_child, level + 1); + + list_del_rcu(&ei->list); + free_ei(ei); +} + +/** + * eventfs_remove_dir - remove eventfs dir or file from list + * @ei: eventfs_inode to be removed. + * + * This function acquire the eventfs_mutex lock and call eventfs_remove_rec() + */ +void eventfs_remove_dir(struct eventfs_inode *ei) +{ + if (!ei) + return; + + mutex_lock(&eventfs_mutex); + eventfs_remove_rec(ei, 0); + mutex_unlock(&eventfs_mutex); +} + +/** + * eventfs_remove_events_dir - remove the top level eventfs directory + * @ei: the event_inode returned by eventfs_create_events_dir(). + * + * This function removes the events main directory + */ +void eventfs_remove_events_dir(struct eventfs_inode *ei) +{ + struct eventfs_root_inode *rei; + struct dentry *dentry; + + rei = get_root_inode(ei); + dentry = rei->events_dir; + if (!dentry) + return; + + rei->events_dir = NULL; + eventfs_remove_dir(ei); + + /* + * Matches the dget() done by tracefs_start_creating() + * in eventfs_create_events_dir() when it the dentry was + * created. In other words, it's a normal dentry that + * sticks around while the other ei->dentry are created + * and destroyed dynamically. + */ + d_invalidate(dentry); + d_make_discardable(dentry); +} diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index da85b3979195..d9d8932a7b9c 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -11,23 +11,65 @@ #include <linux/module.h> #include <linux/fs.h> -#include <linux/mount.h> +#include <linux/fs_context.h> +#include <linux/fs_parser.h> #include <linux/kobject.h> #include <linux/namei.h> #include <linux/tracefs.h> #include <linux/fsnotify.h> #include <linux/security.h> #include <linux/seq_file.h> -#include <linux/parser.h> #include <linux/magic.h> #include <linux/slab.h> +#include "internal.h" #define TRACEFS_DEFAULT_MODE 0700 +static struct kmem_cache *tracefs_inode_cachep __ro_after_init; static struct vfsmount *tracefs_mount; static int tracefs_mount_count; static bool tracefs_registered; +/* + * Keep track of all tracefs_inodes in order to update their + * flags if necessary on a remount. + */ +static DEFINE_SPINLOCK(tracefs_inode_lock); +static LIST_HEAD(tracefs_inodes); + +static struct inode *tracefs_alloc_inode(struct super_block *sb) +{ + struct tracefs_inode *ti; + unsigned long flags; + + ti = alloc_inode_sb(sb, tracefs_inode_cachep, GFP_KERNEL); + if (!ti) + return NULL; + + spin_lock_irqsave(&tracefs_inode_lock, flags); + list_add_rcu(&ti->list, &tracefs_inodes); + spin_unlock_irqrestore(&tracefs_inode_lock, flags); + + return &ti->vfs_inode; +} + +static void tracefs_free_inode(struct inode *inode) +{ + struct tracefs_inode *ti = get_tracefs(inode); + + kmem_cache_free(tracefs_inode_cachep, ti); +} + +static void tracefs_destroy_inode(struct inode *inode) +{ + struct tracefs_inode *ti = get_tracefs(inode); + unsigned long flags; + + spin_lock_irqsave(&tracefs_inode_lock, flags); + list_del_rcu(&ti->list); + spin_unlock_irqrestore(&tracefs_inode_lock, flags); +} + static ssize_t default_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -67,16 +109,26 @@ static char *get_dname(struct dentry *dentry) return name; } -static int tracefs_syscall_mkdir(struct user_namespace *mnt_userns, - struct inode *inode, struct dentry *dentry, - umode_t mode) +static struct dentry *tracefs_syscall_mkdir(struct mnt_idmap *idmap, + struct inode *inode, struct dentry *dentry, + umode_t mode) { + struct tracefs_inode *ti; char *name; int ret; name = get_dname(dentry); if (!name) - return -ENOMEM; + return ERR_PTR(-ENOMEM); + + /* + * This is a new directory that does not take the default of + * the rootfs. It becomes the default permissions for all the + * files and directories underneath it. + */ + ti = get_tracefs(inode); + ti->flags |= TRACEFS_INSTANCE_INODE; + ti->private = inode; /* * The mkdir call can call the generic functions that create @@ -89,7 +141,7 @@ static int tracefs_syscall_mkdir(struct user_namespace *mnt_userns, kfree(name); - return ret; + return ERR_PTR(ret); } static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry) @@ -121,23 +173,112 @@ static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry) return ret; } -static const struct inode_operations tracefs_dir_inode_operations = { +static void set_tracefs_inode_owner(struct inode *inode) +{ + struct tracefs_inode *ti = get_tracefs(inode); + struct inode *root_inode = ti->private; + kuid_t uid; + kgid_t gid; + + uid = root_inode->i_uid; + gid = root_inode->i_gid; + + /* + * If the root is not the mount point, then check the root's + * permissions. If it was never set, then default to the + * mount point. + */ + if (root_inode != d_inode(root_inode->i_sb->s_root)) { + struct tracefs_inode *rti; + + rti = get_tracefs(root_inode); + root_inode = d_inode(root_inode->i_sb->s_root); + + if (!(rti->flags & TRACEFS_UID_PERM_SET)) + uid = root_inode->i_uid; + + if (!(rti->flags & TRACEFS_GID_PERM_SET)) + gid = root_inode->i_gid; + } + + /* + * If this inode has never been referenced, then update + * the permissions to the superblock. + */ + if (!(ti->flags & TRACEFS_UID_PERM_SET)) + inode->i_uid = uid; + + if (!(ti->flags & TRACEFS_GID_PERM_SET)) + inode->i_gid = gid; +} + +static int tracefs_permission(struct mnt_idmap *idmap, + struct inode *inode, int mask) +{ + set_tracefs_inode_owner(inode); + return generic_permission(idmap, inode, mask); +} + +static int tracefs_getattr(struct mnt_idmap *idmap, + const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) +{ + struct inode *inode = d_backing_inode(path->dentry); + + set_tracefs_inode_owner(inode); + generic_fillattr(idmap, request_mask, inode, stat); + return 0; +} + +static int tracefs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, + struct iattr *attr) +{ + unsigned int ia_valid = attr->ia_valid; + struct inode *inode = d_inode(dentry); + struct tracefs_inode *ti = get_tracefs(inode); + + if (ia_valid & ATTR_UID) + ti->flags |= TRACEFS_UID_PERM_SET; + + if (ia_valid & ATTR_GID) + ti->flags |= TRACEFS_GID_PERM_SET; + + return simple_setattr(idmap, dentry, attr); +} + +static const struct inode_operations tracefs_instance_dir_inode_operations = { .lookup = simple_lookup, .mkdir = tracefs_syscall_mkdir, .rmdir = tracefs_syscall_rmdir, + .permission = tracefs_permission, + .getattr = tracefs_getattr, + .setattr = tracefs_setattr, }; -static struct inode *tracefs_get_inode(struct super_block *sb) +static const struct inode_operations tracefs_dir_inode_operations = { + .lookup = simple_lookup, + .permission = tracefs_permission, + .getattr = tracefs_getattr, + .setattr = tracefs_setattr, +}; + +static const struct inode_operations tracefs_file_inode_operations = { + .permission = tracefs_permission, + .getattr = tracefs_getattr, + .setattr = tracefs_setattr, +}; + +struct inode *tracefs_get_inode(struct super_block *sb) { struct inode *inode = new_inode(sb); if (inode) { inode->i_ino = get_next_ino(); - inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + simple_inode_init_ts(inode); } return inode; } -struct tracefs_mount_opts { +struct tracefs_fs_info { kuid_t uid; kgid_t gid; umode_t mode; @@ -149,138 +290,42 @@ enum { Opt_uid, Opt_gid, Opt_mode, - Opt_err }; -static const match_table_t tokens = { - {Opt_uid, "uid=%u"}, - {Opt_gid, "gid=%u"}, - {Opt_mode, "mode=%o"}, - {Opt_err, NULL} +static const struct fs_parameter_spec tracefs_param_specs[] = { + fsparam_gid ("gid", Opt_gid), + fsparam_u32oct ("mode", Opt_mode), + fsparam_uid ("uid", Opt_uid), + {} }; -struct tracefs_fs_info { - struct tracefs_mount_opts mount_opts; -}; - -static void change_gid(struct dentry *dentry, kgid_t gid) -{ - if (!dentry->d_inode) - return; - dentry->d_inode->i_gid = gid; -} - -/* - * Taken from d_walk, but without he need for handling renames. - * Nothing can be renamed while walking the list, as tracefs - * does not support renames. This is only called when mounting - * or remounting the file system, to set all the files to - * the given gid. - */ -static void set_gid(struct dentry *parent, kgid_t gid) +static int tracefs_parse_param(struct fs_context *fc, struct fs_parameter *param) { - struct dentry *this_parent; - struct list_head *next; - - this_parent = parent; - spin_lock(&this_parent->d_lock); - - change_gid(this_parent, gid); -repeat: - next = this_parent->d_subdirs.next; -resume: - while (next != &this_parent->d_subdirs) { - struct list_head *tmp = next; - struct dentry *dentry = list_entry(tmp, struct dentry, d_child); - next = tmp->next; - - spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - - change_gid(dentry, gid); - - if (!list_empty(&dentry->d_subdirs)) { - spin_unlock(&this_parent->d_lock); - spin_release(&dentry->d_lock.dep_map, _RET_IP_); - this_parent = dentry; - spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); - goto repeat; - } - spin_unlock(&dentry->d_lock); - } + struct tracefs_fs_info *opts = fc->s_fs_info; + struct fs_parse_result result; + int opt; + + opt = fs_parse(fc, tracefs_param_specs, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_uid: + opts->uid = result.uid; + break; + case Opt_gid: + opts->gid = result.gid; + break; + case Opt_mode: + opts->mode = result.uint_32 & S_IALLUGO; + break; /* - * All done at this level ... ascend and resume the search. + * We might like to report bad mount options here; + * but traditionally tracefs has ignored all mount options */ - rcu_read_lock(); -ascend: - if (this_parent != parent) { - struct dentry *child = this_parent; - this_parent = child->d_parent; - - spin_unlock(&child->d_lock); - spin_lock(&this_parent->d_lock); - - /* go into the first sibling still alive */ - do { - next = child->d_child.next; - if (next == &this_parent->d_subdirs) - goto ascend; - child = list_entry(next, struct dentry, d_child); - } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)); - rcu_read_unlock(); - goto resume; } - rcu_read_unlock(); - spin_unlock(&this_parent->d_lock); - return; -} -static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts) -{ - substring_t args[MAX_OPT_ARGS]; - int option; - int token; - kuid_t uid; - kgid_t gid; - char *p; - - opts->opts = 0; - opts->mode = TRACEFS_DEFAULT_MODE; - - while ((p = strsep(&data, ",")) != NULL) { - if (!*p) - continue; - - token = match_token(p, tokens, args); - switch (token) { - case Opt_uid: - if (match_int(&args[0], &option)) - return -EINVAL; - uid = make_kuid(current_user_ns(), option); - if (!uid_valid(uid)) - return -EINVAL; - opts->uid = uid; - break; - case Opt_gid: - if (match_int(&args[0], &option)) - return -EINVAL; - gid = make_kgid(current_user_ns(), option); - if (!gid_valid(gid)) - return -EINVAL; - opts->gid = gid; - break; - case Opt_mode: - if (match_octal(&args[0], &option)) - return -EINVAL; - opts->mode = option & S_IALLUGO; - break; - /* - * We might like to report bad mount options here; - * but traditionally tracefs has ignored all mount options - */ - } - - opts->opts |= BIT(token); - } + opts->opts |= BIT(opt); return 0; } @@ -289,117 +334,215 @@ static int tracefs_apply_options(struct super_block *sb, bool remount) { struct tracefs_fs_info *fsi = sb->s_fs_info; struct inode *inode = d_inode(sb->s_root); - struct tracefs_mount_opts *opts = &fsi->mount_opts; + struct tracefs_inode *ti; + bool update_uid, update_gid; + umode_t tmp_mode; /* * On remount, only reset mode/uid/gid if they were provided as mount * options. */ - if (!remount || opts->opts & BIT(Opt_mode)) { - inode->i_mode &= ~S_IALLUGO; - inode->i_mode |= opts->mode; + if (!remount || fsi->opts & BIT(Opt_mode)) { + tmp_mode = READ_ONCE(inode->i_mode) & ~S_IALLUGO; + tmp_mode |= fsi->mode; + WRITE_ONCE(inode->i_mode, tmp_mode); } - if (!remount || opts->opts & BIT(Opt_uid)) - inode->i_uid = opts->uid; + if (!remount || fsi->opts & BIT(Opt_uid)) + inode->i_uid = fsi->uid; + + if (!remount || fsi->opts & BIT(Opt_gid)) + inode->i_gid = fsi->gid; + + if (remount && (fsi->opts & BIT(Opt_uid) || fsi->opts & BIT(Opt_gid))) { + + update_uid = fsi->opts & BIT(Opt_uid); + update_gid = fsi->opts & BIT(Opt_gid); + + rcu_read_lock(); + list_for_each_entry_rcu(ti, &tracefs_inodes, list) { + if (update_uid) { + ti->flags &= ~TRACEFS_UID_PERM_SET; + ti->vfs_inode.i_uid = fsi->uid; + } + + if (update_gid) { + ti->flags &= ~TRACEFS_GID_PERM_SET; + ti->vfs_inode.i_gid = fsi->gid; + } - if (!remount || opts->opts & BIT(Opt_gid)) { - /* Set all the group ids to the mount option */ - set_gid(sb->s_root, opts->gid); + /* + * Note, the above ti->vfs_inode updates are + * used in eventfs_remount() so they must come + * before calling it. + */ + if (ti->flags & TRACEFS_EVENT_INODE) + eventfs_remount(ti, update_uid, update_gid); + } + rcu_read_unlock(); } return 0; } -static int tracefs_remount(struct super_block *sb, int *flags, char *data) +static int tracefs_reconfigure(struct fs_context *fc) { - int err; - struct tracefs_fs_info *fsi = sb->s_fs_info; + struct super_block *sb = fc->root->d_sb; + struct tracefs_fs_info *sb_opts = sb->s_fs_info; + struct tracefs_fs_info *new_opts = fc->s_fs_info; - sync_filesystem(sb); - err = tracefs_parse_options(data, &fsi->mount_opts); - if (err) - goto fail; + if (!new_opts) + return 0; - tracefs_apply_options(sb, true); + sync_filesystem(sb); + /* structure copy of new mount options to sb */ + *sb_opts = *new_opts; -fail: - return err; + return tracefs_apply_options(sb, true); } static int tracefs_show_options(struct seq_file *m, struct dentry *root) { struct tracefs_fs_info *fsi = root->d_sb->s_fs_info; - struct tracefs_mount_opts *opts = &fsi->mount_opts; - if (!uid_eq(opts->uid, GLOBAL_ROOT_UID)) + if (!uid_eq(fsi->uid, GLOBAL_ROOT_UID)) seq_printf(m, ",uid=%u", - from_kuid_munged(&init_user_ns, opts->uid)); - if (!gid_eq(opts->gid, GLOBAL_ROOT_GID)) + from_kuid_munged(&init_user_ns, fsi->uid)); + if (!gid_eq(fsi->gid, GLOBAL_ROOT_GID)) seq_printf(m, ",gid=%u", - from_kgid_munged(&init_user_ns, opts->gid)); - if (opts->mode != TRACEFS_DEFAULT_MODE) - seq_printf(m, ",mode=%o", opts->mode); + from_kgid_munged(&init_user_ns, fsi->gid)); + if (fsi->mode != TRACEFS_DEFAULT_MODE) + seq_printf(m, ",mode=%o", fsi->mode); return 0; } +static int tracefs_drop_inode(struct inode *inode) +{ + struct tracefs_inode *ti = get_tracefs(inode); + + /* + * This inode is being freed and cannot be used for + * eventfs. Clear the flag so that it doesn't call into + * eventfs during the remount flag updates. The eventfs_inode + * gets freed after an RCU cycle, so the content will still + * be safe if the iteration is going on now. + */ + ti->flags &= ~TRACEFS_EVENT_INODE; + + return 1; +} + static const struct super_operations tracefs_super_operations = { + .alloc_inode = tracefs_alloc_inode, + .free_inode = tracefs_free_inode, + .destroy_inode = tracefs_destroy_inode, + .drop_inode = tracefs_drop_inode, .statfs = simple_statfs, - .remount_fs = tracefs_remount, .show_options = tracefs_show_options, }; -static int trace_fill_super(struct super_block *sb, void *data, int silent) +/* + * It would be cleaner if eventfs had its own dentry ops. + * + * Note that d_revalidate is called potentially under RCU, + * so it can't take the eventfs mutex etc. It's fine - if + * we open a file just as it's marked dead, things will + * still work just fine, and just see the old stale case. + */ +static void tracefs_d_release(struct dentry *dentry) +{ + if (dentry->d_fsdata) + eventfs_d_release(dentry); +} + +static int tracefs_d_revalidate(struct inode *inode, const struct qstr *name, + struct dentry *dentry, unsigned int flags) +{ + struct eventfs_inode *ei = dentry->d_fsdata; + + return !(ei && ei->is_freed); +} + +static int tracefs_d_delete(const struct dentry *dentry) +{ + /* + * We want to keep eventfs dentries around but not tracefs + * ones. eventfs dentries have content in d_fsdata. + * Use d_fsdata to determine if it's a eventfs dentry or not. + */ + return dentry->d_fsdata == NULL; +} + +static const struct dentry_operations tracefs_dentry_operations = { + .d_revalidate = tracefs_d_revalidate, + .d_release = tracefs_d_release, + .d_delete = tracefs_d_delete, +}; + +static int tracefs_fill_super(struct super_block *sb, struct fs_context *fc) { static const struct tree_descr trace_files[] = {{""}}; - struct tracefs_fs_info *fsi; int err; - fsi = kzalloc(sizeof(struct tracefs_fs_info), GFP_KERNEL); - sb->s_fs_info = fsi; - if (!fsi) { - err = -ENOMEM; - goto fail; - } - - err = tracefs_parse_options(data, &fsi->mount_opts); + err = simple_fill_super(sb, TRACEFS_MAGIC, trace_files); if (err) - goto fail; - - err = simple_fill_super(sb, TRACEFS_MAGIC, trace_files); - if (err) - goto fail; + return err; sb->s_op = &tracefs_super_operations; - - tracefs_apply_options(sb, false); + set_default_d_op(sb, &tracefs_dentry_operations); return 0; +} + +static int tracefs_get_tree(struct fs_context *fc) +{ + int err = get_tree_single(fc, tracefs_fill_super); -fail: - kfree(fsi); - sb->s_fs_info = NULL; - return err; + if (err) + return err; + + return tracefs_reconfigure(fc); } -static struct dentry *trace_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data) +static void tracefs_free_fc(struct fs_context *fc) { - return mount_single(fs_type, flags, data, trace_fill_super); + kfree(fc->s_fs_info); +} + +static const struct fs_context_operations tracefs_context_ops = { + .free = tracefs_free_fc, + .parse_param = tracefs_parse_param, + .get_tree = tracefs_get_tree, + .reconfigure = tracefs_reconfigure, +}; + +static int tracefs_init_fs_context(struct fs_context *fc) +{ + struct tracefs_fs_info *fsi; + + fsi = kzalloc(sizeof(struct tracefs_fs_info), GFP_KERNEL); + if (!fsi) + return -ENOMEM; + + fsi->mode = TRACEFS_DEFAULT_MODE; + + fc->s_fs_info = fsi; + fc->ops = &tracefs_context_ops; + return 0; } static struct file_system_type trace_fs_type = { .owner = THIS_MODULE, .name = "tracefs", - .mount = trace_mount, - .kill_sb = kill_litter_super, + .init_fs_context = tracefs_init_fs_context, + .parameters = tracefs_param_specs, + .kill_sb = kill_anon_super, }; MODULE_ALIAS_FS("tracefs"); -static struct dentry *start_creating(const char *name, struct dentry *parent) +struct dentry *tracefs_start_creating(const char *name, struct dentry *parent) { struct dentry *dentry; int error; @@ -419,36 +562,44 @@ static struct dentry *start_creating(const char *name, struct dentry *parent) if (!parent) parent = tracefs_mount->mnt_root; - inode_lock(d_inode(parent)); - if (unlikely(IS_DEADDIR(d_inode(parent)))) - dentry = ERR_PTR(-ENOENT); - else - dentry = lookup_one_len(name, parent, strlen(name)); - if (!IS_ERR(dentry) && d_inode(dentry)) { - dput(dentry); - dentry = ERR_PTR(-EEXIST); - } - - if (IS_ERR(dentry)) { - inode_unlock(d_inode(parent)); + dentry = simple_start_creating(parent, name); + if (IS_ERR(dentry)) simple_release_fs(&tracefs_mount, &tracefs_mount_count); - } return dentry; } -static struct dentry *failed_creating(struct dentry *dentry) +struct dentry *tracefs_failed_creating(struct dentry *dentry) { - inode_unlock(d_inode(dentry->d_parent)); - dput(dentry); + simple_done_creating(dentry); simple_release_fs(&tracefs_mount, &tracefs_mount_count); return NULL; } -static struct dentry *end_creating(struct dentry *dentry) +struct dentry *tracefs_end_creating(struct dentry *dentry) { - inode_unlock(d_inode(dentry->d_parent)); - return dentry; + simple_done_creating(dentry); + return dentry; // borrowed +} + +/* Find the inode that this will use for default */ +static struct inode *instance_inode(struct dentry *parent, struct inode *inode) +{ + struct tracefs_inode *ti; + + /* If parent is NULL then use root inode */ + if (!parent) + return d_inode(inode->i_sb->s_root); + + /* Find the inode that is flagged as an instance or the root inode */ + while (!IS_ROOT(parent)) { + ti = get_tracefs(d_inode(parent)); + if (ti->flags & TRACEFS_INSTANCE_INODE) + break; + parent = parent->d_parent; + } + + return d_inode(parent); } /** @@ -481,6 +632,7 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops) { + struct tracefs_inode *ti; struct dentry *dentry; struct inode *inode; @@ -490,29 +642,34 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode, if (!(mode & S_IFMT)) mode |= S_IFREG; BUG_ON(!S_ISREG(mode)); - dentry = start_creating(name, parent); + dentry = tracefs_start_creating(name, parent); if (IS_ERR(dentry)) return NULL; inode = tracefs_get_inode(dentry->d_sb); if (unlikely(!inode)) - return failed_creating(dentry); + return tracefs_failed_creating(dentry); + + ti = get_tracefs(inode); + ti->private = instance_inode(parent, inode); inode->i_mode = mode; + inode->i_op = &tracefs_file_inode_operations; inode->i_fop = fops ? fops : &tracefs_file_operations; inode->i_private = data; inode->i_uid = d_inode(dentry->d_parent)->i_uid; inode->i_gid = d_inode(dentry->d_parent)->i_gid; - d_instantiate(dentry, inode); + d_make_persistent(dentry, inode); fsnotify_create(d_inode(dentry->d_parent), dentry); - return end_creating(dentry); + return tracefs_end_creating(dentry); } static struct dentry *__create_dir(const char *name, struct dentry *parent, const struct inode_operations *ops) { - struct dentry *dentry = start_creating(name, parent); + struct tracefs_inode *ti; + struct dentry *dentry = tracefs_start_creating(name, parent); struct inode *inode; if (IS_ERR(dentry)) @@ -520,7 +677,7 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent, inode = tracefs_get_inode(dentry->d_sb); if (unlikely(!inode)) - return failed_creating(dentry); + return tracefs_failed_creating(dentry); /* Do not set bits for OTH */ inode->i_mode = S_IFDIR | S_IRWXU | S_IRUSR| S_IRGRP | S_IXUSR | S_IXGRP; @@ -529,12 +686,15 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent, inode->i_uid = d_inode(dentry->d_parent)->i_uid; inode->i_gid = d_inode(dentry->d_parent)->i_gid; + ti = get_tracefs(inode); + ti->private = instance_inode(parent, inode); + /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); - d_instantiate(dentry, inode); + d_make_persistent(dentry, inode); inc_nlink(d_inode(dentry->d_parent)); fsnotify_mkdir(d_inode(dentry->d_parent), dentry); - return end_creating(dentry); + return tracefs_end_creating(dentry); } /** @@ -556,7 +716,10 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent, */ struct dentry *tracefs_create_dir(const char *name, struct dentry *parent) { - return __create_dir(name, parent, &simple_dir_inode_operations); + if (security_locked_down(LOCKDOWN_TRACEFS)) + return NULL; + + return __create_dir(name, parent, &tracefs_dir_inode_operations); } /** @@ -587,7 +750,7 @@ __init struct dentry *tracefs_create_instance_dir(const char *name, if (WARN_ON(tracefs_ops.mkdir || tracefs_ops.rmdir)) return NULL; - dentry = __create_dir(name, parent, &tracefs_dir_inode_operations); + dentry = __create_dir(name, parent, &tracefs_instance_dir_inode_operations); if (!dentry) return NULL; @@ -628,10 +791,29 @@ bool tracefs_initialized(void) return tracefs_registered; } +static void init_once(void *foo) +{ + struct tracefs_inode *ti = (struct tracefs_inode *) foo; + + /* inode_init_once() calls memset() on the vfs_inode portion */ + inode_init_once(&ti->vfs_inode); + + /* Zero out the rest */ + memset_after(ti, 0, vfs_inode); +} + static int __init tracefs_init(void) { int retval; + tracefs_inode_cachep = kmem_cache_create("tracefs_inode_cache", + sizeof(struct tracefs_inode), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_ACCOUNT), + init_once); + if (!tracefs_inode_cachep) + return -ENOMEM; + retval = sysfs_create_mount_point(kernel_kobj, "tracing"); if (retval) return -EINVAL; diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h new file mode 100644 index 000000000000..d83c2a25f288 --- /dev/null +++ b/fs/tracefs/internal.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TRACEFS_INTERNAL_H +#define _TRACEFS_INTERNAL_H + +enum { + TRACEFS_EVENT_INODE = BIT(1), + TRACEFS_GID_PERM_SET = BIT(2), + TRACEFS_UID_PERM_SET = BIT(3), + TRACEFS_INSTANCE_INODE = BIT(4), +}; + +struct tracefs_inode { + struct inode vfs_inode; + /* The below gets initialized with memset_after(ti, 0, vfs_inode) */ + struct list_head list; + unsigned long flags; + void *private; +}; + +/* + * struct eventfs_attr - cache the mode and ownership of a eventfs entry + * @mode: saved mode plus flags of what is saved + * @uid: saved uid if changed + * @gid: saved gid if changed + */ +struct eventfs_attr { + int mode; + kuid_t uid; + kgid_t gid; +}; + +/* + * struct eventfs_inode - hold the properties of the eventfs directories. + * @list: link list into the parent directory + * @rcu: Union with @list for freeing + * @children: link list into the child eventfs_inode + * @entries: the array of entries representing the files in the directory + * @name: the name of the directory to create + * @entry_attrs: Saved mode and ownership of the @d_children + * @data: The private data to pass to the callbacks + * @attr: Saved mode and ownership of eventfs_inode itself + * @is_freed: Flag set if the eventfs is on its way to be freed + * Note if is_freed is set, then dentry is corrupted. + * @is_events: Flag set for only the top level "events" directory + * @nr_entries: The number of items in @entries + * @ino: The saved inode number + */ +struct eventfs_inode { + union { + struct list_head list; + struct rcu_head rcu; + }; + struct list_head children; + const struct eventfs_entry *entries; + const char *name; + struct eventfs_attr *entry_attrs; + void *data; + struct eventfs_attr attr; + struct kref kref; + unsigned int is_freed:1; + unsigned int is_events:1; + unsigned int nr_entries:30; + unsigned int ino; +}; + +static inline struct tracefs_inode *get_tracefs(const struct inode *inode) +{ + return container_of(inode, struct tracefs_inode, vfs_inode); +} + +struct dentry *tracefs_start_creating(const char *name, struct dentry *parent); +struct dentry *tracefs_end_creating(struct dentry *dentry); +struct dentry *tracefs_failed_creating(struct dentry *dentry); +struct inode *tracefs_get_inode(struct super_block *sb); + +void eventfs_remount(struct tracefs_inode *ti, bool update_uid, bool update_gid); +void eventfs_d_release(struct dentry *dentry); + +#endif /* _TRACEFS_INTERNAL_H */ |
