From c576e0fcd6188d0edb50b0fb83f853433ef4819b Mon Sep 17 00:00:00 2001 From: Matthew Bobrowski Date: Sun, 8 Aug 2021 15:24:33 +1000 Subject: kernel/pid.c: remove static qualifier from pidfd_create() With the idea of returning pidfds from the fanotify API, we need to expose a mechanism for creating pidfds. We drop the static qualifier from pidfd_create() and add its declaration to linux/pid.h so that the pidfd_create() helper can be called from other kernel subsystems i.e. fanotify. Link: https://lore.kernel.org/r/0c68653ec32f1b7143301f0231f7ed14062fd82b.1628398044.git.repnop@google.com Signed-off-by: Matthew Bobrowski Acked-by: Christian Brauner Signed-off-by: Jan Kara --- include/linux/pid.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pid.h b/include/linux/pid.h index fa10acb8d6a4..af308e15f174 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -78,6 +78,7 @@ struct file; extern struct pid *pidfd_pid(const struct file *file); struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags); +int pidfd_create(struct pid *pid, unsigned int flags); static inline struct pid *get_pid(struct pid *pid) { -- cgit From 0aca67bb7f0d8c997dfef8ff0bfeb0afb361f0e6 Mon Sep 17 00:00:00 2001 From: Matthew Bobrowski Date: Sun, 8 Aug 2021 15:25:58 +1000 Subject: fanotify: introduce a generic info record copying helper The copy_info_records_to_user() helper allows for the separation of info record copying routines/conditionals from copy_event_to_user(), which reduces the overall clutter within this function. This becomes especially true as we start introducing additional info records in the future i.e. struct fanotify_event_info_pidfd. On success, this helper returns the total amount of bytes that have been copied into the user supplied buffer and on error, a negative value is returned to the caller. The newly defined macro FANOTIFY_INFO_MODES can be used to obtain info record types that have been enabled for a specific notification group. This macro becomes useful in the subsequent patch when the FAN_REPORT_PIDFD initialization flag is introduced. Link: https://lore.kernel.org/r/8872947dfe12ce8ae6e9a7f2d49ea29bc8006af0.1628398044.git.repnop@google.com Signed-off-by: Matthew Bobrowski Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fanotify.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index a16dbeced152..10a7e26ddba6 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -27,6 +27,8 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */ #define FANOTIFY_FID_BITS (FAN_REPORT_FID | FAN_REPORT_DFID_NAME) +#define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS) + /* * fanotify_init() flags that require CAP_SYS_ADMIN. * We do not allow unprivileged groups to request permission events. -- cgit From af579beb666aefb17e9a335c12c788c92932baf1 Mon Sep 17 00:00:00 2001 From: Matthew Bobrowski Date: Sun, 8 Aug 2021 15:26:25 +1000 Subject: fanotify: add pidfd support to the fanotify API Introduce a new flag FAN_REPORT_PIDFD for fanotify_init(2) which allows userspace applications to control whether a pidfd information record containing a pidfd is to be returned alongside the generic event metadata for each event. If FAN_REPORT_PIDFD is enabled for a notification group, an additional struct fanotify_event_info_pidfd object type will be supplied alongside the generic struct fanotify_event_metadata for a single event. This functionality is analogous to that of FAN_REPORT_FID in terms of how the event structure is supplied to a userspace application. Usage of FAN_REPORT_PIDFD with FAN_REPORT_FID/FAN_REPORT_DFID_NAME is permitted, and in this case a struct fanotify_event_info_pidfd object will likely follow any struct fanotify_event_info_fid object. Currently, the usage of the FAN_REPORT_TID flag is not permitted along with FAN_REPORT_PIDFD as the pidfd API currently only supports the creation of pidfds for thread-group leaders. Additionally, usage of the FAN_REPORT_PIDFD flag is limited to privileged processes only i.e. event listeners that are running with the CAP_SYS_ADMIN capability. Attempting to supply the FAN_REPORT_TID initialization flags with FAN_REPORT_PIDFD or creating a notification group without CAP_SYS_ADMIN will result with -EINVAL being returned to the caller. In the event of a pidfd creation error, there are two types of error values that can be reported back to the listener. There is FAN_NOPIDFD, which will be reported in cases where the process responsible for generating the event has terminated prior to the event listener being able to read the event. Then there is FAN_EPIDFD, which will be reported when a more generic pidfd creation error has occurred when fanotify calls pidfd_create(). Link: https://lore.kernel.org/r/5f9e09cff7ed62bfaa51c1369e0f7ea5f16a91aa.1628398044.git.repnop@google.com Signed-off-by: Matthew Bobrowski Signed-off-by: Jan Kara --- include/linux/fanotify.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 10a7e26ddba6..eec3b7c40811 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -27,7 +27,7 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */ #define FANOTIFY_FID_BITS (FAN_REPORT_FID | FAN_REPORT_DFID_NAME) -#define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS) +#define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD) /* * fanotify_init() flags that require CAP_SYS_ADMIN. @@ -37,6 +37,7 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */ */ #define FANOTIFY_ADMIN_INIT_FLAGS (FANOTIFY_PERM_CLASSES | \ FAN_REPORT_TID | \ + FAN_REPORT_PIDFD | \ FAN_UNLIMITED_QUEUE | \ FAN_UNLIMITED_MARKS) -- cgit From ec44610fe2b86daef70f3f53f47d2a2542d7094f Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 10 Aug 2021 18:12:19 +0300 Subject: fsnotify: count all objects with attached connectors Rename s_fsnotify_inode_refs to s_fsnotify_connectors and count all objects with attached connectors, not only inodes with attached connectors. This will be used to optimize fsnotify() calls on sb without any type of marks. Link: https://lore.kernel.org/r/20210810151220.285179-4-amir73il@gmail.com Signed-off-by: Amir Goldstein Reviewed-by: Matthew Bobrowski Signed-off-by: Jan Kara --- include/linux/fs.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 640574294216..bea8ec5c726c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1507,8 +1507,11 @@ struct super_block { /* Number of inodes with nlink == 0 but still referenced */ atomic_long_t s_remove_count; - /* Pending fsnotify inode refs */ - atomic_long_t s_fsnotify_inode_refs; + /* + * Number of inode/mount/sb objects that are being watched, note that + * inodes objects are currently double-accounted. + */ + atomic_long_t s_fsnotify_connectors; /* Being remounted read-only */ int s_readonly_remount; -- cgit From e43de7f0862b8598cd1ef440e3b4701cd107ea40 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 10 Aug 2021 18:12:20 +0300 Subject: fsnotify: optimize the case of no marks of any type Add a simple check in the inline helpers to avoid calling fsnotify() and __fsnotify_parent() in case there are no marks of any type (inode/sb/mount) for an inode's sb, so there can be no objects of any type interested in the event. Link: https://lore.kernel.org/r/20210810151220.285179-5-amir73il@gmail.com Reviewed-by: Matthew Bobrowski Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index f8acddcf54fb..12d3a7d308ab 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -30,6 +30,9 @@ static inline void fsnotify_name(struct inode *dir, __u32 mask, struct inode *child, const struct qstr *name, u32 cookie) { + if (atomic_long_read(&dir->i_sb->s_fsnotify_connectors) == 0) + return; + fsnotify(mask, child, FSNOTIFY_EVENT_INODE, dir, name, NULL, cookie); } @@ -41,6 +44,9 @@ static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry, static inline void fsnotify_inode(struct inode *inode, __u32 mask) { + if (atomic_long_read(&inode->i_sb->s_fsnotify_connectors) == 0) + return; + if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; @@ -53,6 +59,9 @@ static inline int fsnotify_parent(struct dentry *dentry, __u32 mask, { struct inode *inode = d_inode(dentry); + if (atomic_long_read(&inode->i_sb->s_fsnotify_connectors) == 0) + return 0; + if (S_ISDIR(inode->i_mode)) { mask |= FS_ISDIR; -- cgit