From 9dd813c15b2c101168808d4f5941a29985758973 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 14 Mar 2017 12:31:02 +0100 Subject: fsnotify: Move mark list head from object into dedicated structure Currently notification marks are attached to object (inode or vfsmnt) by a hlist_head in the object. The list is also protected by a spinlock in the object. So while there is any mark attached to the list of marks, the object must be pinned in memory (and thus e.g. last iput() deleting inode cannot happen). Also for list iteration in fsnotify() to work, we must hold fsnotify_mark_srcu lock so that mark itself and mark->obj_list.next cannot get freed. Thus we are required to wait for response to fanotify events from userspace process with fsnotify_mark_srcu lock held. That causes issues when userspace process is buggy and does not reply to some event - basically the whole notification subsystem gets eventually stuck. So to be able to drop fsnotify_mark_srcu lock while waiting for response, we have to pin the mark in memory and make sure it stays in the object list (as removing the mark waiting for response could lead to lost notification events for groups later in the list). However we don't want inode reclaim to block on such mark as that would lead to system just locking up elsewhere. This commit is the first in the series that paves way towards solving these conflicting lifetime needs. Instead of anchoring the list of marks directly in the object, we anchor it in a dedicated structure (fsnotify_mark_connector) and just point to that structure from the object. The following commits will also add spinlock protecting the list and object pointer to the structure. Reviewed-by: Miklos Szeredi Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- kernel/auditsc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'kernel/auditsc.c') diff --git a/kernel/auditsc.c b/kernel/auditsc.c index d6a8de5f8fa3..bf7b7ca295d0 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -73,6 +73,7 @@ #include #include #include +#include #include #include "audit.h" @@ -1596,7 +1597,8 @@ static inline void handle_one(const struct inode *inode) struct audit_tree_refs *p; struct audit_chunk *chunk; int count; - if (likely(hlist_empty(&inode->i_fsnotify_marks))) + if (likely(!inode->i_fsnotify_marks || + hlist_empty(&inode->i_fsnotify_marks->list))) return; context = current->audit_context; p = context->trees; @@ -1639,7 +1641,8 @@ retry: seq = read_seqbegin(&rename_lock); for(;;) { struct inode *inode = d_backing_inode(d); - if (inode && unlikely(!hlist_empty(&inode->i_fsnotify_marks))) { + if (inode && unlikely(inode->i_fsnotify_marks && + !hlist_empty(&inode->i_fsnotify_marks->list))) { struct audit_chunk *chunk; chunk = audit_tree_lookup(inode); if (chunk) { -- cgit From 08991e83b7286635167bab40927665a90fb00d81 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Feb 2017 09:21:58 +0100 Subject: fsnotify: Free fsnotify_mark_connector when there is no mark attached Currently we free fsnotify_mark_connector structure only when inode / vfsmount is getting freed. This can however impose noticeable memory overhead when marks get attached to inodes only temporarily. So free the connector structure once the last mark is detached from the object. Since notification infrastructure can be working with the connector under the protection of fsnotify_mark_srcu, we have to be careful and free the fsnotify_mark_connector only after SRCU period passes. Reviewed-by: Miklos Szeredi Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- kernel/auditsc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'kernel/auditsc.c') diff --git a/kernel/auditsc.c b/kernel/auditsc.c index bf7b7ca295d0..d383c33540af 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1597,8 +1597,7 @@ static inline void handle_one(const struct inode *inode) struct audit_tree_refs *p; struct audit_chunk *chunk; int count; - if (likely(!inode->i_fsnotify_marks || - hlist_empty(&inode->i_fsnotify_marks->list))) + if (likely(!inode->i_fsnotify_marks)) return; context = current->audit_context; p = context->trees; @@ -1641,8 +1640,7 @@ retry: seq = read_seqbegin(&rename_lock); for(;;) { struct inode *inode = d_backing_inode(d); - if (inode && unlikely(inode->i_fsnotify_marks && - !hlist_empty(&inode->i_fsnotify_marks->list))) { + if (inode && unlikely(inode->i_fsnotify_marks)) { struct audit_chunk *chunk; chunk = audit_tree_lookup(inode); if (chunk) { -- cgit