From 6d73c9ce0285adff18b54a5acadfbbbf8ba40dd5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 29 Oct 2023 13:41:06 -0400 Subject: get rid of __dget() fold into the sole remaining caller Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/dcache.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index c82ae731df9a..b8f1b54a1492 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -948,11 +948,6 @@ static inline void __dget_dlock(struct dentry *dentry) dentry->d_lockref.count++; } -static inline void __dget(struct dentry *dentry) -{ - lockref_get(&dentry->d_lockref); -} - struct dentry *dget_parent(struct dentry *dentry) { int gotref; @@ -1002,7 +997,7 @@ static struct dentry * __d_find_any_alias(struct inode *inode) if (hlist_empty(&inode->i_dentry)) return NULL; alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias); - __dget(alias); + lockref_get(&alias->d_lockref); return alias; } -- cgit From 8219cb58feddcf28909072015f4e17e29f68c41a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 10 Nov 2023 14:32:05 -0500 Subject: kill d_{is,set}_fallthru() Introduced in 2015 and never had any in-tree users... Signed-off-by: Al Viro --- fs/dcache.c | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index b8f1b54a1492..9f5b2b5c1e6d 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -344,7 +344,7 @@ static inline void __d_set_inode_and_type(struct dentry *dentry, dentry->d_inode = inode; flags = READ_ONCE(dentry->d_flags); - flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); + flags &= ~DCACHE_ENTRY_TYPE; flags |= type_flags; smp_store_release(&dentry->d_flags, flags); } @@ -353,7 +353,7 @@ static inline void __d_clear_type_and_inode(struct dentry *dentry) { unsigned flags = READ_ONCE(dentry->d_flags); - flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); + flags &= ~DCACHE_ENTRY_TYPE; WRITE_ONCE(dentry->d_flags, flags); dentry->d_inode = NULL; if (dentry->d_flags & DCACHE_LRU_LIST) @@ -1936,22 +1936,6 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) } EXPORT_SYMBOL(d_set_d_op); - -/* - * d_set_fallthru - Mark a dentry as falling through to a lower layer - * @dentry - The dentry to mark - * - * Mark a dentry as falling through to the lower layer (as set with - * d_pin_lower()). This flag may be recorded on the medium. - */ -void d_set_fallthru(struct dentry *dentry) -{ - spin_lock(&dentry->d_lock); - dentry->d_flags |= DCACHE_FALLTHRU; - spin_unlock(&dentry->d_lock); -} -EXPORT_SYMBOL(d_set_fallthru); - static unsigned d_flags_for_inode(struct inode *inode) { unsigned add_flags = DCACHE_REGULAR_TYPE; -- cgit From da549bdd15c295c24b2ee7ffe7ad0f3877fa8a87 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Nov 2023 02:00:39 -0500 Subject: dentry: switch the lists of children to hlist Saves a pointer per struct dentry and actually makes the things less clumsy. Cleaned the d_walk() and dcache_readdir() a bit by use of hlist_for_... iterators. A couple of new helpers - d_first_child() and d_next_sibling(), to make the expressions less awful. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/dcache.c | 76 ++++++++++++++++++++++++++++++------------------------------- 1 file changed, 37 insertions(+), 39 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index c82ae731df9a..59f76c9a15d1 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -51,8 +51,8 @@ * - d_lru * - d_count * - d_unhashed() - * - d_parent and d_subdirs - * - childrens' d_child and d_parent + * - d_parent and d_chilren + * - childrens' d_sib and d_parent * - d_u.d_alias, d_inode * * Ordering: @@ -537,7 +537,7 @@ void d_drop(struct dentry *dentry) } EXPORT_SYMBOL(d_drop); -static inline void dentry_unlist(struct dentry *dentry, struct dentry *parent) +static inline void dentry_unlist(struct dentry *dentry) { struct dentry *next; /* @@ -545,12 +545,12 @@ static inline void dentry_unlist(struct dentry *dentry, struct dentry *parent) * attached to the dentry tree */ dentry->d_flags |= DCACHE_DENTRY_KILLED; - if (unlikely(list_empty(&dentry->d_child))) + if (unlikely(hlist_unhashed(&dentry->d_sib))) return; - __list_del_entry(&dentry->d_child); + __hlist_del(&dentry->d_sib); /* * Cursors can move around the list of children. While we'd been - * a normal list member, it didn't matter - ->d_child.next would've + * a normal list member, it didn't matter - ->d_sib.next would've * been updated. However, from now on it won't be and for the * things like d_walk() it might end up with a nasty surprise. * Normally d_walk() doesn't care about cursors moving around - @@ -558,20 +558,20 @@ static inline void dentry_unlist(struct dentry *dentry, struct dentry *parent) * of its own, we get through it without ever unlocking the parent. * There is one exception, though - if we ascend from a child that * gets killed as soon as we unlock it, the next sibling is found - * using the value left in its ->d_child.next. And if _that_ + * using the value left in its ->d_sib.next. And if _that_ * pointed to a cursor, and cursor got moved (e.g. by lseek()) * before d_walk() regains parent->d_lock, we'll end up skipping * everything the cursor had been moved past. * - * Solution: make sure that the pointer left behind in ->d_child.next + * Solution: make sure that the pointer left behind in ->d_sib.next * points to something that won't be moving around. I.e. skip the * cursors. */ - while (dentry->d_child.next != &parent->d_subdirs) { - next = list_entry(dentry->d_child.next, struct dentry, d_child); + while (dentry->d_sib.next) { + next = hlist_entry(dentry->d_sib.next, struct dentry, d_sib); if (likely(!(next->d_flags & DCACHE_DENTRY_CURSOR))) break; - dentry->d_child.next = next->d_child.next; + dentry->d_sib.next = next->d_sib.next; } } @@ -600,7 +600,7 @@ static void __dentry_kill(struct dentry *dentry) } /* if it was on the hash then remove it */ __d_drop(dentry); - dentry_unlist(dentry, parent); + dentry_unlist(dentry); if (parent) spin_unlock(&parent->d_lock); if (dentry->d_inode) @@ -1348,8 +1348,7 @@ enum d_walk_ret { static void d_walk(struct dentry *parent, void *data, enum d_walk_ret (*enter)(void *, struct dentry *)) { - struct dentry *this_parent; - struct list_head *next; + struct dentry *this_parent, *dentry; unsigned seq = 0; enum d_walk_ret ret; bool retry = true; @@ -1371,13 +1370,9 @@ again: break; } repeat: - next = this_parent->d_subdirs.next; + dentry = d_first_child(this_parent); resume: - while (next != &this_parent->d_subdirs) { - struct list_head *tmp = next; - struct dentry *dentry = list_entry(tmp, struct dentry, d_child); - next = tmp->next; - + hlist_for_each_entry_from(dentry, d_sib) { if (unlikely(dentry->d_flags & DCACHE_DENTRY_CURSOR)) continue; @@ -1398,7 +1393,7 @@ resume: continue; } - if (!list_empty(&dentry->d_subdirs)) { + if (!hlist_empty(&dentry->d_children)) { spin_unlock(&this_parent->d_lock); spin_release(&dentry->d_lock.dep_map, _RET_IP_); this_parent = dentry; @@ -1413,24 +1408,23 @@ resume: rcu_read_lock(); ascend: if (this_parent != parent) { - struct dentry *child = this_parent; - this_parent = child->d_parent; + dentry = this_parent; + this_parent = dentry->d_parent; - spin_unlock(&child->d_lock); + spin_unlock(&dentry->d_lock); spin_lock(&this_parent->d_lock); /* might go back up the wrong parent if we have had a rename. */ if (need_seqretry(&rename_lock, seq)) goto rename_retry; /* go into the first sibling still alive */ - do { - next = child->d_child.next; - if (next == &this_parent->d_subdirs) - goto ascend; - child = list_entry(next, struct dentry, d_child); - } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)); - rcu_read_unlock(); - goto resume; + hlist_for_each_entry_continue(dentry, d_sib) { + if (likely(!(dentry->d_flags & DCACHE_DENTRY_KILLED))) { + rcu_read_unlock(); + goto resume; + } + } + goto ascend; } if (need_seqretry(&rename_lock, seq)) goto rename_retry; @@ -1530,7 +1524,7 @@ out: * Search the dentry child list of the specified parent, * and move any unused dentries to the end of the unused * list for prune_dcache(). We descend to the next level - * whenever the d_subdirs list is non-empty and continue + * whenever the d_children list is non-empty and continue * searching. * * It returns zero iff there are no unused children, @@ -1657,7 +1651,7 @@ EXPORT_SYMBOL(shrink_dcache_parent); static enum d_walk_ret umount_check(void *_data, struct dentry *dentry) { /* it has busy descendents; complain about those instead */ - if (!list_empty(&dentry->d_subdirs)) + if (!hlist_empty(&dentry->d_children)) return D_WALK_CONTINUE; /* root with refcount 1 is fine */ @@ -1814,9 +1808,9 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) dentry->d_fsdata = NULL; INIT_HLIST_BL_NODE(&dentry->d_hash); INIT_LIST_HEAD(&dentry->d_lru); - INIT_LIST_HEAD(&dentry->d_subdirs); + INIT_HLIST_HEAD(&dentry->d_children); INIT_HLIST_NODE(&dentry->d_u.d_alias); - INIT_LIST_HEAD(&dentry->d_child); + INIT_HLIST_NODE(&dentry->d_sib); d_set_d_op(dentry, dentry->d_sb->s_d_op); if (dentry->d_op && dentry->d_op->d_init) { @@ -1855,7 +1849,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) */ __dget_dlock(parent); dentry->d_parent = parent; - list_add(&dentry->d_child, &parent->d_subdirs); + hlist_add_head(&dentry->d_sib, &parent->d_children); spin_unlock(&parent->d_lock); return dentry; @@ -2993,11 +2987,15 @@ static void __d_move(struct dentry *dentry, struct dentry *target, } else { target->d_parent = old_parent; swap_names(dentry, target); - list_move(&target->d_child, &target->d_parent->d_subdirs); + if (!hlist_unhashed(&target->d_sib)) + __hlist_del(&target->d_sib); + hlist_add_head(&target->d_sib, &target->d_parent->d_children); __d_rehash(target); fsnotify_update_flags(target); } - list_move(&dentry->d_child, &dentry->d_parent->d_subdirs); + if (!hlist_unhashed(&dentry->d_sib)) + __hlist_del(&dentry->d_sib); + hlist_add_head(&dentry->d_sib, &dentry->d_parent->d_children); __d_rehash(dentry); fsnotify_update_flags(dentry); fscrypt_handle_d_move(dentry); -- cgit From 3fcf535626a44e107e2d536a2c43da0fb5bde121 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Nov 2023 15:21:33 -0500 Subject: centralize killing dentry from shrink list new helper unifying identical bits of shrink_dentry_list() and shring_dcache_for_umount() Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/dcache.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 59f76c9a15d1..bb862a304e1b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1174,10 +1174,18 @@ out: return false; } +static inline void shrink_kill(struct dentry *victim, struct list_head *list) +{ + struct dentry *parent = victim->d_parent; + if (parent != victim) + __dput_to_list(parent, list); + __dentry_kill(victim); +} + void shrink_dentry_list(struct list_head *list) { while (!list_empty(list)) { - struct dentry *dentry, *parent; + struct dentry *dentry; dentry = list_entry(list->prev, struct dentry, d_lru); spin_lock(&dentry->d_lock); @@ -1195,10 +1203,7 @@ void shrink_dentry_list(struct list_head *list) } rcu_read_unlock(); d_shrink_del(dentry); - parent = dentry->d_parent; - if (parent != dentry) - __dput_to_list(parent, list); - __dentry_kill(dentry); + shrink_kill(dentry, list); } } @@ -1629,17 +1634,13 @@ void shrink_dcache_parent(struct dentry *parent) data.victim = NULL; d_walk(parent, &data, select_collect2); if (data.victim) { - struct dentry *parent; spin_lock(&data.victim->d_lock); if (!shrink_lock_dentry(data.victim)) { spin_unlock(&data.victim->d_lock); rcu_read_unlock(); } else { rcu_read_unlock(); - parent = data.victim->d_parent; - if (parent != data.victim) - __dput_to_list(parent, &data.dispose); - __dentry_kill(data.victim); + shrink_kill(data.victim, &data.dispose); } } if (!list_empty(&data.dispose)) -- cgit From cd9f84f35c2eaaf4da7e111021b604662326d8aa Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 30 Oct 2023 13:57:12 -0400 Subject: shrink_dentry_list(): no need to check that dentry refcount is marked dead ... we won't see DCACHE_MAY_FREE on anything that is *not* dead and checking d_flags is just as cheap as checking refcount. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/dcache.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index bb862a304e1b..c1025921f8d3 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1191,11 +1191,10 @@ void shrink_dentry_list(struct list_head *list) spin_lock(&dentry->d_lock); rcu_read_lock(); if (!shrink_lock_dentry(dentry)) { - bool can_free = false; + bool can_free; rcu_read_unlock(); d_shrink_del(dentry); - if (dentry->d_lockref.count < 0) - can_free = dentry->d_flags & DCACHE_MAY_FREE; + can_free = dentry->d_flags & DCACHE_MAY_FREE; spin_unlock(&dentry->d_lock); if (can_free) dentry_free(dentry); -- cgit From 15220fbf187100e1cc1ad553b7d21633bdc27e76 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 30 Oct 2023 00:02:14 -0400 Subject: fast_dput(): having ->d_delete() is not reason to delay refcount decrement ->d_delete() is a way for filesystem to tell that dentry is not worth keeping cached. It is not guaranteed to be called every time a dentry has refcount drop down to zero; it is not guaranteed to be called before dentry gets evicted. In other words, it is not suitable for any kind of keeping track of dentry state. None of the in-tree filesystems attempt to use it that way, fortunately. So the contortions done by fast_dput() (as well as dentry_kill()) are not warranted. fast_dput() certainly should treat having ->d_delete() instance as "can't assume we'll be keeping it", but that's not different from the way we treat e.g. DCACHE_DONTCACHE (which is rather similar to making ->d_delete() returns true when called). Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/dcache.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index c1025921f8d3..00c19041adf3 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -768,15 +768,7 @@ static inline bool fast_dput(struct dentry *dentry) unsigned int d_flags; /* - * If we have a d_op->d_delete() operation, we sould not - * let the dentry count go to zero, so use "put_or_lock". - */ - if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) - return lockref_put_or_lock(&dentry->d_lockref); - - /* - * .. otherwise, we can try to just decrement the - * lockref optimistically. + * try to decrement the lockref optimistically. */ ret = lockref_put_return(&dentry->d_lockref); @@ -830,7 +822,7 @@ static inline bool fast_dput(struct dentry *dentry) */ smp_rmb(); d_flags = READ_ONCE(dentry->d_flags); - d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST | + d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST | DCACHE_OP_DELETE | DCACHE_DISCONNECTED | DCACHE_DONTCACHE; /* Nothing to do? Dropping the reference was all we needed? */ -- cgit From 504e08cebe1d4e1efe25f915234f646e74a364a8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 1 Nov 2023 01:08:54 -0400 Subject: fast_dput(): handle underflows gracefully If refcount is less than 1, we should just warn, unlock dentry and return true, so that the caller doesn't try to do anything else. Taking care of that leaves the rest of "lockref_put_return() has failed" case equivalent to "decrement refcount and rejoin the normal slow path after the point where we grab ->d_lock". NOTE: lockref_put_return() is strictly a fastpath thing - unlike the rest of lockref primitives, it does not contain a fallback. Caller (and it looks like fast_dput() is the only legitimate one in the entire kernel) has to do that itself. Reasons for lockref_put_return() failures: * ->d_lock held by somebody * refcount <= 0 * ... or an architecture not supporting lockref use of cmpxchg - sparc, anything non-SMP, config with spinlock debugging... We could add a fallback, but it would be a clumsy API - we'd have to distinguish between: (1) refcount > 1 - decremented, lock not held on return (2) refcount < 1 - left alone, probably no sense to hold the lock (3) refcount is 1, no cmphxcg - decremented, lock held on return (4) refcount is 1, cmphxcg supported - decremented, lock *NOT* held on return. We want to return with no lock held in case (4); that's the whole point of that thing. We very much do not want to have the fallback in case (3) return without a lock, since the caller might have to retake it in that case. So it wouldn't be more convenient than doing the fallback in the caller and it would be very easy to screw up, especially since the test coverage would suck - no way to test (3) and (4) on the same kernel build. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/dcache.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 00c19041adf3..9edabc7e2e64 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -779,12 +779,12 @@ static inline bool fast_dput(struct dentry *dentry) */ if (unlikely(ret < 0)) { spin_lock(&dentry->d_lock); - if (dentry->d_lockref.count > 1) { - dentry->d_lockref.count--; + if (WARN_ON_ONCE(dentry->d_lockref.count <= 0)) { spin_unlock(&dentry->d_lock); return true; } - return false; + dentry->d_lockref.count--; + goto locked; } /* @@ -842,6 +842,7 @@ static inline bool fast_dput(struct dentry *dentry) * else could have killed it and marked it dead. Either way, we * don't need to do anything else. */ +locked: if (dentry->d_lockref.count) { spin_unlock(&dentry->d_lock); return true; -- cgit From 15f23734a1def3b22ba790bbf9f0d01949aae231 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 30 Oct 2023 00:11:58 -0400 Subject: fast_dput(): new rules for refcount By now there is only one place in entire fast_dput() where we return false; that happens after refcount had been decremented and found (under ->d_lock) to be zero. In that case, just prior to returning false to caller, fast_dput() forcibly changes the refcount from 0 to 1. Lift that resetting refcount to 1 into the callers; later in the series it will be massaged out of existence. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/dcache.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 9edabc7e2e64..a00e9ba22480 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -847,13 +847,6 @@ locked: spin_unlock(&dentry->d_lock); return true; } - - /* - * Re-get the reference we optimistically dropped. We hold the - * lock, and we just tested that it was zero, so we can just - * set it to 1. - */ - dentry->d_lockref.count = 1; return false; } @@ -896,6 +889,7 @@ void dput(struct dentry *dentry) } /* Slow case: now with the dentry lock held */ + dentry->d_lockref.count = 1; rcu_read_unlock(); if (likely(retain_dentry(dentry))) { @@ -930,6 +924,7 @@ void dput_to_list(struct dentry *dentry, struct list_head *list) return; } rcu_read_unlock(); + dentry->d_lockref.count = 1; if (!retain_dentry(dentry)) __dput_to_list(dentry, list); spin_unlock(&dentry->d_lock); -- cgit From 6511f6be777f3b06e904c7407fd9f2a39dae8b67 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 30 Oct 2023 00:43:48 -0400 Subject: __dput_to_list(): do decrement of refcount in the callers ... and rename it to to_shrink_list(), seeing that it no longer does dropping any references Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/dcache.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index a00e9ba22480..0718b3895c12 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -902,16 +902,13 @@ void dput(struct dentry *dentry) } EXPORT_SYMBOL(dput); -static void __dput_to_list(struct dentry *dentry, struct list_head *list) +static void to_shrink_list(struct dentry *dentry, struct list_head *list) __must_hold(&dentry->d_lock) { - if (dentry->d_flags & DCACHE_SHRINK_LIST) { - /* let the owner of the list it's on deal with it */ - --dentry->d_lockref.count; - } else { + if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { if (dentry->d_flags & DCACHE_LRU_LIST) d_lru_del(dentry); - if (!--dentry->d_lockref.count) + if (!dentry->d_lockref.count) d_shrink_add(dentry, list); } } @@ -925,8 +922,10 @@ void dput_to_list(struct dentry *dentry, struct list_head *list) } rcu_read_unlock(); dentry->d_lockref.count = 1; - if (!retain_dentry(dentry)) - __dput_to_list(dentry, list); + if (!retain_dentry(dentry)) { + --dentry->d_lockref.count; + to_shrink_list(dentry, list); + } spin_unlock(&dentry->d_lock); } @@ -1165,8 +1164,10 @@ out: static inline void shrink_kill(struct dentry *victim, struct list_head *list) { struct dentry *parent = victim->d_parent; - if (parent != victim) - __dput_to_list(parent, list); + if (parent != victim) { + --parent->d_lockref.count; + to_shrink_list(parent, list); + } __dentry_kill(victim); } -- cgit From e9d130d05077e71b1224ad96e419f5f5512b8574 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 29 Oct 2023 19:09:11 -0400 Subject: make retain_dentry() neutral with respect to refcounting retain_dentry() used to decrement refcount if and only if it returned true. Lift those decrements into the callers. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/dcache.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 0718b3895c12..2e74f3f2ce2e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -680,7 +680,6 @@ static inline bool retain_dentry(struct dentry *dentry) return false; /* retain; LRU fodder */ - dentry->d_lockref.count--; if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) d_lru_add(dentry); else if (unlikely(!(dentry->d_flags & DCACHE_REFERENCED))) @@ -744,6 +743,8 @@ got_locks: } else if (likely(!retain_dentry(dentry))) { __dentry_kill(dentry); return parent; + } else { + dentry->d_lockref.count--; } /* we are keeping it, after all */ if (inode) @@ -893,6 +894,7 @@ void dput(struct dentry *dentry) rcu_read_unlock(); if (likely(retain_dentry(dentry))) { + dentry->d_lockref.count--; spin_unlock(&dentry->d_lock); return; } @@ -925,6 +927,8 @@ void dput_to_list(struct dentry *dentry, struct list_head *list) if (!retain_dentry(dentry)) { --dentry->d_lockref.count; to_shrink_list(dentry, list); + } else { + --dentry->d_lockref.count; } spin_unlock(&dentry->d_lock); } -- cgit From ee0c82503dcd0d14cc1ad53da18d32a04f612c4c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 29 Oct 2023 18:38:27 -0400 Subject: __dentry_kill(): get consistent rules for victim's refcount Currently we call it with refcount equal to 1 when called from dentry_kill(); all other callers have it equal to 0. Make it always be called with zero refcount; on this step we just decrement it before the calls in dentry_kill(). That is safe, since all places that care about the value of refcount either do that under ->d_lock or hold a reference to dentry in question. Either is sufficient to prevent observing a dentry immediately prior to __dentry_kill() getting called from dentry_kill(). Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/dcache.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 2e74f3f2ce2e..b527db8e5901 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -729,6 +729,7 @@ static struct dentry *dentry_kill(struct dentry *dentry) goto slow_positive; } } + dentry->d_lockref.count--; __dentry_kill(dentry); return parent; @@ -741,6 +742,7 @@ got_locks: if (unlikely(dentry->d_lockref.count != 1)) { dentry->d_lockref.count--; } else if (likely(!retain_dentry(dentry))) { + dentry->d_lockref.count--; __dentry_kill(dentry); return parent; } else { -- cgit From b06c684d3984ef64e792ec3e89889c96cab97b5e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 31 Oct 2023 00:23:35 -0400 Subject: dentry_kill(): don't bother with retain_dentry() on slow path We have already checked it and dentry used to look not worthy of keeping. The only hard obstacle to evicting dentry is non-zero refcount; everything else is advisory - e.g. memory pressure could evict any dentry found with refcount zero. On the slow path in dentry_kill() we had dropped and regained ->d_lock; we must recheck the refcount, but everything else is not worth bothering with. Note that filesystem can not count upon ->d_delete() being called for dentry - not even once. Again, memory pressure (as well as d_prune_aliases(), or attempted rmdir() of ancestor, or...) will not call ->d_delete() at all. So from the correctness point of view we are fine doing the check only once. And it makes things simpler down the road. Signed-off-by: Al Viro --- fs/dcache.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index b527db8e5901..80992e49561c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -739,14 +739,10 @@ slow_positive: spin_lock(&dentry->d_lock); parent = lock_parent(dentry); got_locks: - if (unlikely(dentry->d_lockref.count != 1)) { - dentry->d_lockref.count--; - } else if (likely(!retain_dentry(dentry))) { - dentry->d_lockref.count--; + dentry->d_lockref.count--; + if (likely(dentry->d_lockref.count == 0)) { __dentry_kill(dentry); return parent; - } else { - dentry->d_lockref.count--; } /* we are keeping it, after all */ if (inode) -- cgit From 2f42f1eb9093834b635991c70d0273fbe249eabf Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 30 Oct 2023 01:09:50 -0400 Subject: Call retain_dentry() with refcount 0 Instead of bumping it from 0 to 1, calling retain_dentry(), then decrementing it back to 0 (with ->d_lock held all the way through), just leave refcount at 0 through all of that. It will have a visible effect for ->d_delete() - now it can be called with refcount 0 instead of 1 and it can no longer play silly buggers with dropping/regaining ->d_lock. Not that any in-tree instances tried to (it's pretty hard to get right). Any out-of-tree ones will have to adjust (assuming they need any changes). Note that we do not need to extend rcu-critical area here - we have verified that refcount is non-negative after having grabbed ->d_lock, so nobody will be able to free dentry until they get into __dentry_kill(), which won't happen until they manage to grab ->d_lock. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/dcache.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 80992e49561c..8ce0fe70f303 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -888,15 +888,14 @@ void dput(struct dentry *dentry) } /* Slow case: now with the dentry lock held */ - dentry->d_lockref.count = 1; rcu_read_unlock(); if (likely(retain_dentry(dentry))) { - dentry->d_lockref.count--; spin_unlock(&dentry->d_lock); return; } + dentry->d_lockref.count = 1; dentry = dentry_kill(dentry); } } @@ -921,13 +920,8 @@ void dput_to_list(struct dentry *dentry, struct list_head *list) return; } rcu_read_unlock(); - dentry->d_lockref.count = 1; - if (!retain_dentry(dentry)) { - --dentry->d_lockref.count; + if (!retain_dentry(dentry)) to_shrink_list(dentry, list); - } else { - --dentry->d_lockref.count; - } spin_unlock(&dentry->d_lock); } -- cgit From f05441c7e16e6a720cf24e08999661a76ff77478 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 31 Oct 2023 00:45:40 -0400 Subject: fold the call of retain_dentry() into fast_dput() Calls of retain_dentry() happen immediately after getting false from fast_dput() and getting true from retain_dentry() is treated the same way as non-zero refcount would be treated by fast_dput() - unlock dentry and bugger off. Doing that in fast_dput() itself is simpler. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/dcache.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 8ce0fe70f303..b69ff3a0b30f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -757,6 +757,8 @@ got_locks: * Try to do a lockless dput(), and return whether that was successful. * * If unsuccessful, we return false, having already taken the dentry lock. + * In that case refcount is guaranteed to be zero and we have already + * decided that it's not worth keeping around. * * The caller needs to hold the RCU read lock, so that the dentry is * guaranteed to stay around even if the refcount goes down to zero! @@ -842,7 +844,7 @@ static inline bool fast_dput(struct dentry *dentry) * don't need to do anything else. */ locked: - if (dentry->d_lockref.count) { + if (dentry->d_lockref.count || retain_dentry(dentry)) { spin_unlock(&dentry->d_lock); return true; } @@ -889,12 +891,6 @@ void dput(struct dentry *dentry) /* Slow case: now with the dentry lock held */ rcu_read_unlock(); - - if (likely(retain_dentry(dentry))) { - spin_unlock(&dentry->d_lock); - return; - } - dentry->d_lockref.count = 1; dentry = dentry_kill(dentry); } @@ -920,8 +916,7 @@ void dput_to_list(struct dentry *dentry, struct list_head *list) return; } rcu_read_unlock(); - if (!retain_dentry(dentry)) - to_shrink_list(dentry, list); + to_shrink_list(dentry, list); spin_unlock(&dentry->d_lock); } -- cgit From 339e9e13530ba750fe0868c5e51bc23be07fd1f1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 31 Oct 2023 01:23:47 -0400 Subject: don't try to cut corners in shrink_lock_dentry() That is to say, do *not* treat the ->d_inode or ->d_parent changes as "it's hard, return false; somebody must have grabbed it, so even if has zero refcount, we don't need to bother killing it - final dput() from whoever grabbed it would've done everything". First of all, that is not guaranteed. It might have been dropped by shrink_kill() handling of victim's parent, which would've found it already on a shrink list (ours) and decided that they don't need to put it on their shrink list. What's more, dentry_kill() is doing pretty much the same thing, cutting its own set of corners (it assumes that dentry can't go from positive to negative, so its inode can change but only once and only in one direction). Doing that right allows to get rid of that not-quite-duplication and removes the only reason for re-incrementing refcount before the call of dentry_kill(). Replacement is called lock_for_kill(); called under rcu_read_lock and with ->d_lock held. If it returns false, dentry has non-zero refcount and the same locks are held. If it returns true, dentry has zero refcount and all locks required by __dentry_kill() are taken. Part of __lock_parent() had been lifted into lock_parent() to allow its reuse. Now it's called with rcu_read_lock already held and dentry already unlocked. Note that this is not the final change - locking requirements for __dentry_kill() are going to change later in the series and the set of locks taken by lock_for_kill() will be adjusted. Both lock_parent() and __lock_parent() will be gone once that happens. Signed-off-by: Al Viro --- fs/dcache.c | 159 +++++++++++++++++++++++++----------------------------------- 1 file changed, 66 insertions(+), 93 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index b69ff3a0b30f..a7f99d46c41b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -625,8 +625,6 @@ static void __dentry_kill(struct dentry *dentry) static struct dentry *__lock_parent(struct dentry *dentry) { struct dentry *parent; - rcu_read_lock(); - spin_unlock(&dentry->d_lock); again: parent = READ_ONCE(dentry->d_parent); spin_lock(&parent->d_lock); @@ -642,7 +640,6 @@ again: spin_unlock(&parent->d_lock); goto again; } - rcu_read_unlock(); if (parent != dentry) spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); else @@ -657,7 +654,64 @@ static inline struct dentry *lock_parent(struct dentry *dentry) return NULL; if (likely(spin_trylock(&parent->d_lock))) return parent; - return __lock_parent(dentry); + rcu_read_lock(); + spin_unlock(&dentry->d_lock); + parent = __lock_parent(dentry); + rcu_read_unlock(); + return parent; +} + +/* + * Lock a dentry for feeding it to __dentry_kill(). + * Called under rcu_read_lock() and dentry->d_lock; the former + * guarantees that nothing we access will be freed under us. + * Note that dentry is *not* protected from concurrent dentry_kill(), + * d_delete(), etc. + * + * Return false if dentry is busy. Otherwise, return true and have + * that dentry's inode and parent both locked. + */ + +static bool lock_for_kill(struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + struct dentry *parent = dentry->d_parent; + + if (unlikely(dentry->d_lockref.count)) + return false; + + if (inode && unlikely(!spin_trylock(&inode->i_lock))) + goto slow; + if (dentry == parent) + return true; + if (likely(spin_trylock(&parent->d_lock))) + return true; + + if (inode) + spin_unlock(&inode->i_lock); +slow: + spin_unlock(&dentry->d_lock); + + for (;;) { + if (inode) + spin_lock(&inode->i_lock); + parent = __lock_parent(dentry); + if (likely(inode == dentry->d_inode)) + break; + if (inode) + spin_unlock(&inode->i_lock); + inode = dentry->d_inode; + spin_unlock(&dentry->d_lock); + if (parent) + spin_unlock(&parent->d_lock); + } + if (likely(!dentry->d_lockref.count)) + return true; + if (inode) + spin_unlock(&inode->i_lock); + if (parent) + spin_unlock(&parent->d_lock); + return false; } static inline bool retain_dentry(struct dentry *dentry) @@ -710,45 +764,16 @@ EXPORT_SYMBOL(d_mark_dontcache); static struct dentry *dentry_kill(struct dentry *dentry) __releases(dentry->d_lock) { - struct inode *inode = dentry->d_inode; - struct dentry *parent = NULL; - if (inode && unlikely(!spin_trylock(&inode->i_lock))) - goto slow_positive; - - if (!IS_ROOT(dentry)) { - parent = dentry->d_parent; - if (unlikely(!spin_trylock(&parent->d_lock))) { - parent = __lock_parent(dentry); - if (likely(inode || !dentry->d_inode)) - goto got_locks; - /* negative that became positive */ - if (parent) - spin_unlock(&parent->d_lock); - inode = dentry->d_inode; - goto slow_positive; - } - } dentry->d_lockref.count--; - __dentry_kill(dentry); - return parent; - -slow_positive: - spin_unlock(&dentry->d_lock); - spin_lock(&inode->i_lock); - spin_lock(&dentry->d_lock); - parent = lock_parent(dentry); -got_locks: - dentry->d_lockref.count--; - if (likely(dentry->d_lockref.count == 0)) { + rcu_read_lock(); + if (likely(lock_for_kill(dentry))) { + struct dentry *parent = dentry->d_parent; + rcu_read_unlock(); __dentry_kill(dentry); - return parent; + return parent != dentry ? parent : NULL; } - /* we are keeping it, after all */ - if (inode) - spin_unlock(&inode->i_lock); - if (parent) - spin_unlock(&parent->d_lock); + rcu_read_unlock(); spin_unlock(&dentry->d_lock); return NULL; } @@ -1100,58 +1125,6 @@ restart: } EXPORT_SYMBOL(d_prune_aliases); -/* - * Lock a dentry from shrink list. - * Called under rcu_read_lock() and dentry->d_lock; the former - * guarantees that nothing we access will be freed under us. - * Note that dentry is *not* protected from concurrent dentry_kill(), - * d_delete(), etc. - * - * Return false if dentry has been disrupted or grabbed, leaving - * the caller to kick it off-list. Otherwise, return true and have - * that dentry's inode and parent both locked. - */ -static bool shrink_lock_dentry(struct dentry *dentry) -{ - struct inode *inode; - struct dentry *parent; - - if (dentry->d_lockref.count) - return false; - - inode = dentry->d_inode; - if (inode && unlikely(!spin_trylock(&inode->i_lock))) { - spin_unlock(&dentry->d_lock); - spin_lock(&inode->i_lock); - spin_lock(&dentry->d_lock); - if (unlikely(dentry->d_lockref.count)) - goto out; - /* changed inode means that somebody had grabbed it */ - if (unlikely(inode != dentry->d_inode)) - goto out; - } - - parent = dentry->d_parent; - if (IS_ROOT(dentry) || likely(spin_trylock(&parent->d_lock))) - return true; - - spin_unlock(&dentry->d_lock); - spin_lock(&parent->d_lock); - if (unlikely(parent != dentry->d_parent)) { - spin_unlock(&parent->d_lock); - spin_lock(&dentry->d_lock); - goto out; - } - spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - if (likely(!dentry->d_lockref.count)) - return true; - spin_unlock(&parent->d_lock); -out: - if (inode) - spin_unlock(&inode->i_lock); - return false; -} - static inline void shrink_kill(struct dentry *victim, struct list_head *list) { struct dentry *parent = victim->d_parent; @@ -1170,7 +1143,7 @@ void shrink_dentry_list(struct list_head *list) dentry = list_entry(list->prev, struct dentry, d_lru); spin_lock(&dentry->d_lock); rcu_read_lock(); - if (!shrink_lock_dentry(dentry)) { + if (!lock_for_kill(dentry)) { bool can_free; rcu_read_unlock(); d_shrink_del(dentry); @@ -1614,7 +1587,7 @@ void shrink_dcache_parent(struct dentry *parent) d_walk(parent, &data, select_collect2); if (data.victim) { spin_lock(&data.victim->d_lock); - if (!shrink_lock_dentry(data.victim)) { + if (!lock_for_kill(data.victim)) { spin_unlock(&data.victim->d_lock); rcu_read_unlock(); } else { -- cgit From 5e7a5c8d17d94216aed205cb0f20cd32adfd4487 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 31 Oct 2023 01:41:22 -0400 Subject: fold dentry_kill() into dput() Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/dcache.c | 37 ++++++++++++------------------------- 1 file changed, 12 insertions(+), 25 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index a7f99d46c41b..5284b02747cd 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -756,28 +756,6 @@ void d_mark_dontcache(struct inode *inode) } EXPORT_SYMBOL(d_mark_dontcache); -/* - * Finish off a dentry we've decided to kill. - * dentry->d_lock must be held, returns with it unlocked. - * Returns dentry requiring refcount drop, or NULL if we're done. - */ -static struct dentry *dentry_kill(struct dentry *dentry) - __releases(dentry->d_lock) -{ - - dentry->d_lockref.count--; - rcu_read_lock(); - if (likely(lock_for_kill(dentry))) { - struct dentry *parent = dentry->d_parent; - rcu_read_unlock(); - __dentry_kill(dentry); - return parent != dentry ? parent : NULL; - } - rcu_read_unlock(); - spin_unlock(&dentry->d_lock); - return NULL; -} - /* * Try to do a lockless dput(), and return whether that was successful. * @@ -915,9 +893,18 @@ void dput(struct dentry *dentry) } /* Slow case: now with the dentry lock held */ - rcu_read_unlock(); - dentry->d_lockref.count = 1; - dentry = dentry_kill(dentry); + if (likely(lock_for_kill(dentry))) { + struct dentry *parent = dentry->d_parent; + rcu_read_unlock(); + __dentry_kill(dentry); + if (dentry == parent) + return; + dentry = parent; + } else { + rcu_read_unlock(); + spin_unlock(&dentry->d_lock); + return; + } } } EXPORT_SYMBOL(dput); -- cgit From c2e5e29f3fdaff6f57795be068ce525c60954fd0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 30 Oct 2023 01:06:06 -0400 Subject: to_shrink_list(): call only if refcount is 0 The only thing it does if refcount is not zero is d_lru_del(); no point, IMO, seeing that plain dput() does nothing of that sort... Note that 2 of 3 current callers are guaranteed that refcount is 0. Acked-by: Christian Brauner Signed-off-by: Al Viro --- fs/dcache.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 5284b02747cd..704676bf06fd 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -915,8 +915,7 @@ __must_hold(&dentry->d_lock) if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { if (dentry->d_flags & DCACHE_LRU_LIST) d_lru_del(dentry); - if (!dentry->d_lockref.count) - d_shrink_add(dentry, list); + d_shrink_add(dentry, list); } } @@ -1115,10 +1114,8 @@ EXPORT_SYMBOL(d_prune_aliases); static inline void shrink_kill(struct dentry *victim, struct list_head *list) { struct dentry *parent = victim->d_parent; - if (parent != victim) { - --parent->d_lockref.count; + if (parent != victim && !--parent->d_lockref.count) to_shrink_list(parent, list); - } __dentry_kill(victim); } -- cgit From f5c8a8a4b6c90f2160de6b580fa672b4afc15061 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 30 Oct 2023 21:07:34 -0400 Subject: switch select_collect{,2}() to use of to_shrink_list() Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/dcache.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 704676bf06fd..f68fe7c863e0 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1495,13 +1495,9 @@ static enum d_walk_ret select_collect(void *_data, struct dentry *dentry) if (dentry->d_flags & DCACHE_SHRINK_LIST) { data->found++; - } else { - if (dentry->d_flags & DCACHE_LRU_LIST) - d_lru_del(dentry); - if (!dentry->d_lockref.count) { - d_shrink_add(dentry, &data->dispose); - data->found++; - } + } else if (!dentry->d_lockref.count) { + to_shrink_list(dentry, &data->dispose); + data->found++; } /* * We can return to the caller if we have found some (this @@ -1522,17 +1518,13 @@ static enum d_walk_ret select_collect2(void *_data, struct dentry *dentry) if (data->start == dentry) goto out; - if (dentry->d_flags & DCACHE_SHRINK_LIST) { - if (!dentry->d_lockref.count) { + if (!dentry->d_lockref.count) { + if (dentry->d_flags & DCACHE_SHRINK_LIST) { rcu_read_lock(); data->victim = dentry; return D_WALK_QUIT; } - } else { - if (dentry->d_flags & DCACHE_LRU_LIST) - d_lru_del(dentry); - if (!dentry->d_lockref.count) - d_shrink_add(dentry, &data->dispose); + to_shrink_list(dentry, &data->dispose); } /* * We can return to the caller if we have found some (this -- cgit From b4cc0734d25746d402db3baa6082d19109bca951 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 3 Nov 2023 14:46:14 -0400 Subject: d_prune_aliases(): use a shrink list Instead of dropping aliases one by one, restarting, etc., just collect them into a shrink list and kill them off in one pass. We don't really need the restarts - one alias can't pin another (directory has only one alias, and couldn't be its own ancestor anyway), so collecting everything that is not busy and taking it out would take care of everything evictable that had been there as we entered the function. And new aliases added while we'd been dropping old ones could just as easily have appeared right as we return to caller... Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/dcache.c | 30 +++++------------------------- 1 file changed, 5 insertions(+), 25 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index f68fe7c863e0..a3cc612a80d5 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -647,20 +647,6 @@ again: return parent; } -static inline struct dentry *lock_parent(struct dentry *dentry) -{ - struct dentry *parent = dentry->d_parent; - if (IS_ROOT(dentry)) - return NULL; - if (likely(spin_trylock(&parent->d_lock))) - return parent; - rcu_read_lock(); - spin_unlock(&dentry->d_lock); - parent = __lock_parent(dentry); - rcu_read_unlock(); - return parent; -} - /* * Lock a dentry for feeding it to __dentry_kill(). * Called under rcu_read_lock() and dentry->d_lock; the former @@ -1090,24 +1076,18 @@ struct dentry *d_find_alias_rcu(struct inode *inode) */ void d_prune_aliases(struct inode *inode) { + LIST_HEAD(dispose); struct dentry *dentry; -restart: + spin_lock(&inode->i_lock); hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { spin_lock(&dentry->d_lock); - if (!dentry->d_lockref.count) { - struct dentry *parent = lock_parent(dentry); - if (likely(!dentry->d_lockref.count)) { - __dentry_kill(dentry); - dput(parent); - goto restart; - } - if (parent) - spin_unlock(&parent->d_lock); - } + if (!dentry->d_lockref.count) + to_shrink_list(dentry, &dispose); spin_unlock(&dentry->d_lock); } spin_unlock(&inode->i_lock); + shrink_dentry_list(&dispose); } EXPORT_SYMBOL(d_prune_aliases); -- cgit From 1c18edd1b7a068e07fed7f00e059f22ed67c04c9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Nov 2023 16:14:08 -0500 Subject: __dentry_kill(): new locking scheme Currently we enter __dentry_kill() with parent (along with the victim dentry and victim's inode) held locked. Then we mark dentry refcount as dead call ->d_prune() remove dentry from hash remove it from the parent's list of children unlock the parent, don't need it from that point on detach dentry from inode, unlock dentry and drop the inode (via ->d_iput()) call ->d_release() regain the lock on dentry check if it's on a shrink list (in which case freeing its empty husk has to be left to shrink_dentry_list()) or not (in which case we can free it ourselves). In the former case, mark it as an empty husk, so that shrink_dentry_list() would know it can free the sucker. drop the lock on dentry ... and usually the caller proceeds to drop a reference on the parent, possibly retaking the lock on it. That is painful for a bunch of reasons, starting with the need to take locks out of order, but not limited to that - the parent of positive dentry can change if we drop its ->d_lock, so getting these locks has to be done with care. Moreover, as soon as dentry is out of the parent's list of children, shrink_dcache_for_umount() won't see it anymore, making it appear as if the parent is inexplicably busy. We do work around that by having shrink_dentry_list() decrement the parent's refcount first and put it on shrink list to be evicted once we are done with __dentry_kill() of child, but that may in some cases lead to ->d_iput() on child called after the parent got killed. That doesn't happen in cases where in-tree ->d_iput() instances might want to look at the parent, but that's brittle as hell. Solution: do removal from the parent's list of children in the very end of __dentry_kill(). As the result, the callers do not need to lock the parent and by the time we really need the parent locked, dentry is negative and is guaranteed not to be moved around. It does mean that ->d_prune() will be called with parent not locked. It also means that we might see dentries in process of being torn down while going through the parent's list of children; those dentries will be unhashed, negative and with refcount marked dead. In practice, that's enough for in-tree code that looks through the list of children to do the right thing as-is. Out-of-tree code might need to be adjusted. Calling conventions: __dentry_kill(dentry) is called with dentry->d_lock held, along with ->i_lock of its inode (if any). It either returns the parent (locked, with refcount decremented to 0) or NULL (if there'd been no parent or if refcount decrement for parent hadn't reached 0). lock_for_kill() is adjusted for new requirements - it doesn't touch the parent's ->d_lock at all. Callers adjusted. Note that for dput() we don't need to bother with fast_dput() for the parent - we just need to check retain_dentry() for it, since its ->d_lock is still held since the moment when __dentry_kill() had taken it to remove the victim from the list of children. The kludge with early decrement of parent's refcount in shrink_dentry_list() is no longer needed - shrink_dcache_for_umount() sees the half-killed dentries in the list of children for as long as they are pinning the parent. They are easily recognized and accounted for by select_collect(), so we know we are not done yet. As the result, we always have the expected ordering for ->d_iput()/->d_release() vs. __dentry_kill() of the parent, no exceptions. Moreover, the current rules for shrink lists (one must make sure that shrink_dcache_for_umount() won't happen while any dentries from the superblock in question are on any shrink lists) are gone - shrink_dcache_for_umount() will do the right thing in all cases, taking such dentries out. Their empty husks (memory occupied by struct dentry itself + its external name, if any) will remain on the shrink lists, but they are no obstacles to filesystem shutdown. And such husks will get freed as soon as shrink_dentry_list() of the list they are on gets to them. Reviewed-by: Christian Brauner Signed-off-by: Al Viro --- fs/dcache.c | 129 ++++++++++++++++++++++-------------------------------------- 1 file changed, 48 insertions(+), 81 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index a3cc612a80d5..c795154ffa3a 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -575,12 +575,10 @@ static inline void dentry_unlist(struct dentry *dentry) } } -static void __dentry_kill(struct dentry *dentry) +static struct dentry *__dentry_kill(struct dentry *dentry) { struct dentry *parent = NULL; bool can_free = true; - if (!IS_ROOT(dentry)) - parent = dentry->d_parent; /* * The dentry is now unrecoverably dead to the world. @@ -600,9 +598,6 @@ static void __dentry_kill(struct dentry *dentry) } /* if it was on the hash then remove it */ __d_drop(dentry); - dentry_unlist(dentry); - if (parent) - spin_unlock(&parent->d_lock); if (dentry->d_inode) dentry_unlink_inode(dentry); else @@ -611,7 +606,14 @@ static void __dentry_kill(struct dentry *dentry) if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); - spin_lock(&dentry->d_lock); + cond_resched(); + /* now that it's negative, ->d_parent is stable */ + if (!IS_ROOT(dentry)) { + parent = dentry->d_parent; + spin_lock(&parent->d_lock); + } + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + dentry_unlist(dentry); if (dentry->d_flags & DCACHE_SHRINK_LIST) { dentry->d_flags |= DCACHE_MAY_FREE; can_free = false; @@ -619,31 +621,10 @@ static void __dentry_kill(struct dentry *dentry) spin_unlock(&dentry->d_lock); if (likely(can_free)) dentry_free(dentry); - cond_resched(); -} - -static struct dentry *__lock_parent(struct dentry *dentry) -{ - struct dentry *parent; -again: - parent = READ_ONCE(dentry->d_parent); - spin_lock(&parent->d_lock); - /* - * We can't blindly lock dentry until we are sure - * that we won't violate the locking order. - * Any changes of dentry->d_parent must have - * been done with parent->d_lock held, so - * spin_lock() above is enough of a barrier - * for checking if it's still our child. - */ - if (unlikely(parent != dentry->d_parent)) { + if (parent && --parent->d_lockref.count) { spin_unlock(&parent->d_lock); - goto again; + return NULL; } - if (parent != dentry) - spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - else - parent = NULL; return parent; } @@ -655,48 +636,32 @@ again: * d_delete(), etc. * * Return false if dentry is busy. Otherwise, return true and have - * that dentry's inode and parent both locked. + * that dentry's inode locked. */ static bool lock_for_kill(struct dentry *dentry) { struct inode *inode = dentry->d_inode; - struct dentry *parent = dentry->d_parent; if (unlikely(dentry->d_lockref.count)) return false; - if (inode && unlikely(!spin_trylock(&inode->i_lock))) - goto slow; - if (dentry == parent) - return true; - if (likely(spin_trylock(&parent->d_lock))) + if (!inode || likely(spin_trylock(&inode->i_lock))) return true; - if (inode) - spin_unlock(&inode->i_lock); -slow: - spin_unlock(&dentry->d_lock); - - for (;;) { - if (inode) - spin_lock(&inode->i_lock); - parent = __lock_parent(dentry); + do { + spin_unlock(&dentry->d_lock); + spin_lock(&inode->i_lock); + spin_lock(&dentry->d_lock); if (likely(inode == dentry->d_inode)) break; - if (inode) - spin_unlock(&inode->i_lock); + spin_unlock(&inode->i_lock); inode = dentry->d_inode; - spin_unlock(&dentry->d_lock); - if (parent) - spin_unlock(&parent->d_lock); - } + } while (inode); if (likely(!dentry->d_lockref.count)) return true; if (inode) spin_unlock(&inode->i_lock); - if (parent) - spin_unlock(&parent->d_lock); return false; } @@ -869,29 +834,27 @@ locked: */ void dput(struct dentry *dentry) { - while (dentry) { - might_sleep(); - - rcu_read_lock(); - if (likely(fast_dput(dentry))) { - rcu_read_unlock(); + if (!dentry) + return; + might_sleep(); + rcu_read_lock(); + if (likely(fast_dput(dentry))) { + rcu_read_unlock(); + return; + } + while (lock_for_kill(dentry)) { + rcu_read_unlock(); + dentry = __dentry_kill(dentry); + if (!dentry) return; - } - - /* Slow case: now with the dentry lock held */ - if (likely(lock_for_kill(dentry))) { - struct dentry *parent = dentry->d_parent; - rcu_read_unlock(); - __dentry_kill(dentry); - if (dentry == parent) - return; - dentry = parent; - } else { - rcu_read_unlock(); + if (retain_dentry(dentry)) { spin_unlock(&dentry->d_lock); return; } + rcu_read_lock(); } + rcu_read_unlock(); + spin_unlock(&dentry->d_lock); } EXPORT_SYMBOL(dput); @@ -1091,12 +1054,16 @@ void d_prune_aliases(struct inode *inode) } EXPORT_SYMBOL(d_prune_aliases); -static inline void shrink_kill(struct dentry *victim, struct list_head *list) +static inline void shrink_kill(struct dentry *victim) { - struct dentry *parent = victim->d_parent; - if (parent != victim && !--parent->d_lockref.count) - to_shrink_list(parent, list); - __dentry_kill(victim); + do { + rcu_read_unlock(); + victim = __dentry_kill(victim); + rcu_read_lock(); + } while (victim && lock_for_kill(victim)); + rcu_read_unlock(); + if (victim) + spin_unlock(&victim->d_lock); } void shrink_dentry_list(struct list_head *list) @@ -1117,9 +1084,8 @@ void shrink_dentry_list(struct list_head *list) dentry_free(dentry); continue; } - rcu_read_unlock(); d_shrink_del(dentry); - shrink_kill(dentry, list); + shrink_kill(dentry); } } @@ -1478,6 +1444,8 @@ static enum d_walk_ret select_collect(void *_data, struct dentry *dentry) } else if (!dentry->d_lockref.count) { to_shrink_list(dentry, &data->dispose); data->found++; + } else if (dentry->d_lockref.count < 0) { + data->found++; } /* * We can return to the caller if we have found some (this @@ -1547,8 +1515,7 @@ void shrink_dcache_parent(struct dentry *parent) spin_unlock(&data.victim->d_lock); rcu_read_unlock(); } else { - rcu_read_unlock(); - shrink_kill(data.victim, &data.dispose); + shrink_kill(data.victim); } } if (!list_empty(&data.dispose)) -- cgit From 6367b491c17a34b28aece294bddfda1a36ec0377 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 23 Nov 2023 17:33:21 -0500 Subject: retain_dentry(): introduce a trimmed-down lockless variant fast_dput() contains a small piece of code, preceded by scary comments about 5 times longer than it. What is actually done there is a trimmed-down subset of retain_dentry() - in some situations we can tell that retain_dentry() would have returned true without ever needing ->d_lock and that's what that code checks. If these checks come true fast_dput() can declare that we are done, without bothering with ->d_lock; otherwise it has to take the lock and do full variant of retain_dentry() checks. Trimmed-down variant of the checks is hard to follow and it's asking for trouble - if we ever decide to change the rules in retain_dentry(), we'll have to remember to update that code. It turns out that an equivalent variant of these checks more obviously parallel to retain_dentry() is not just possible, but easy to unify with retain_dentry() itself, passing it a new boolean argument ('locked') to distinguish between the full semantics and trimmed down one. Note that in lockless case true is returned only when locked variant would have returned true without ever needing the lock; false means "punt to the locking path and recheck there". Signed-off-by: Al Viro --- fs/dcache.c | 95 ++++++++++++++++++++++++++++++------------------------------- 1 file changed, 47 insertions(+), 48 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index c795154ffa3a..b212a65ed190 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -665,30 +665,57 @@ static bool lock_for_kill(struct dentry *dentry) return false; } -static inline bool retain_dentry(struct dentry *dentry) +/* + * Decide if dentry is worth retaining. Usually this is called with dentry + * locked; if not locked, we are more limited and might not be able to tell + * without a lock. False in this case means "punt to locked path and recheck". + * + * In case we aren't locked, these predicates are not "stable". However, it is + * sufficient that at some point after we dropped the reference the dentry was + * hashed and the flags had the proper value. Other dentry users may have + * re-gotten a reference to the dentry and change that, but our work is done - + * we can leave the dentry around with a zero refcount. + */ +static inline bool retain_dentry(struct dentry *dentry, bool locked) { - WARN_ON(d_in_lookup(dentry)); + unsigned int d_flags; - /* Unreachable? Get rid of it */ + smp_rmb(); + d_flags = READ_ONCE(dentry->d_flags); + + // Unreachable? Nobody would be able to look it up, no point retaining if (unlikely(d_unhashed(dentry))) return false; - if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) + // Same if it's disconnected + if (unlikely(d_flags & DCACHE_DISCONNECTED)) return false; - if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) { - if (dentry->d_op->d_delete(dentry)) + // ->d_delete() might tell us not to bother, but that requires + // ->d_lock; can't decide without it + if (unlikely(d_flags & DCACHE_OP_DELETE)) { + if (!locked || dentry->d_op->d_delete(dentry)) return false; } - if (unlikely(dentry->d_flags & DCACHE_DONTCACHE)) + // Explicitly told not to bother + if (unlikely(d_flags & DCACHE_DONTCACHE)) return false; - /* retain; LRU fodder */ - if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) + // At this point it looks like we ought to keep it. We also might + // need to do something - put it on LRU if it wasn't there already + // and mark it referenced if it was on LRU, but not marked yet. + // Unfortunately, both actions require ->d_lock, so in lockless + // case we'd have to punt rather than doing those. + if (unlikely(!(d_flags & DCACHE_LRU_LIST))) { + if (!locked) + return false; d_lru_add(dentry); - else if (unlikely(!(dentry->d_flags & DCACHE_REFERENCED))) + } else if (unlikely(!(d_flags & DCACHE_REFERENCED))) { + if (!locked) + return false; dentry->d_flags |= DCACHE_REFERENCED; + } return true; } @@ -720,7 +747,6 @@ EXPORT_SYMBOL(d_mark_dontcache); static inline bool fast_dput(struct dentry *dentry) { int ret; - unsigned int d_flags; /* * try to decrement the lockref optimistically. @@ -749,45 +775,18 @@ static inline bool fast_dput(struct dentry *dentry) return true; /* - * Careful, careful. The reference count went down - * to zero, but we don't hold the dentry lock, so - * somebody else could get it again, and do another - * dput(), and we need to not race with that. - * - * However, there is a very special and common case - * where we don't care, because there is nothing to - * do: the dentry is still hashed, it does not have - * a 'delete' op, and it's referenced and already on - * the LRU list. - * - * NOTE! Since we aren't locked, these values are - * not "stable". However, it is sufficient that at - * some point after we dropped the reference the - * dentry was hashed and the flags had the proper - * value. Other dentry users may have re-gotten - * a reference to the dentry and change that, but - * our work is done - we can leave the dentry - * around with a zero refcount. - * - * Nevertheless, there are two cases that we should kill - * the dentry anyway. - * 1. free disconnected dentries as soon as their refcount - * reached zero. - * 2. free dentries if they should not be cached. + * Can we decide that decrement of refcount is all we needed without + * taking the lock? There's a very common case when it's all we need - + * dentry looks like it ought to be retained and there's nothing else + * to do. */ - smp_rmb(); - d_flags = READ_ONCE(dentry->d_flags); - d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST | DCACHE_OP_DELETE | - DCACHE_DISCONNECTED | DCACHE_DONTCACHE; - - /* Nothing to do? Dropping the reference was all we needed? */ - if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry)) + if (retain_dentry(dentry, false)) return true; /* - * Not the fast normal case? Get the lock. We've already decremented - * the refcount, but we'll need to re-check the situation after - * getting the lock. + * Either not worth retaining or we can't tell without the lock. + * Get the lock, then. We've already decremented the refcount to 0, + * but we'll need to re-check the situation after getting the lock. */ spin_lock(&dentry->d_lock); @@ -798,7 +797,7 @@ static inline bool fast_dput(struct dentry *dentry) * don't need to do anything else. */ locked: - if (dentry->d_lockref.count || retain_dentry(dentry)) { + if (dentry->d_lockref.count || retain_dentry(dentry, true)) { spin_unlock(&dentry->d_lock); return true; } @@ -847,7 +846,7 @@ void dput(struct dentry *dentry) dentry = __dentry_kill(dentry); if (!dentry) return; - if (retain_dentry(dentry)) { + if (retain_dentry(dentry, true)) { spin_unlock(&dentry->d_lock); return; } -- cgit From f2824db1b49f947ba6e208ddf02edf4b1391480a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 18 Nov 2023 16:42:43 -0500 Subject: kill d_instantate_anon(), fold __d_instantiate_anon() into remaining caller now that the only user of d_instantiate_anon() is gone... [braino fix folded - kudos to Dan Carpenter] Signed-off-by: Al Viro --- fs/dcache.c | 90 ++++++++++++++++++++++++------------------------------------- 1 file changed, 35 insertions(+), 55 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 9f5b2b5c1e6d..6c520eda131f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2054,75 +2054,55 @@ struct dentry *d_make_root(struct inode *root_inode) } EXPORT_SYMBOL(d_make_root); -static struct dentry *__d_instantiate_anon(struct dentry *dentry, - struct inode *inode, - bool disconnected) -{ - struct dentry *res; - unsigned add_flags; - - security_d_instantiate(dentry, inode); - spin_lock(&inode->i_lock); - res = __d_find_any_alias(inode); - if (res) { - spin_unlock(&inode->i_lock); - dput(dentry); - goto out_iput; - } - - /* attach a disconnected dentry */ - add_flags = d_flags_for_inode(inode); - - if (disconnected) - add_flags |= DCACHE_DISCONNECTED; - - spin_lock(&dentry->d_lock); - __d_set_inode_and_type(dentry, inode, add_flags); - hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); - if (!disconnected) { - hlist_bl_lock(&dentry->d_sb->s_roots); - hlist_bl_add_head(&dentry->d_hash, &dentry->d_sb->s_roots); - hlist_bl_unlock(&dentry->d_sb->s_roots); - } - spin_unlock(&dentry->d_lock); - spin_unlock(&inode->i_lock); - - return dentry; - - out_iput: - iput(inode); - return res; -} - -struct dentry *d_instantiate_anon(struct dentry *dentry, struct inode *inode) -{ - return __d_instantiate_anon(dentry, inode, true); -} -EXPORT_SYMBOL(d_instantiate_anon); - static struct dentry *__d_obtain_alias(struct inode *inode, bool disconnected) { - struct dentry *tmp; - struct dentry *res; + struct super_block *sb; + struct dentry *new, *res; if (!inode) return ERR_PTR(-ESTALE); if (IS_ERR(inode)) return ERR_CAST(inode); - res = d_find_any_alias(inode); + sb = inode->i_sb; + + res = d_find_any_alias(inode); /* existing alias? */ if (res) - goto out_iput; + goto out; - tmp = d_alloc_anon(inode->i_sb); - if (!tmp) { + new = d_alloc_anon(sb); + if (!new) { res = ERR_PTR(-ENOMEM); - goto out_iput; + goto out; } - return __d_instantiate_anon(tmp, inode, disconnected); + security_d_instantiate(new, inode); + spin_lock(&inode->i_lock); + res = __d_find_any_alias(inode); /* recheck under lock */ + if (likely(!res)) { /* still no alias, attach a disconnected dentry */ + unsigned add_flags = d_flags_for_inode(inode); + + if (disconnected) + add_flags |= DCACHE_DISCONNECTED; + + spin_lock(&new->d_lock); + __d_set_inode_and_type(new, inode, add_flags); + hlist_add_head(&new->d_u.d_alias, &inode->i_dentry); + if (!disconnected) { + hlist_bl_lock(&sb->s_roots); + hlist_bl_add_head(&new->d_hash, &sb->s_roots); + hlist_bl_unlock(&sb->s_roots); + } + spin_unlock(&new->d_lock); + spin_unlock(&inode->i_lock); + inode = NULL; /* consumed by new->d_inode */ + res = new; + } else { + spin_unlock(&inode->i_lock); + dput(new); + } -out_iput: + out: iput(inode); return res; } -- cgit From 9024b4c96576162a13e58003b0d58e93ebe3ac33 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Nov 2023 00:04:46 -0500 Subject: d_alloc_pseudo(): move setting ->d_op there from the (sole) caller Signed-off-by: Al Viro --- fs/dcache.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 6c520eda131f..5947556b6e90 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1890,9 +1890,15 @@ struct dentry *d_alloc_cursor(struct dentry * parent) */ struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name) { + static const struct dentry_operations anon_ops = { + .d_dname = simple_dname + }; struct dentry *dentry = __d_alloc(sb, name); - if (likely(dentry)) + if (likely(dentry)) { dentry->d_flags |= DCACHE_NORCU; + if (!sb->s_d_op) + d_set_d_op(dentry, &anon_ops); + } return dentry; } -- cgit From 57851607326a2beef21e67f83f4f53a90df8445a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Nov 2023 21:38:48 -0500 Subject: get rid of DCACHE_GENOCIDE ... now that we never call d_genocide() other than from kill_litter_super() Signed-off-by: Al Viro --- fs/dcache.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 5947556b6e90..8473c8f0ce22 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -3198,10 +3198,7 @@ static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry) if (d_unhashed(dentry) || !dentry->d_inode) return D_WALK_SKIP; - if (!(dentry->d_flags & DCACHE_GENOCIDE)) { - dentry->d_flags |= DCACHE_GENOCIDE; - dentry->d_lockref.count--; - } + dentry->d_lockref.count--; } return D_WALK_CONTINUE; } -- cgit From f9f677c5f723394170fa838b856602476150948d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Nov 2023 00:38:02 -0500 Subject: d_alloc_parallel(): in-lookup hash insertion doesn't need an RCU variant We only search in the damn thing under hlist_bl_lock(); RCU variant of insertion was, IIRC, pretty much cargo-culted - mea culpa... Signed-off-by: Al Viro --- fs/dcache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 8473c8f0ce22..82d086185703 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2692,7 +2692,7 @@ retry: /* we can't take ->d_lock here; it's OK, though. */ new->d_flags |= DCACHE_PAR_LOOKUP; new->d_wait = wq; - hlist_bl_add_head_rcu(&new->d_u.d_in_lookup_hash, b); + hlist_bl_add_head(&new->d_u.d_in_lookup_hash, b); hlist_bl_unlock(b); return new; mismatch: -- cgit From ef69f0506d8f3a250ac5baa96746e17ae22c67b5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 23 Nov 2023 18:11:00 -0500 Subject: __d_unalias() doesn't use inode argument ... and hasn't since 2015. Signed-off-by: Al Viro --- fs/dcache.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 82d086185703..74b2d073ebbf 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -3045,8 +3045,7 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) * Note: If ever the locking in lock_rename() changes, then please * remember to update this too... */ -static int __d_unalias(struct inode *inode, - struct dentry *dentry, struct dentry *alias) +static int __d_unalias(struct dentry *dentry, struct dentry *alias) { struct mutex *m1 = NULL; struct rw_semaphore *m2 = NULL; @@ -3127,7 +3126,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) inode->i_sb->s_id); } else if (!IS_ROOT(new)) { struct dentry *old_parent = dget(new->d_parent); - int err = __d_unalias(inode, dentry, new); + int err = __d_unalias(dentry, new); write_sequnlock(&rename_lock); if (err) { dput(new); -- cgit From 1b327b5ac57cf83e3d015de45d0142852f475375 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 10 Nov 2023 14:07:43 -0500 Subject: kill DCACHE_MAY_FREE With the new ordering in __dentry_kill() it has become redundant - it's set if and only if both DCACHE_DENTRY_KILLED and DCACHE_SHRINK_LIST are set. We set it in __dentry_kill(), after having set DCACHE_DENTRY_KILLED with the only condition being that DCACHE_SHRINK_LIST is there; all of that is done without dropping ->d_lock and the only place that checks that flag (shrink_dentry_list()) does so under ->d_lock, after having found the victim on its shrink list. Since DCACHE_SHRINK_LIST is set only when placing dentry into shrink list and removed only by shrink_dentry_list() itself, a check for DCACHE_DENTRY_KILLED in there would be equivalent to check for DCACHE_MAY_FREE. Signed-off-by: Al Viro --- fs/dcache.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 475ef1edba03..523cbf0780f0 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -614,10 +614,8 @@ static struct dentry *__dentry_kill(struct dentry *dentry) } spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); dentry_unlist(dentry); - if (dentry->d_flags & DCACHE_SHRINK_LIST) { - dentry->d_flags |= DCACHE_MAY_FREE; + if (dentry->d_flags & DCACHE_SHRINK_LIST) can_free = false; - } spin_unlock(&dentry->d_lock); if (likely(can_free)) dentry_free(dentry); @@ -1072,7 +1070,7 @@ void shrink_dentry_list(struct list_head *list) bool can_free; rcu_read_unlock(); d_shrink_del(dentry); - can_free = dentry->d_flags & DCACHE_MAY_FREE; + can_free = dentry->d_flags & DCACHE_DENTRY_KILLED; spin_unlock(&dentry->d_lock); if (can_free) dentry_free(dentry); -- cgit From 1b6ae9f6e6c3e3c35aad0f11b116a81780b8aa03 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Mon, 6 Nov 2023 14:44:17 +0100 Subject: dcache: remove unnecessary NULL check in dget_dlock() dget_dlock() requires dentry->d_lock to be held when called, yet contains a NULL check for dentry. An audit of all calls to dget_dlock() shows that it is never called with a NULL pointer (as spin_lock()/spin_unlock() would crash in these cases): $ git grep -W '\' arch/powerpc/platforms/cell/spufs/inode.c- spin_lock(&dentry->d_lock); arch/powerpc/platforms/cell/spufs/inode.c- if (simple_positive(dentry)) { arch/powerpc/platforms/cell/spufs/inode.c: dget_dlock(dentry); fs/autofs/expire.c- spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); fs/autofs/expire.c- if (simple_positive(child)) { fs/autofs/expire.c: dget_dlock(child); fs/autofs/root.c: dget_dlock(active); fs/autofs/root.c- spin_unlock(&active->d_lock); fs/autofs/root.c: dget_dlock(expiring); fs/autofs/root.c- spin_unlock(&expiring->d_lock); fs/ceph/dir.c- if (!spin_trylock(&dentry->d_lock)) fs/ceph/dir.c- continue; [...] fs/ceph/dir.c: dget_dlock(dentry); fs/ceph/mds_client.c- spin_lock(&alias->d_lock); [...] fs/ceph/mds_client.c: dn = dget_dlock(alias); fs/configfs/inode.c- spin_lock(&dentry->d_lock); fs/configfs/inode.c- if (simple_positive(dentry)) { fs/configfs/inode.c: dget_dlock(dentry); fs/libfs.c: found = dget_dlock(d); fs/libfs.c- spin_unlock(&d->d_lock); fs/libfs.c: found = dget_dlock(child); fs/libfs.c- spin_unlock(&child->d_lock); fs/libfs.c: child = dget_dlock(d); fs/libfs.c- spin_unlock(&d->d_lock); fs/ocfs2/dcache.c: dget_dlock(dentry); fs/ocfs2/dcache.c- spin_unlock(&dentry->d_lock); include/linux/dcache.h:static inline struct dentry *dget_dlock(struct dentry *dentry) After taking out the NULL check, dget_dlock() becomes almost identical to __dget_dlock(); the only difference is that dget_dlock() returns the dentry that was passed in. These are static inline helpers, so we can rely on the compiler to discard unused return values. We can therefore also remove __dget_dlock() and replace calls to it by dget_dlock(). Also fix up and improve the kerneldoc comments while we're at it. Al Viro pointed out that we can also clean up some of the callers to make use of the returned value and provided a bit more info for the kerneldoc. While preparing v2 I also noticed that the tabs used in the kerneldoc comments were causing the kerneldoc to get parsed incorrectly so I also fixed this up (including for d_unhashed, which is otherwise unrelated). Testing: x86 defconfig build + boot; make htmldocs for the kerneldoc warning. objdump shows there are code generation changes. Link: https://lore.kernel.org/all/20231022164520.915013-1-vegard.nossum@oracle.com/ Cc: Alexander Viro Cc: Christian Brauner Cc: linux-fsdevel@vger.kernel.org Cc: Nick Piggin Cc: Waiman Long Cc: linux-doc@vger.kernel.org Signed-off-by: Vegard Nossum Signed-off-by: Al Viro --- fs/dcache.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 523cbf0780f0..bb18ea91d717 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -877,12 +877,6 @@ void dput_to_list(struct dentry *dentry, struct list_head *list) spin_unlock(&dentry->d_lock); } -/* This must be called with d_lock held */ -static inline void __dget_dlock(struct dentry *dentry) -{ - dentry->d_lockref.count++; -} - struct dentry *dget_parent(struct dentry *dentry) { int gotref; @@ -964,7 +958,7 @@ static struct dentry *__d_find_alias(struct inode *inode) hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { spin_lock(&alias->d_lock); if (!d_unhashed(alias)) { - __dget_dlock(alias); + dget_dlock(alias); spin_unlock(&alias->d_lock); return alias; } @@ -1569,8 +1563,7 @@ static enum d_walk_ret find_submount(void *_data, struct dentry *dentry) { struct dentry **victim = _data; if (d_mountpoint(dentry)) { - __dget_dlock(dentry); - *victim = dentry; + *victim = dget_dlock(dentry); return D_WALK_QUIT; } return D_WALK_CONTINUE; @@ -1715,8 +1708,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) * don't need child lock because it is not subject * to concurrency here */ - __dget_dlock(parent); - dentry->d_parent = parent; + dentry->d_parent = dget_dlock(parent); hlist_add_head(&dentry->d_sib, &parent->d_children); spin_unlock(&parent->d_lock); @@ -2683,7 +2675,7 @@ struct dentry *d_exact_alias(struct dentry *entry, struct inode *inode) spin_unlock(&alias->d_lock); alias = NULL; } else { - __dget_dlock(alias); + dget_dlock(alias); __d_rehash(alias); spin_unlock(&alias->d_lock); } -- cgit