summaryrefslogtreecommitdiff
path: root/fs/file_table.c
diff options
context:
space:
mode:
authorChristian Brauner <brauner@kernel.org>2023-09-29 08:45:59 +0200
committerChristian Brauner <brauner@kernel.org>2023-10-19 11:02:48 +0200
commit0ede61d8589cc2d93aa78230d74ac58b5b8d0244 (patch)
tree8c1b4e637a2bfcf49828b4e07bbdbe4060382f75 /fs/file_table.c
parent93faf426e3cc000c95f1a5d3510b77ce99adac52 (diff)
file: convert to SLAB_TYPESAFE_BY_RCU
In recent discussions around some performance improvements in the file handling area we discussed switching the file cache to rely on SLAB_TYPESAFE_BY_RCU which allows us to get rid of call_rcu() based freeing for files completely. This is a pretty sensitive change overall but it might actually be worth doing. The main downside is the subtlety. The other one is that we should really wait for Jann's patch to land that enables KASAN to handle SLAB_TYPESAFE_BY_RCU UAFs. Currently it doesn't but a patch for this exists. With SLAB_TYPESAFE_BY_RCU objects may be freed and reused multiple times which requires a few changes. So it isn't sufficient anymore to just acquire a reference to the file in question under rcu using atomic_long_inc_not_zero() since the file might have already been recycled and someone else might have bumped the reference. In other words, callers might see reference count bumps from newer users. For this reason it is necessary to verify that the pointer is the same before and after the reference count increment. This pattern can be seen in get_file_rcu() and __files_get_rcu(). In addition, it isn't possible to access or check fields in struct file without first aqcuiring a reference on it. Not doing that was always very dodgy and it was only usable for non-pointer data in struct file. With SLAB_TYPESAFE_BY_RCU it is necessary that callers first acquire a reference under rcu or they must hold the files_lock of the fdtable. Failing to do either one of this is a bug. Thanks to Jann for pointing out that we need to ensure memory ordering between reallocations and pointer check by ensuring that all subsequent loads have a dependency on the second load in get_file_rcu() and providing a fixup that was folded into this patch. Cc: Jann Horn <jannh@google.com> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Christian Brauner <brauner@kernel.org>
Diffstat (limited to 'fs/file_table.c')
-rw-r--r--fs/file_table.c40
1 files changed, 23 insertions, 17 deletions
diff --git a/fs/file_table.c b/fs/file_table.c
index e68e97d4f00a..a79a80031343 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -65,33 +65,33 @@ static void file_free_rcu(struct rcu_head *head)
{
struct file *f = container_of(head, struct file, f_rcuhead);
- put_cred(f->f_cred);
- if (unlikely(f->f_mode & FMODE_BACKING))
- kfree(backing_file(f));
- else
- kmem_cache_free(filp_cachep, f);
+ kfree(backing_file(f));
}
static inline void file_free(struct file *f)
{
security_file_free(f);
- if (unlikely(f->f_mode & FMODE_BACKING))
- path_put(backing_file_real_path(f));
if (likely(!(f->f_mode & FMODE_NOACCOUNT)))
percpu_counter_dec(&nr_files);
- call_rcu(&f->f_rcuhead, file_free_rcu);
+ put_cred(f->f_cred);
+ if (unlikely(f->f_mode & FMODE_BACKING)) {
+ path_put(backing_file_real_path(f));
+ call_rcu(&f->f_rcuhead, file_free_rcu);
+ } else {
+ kmem_cache_free(filp_cachep, f);
+ }
}
void release_empty_file(struct file *f)
{
WARN_ON_ONCE(f->f_mode & (FMODE_BACKING | FMODE_OPENED));
- /* Uhm, we better find out who grabs references to an unopened file. */
- WARN_ON_ONCE(atomic_long_cmpxchg(&f->f_count, 1, 0) != 1);
- security_file_free(f);
- put_cred(f->f_cred);
- if (likely(!(f->f_mode & FMODE_NOACCOUNT)))
- percpu_counter_dec(&nr_files);
- kmem_cache_free(filp_cachep, f);
+ if (atomic_long_dec_and_test(&f->f_count)) {
+ security_file_free(f);
+ put_cred(f->f_cred);
+ if (likely(!(f->f_mode & FMODE_NOACCOUNT)))
+ percpu_counter_dec(&nr_files);
+ kmem_cache_free(filp_cachep, f);
+ }
}
/*
@@ -176,7 +176,6 @@ static int init_file(struct file *f, int flags, const struct cred *cred)
return error;
}
- atomic_long_set(&f->f_count, 1);
rwlock_init(&f->f_owner.lock);
spin_lock_init(&f->f_lock);
mutex_init(&f->f_pos_lock);
@@ -184,6 +183,12 @@ static int init_file(struct file *f, int flags, const struct cred *cred)
f->f_mode = OPEN_FMODE(flags);
/* f->f_version: 0 */
+ /*
+ * We're SLAB_TYPESAFE_BY_RCU so initialize f_count last. While
+ * fget-rcu pattern users need to be able to handle spurious
+ * refcount bumps we should reinitialize the reused file first.
+ */
+ atomic_long_set(&f->f_count, 1);
return 0;
}
@@ -483,7 +488,8 @@ EXPORT_SYMBOL(__fput_sync);
void __init files_init(void)
{
filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
- SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT, NULL);
+ SLAB_TYPESAFE_BY_RCU | SLAB_HWCACHE_ALIGN |
+ SLAB_PANIC | SLAB_ACCOUNT, NULL);
percpu_counter_init(&nr_files, 0, GFP_KERNEL);
}