diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-16 09:14:02 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-16 09:14:02 +0200 |
commit | 3352633ce6b221d64bf40644d412d9670e7d56e3 (patch) | |
tree | f74add2d0a46ac33034955c1fa8dcff1cedd7dc0 /include/linux/fs.h | |
parent | 2775df6e5e324be9dc375f7db2c8d3042df72bbf (diff) | |
parent | 24a988f75c8a5f16ef935c51039700e985767eb9 (diff) |
Merge tag 'vfs-6.12.file' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs file updates from Christian Brauner:
"This is the work to cleanup and shrink struct file significantly.
Right now, (focusing on x86) struct file is 232 bytes. After this
series struct file will be 184 bytes aka 3 cacheline and a spare 8
bytes for future extensions at the end of the struct.
With struct file being as ubiquitous as it is this should make a
difference for file heavy workloads and allow further optimizations in
the future.
- struct fown_struct was embedded into struct file letting it take up
32 bytes in total when really it shouldn't even be embedded in
struct file in the first place. Instead, actual users of struct
fown_struct now allocate the struct on demand. This frees up 24
bytes.
- Move struct file_ra_state into the union containg the cleanup hooks
and move f_iocb_flags out of the union. This closes a 4 byte hole
we created earlier and brings struct file to 192 bytes. Which means
struct file is 3 cachelines and we managed to shrink it by 40
bytes.
- Reorder struct file so that nothing crosses a cacheline.
I suspect that in the future we will end up reordering some members
to mitigate false sharing issues or just because someone does
actually provide really good perf data.
- Shrinking struct file to 192 bytes is only part of the work.
Files use a slab that is SLAB_TYPESAFE_BY_RCU and when a kmem cache
is created with SLAB_TYPESAFE_BY_RCU the free pointer must be
located outside of the object because the cache doesn't know what
part of the memory can safely be overwritten as it may be needed to
prevent object recycling.
That has the consequence that SLAB_TYPESAFE_BY_RCU may end up
adding a new cacheline.
So this also contains work to add a new kmem_cache_create_rcu()
function that allows the caller to specify an offset where the
freelist pointer is supposed to be placed. Thus avoiding the
implicit addition of a fourth cacheline.
- And finally this removes the f_version member in struct file.
The f_version member isn't particularly well-defined. It is mainly
used as a cookie to detect concurrent seeks when iterating
directories. But it is also abused by some subsystems for
completely unrelated things.
It is mostly a directory and filesystem specific thing that doesn't
really need to live in struct file and with its wonky semantics it
really lacks a specific function.
For pipes, f_version is (ab)used to defer poll notifications until
a write has happened. And struct pipe_inode_info is used by
multiple struct files in their ->private_data so there's no chance
of pushing that down into file->private_data without introducing
another pointer indirection.
But pipes don't rely on f_pos_lock so this adds a union into struct
file encompassing f_pos_lock and a pipe specific f_pipe member that
pipes can use. This union of course can be extended to other file
types and is similar to what we do in struct inode already"
* tag 'vfs-6.12.file' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (26 commits)
fs: remove f_version
pipe: use f_pipe
fs: add f_pipe
ubifs: store cookie in private data
ufs: store cookie in private data
udf: store cookie in private data
proc: store cookie in private data
ocfs2: store cookie in private data
input: remove f_version abuse
ext4: store cookie in private data
ext2: store cookie in private data
affs: store cookie in private data
fs: add generic_llseek_cookie()
fs: use must_set_pos()
fs: add must_set_pos()
fs: add vfs_setpos_cookie()
s390: remove unused f_version
ceph: remove unused f_version
adi: remove unused f_version
mm: Removed @freeptr_offset to prevent doc warning
...
Diffstat (limited to 'include/linux/fs.h')
-rw-r--r-- | include/linux/fs.h | 106 |
1 files changed, 66 insertions, 40 deletions
diff --git a/include/linux/fs.h b/include/linux/fs.h index e8d078e98379..0df3e5f0dd2b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -963,6 +963,7 @@ static inline unsigned imajor(const struct inode *inode) } struct fown_struct { + struct file *file; /* backpointer for security modules */ rwlock_t lock; /* protects pid, uid, euid fields */ struct pid *pid; /* pid or -pgrp where SIGIO should be sent */ enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */ @@ -1002,52 +1003,69 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) index < ra->start + ra->size); } -/* - * f_{lock,count,pos_lock} members can be highly contended and share - * the same cacheline. f_{lock,mode} are very frequently used together - * and so share the same cacheline as well. The read-mostly - * f_{path,inode,op} are kept on a separate cacheline. +/** + * struct file - Represents a file + * @f_count: reference count + * @f_lock: Protects f_ep, f_flags. Must not be taken from IRQ context. + * @f_mode: FMODE_* flags often used in hotpaths + * @f_op: file operations + * @f_mapping: Contents of a cacheable, mappable object. + * @private_data: filesystem or driver specific data + * @f_inode: cached inode + * @f_flags: file flags + * @f_iocb_flags: iocb flags + * @f_cred: stashed credentials of creator/opener + * @f_path: path of the file + * @f_pos_lock: lock protecting file position + * @f_pipe: specific to pipes + * @f_pos: file position + * @f_security: LSM security context of this file + * @f_owner: file owner + * @f_wb_err: writeback error + * @f_sb_err: per sb writeback errors + * @f_ep: link of all epoll hooks for this file + * @f_task_work: task work entry point + * @f_llist: work queue entrypoint + * @f_ra: file's readahead state + * @f_freeptr: Pointer used by SLAB_TYPESAFE_BY_RCU file cache (don't touch.) */ struct file { + atomic_long_t f_count; + spinlock_t f_lock; + fmode_t f_mode; + const struct file_operations *f_op; + struct address_space *f_mapping; + void *private_data; + struct inode *f_inode; + unsigned int f_flags; + unsigned int f_iocb_flags; + const struct cred *f_cred; + /* --- cacheline 1 boundary (64 bytes) --- */ + struct path f_path; union { - /* fput() uses task work when closing and freeing file (default). */ - struct callback_head f_task_work; - /* fput() must use workqueue (most kernel threads). */ - struct llist_node f_llist; - unsigned int f_iocb_flags; + /* regular files (with FMODE_ATOMIC_POS) and directories */ + struct mutex f_pos_lock; + /* pipes */ + u64 f_pipe; }; - - /* - * Protects f_ep, f_flags. - * Must not be taken from IRQ context. - */ - spinlock_t f_lock; - fmode_t f_mode; - atomic_long_t f_count; - struct mutex f_pos_lock; - loff_t f_pos; - unsigned int f_flags; - struct fown_struct f_owner; - const struct cred *f_cred; - struct file_ra_state f_ra; - struct path f_path; - struct inode *f_inode; /* cached value */ - const struct file_operations *f_op; - - u64 f_version; + loff_t f_pos; #ifdef CONFIG_SECURITY - void *f_security; + void *f_security; #endif - /* needed for tty driver, and maybe others */ - void *private_data; - + /* --- cacheline 2 boundary (128 bytes) --- */ + struct fown_struct *f_owner; + errseq_t f_wb_err; + errseq_t f_sb_err; #ifdef CONFIG_EPOLL - /* Used by fs/eventpoll.c to link all the hooks to this file */ - struct hlist_head *f_ep; -#endif /* #ifdef CONFIG_EPOLL */ - struct address_space *f_mapping; - errseq_t f_wb_err; - errseq_t f_sb_err; /* for syncfs */ + struct hlist_head *f_ep; +#endif + union { + struct callback_head f_task_work; + struct llist_node f_llist; + struct file_ra_state f_ra; + freeptr_t f_freeptr; + }; + /* --- cacheline 3 boundary (192 bytes) --- */ } __randomize_layout __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ @@ -1092,6 +1110,12 @@ struct file_lease; #define OFFT_OFFSET_MAX type_max(off_t) #endif +int file_f_owner_allocate(struct file *file); +static inline struct fown_struct *file_f_owner(const struct file *file) +{ + return READ_ONCE(file->f_owner); +} + extern void send_sigio(struct fown_struct *fown, int fd, int band); static inline struct inode *file_inode(const struct file *f) @@ -1140,7 +1164,7 @@ extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force extern int f_setown(struct file *filp, int who, int force); extern void f_delown(struct file *filp); extern pid_t f_getown(struct file *filp); -extern int send_sigurg(struct fown_struct *fown); +extern int send_sigurg(struct file *file); /* * sb->s_flags. Note that these mirror the equivalent MS_* flags where @@ -3207,6 +3231,8 @@ extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize); extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence); extern loff_t generic_file_llseek_size(struct file *file, loff_t offset, int whence, loff_t maxsize, loff_t eof); +loff_t generic_llseek_cookie(struct file *file, loff_t offset, int whence, + u64 *cookie); extern loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size); extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t); |