diff options
Diffstat (limited to 'include/linux/fs.h')
-rw-r--r-- | include/linux/fs.h | 156 |
1 files changed, 123 insertions, 33 deletions
diff --git a/include/linux/fs.h b/include/linux/fs.h index 016b0fe1536e..2ec4807d4ea8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -80,7 +80,7 @@ struct fsnotify_mark_connector; struct fsnotify_sb_info; struct fs_context; struct fs_parameter_spec; -struct fileattr; +struct file_kattr; struct iomap_ops; extern void __init inode_init(void); @@ -399,7 +399,9 @@ struct readahead_control; { IOCB_WAITQ, "WAITQ" }, \ { IOCB_NOIO, "NOIO" }, \ { IOCB_ALLOC_CACHE, "ALLOC_CACHE" }, \ - { IOCB_DIO_CALLER_COMP, "CALLER_COMP" } + { IOCB_DIO_CALLER_COMP, "CALLER_COMP" }, \ + { IOCB_AIO_RW, "AIO_RW" }, \ + { IOCB_HAS_METADATA, "AIO_HAS_METADATA" } struct kiocb { struct file *ki_filp; @@ -408,6 +410,7 @@ struct kiocb { void *private; int ki_flags; u16 ki_ioprio; /* See linux/ioprio.h */ + u8 ki_write_stream; union { /* * Only used for async buffered reads, where it denotes the @@ -433,7 +436,6 @@ static inline bool is_sync_kiocb(struct kiocb *kiocb) } struct address_space_operations { - int (*writepage)(struct page *page, struct writeback_control *wbc); int (*read_folio)(struct file *, struct folio *); /* Write back some dirty pages from this mapping. */ @@ -444,10 +446,10 @@ struct address_space_operations { void (*readahead)(struct readahead_control *); - int (*write_begin)(struct file *, struct address_space *mapping, + int (*write_begin)(const struct kiocb *, struct address_space *mapping, loff_t pos, unsigned len, struct folio **foliop, void **fsdata); - int (*write_end)(struct file *, struct address_space *mapping, + int (*write_end)(const struct kiocb *, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct folio *folio, void *fsdata); @@ -837,7 +839,7 @@ static inline void inode_fake_hash(struct inode *inode) } /* - * inode->i_mutex nesting subclasses for the lock validator: + * inode->i_rwsem nesting subclasses for the lock validator: * * 0: the object of the current VFS operation * 1: parent @@ -867,6 +869,11 @@ static inline void inode_lock(struct inode *inode) down_write(&inode->i_rwsem); } +static inline __must_check int inode_lock_killable(struct inode *inode) +{ + return down_write_killable(&inode->i_rwsem); +} + static inline void inode_unlock(struct inode *inode) { up_write(&inode->i_rwsem); @@ -877,6 +884,11 @@ static inline void inode_lock_shared(struct inode *inode) down_read(&inode->i_rwsem); } +static inline __must_check int inode_lock_shared_killable(struct inode *inode) +{ + return down_read_killable(&inode->i_rwsem); +} + static inline void inode_unlock_shared(struct inode *inode) { up_read(&inode->i_rwsem); @@ -979,7 +991,7 @@ static inline loff_t i_size_read(const struct inode *inode) /* * NOTE: unlike i_size_read(), i_size_write() does need locking around it - * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount + * (normally i_rwsem), otherwise on 32bit/SMP an update of i_size_seqcount * can be lost, resulting in subsequent i_size_read() calls spinning forever. */ static inline void i_size_write(struct inode *inode, loff_t i_size) @@ -1240,7 +1252,6 @@ extern int send_sigurg(struct file *file); /* These sb flags are internal to the kernel */ #define SB_DEAD BIT(21) #define SB_DYING BIT(24) -#define SB_SUBMOUNT BIT(26) #define SB_FORCE BIT(27) #define SB_NOSEC BIT(28) #define SB_BORN BIT(29) @@ -1307,6 +1318,7 @@ struct sb_writers { unsigned short frozen; /* Is sb frozen? */ int freeze_kcount; /* How many kernel freeze requests? */ int freeze_ucount; /* How many userspace freeze requests? */ + const void *freeze_owner; /* Owner of the freeze */ struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS]; }; @@ -1390,6 +1402,7 @@ struct super_block { char s_sysfs_name[UUID_STRING_LEN + 1]; unsigned int s_max_links; + unsigned int s_d_flags; /* default d_flags for dentries */ /* * The next field is for VFS *only*. No filesystems have any business @@ -1403,7 +1416,7 @@ struct super_block { */ const char *s_subtype; - const struct dentry_operations *s_d_op; /* default d_op for dentries */ + const struct dentry_operations *__s_d_op; /* default d_op for dentries */ struct shrinker *s_shrink; /* per-sb shrinker handle */ @@ -1780,7 +1793,7 @@ static inline void __sb_end_write(struct super_block *sb, int level) static inline void __sb_start_write(struct super_block *sb, int level) { - percpu_down_read(sb->s_writers.rw_sem + level - 1); + percpu_down_read_freezable(sb->s_writers.rw_sem + level - 1, true); } static inline bool __sb_start_write_trylock(struct super_block *sb, int level) @@ -1911,7 +1924,7 @@ static inline void sb_end_intwrite(struct super_block *sb) * freeze protection should be the outermost lock. In particular, we have: * * sb_start_write - * -> i_mutex (write path, truncate, directory ops, ...) + * -> i_rwsem (write path, truncate, directory ops, ...) * -> s_umount (freeze_super, thaw_super) */ static inline void sb_start_write(struct super_block *sb) @@ -1994,20 +2007,20 @@ int vfs_unlink(struct mnt_idmap *, struct inode *, struct dentry *, /** * struct renamedata - contains all information required for renaming * @old_mnt_idmap: idmap of the old mount the inode was found from - * @old_dir: parent of source + * @old_parent: parent of source * @old_dentry: source * @new_mnt_idmap: idmap of the new mount the inode was found from - * @new_dir: parent of destination + * @new_parent: parent of destination * @new_dentry: destination * @delegated_inode: returns an inode needing a delegation break * @flags: rename flags */ struct renamedata { struct mnt_idmap *old_mnt_idmap; - struct inode *old_dir; + struct dentry *old_parent; struct dentry *old_dentry; struct mnt_idmap *new_mnt_idmap; - struct inode *new_dir; + struct dentry *new_parent; struct dentry *new_dentry; struct inode **delegated_inode; unsigned int flags; @@ -2071,8 +2084,18 @@ typedef bool (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64, struct dir_context { filldir_t actor; loff_t pos; + /* + * Filesystems MUST NOT MODIFY count, but may use as a hint: + * 0 unknown + * > 0 space in buffer (assume at least one entry) + * INT_MAX unlimited + */ + int count; }; +/* If OR-ed with d_type, pending signals are not checked */ +#define FILLDIR_FLAG_NOINTR 0x1000 + /* * These flags let !MMU mmap() govern direct device mapping vs immediate * copying more easily for MAP_PRIVATE, especially for ROM filesystems. @@ -2169,6 +2192,7 @@ struct file_operations { int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags); int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *, unsigned int poll_flags); + int (*mmap_prepare)(struct vm_area_desc *); } __randomize_layout; /* Supports async buffered reads */ @@ -2233,16 +2257,41 @@ struct inode_operations { int (*set_acl)(struct mnt_idmap *, struct dentry *, struct posix_acl *, int); int (*fileattr_set)(struct mnt_idmap *idmap, - struct dentry *dentry, struct fileattr *fa); - int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa); + struct dentry *dentry, struct file_kattr *fa); + int (*fileattr_get)(struct dentry *dentry, struct file_kattr *fa); struct offset_ctx *(*get_offset_ctx)(struct inode *inode); } ____cacheline_aligned; -static inline int call_mmap(struct file *file, struct vm_area_struct *vma) +/* Did the driver provide valid mmap hook configuration? */ +static inline bool can_mmap_file(struct file *file) { + bool has_mmap = file->f_op->mmap; + bool has_mmap_prepare = file->f_op->mmap_prepare; + + /* Hooks are mutually exclusive. */ + if (WARN_ON_ONCE(has_mmap && has_mmap_prepare)) + return false; + if (!has_mmap && !has_mmap_prepare) + return false; + + return true; +} + +int compat_vma_mmap_prepare(struct file *file, struct vm_area_struct *vma); + +static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma) +{ + if (file->f_op->mmap_prepare) + return compat_vma_mmap_prepare(file, vma); + return file->f_op->mmap(file, vma); } +static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc) +{ + return file->f_op->mmap_prepare(desc); +} + extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, @@ -2269,6 +2318,7 @@ extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, * @FREEZE_HOLDER_KERNEL: kernel wants to freeze or thaw filesystem * @FREEZE_HOLDER_USERSPACE: userspace wants to freeze or thaw filesystem * @FREEZE_MAY_NEST: whether nesting freeze and thaw requests is allowed + * @FREEZE_EXCL: a freeze that can only be undone by the owner * * Indicate who the owner of the freeze or thaw request is and whether * the freeze needs to be exclusive or can nest. @@ -2282,6 +2332,7 @@ enum freeze_holder { FREEZE_HOLDER_KERNEL = (1U << 0), FREEZE_HOLDER_USERSPACE = (1U << 1), FREEZE_MAY_NEST = (1U << 2), + FREEZE_EXCL = (1U << 3), }; struct super_operations { @@ -2295,9 +2346,9 @@ struct super_operations { void (*evict_inode) (struct inode *); void (*put_super) (struct super_block *); int (*sync_fs)(struct super_block *sb, int wait); - int (*freeze_super) (struct super_block *, enum freeze_holder who); + int (*freeze_super) (struct super_block *, enum freeze_holder who, const void *owner); int (*freeze_fs) (struct super_block *); - int (*thaw_super) (struct super_block *, enum freeze_holder who); + int (*thaw_super) (struct super_block *, enum freeze_holder who, const void *owner); int (*unfreeze_fs) (struct super_block *); int (*statfs) (struct dentry *, struct kstatfs *); int (*remount_fs) (struct super_block *, int *, char *); @@ -2316,6 +2367,15 @@ struct super_operations { struct shrink_control *); long (*free_cached_objects)(struct super_block *, struct shrink_control *); + /* + * If a filesystem can support graceful removal of a device and + * continue read-write operations, implement this callback. + * + * Return 0 if the filesystem can continue read-write. + * Non-zero return value or no such callback means the fs will be shutdown + * as usual. + */ + int (*remove_bdev)(struct super_block *sb, struct block_device *bdev); void (*shutdown)(struct super_block *sb); }; @@ -2344,6 +2404,7 @@ struct super_operations { #define S_CASEFOLD (1 << 15) /* Casefolded file */ #define S_VERITY (1 << 16) /* Verity file (using fs/verity/) */ #define S_KERNEL_FILE (1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */ +#define S_ANON_INODE (1 << 19) /* Inode is an anonymous inode */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -2400,6 +2461,7 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags #define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ (inode)->i_rdev == WHITEOUT_DEV) +#define IS_ANON_FILE(inode) ((inode)->i_flags & S_ANON_INODE) static inline bool HAS_UNMAPPED_ID(struct mnt_idmap *idmap, struct inode *inode) @@ -2705,8 +2767,10 @@ extern int unregister_filesystem(struct file_system_type *); extern int vfs_statfs(const struct path *, struct kstatfs *); extern int user_statfs(const char __user *, struct kstatfs *); extern int fd_statfs(int, struct kstatfs *); -int freeze_super(struct super_block *super, enum freeze_holder who); -int thaw_super(struct super_block *super, enum freeze_holder who); +int freeze_super(struct super_block *super, enum freeze_holder who, + const void *freeze_owner); +int thaw_super(struct super_block *super, enum freeze_holder who, + const void *freeze_owner); extern __printf(2, 3) int super_setup_bdi_name(struct super_block *sb, char *fmt, ...); extern int super_setup_bdi(struct super_block *sb); @@ -2813,7 +2877,7 @@ struct file *dentry_open_nonotify(const struct path *path, int flags, const struct cred *cred); struct file *dentry_create(const struct path *path, int flags, umode_t mode, const struct cred *cred); -struct path *backing_file_user_path(struct file *f); +struct path *backing_file_user_path(const struct file *f); /* * When mmapping a file on a stackable filesystem (e.g., overlayfs), the file @@ -2825,14 +2889,14 @@ struct path *backing_file_user_path(struct file *f); * by fstat() on that same fd. */ /* Get the path to display in /proc/<pid>/maps */ -static inline const struct path *file_user_path(struct file *f) +static inline const struct path *file_user_path(const struct file *f) { if (unlikely(f->f_mode & FMODE_BACKING)) return backing_file_user_path(f); return &f->f_path; } /* Get the inode whose inode number to display in /proc/<pid>/maps */ -static inline const struct inode *file_user_inode(struct file *f) +static inline const struct inode *file_user_inode(const struct file *f) { if (unlikely(f->f_mode & FMODE_BACKING)) return d_inode(backing_file_user_path(f)->dentry); @@ -3213,6 +3277,22 @@ static inline bool is_dot_dotdot(const char *name, size_t len) (len == 1 || (len == 2 && name[1] == '.')); } +/** + * name_contains_dotdot - check if a file name contains ".." path components + * + * Search for ".." surrounded by either '/' or start/end of string. + */ +static inline bool name_contains_dotdot(const char *name) +{ + size_t name_len; + + name_len = strlen(name); + return strcmp(name, "..") == 0 || + strncmp(name, "../", 3) == 0 || + strstr(name, "/../") != NULL || + (name_len >= 3 && strcmp(name + name_len - 3, "/..") == 0); +} + #include <linux/err.h> /* needed for stackable file system support */ @@ -3341,8 +3421,10 @@ extern void inode_add_lru(struct inode *inode); extern int sb_set_blocksize(struct super_block *, int); extern int sb_min_blocksize(struct super_block *, int); -extern int generic_file_mmap(struct file *, struct vm_area_struct *); -extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); +int generic_file_mmap(struct file *, struct vm_area_struct *); +int generic_file_mmap_prepare(struct vm_area_desc *desc); +int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); +int generic_file_readonly_mmap_prepare(struct vm_area_desc *desc); extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); int generic_write_checks_count(struct kiocb *iocb, loff_t *count); extern int generic_write_check_limits(struct file *file, loff_t pos, @@ -3475,7 +3557,8 @@ void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *); void generic_fill_statx_attr(struct inode *inode, struct kstat *stat); void generic_fill_statx_atomic_writes(struct kstat *stat, unsigned int unit_min, - unsigned int unit_max); + unsigned int unit_max, + unsigned int unit_max_opt); extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int); extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int); void __inode_add_bytes(struct inode *inode, loff_t bytes); @@ -3515,9 +3598,11 @@ extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); extern void drop_super(struct super_block *sb); extern void drop_super_exclusive(struct super_block *sb); -extern void iterate_supers(void (*)(struct super_block *, void *), void *); +extern void iterate_supers(void (*f)(struct super_block *, void *), void *arg); extern void iterate_supers_type(struct file_system_type *, void (*)(struct super_block *, void *), void *); +void filesystems_freeze(void); +void filesystems_thaw(void); extern int dcache_dir_open(struct inode *, struct file *); extern int dcache_dir_close(struct inode *, struct file *); @@ -3541,17 +3626,21 @@ extern int simple_rename(struct mnt_idmap *, struct inode *, unsigned int); extern void simple_recursive_removal(struct dentry *, void (*callback)(struct dentry *)); +extern void locked_recursive_removal(struct dentry *, + void (*callback)(struct dentry *)); extern int noop_fsync(struct file *, loff_t, loff_t, int); extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter); extern int simple_empty(struct dentry *); -extern int simple_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, - struct folio **foliop, void **fsdata); +extern int simple_write_begin(const struct kiocb *iocb, + struct address_space *mapping, + loff_t pos, unsigned len, + struct folio **foliop, void **fsdata); extern const struct address_space_operations ram_aops; extern int always_delete_dentry(const struct dentry *); extern struct inode *alloc_anon_inode(struct super_block *); +struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name, + const struct inode *context_inode); extern int simple_nosetlease(struct file *, int, struct file_lease **, void **); -extern const struct dentry_operations simple_dentry_operations; extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags); extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); @@ -3565,6 +3654,7 @@ extern int simple_fill_super(struct super_block *, unsigned long, const struct tree_descr *); extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count); extern void simple_release_fs(struct vfsmount **mount, int *count); +struct dentry *simple_start_creating(struct dentry *, const char *); extern ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, const void *from, size_t available); |