summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-08-28 11:04:18 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-08-28 11:04:18 -0700
commit511fb5bafed197ff76d9adf5448de67f1d0558ae (patch)
tree6683ae0e7b62caa9488040d71705768a306f37dd /include
parentde16588a7737b12e63ec646d72b45befb2b1f8f7 (diff)
parentcd4284cfd3e11c7a49e4808f76f53284d47d04dd (diff)
Merge tag 'v6.6-vfs.super' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull superblock updates from Christian Brauner: "This contains the super rework that was ready for this cycle. The first part changes the order of how we open block devices and allocate superblocks, contains various cleanups, simplifications, and a new mechanism to wait on superblock state changes. This unblocks work to ultimately limit the number of writers to a block device. Jan has already scheduled follow-up work that will be ready for v6.7 and allows us to restrict the number of writers to a given block device. That series builds on this work right here. The second part contains filesystem freezing updates. Overview: The generic superblock changes are rougly organized as follows (ignoring additional minor cleanups): (1) Removal of the bd_super member from struct block_device. This was a very odd back pointer to struct super_block with unclear rules. For all relevant places we have other means to get the same information so just get rid of this. (2) Simplify rules for superblock cleanup. Roughly, everything that is allocated during fs_context initialization and that's stored in fs_context->s_fs_info needs to be cleaned up by the fs_context->free() implementation before the superblock allocation function has been called successfully. After sget_fc() returned fs_context->s_fs_info has been transferred to sb->s_fs_info at which point sb->kill_sb() if fully responsible for cleanup. Adhering to these rules means that cleanup of sb->s_fs_info in fill_super() is to be avoided as it's brittle and inconsistent. Cleanup shouldn't be duplicated between sb->put_super() as sb->put_super() is only called if sb->s_root has been set aka when the filesystem has been successfully born (SB_BORN). That complexity should be avoided. This also means that block devices are to be closed in sb->kill_sb() instead of sb->put_super(). More details in the lower section. (3) Make it possible to lookup or create a superblock before opening block devices There's a subtle dependency on (2) as some filesystems did rely on fill_super() to be called in order to correctly clean up sb->s_fs_info. All these filesystems have been fixed. (4) Switch most filesystem to follow the same logic as the generic mount code now does as outlined in (3). (5) Use the superblock as the holder of the block device. We can now easily go back from block device to owning superblock. (6) Export and extend the generic fs_holder_ops and use them as holder ops everywhere and remove the filesystem specific holder ops. (7) Call from the block layer up into the filesystem layer when the block device is removed, allowing to shut down the filesystem without risk of deadlocks. (8) Get rid of get_super(). We can now easily go back from the block device to owning superblock and can call up from the block layer into the filesystem layer when the device is removed. So no need to wade through all registered superblock to find the owning superblock anymore" Link: https://lore.kernel.org/lkml/20230824-prall-intakt-95dbffdee4a0@brauner/ * tag 'v6.6-vfs.super' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (47 commits) super: use higher-level helper for {freeze,thaw} super: wait until we passed kill super super: wait for nascent superblocks super: make locking naming consistent super: use locking helpers fs: simplify invalidate_inodes fs: remove get_super block: call into the file system for ioctl BLKFLSBUF block: call into the file system for bdev_mark_dead block: consolidate __invalidate_device and fsync_bdev block: drop the "busy inodes on changed media" log message dasd: also call __invalidate_device when setting the device offline amiflop: don't call fsync_bdev in FDFMTBEG floppy: call disk_force_media_change when changing the format block: simplify the disk_force_media_change interface nbd: call blk_mark_disk_dead in nbd_clear_sock_ioctl xfs use fs_holder_ops for the log and RT devices xfs: drop s_umount over opening the log and RT devices ext4: use fs_holder_ops for the log device ext4: drop s_umount over opening the log device ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/blk_types.h1
-rw-r--r--include/linux/blkdev.h15
-rw-r--r--include/linux/fs.h18
-rw-r--r--include/linux/fs_context.h2
4 files changed, 24 insertions, 12 deletions
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 0bad62cca3d0..d5c5e59ddbd2 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -52,7 +52,6 @@ struct block_device {
atomic_t bd_openers;
spinlock_t bd_size_lock; /* for bd_inode->i_size updates */
struct inode * bd_inode; /* will die */
- struct super_block * bd_super;
void * bd_claiming;
void * bd_holder;
const struct blk_holder_ops *bd_holder_ops;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 87d94be7825a..83ce87354e9a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -750,7 +750,8 @@ static inline int bdev_read_only(struct block_device *bdev)
}
bool set_capacity_and_notify(struct gendisk *disk, sector_t size);
-bool disk_force_media_change(struct gendisk *disk, unsigned int events);
+void disk_force_media_change(struct gendisk *disk);
+void bdev_mark_dead(struct block_device *bdev, bool surprise);
void add_disk_randomness(struct gendisk *disk) __latent_entropy;
void rand_initialize_disk(struct gendisk *disk);
@@ -809,7 +810,6 @@ int __register_blkdev(unsigned int major, const char *name,
void unregister_blkdev(unsigned int major, const char *name);
bool disk_check_media_change(struct gendisk *disk);
-int __invalidate_device(struct block_device *bdev, bool kill_dirty);
void set_capacity(struct gendisk *disk, sector_t size);
#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
@@ -1460,9 +1460,16 @@ void blkdev_show(struct seq_file *seqf, off_t offset);
#endif
struct blk_holder_ops {
- void (*mark_dead)(struct block_device *bdev);
+ void (*mark_dead)(struct block_device *bdev, bool surprise);
+
+ /*
+ * Sync the file system mounted on the block device.
+ */
+ void (*sync)(struct block_device *bdev);
};
+extern const struct blk_holder_ops fs_holder_ops;
+
/*
* Return the correct open flags for blkdev_get_by_* for super block flags
* as stored in sb->s_flags.
@@ -1521,8 +1528,6 @@ static inline int early_lookup_bdev(const char *pathname, dev_t *dev)
}
#endif /* CONFIG_BLOCK */
-int fsync_bdev(struct block_device *bdev);
-
int freeze_bdev(struct block_device *bdev);
int thaw_bdev(struct block_device *bdev);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0ffb996c2af3..4e270f3ed58e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1095,6 +1095,8 @@ extern int send_sigurg(struct fown_struct *fown);
#define SB_LAZYTIME BIT(25) /* Update the on-disk [acm]times lazily */
/* These sb flags are internal to the kernel */
+#define SB_DEAD BIT(21)
+#define SB_DYING BIT(24)
#define SB_SUBMOUNT BIT(26)
#define SB_FORCE BIT(27)
#define SB_NOSEC BIT(28)
@@ -1147,7 +1149,8 @@ enum {
#define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1)
struct sb_writers {
- int frozen; /* Is sb frozen? */
+ unsigned short frozen; /* Is sb frozen? */
+ unsigned short freeze_holders; /* Who froze fs? */
struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS];
};
@@ -1982,6 +1985,10 @@ extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
struct file *dst_file, loff_t dst_pos,
loff_t len, unsigned int remap_flags);
+enum freeze_holder {
+ FREEZE_HOLDER_KERNEL = (1U << 0),
+ FREEZE_HOLDER_USERSPACE = (1U << 1),
+};
struct super_operations {
struct inode *(*alloc_inode)(struct super_block *sb);
@@ -1994,9 +2001,9 @@ struct super_operations {
void (*evict_inode) (struct inode *);
void (*put_super) (struct super_block *);
int (*sync_fs)(struct super_block *sb, int wait);
- int (*freeze_super) (struct super_block *);
+ int (*freeze_super) (struct super_block *, enum freeze_holder who);
int (*freeze_fs) (struct super_block *);
- int (*thaw_super) (struct super_block *);
+ int (*thaw_super) (struct super_block *, enum freeze_holder who);
int (*unfreeze_fs) (struct super_block *);
int (*statfs) (struct dentry *, struct kstatfs *);
int (*remount_fs) (struct super_block *, int *, char *);
@@ -2382,8 +2389,8 @@ extern int unregister_filesystem(struct file_system_type *);
extern int vfs_statfs(const struct path *, struct kstatfs *);
extern int user_statfs(const char __user *, struct kstatfs *);
extern int fd_statfs(int, struct kstatfs *);
-extern int freeze_super(struct super_block *super);
-extern int thaw_super(struct super_block *super);
+int freeze_super(struct super_block *super, enum freeze_holder who);
+int thaw_super(struct super_block *super, enum freeze_holder who);
extern __printf(2, 3)
int super_setup_bdi_name(struct super_block *sb, char *fmt, ...);
extern int super_setup_bdi(struct super_block *sb);
@@ -3055,7 +3062,6 @@ extern int vfs_readlink(struct dentry *, char __user *, int);
extern struct file_system_type *get_filesystem(struct file_system_type *fs);
extern void put_filesystem(struct file_system_type *fs);
extern struct file_system_type *get_fs_type(const char *name);
-extern struct super_block *get_super(struct block_device *);
extern struct super_block *get_active_super(struct block_device *bdev);
extern void drop_super(struct super_block *sb);
extern void drop_super_exclusive(struct super_block *sb);
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index a33a3b1d9016..96332db693d5 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -156,6 +156,8 @@ extern int get_tree_keyed(struct fs_context *fc,
struct fs_context *fc),
void *key);
+int setup_bdev_super(struct super_block *sb, int sb_flags,
+ struct fs_context *fc);
extern int get_tree_bdev(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc));