summaryrefslogtreecommitdiff
path: root/fs/ext4/super.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/super.c')
-rw-r--r--fs/ext4/super.c6143
1 files changed, 3934 insertions, 2209 deletions
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d61a70e2193a..87205660c5d0 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/ext4/super.c
*
@@ -38,11 +39,15 @@
#include <linux/log2.h>
#include <linux/crc16.h>
#include <linux/dax.h>
-#include <linux/cleancache.h>
#include <linux/uaccess.h>
-
+#include <linux/iversion.h>
+#include <linux/unicode.h>
+#include <linux/part_stat.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
+#include <linux/fsnotify.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
#include "ext4.h"
#include "ext4_extents.h" /* Needed for trace points definition */
@@ -56,89 +61,230 @@
#include <trace/events/ext4.h>
static struct ext4_lazy_init *ext4_li_info;
-static struct mutex ext4_li_mtx;
+static DEFINE_MUTEX(ext4_li_mtx);
static struct ratelimit_state ext4_mount_msg_ratelimit;
static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
unsigned long journal_devnum);
static int ext4_show_options(struct seq_file *seq, struct dentry *root);
-static int ext4_commit_super(struct super_block *sb, int sync);
-static void ext4_mark_recovery_complete(struct super_block *sb,
+static void ext4_update_super(struct super_block *sb);
+static int ext4_commit_super(struct super_block *sb);
+static int ext4_mark_recovery_complete(struct super_block *sb,
struct ext4_super_block *es);
-static void ext4_clear_journal_err(struct super_block *sb,
- struct ext4_super_block *es);
+static int ext4_clear_journal_err(struct super_block *sb,
+ struct ext4_super_block *es);
static int ext4_sync_fs(struct super_block *sb, int wait);
-static int ext4_remount(struct super_block *sb, int *flags, char *data);
static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
static int ext4_unfreeze(struct super_block *sb);
static int ext4_freeze(struct super_block *sb);
-static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data);
static inline int ext2_feature_set_ok(struct super_block *sb);
static inline int ext3_feature_set_ok(struct super_block *sb);
-static int ext4_feature_set_ok(struct super_block *sb, int readonly);
-static void ext4_destroy_lazyinit_thread(void);
static void ext4_unregister_li_request(struct super_block *sb);
static void ext4_clear_request_list(void);
static struct inode *ext4_get_journal_inode(struct super_block *sb,
unsigned int journal_inum);
+static int ext4_validate_options(struct fs_context *fc);
+static int ext4_check_opt_consistency(struct fs_context *fc,
+ struct super_block *sb);
+static void ext4_apply_options(struct fs_context *fc, struct super_block *sb);
+static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param);
+static int ext4_get_tree(struct fs_context *fc);
+static int ext4_reconfigure(struct fs_context *fc);
+static void ext4_fc_free(struct fs_context *fc);
+static int ext4_init_fs_context(struct fs_context *fc);
+static void ext4_kill_sb(struct super_block *sb);
+static const struct fs_parameter_spec ext4_param_specs[];
/*
* Lock ordering
*
- * Note the difference between i_mmap_sem (EXT4_I(inode)->i_mmap_sem) and
- * i_mmap_rwsem (inode->i_mmap_rwsem)!
- *
* page fault path:
- * mmap_sem -> sb_start_pagefault -> i_mmap_sem (r) -> transaction start ->
- * page lock -> i_data_sem (rw)
+ * mmap_lock -> sb_start_pagefault -> invalidate_lock (r) -> transaction start
+ * -> page lock -> i_data_sem (rw)
*
* buffered write path:
- * sb_start_write -> i_mutex -> mmap_sem
+ * sb_start_write -> i_mutex -> mmap_lock
* sb_start_write -> i_mutex -> transaction start -> page lock ->
* i_data_sem (rw)
*
* truncate:
- * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
- * i_mmap_rwsem (w) -> page lock
- * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
- * transaction start -> i_data_sem (rw)
+ * sb_start_write -> i_mutex -> invalidate_lock (w) -> i_mmap_rwsem (w) ->
+ * page lock
+ * sb_start_write -> i_mutex -> invalidate_lock (w) -> transaction start ->
+ * i_data_sem (rw)
*
* direct IO:
- * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) -> mmap_sem
- * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) ->
- * transaction start -> i_data_sem (rw)
+ * sb_start_write -> i_mutex -> mmap_lock
+ * sb_start_write -> i_mutex -> transaction start -> i_data_sem (rw)
*
* writepages:
* transaction start -> page lock(s) -> i_data_sem (rw)
*/
+static const struct fs_context_operations ext4_context_ops = {
+ .parse_param = ext4_parse_param,
+ .get_tree = ext4_get_tree,
+ .reconfigure = ext4_reconfigure,
+ .free = ext4_fc_free,
+};
+
+
#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
static struct file_system_type ext2_fs_type = {
- .owner = THIS_MODULE,
- .name = "ext2",
- .mount = ext4_mount,
- .kill_sb = kill_block_super,
- .fs_flags = FS_REQUIRES_DEV,
+ .owner = THIS_MODULE,
+ .name = "ext2",
+ .init_fs_context = ext4_init_fs_context,
+ .parameters = ext4_param_specs,
+ .kill_sb = ext4_kill_sb,
+ .fs_flags = FS_REQUIRES_DEV,
};
MODULE_ALIAS_FS("ext2");
MODULE_ALIAS("ext2");
-#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
+#define IS_EXT2_SB(sb) ((sb)->s_type == &ext2_fs_type)
#else
#define IS_EXT2_SB(sb) (0)
#endif
static struct file_system_type ext3_fs_type = {
- .owner = THIS_MODULE,
- .name = "ext3",
- .mount = ext4_mount,
- .kill_sb = kill_block_super,
- .fs_flags = FS_REQUIRES_DEV,
+ .owner = THIS_MODULE,
+ .name = "ext3",
+ .init_fs_context = ext4_init_fs_context,
+ .parameters = ext4_param_specs,
+ .kill_sb = ext4_kill_sb,
+ .fs_flags = FS_REQUIRES_DEV,
};
MODULE_ALIAS_FS("ext3");
MODULE_ALIAS("ext3");
-#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
+#define IS_EXT3_SB(sb) ((sb)->s_type == &ext3_fs_type)
+
+
+static inline void __ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags,
+ bh_end_io_t *end_io, bool simu_fail)
+{
+ if (simu_fail) {
+ clear_buffer_uptodate(bh);
+ unlock_buffer(bh);
+ return;
+ }
+
+ /*
+ * buffer's verified bit is no longer valid after reading from
+ * disk again due to write out error, clear it to make sure we
+ * recheck the buffer contents.
+ */
+ clear_buffer_verified(bh);
+
+ bh->b_end_io = end_io ? end_io : end_buffer_read_sync;
+ get_bh(bh);
+ submit_bh(REQ_OP_READ | op_flags, bh);
+}
+
+void ext4_read_bh_nowait(struct buffer_head *bh, blk_opf_t op_flags,
+ bh_end_io_t *end_io, bool simu_fail)
+{
+ BUG_ON(!buffer_locked(bh));
+
+ if (ext4_buffer_uptodate(bh)) {
+ unlock_buffer(bh);
+ return;
+ }
+ __ext4_read_bh(bh, op_flags, end_io, simu_fail);
+}
+
+int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags,
+ bh_end_io_t *end_io, bool simu_fail)
+{
+ BUG_ON(!buffer_locked(bh));
+
+ if (ext4_buffer_uptodate(bh)) {
+ unlock_buffer(bh);
+ return 0;
+ }
+
+ __ext4_read_bh(bh, op_flags, end_io, simu_fail);
+
+ wait_on_buffer(bh);
+ if (buffer_uptodate(bh))
+ return 0;
+ return -EIO;
+}
+
+int ext4_read_bh_lock(struct buffer_head *bh, blk_opf_t op_flags, bool wait)
+{
+ lock_buffer(bh);
+ if (!wait) {
+ ext4_read_bh_nowait(bh, op_flags, NULL, false);
+ return 0;
+ }
+ return ext4_read_bh(bh, op_flags, NULL, false);
+}
+
+/*
+ * This works like __bread_gfp() except it uses ERR_PTR for error
+ * returns. Currently with sb_bread it's impossible to distinguish
+ * between ENOMEM and EIO situations (since both result in a NULL
+ * return.
+ */
+static struct buffer_head *__ext4_sb_bread_gfp(struct super_block *sb,
+ sector_t block,
+ blk_opf_t op_flags, gfp_t gfp)
+{
+ struct buffer_head *bh;
+ int ret;
+
+ bh = sb_getblk_gfp(sb, block, gfp);
+ if (bh == NULL)
+ return ERR_PTR(-ENOMEM);
+ if (ext4_buffer_uptodate(bh))
+ return bh;
+
+ ret = ext4_read_bh_lock(bh, REQ_META | op_flags, true);
+ if (ret) {
+ put_bh(bh);
+ return ERR_PTR(ret);
+ }
+ return bh;
+}
+
+struct buffer_head *ext4_sb_bread(struct super_block *sb, sector_t block,
+ blk_opf_t op_flags)
+{
+ gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_mapping,
+ ~__GFP_FS) | __GFP_MOVABLE;
+
+ return __ext4_sb_bread_gfp(sb, block, op_flags, gfp);
+}
+
+struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb,
+ sector_t block)
+{
+ gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_mapping,
+ ~__GFP_FS);
+
+ return __ext4_sb_bread_gfp(sb, block, 0, gfp);
+}
+
+struct buffer_head *ext4_sb_bread_nofail(struct super_block *sb,
+ sector_t block)
+{
+ gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_mapping,
+ ~__GFP_FS) | __GFP_MOVABLE | __GFP_NOFAIL;
+
+ return __ext4_sb_bread_gfp(sb, block, 0, gfp);
+}
+
+void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block)
+{
+ struct buffer_head *bh = bdev_getblk(sb->s_bdev, block,
+ sb->s_blocksize, GFP_NOWAIT);
+
+ if (likely(bh)) {
+ if (trylock_buffer(bh))
+ ext4_read_bh_nowait(bh, REQ_RAHEAD, NULL, false);
+ brelse(bh);
+ }
+}
static int ext4_verify_csum_type(struct super_block *sb,
struct ext4_super_block *es)
@@ -149,14 +295,12 @@ static int ext4_verify_csum_type(struct super_block *sb,
return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
}
-static __le32 ext4_superblock_csum(struct super_block *sb,
- struct ext4_super_block *es)
+__le32 ext4_superblock_csum(struct ext4_super_block *es)
{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
int offset = offsetof(struct ext4_super_block, s_checksum);
__u32 csum;
- csum = ext4_chksum(sbi, ~0, (char *)es, offset);
+ csum = ext4_chksum(~0, (char *)es, offset);
return cpu_to_le32(csum);
}
@@ -164,40 +308,20 @@ static __le32 ext4_superblock_csum(struct super_block *sb,
static int ext4_superblock_csum_verify(struct super_block *sb,
struct ext4_super_block *es)
{
- if (!ext4_has_metadata_csum(sb))
+ if (!ext4_has_feature_metadata_csum(sb))
return 1;
- return es->s_checksum == ext4_superblock_csum(sb, es);
+ return es->s_checksum == ext4_superblock_csum(es);
}
void ext4_superblock_csum_set(struct super_block *sb)
{
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
- if (!ext4_has_metadata_csum(sb))
+ if (!ext4_has_feature_metadata_csum(sb))
return;
- es->s_checksum = ext4_superblock_csum(sb, es);
-}
-
-void *ext4_kvmalloc(size_t size, gfp_t flags)
-{
- void *ret;
-
- ret = kmalloc(size, flags | __GFP_NOWARN);
- if (!ret)
- ret = __vmalloc(size, flags, PAGE_KERNEL);
- return ret;
-}
-
-void *ext4_kvzalloc(size_t size, gfp_t flags)
-{
- void *ret;
-
- ret = kzalloc(size, flags | __GFP_NOWARN);
- if (!ret)
- ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
- return ret;
+ es->s_checksum = ext4_superblock_csum(es);
}
ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
@@ -235,9 +359,9 @@ __u32 ext4_free_group_clusters(struct super_block *sb,
__u32 ext4_free_inodes_count(struct super_block *sb,
struct ext4_group_desc *bg)
{
- return le16_to_cpu(bg->bg_free_inodes_count_lo) |
+ return le16_to_cpu(READ_ONCE(bg->bg_free_inodes_count_lo)) |
(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
- (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
+ (__u32)le16_to_cpu(READ_ONCE(bg->bg_free_inodes_count_hi)) << 16 : 0);
}
__u32 ext4_used_dirs_count(struct super_block *sb,
@@ -291,9 +415,9 @@ void ext4_free_group_clusters_set(struct super_block *sb,
void ext4_free_inodes_set(struct super_block *sb,
struct ext4_group_desc *bg, __u32 count)
{
- bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
+ WRITE_ONCE(bg->bg_free_inodes_count_lo, cpu_to_le16((__u16)count));
if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
- bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
+ WRITE_ONCE(bg->bg_free_inodes_count_hi, cpu_to_le16(count >> 16));
}
void ext4_used_dirs_set(struct super_block *sb,
@@ -312,80 +436,234 @@ void ext4_itable_unused_set(struct super_block *sb,
bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
}
-
-static void __save_error_info(struct super_block *sb, const char *func,
- unsigned int line)
+static void __ext4_update_tstamp(__le32 *lo, __u8 *hi, time64_t now)
{
- struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+ now = clamp_val(now, 0, (1ull << 40) - 1);
- EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
- if (bdev_read_only(sb->s_bdev))
- return;
- es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
- es->s_last_error_time = cpu_to_le32(get_seconds());
- strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
- es->s_last_error_line = cpu_to_le32(line);
- if (!es->s_first_error_time) {
- es->s_first_error_time = es->s_last_error_time;
- strncpy(es->s_first_error_func, func,
- sizeof(es->s_first_error_func));
- es->s_first_error_line = cpu_to_le32(line);
- es->s_first_error_ino = es->s_last_error_ino;
- es->s_first_error_block = es->s_last_error_block;
- }
- /*
- * Start the daily error reporting function if it hasn't been
- * started already
- */
- if (!es->s_error_count)
- mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
- le32_add_cpu(&es->s_error_count, 1);
+ *lo = cpu_to_le32(lower_32_bits(now));
+ *hi = upper_32_bits(now);
}
-static void save_error_info(struct super_block *sb, const char *func,
- unsigned int line)
+static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi)
{
- __save_error_info(sb, func, line);
- ext4_commit_super(sb, 1);
+ return ((time64_t)(*hi) << 32) + le32_to_cpu(*lo);
}
+#define ext4_update_tstamp(es, tstamp) \
+ __ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi, \
+ ktime_get_real_seconds())
+#define ext4_get_tstamp(es, tstamp) \
+ __ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
/*
- * The del_gendisk() function uninitializes the disk-specific data
- * structures, including the bdi structure, without telling anyone
- * else. Once this happens, any attempt to call mark_buffer_dirty()
- * (for example, by ext4_commit_super), will cause a kernel OOPS.
- * This is a kludge to prevent these oops until we can put in a proper
- * hook in del_gendisk() to inform the VFS and file system layers.
+ * The ext4_maybe_update_superblock() function checks and updates the
+ * superblock if needed.
+ *
+ * This function is designed to update the on-disk superblock only under
+ * certain conditions to prevent excessive disk writes and unnecessary
+ * waking of the disk from sleep. The superblock will be updated if:
+ * 1. More than sbi->s_sb_update_sec (def: 1 hour) has passed since the last
+ * superblock update
+ * 2. More than sbi->s_sb_update_kb (def: 16MB) kbs have been written since the
+ * last superblock update.
+ *
+ * @sb: The superblock
*/
-static int block_device_ejected(struct super_block *sb)
+static void ext4_maybe_update_superblock(struct super_block *sb)
{
- struct inode *bd_inode = sb->s_bdev->bd_inode;
- struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_super_block *es = sbi->s_es;
+ journal_t *journal = sbi->s_journal;
+ time64_t now;
+ __u64 last_update;
+ __u64 lifetime_write_kbytes;
+ __u64 diff_size;
+
+ if (ext4_emergency_state(sb) || sb_rdonly(sb) ||
+ !(sb->s_flags & SB_ACTIVE) || !journal ||
+ journal->j_flags & JBD2_UNMOUNT)
+ return;
+
+ now = ktime_get_real_seconds();
+ last_update = ext4_get_tstamp(es, s_wtime);
+
+ if (likely(now - last_update < sbi->s_sb_update_sec))
+ return;
+
+ lifetime_write_kbytes = sbi->s_kbytes_written +
+ ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) -
+ sbi->s_sectors_written_start) >> 1);
- return bdi->dev == NULL;
+ /* Get the number of kilobytes not written to disk to account
+ * for statistics and compare with a multiple of 16 MB. This
+ * is used to determine when the next superblock commit should
+ * occur (i.e. not more often than once per 16MB if there was
+ * less written in an hour).
+ */
+ diff_size = lifetime_write_kbytes - le64_to_cpu(es->s_kbytes_written);
+
+ if (diff_size > sbi->s_sb_update_kb)
+ schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
}
static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
{
struct super_block *sb = journal->j_private;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- int error = is_journal_aborted(journal);
- struct ext4_journal_cb_entry *jce;
BUG_ON(txn->t_state == T_FINISHED);
ext4_process_freed_data(sb, txn->t_tid);
+ ext4_maybe_update_superblock(sb);
+}
+
+static bool ext4_journalled_writepage_needs_redirty(struct jbd2_inode *jinode,
+ struct folio *folio)
+{
+ struct buffer_head *bh, *head;
+ struct journal_head *jh;
- spin_lock(&sbi->s_md_lock);
- while (!list_empty(&txn->t_private_list)) {
- jce = list_entry(txn->t_private_list.next,
- struct ext4_journal_cb_entry, jce_list);
- list_del_init(&jce->jce_list);
- spin_unlock(&sbi->s_md_lock);
- jce->jce_func(sb, jce, error);
- spin_lock(&sbi->s_md_lock);
+ bh = head = folio_buffers(folio);
+ do {
+ /*
+ * We have to redirty a page in these cases:
+ * 1) If buffer is dirty, it means the page was dirty because it
+ * contains a buffer that needs checkpointing. So the dirty bit
+ * needs to be preserved so that checkpointing writes the buffer
+ * properly.
+ * 2) If buffer is not part of the committing transaction
+ * (we may have just accidentally come across this buffer because
+ * inode range tracking is not exact) or if the currently running
+ * transaction already contains this buffer as well, dirty bit
+ * needs to be preserved so that the buffer gets writeprotected
+ * properly on running transaction's commit.
+ */
+ jh = bh2jh(bh);
+ if (buffer_dirty(bh) ||
+ (jh && (jh->b_transaction != jinode->i_transaction ||
+ jh->b_next_transaction)))
+ return true;
+ } while ((bh = bh->b_this_page) != head);
+
+ return false;
+}
+
+static int ext4_journalled_submit_inode_data_buffers(struct jbd2_inode *jinode)
+{
+ struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = LONG_MAX,
+ .range_start = jinode->i_dirty_start,
+ .range_end = jinode->i_dirty_end,
+ };
+ struct folio *folio = NULL;
+ int error;
+
+ /*
+ * writeback_iter() already checks for dirty pages and calls
+ * folio_clear_dirty_for_io(), which we want to write protect the
+ * folios.
+ *
+ * However, we may have to redirty a folio sometimes.
+ */
+ while ((folio = writeback_iter(mapping, &wbc, folio, &error))) {
+ if (ext4_journalled_writepage_needs_redirty(jinode, folio))
+ folio_redirty_for_writepage(&wbc, folio);
+ folio_unlock(folio);
}
- spin_unlock(&sbi->s_md_lock);
+
+ return error;
+}
+
+static int ext4_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
+{
+ int ret;
+
+ if (ext4_should_journal_data(jinode->i_vfs_inode))
+ ret = ext4_journalled_submit_inode_data_buffers(jinode);
+ else
+ ret = ext4_normal_submit_inode_data_buffers(jinode);
+ return ret;
+}
+
+static int ext4_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
+{
+ int ret = 0;
+
+ if (!ext4_should_journal_data(jinode->i_vfs_inode))
+ ret = jbd2_journal_finish_inode_data_buffers(jinode);
+
+ return ret;
+}
+
+static bool system_going_down(void)
+{
+ return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
+ || system_state == SYSTEM_RESTART;
+}
+
+struct ext4_err_translation {
+ int code;
+ int errno;
+};
+
+#define EXT4_ERR_TRANSLATE(err) { .code = EXT4_ERR_##err, .errno = err }
+
+static struct ext4_err_translation err_translation[] = {
+ EXT4_ERR_TRANSLATE(EIO),
+ EXT4_ERR_TRANSLATE(ENOMEM),
+ EXT4_ERR_TRANSLATE(EFSBADCRC),
+ EXT4_ERR_TRANSLATE(EFSCORRUPTED),
+ EXT4_ERR_TRANSLATE(ENOSPC),
+ EXT4_ERR_TRANSLATE(ENOKEY),
+ EXT4_ERR_TRANSLATE(EROFS),
+ EXT4_ERR_TRANSLATE(EFBIG),
+ EXT4_ERR_TRANSLATE(EEXIST),
+ EXT4_ERR_TRANSLATE(ERANGE),
+ EXT4_ERR_TRANSLATE(EOVERFLOW),
+ EXT4_ERR_TRANSLATE(EBUSY),
+ EXT4_ERR_TRANSLATE(ENOTDIR),
+ EXT4_ERR_TRANSLATE(ENOTEMPTY),
+ EXT4_ERR_TRANSLATE(ESHUTDOWN),
+ EXT4_ERR_TRANSLATE(EFAULT),
+};
+
+static int ext4_errno_to_code(int errno)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(err_translation); i++)
+ if (err_translation[i].errno == errno)
+ return err_translation[i].code;
+ return EXT4_ERR_UNKNOWN;
+}
+
+static void save_error_info(struct super_block *sb, int error,
+ __u32 ino, __u64 block,
+ const char *func, unsigned int line)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ /* We default to EFSCORRUPTED error... */
+ if (error == 0)
+ error = EFSCORRUPTED;
+
+ spin_lock(&sbi->s_error_lock);
+ sbi->s_add_error_count++;
+ sbi->s_last_error_code = error;
+ sbi->s_last_error_line = line;
+ sbi->s_last_error_ino = ino;
+ sbi->s_last_error_block = block;
+ sbi->s_last_error_func = func;
+ sbi->s_last_error_time = ktime_get_real_seconds();
+ if (!sbi->s_first_error_time) {
+ sbi->s_first_error_code = error;
+ sbi->s_first_error_line = line;
+ sbi->s_first_error_ino = ino;
+ sbi->s_first_error_block = block;
+ sbi->s_first_error_func = func;
+ sbi->s_first_error_time = sbi->s_last_error_time;
+ }
+ spin_unlock(&sbi->s_error_lock);
}
/* Deal with the reporting of failure conditions on a filesystem such as
@@ -401,36 +679,125 @@ static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
* We'll just use the jbd2_journal_abort() error code to record an error in
* the journal instead. On recovery, the journal will complain about
* that error until we've noted it down and cleared it.
+ *
+ * If force_ro is set, we unconditionally force the filesystem into an
+ * ABORT|READONLY state, unless the error response on the fs has been set to
+ * panic in which case we take the easy way out and panic immediately. This is
+ * used to deal with unrecoverable failures such as journal IO errors or ENOMEM
+ * at a critical moment in log management.
*/
-
-static void ext4_handle_error(struct super_block *sb)
+static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
+ __u32 ino, __u64 block,
+ const char *func, unsigned int line)
{
- if (sb->s_flags & MS_RDONLY)
- return;
+ journal_t *journal = EXT4_SB(sb)->s_journal;
+ bool continue_fs = !force_ro && test_opt(sb, ERRORS_CONT);
- if (!test_opt(sb, ERRORS_CONT)) {
- journal_t *journal = EXT4_SB(sb)->s_journal;
+ EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
+ if (test_opt(sb, WARN_ON_ERROR))
+ WARN_ON_ONCE(1);
- EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
- if (journal)
- jbd2_journal_abort(journal, -EIO);
- }
- if (test_opt(sb, ERRORS_RO)) {
- ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
+ if (!continue_fs && !ext4_emergency_ro(sb) && journal)
+ jbd2_journal_abort(journal, -error);
+
+ if (!bdev_read_only(sb->s_bdev)) {
+ save_error_info(sb, error, ino, block, func, line);
/*
- * Make sure updated value of ->s_mount_flags will be visible
- * before ->s_flags update
+ * In case the fs should keep running, we need to writeout
+ * superblock through the journal. Due to lock ordering
+ * constraints, it may not be safe to do it right here so we
+ * defer superblock flushing to a workqueue. We just need to be
+ * careful when the journal is already shutting down. If we get
+ * here in that case, just update the sb directly as the last
+ * transaction won't commit anyway.
*/
- smp_wmb();
- sb->s_flags |= MS_RDONLY;
+ if (continue_fs && journal &&
+ !ext4_test_mount_flag(sb, EXT4_MF_JOURNAL_DESTROY))
+ schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
+ else
+ ext4_commit_super(sb);
}
- if (test_opt(sb, ERRORS_PANIC)) {
- if (EXT4_SB(sb)->s_journal &&
- !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
- return;
+
+ /*
+ * We force ERRORS_RO behavior when system is rebooting. Otherwise we
+ * could panic during 'reboot -f' as the underlying device got already
+ * disabled.
+ */
+ if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) {
panic("EXT4-fs (device %s): panic forced after error\n",
sb->s_id);
}
+
+ if (ext4_emergency_ro(sb) || continue_fs)
+ return;
+
+ ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
+ /*
+ * We don't set SB_RDONLY because that requires sb->s_umount
+ * semaphore and setting it without proper remount procedure is
+ * confusing code such as freeze_super() leading to deadlocks
+ * and other problems.
+ */
+ set_bit(EXT4_FLAGS_EMERGENCY_RO, &EXT4_SB(sb)->s_ext4_flags);
+}
+
+static void update_super_work(struct work_struct *work)
+{
+ struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info,
+ s_sb_upd_work);
+ journal_t *journal = sbi->s_journal;
+ handle_t *handle;
+
+ /*
+ * If the journal is still running, we have to write out superblock
+ * through the journal to avoid collisions of other journalled sb
+ * updates.
+ *
+ * We use directly jbd2 functions here to avoid recursing back into
+ * ext4 error handling code during handling of previous errors.
+ */
+ if (!ext4_emergency_state(sbi->s_sb) &&
+ !sb_rdonly(sbi->s_sb) && journal) {
+ struct buffer_head *sbh = sbi->s_sbh;
+ bool call_notify_err = false;
+
+ handle = jbd2_journal_start(journal, 1);
+ if (IS_ERR(handle))
+ goto write_directly;
+ if (jbd2_journal_get_write_access(handle, sbh)) {
+ jbd2_journal_stop(handle);
+ goto write_directly;
+ }
+
+ if (sbi->s_add_error_count > 0)
+ call_notify_err = true;
+
+ ext4_update_super(sbi->s_sb);
+ if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
+ ext4_msg(sbi->s_sb, KERN_ERR, "previous I/O error to "
+ "superblock detected");
+ clear_buffer_write_io_error(sbh);
+ set_buffer_uptodate(sbh);
+ }
+
+ if (jbd2_journal_dirty_metadata(handle, sbh)) {
+ jbd2_journal_stop(handle);
+ goto write_directly;
+ }
+ jbd2_journal_stop(handle);
+
+ if (call_notify_err)
+ ext4_notify_error_sysfs(sbi);
+
+ return;
+ }
+write_directly:
+ /*
+ * Write through journal failed. Write sb directly to get error info
+ * out and hope for the best.
+ */
+ ext4_commit_super(sbi->s_sb);
+ ext4_notify_error_sysfs(sbi);
}
#define ext4_error_ratelimit(sb) \
@@ -438,14 +805,16 @@ static void ext4_handle_error(struct super_block *sb)
"EXT4-fs error")
void __ext4_error(struct super_block *sb, const char *function,
- unsigned int line, const char *fmt, ...)
+ unsigned int line, bool force_ro, int error, __u64 block,
+ const char *fmt, ...)
{
struct va_format vaf;
va_list args;
- if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
+ if (unlikely(ext4_emergency_state(sb)))
return;
+ trace_ext4_error(sb, function, line);
if (ext4_error_ratelimit(sb)) {
va_start(args, fmt);
vaf.fmt = fmt;
@@ -455,23 +824,22 @@ void __ext4_error(struct super_block *sb, const char *function,
sb->s_id, function, line, current->comm, &vaf);
va_end(args);
}
- save_error_info(sb, function, line);
- ext4_handle_error(sb);
+ fsnotify_sb_error(sb, NULL, error ? error : EFSCORRUPTED);
+
+ ext4_handle_error(sb, force_ro, error, 0, block, function, line);
}
void __ext4_error_inode(struct inode *inode, const char *function,
- unsigned int line, ext4_fsblk_t block,
+ unsigned int line, ext4_fsblk_t block, int error,
const char *fmt, ...)
{
va_list args;
struct va_format vaf;
- struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
- if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ if (unlikely(ext4_emergency_state(inode->i_sb)))
return;
- es->s_last_error_ino = cpu_to_le32(inode->i_ino);
- es->s_last_error_block = cpu_to_le64(block);
+ trace_ext4_error(inode->i_sb, function, line);
if (ext4_error_ratelimit(inode->i_sb)) {
va_start(args, fmt);
vaf.fmt = fmt;
@@ -488,8 +856,10 @@ void __ext4_error_inode(struct inode *inode, const char *function,
current->comm, &vaf);
va_end(args);
}
- save_error_info(inode->i_sb, function, line);
- ext4_handle_error(inode->i_sb);
+ fsnotify_sb_error(inode->i_sb, inode, error ? error : EFSCORRUPTED);
+
+ ext4_handle_error(inode->i_sb, false, error, inode->i_ino, block,
+ function, line);
}
void __ext4_error_file(struct file *file, const char *function,
@@ -498,15 +868,13 @@ void __ext4_error_file(struct file *file, const char *function,
{
va_list args;
struct va_format vaf;
- struct ext4_super_block *es;
struct inode *inode = file_inode(file);
char pathname[80], *path;
- if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+ if (unlikely(ext4_emergency_state(inode->i_sb)))
return;
- es = EXT4_SB(inode->i_sb)->s_es;
- es->s_last_error_ino = cpu_to_le32(inode->i_ino);
+ trace_ext4_error(inode->i_sb, function, line);
if (ext4_error_ratelimit(inode->i_sb)) {
path = file_path(file, pathname, sizeof(pathname));
if (IS_ERR(path))
@@ -528,8 +896,10 @@ void __ext4_error_file(struct file *file, const char *function,
current->comm, path, &vaf);
va_end(args);
}
- save_error_info(inode->i_sb, function, line);
- ext4_handle_error(inode->i_sb);
+ fsnotify_sb_error(inode->i_sb, inode, EFSCORRUPTED);
+
+ ext4_handle_error(inode->i_sb, false, EFSCORRUPTED, inode->i_ino, block,
+ function, line);
}
const char *ext4_decode_error(struct super_block *sb, int errno,
@@ -581,14 +951,13 @@ void __ext4_std_error(struct super_block *sb, const char *function,
char nbuf[16];
const char *errstr;
- if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
+ if (unlikely(ext4_emergency_state(sb)))
return;
/* Special case: if the error is EROFS, and we're not already
* inside a transaction, then there's really no point in logging
* an error. */
- if (errno == -EROFS && journal_current_handle() == NULL &&
- (sb->s_flags & MS_RDONLY))
+ if (errno == -EROFS && journal_current_handle() == NULL && sb_rdonly(sb))
return;
if (ext4_error_ratelimit(sb)) {
@@ -596,57 +965,9 @@ void __ext4_std_error(struct super_block *sb, const char *function,
printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
sb->s_id, function, line, errstr);
}
+ fsnotify_sb_error(sb, NULL, errno ? errno : EFSCORRUPTED);
- save_error_info(sb, function, line);
- ext4_handle_error(sb);
-}
-
-/*
- * ext4_abort is a much stronger failure handler than ext4_error. The
- * abort function may be used to deal with unrecoverable failures such
- * as journal IO errors or ENOMEM at a critical moment in log management.
- *
- * We unconditionally force the filesystem into an ABORT|READONLY state,
- * unless the error response on the fs has been set to panic in which
- * case we take the easy way out and panic immediately.
- */
-
-void __ext4_abort(struct super_block *sb, const char *function,
- unsigned int line, const char *fmt, ...)
-{
- struct va_format vaf;
- va_list args;
-
- if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
- return;
-
- save_error_info(sb, function, line);
- va_start(args, fmt);
- vaf.fmt = fmt;
- vaf.va = &args;
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: %pV\n",
- sb->s_id, function, line, &vaf);
- va_end(args);
-
- if ((sb->s_flags & MS_RDONLY) == 0) {
- ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
- EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
- /*
- * Make sure updated value of ->s_mount_flags will be visible
- * before ->s_flags update
- */
- smp_wmb();
- sb->s_flags |= MS_RDONLY;
- if (EXT4_SB(sb)->s_journal)
- jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
- save_error_info(sb, function, line);
- }
- if (test_opt(sb, ERRORS_PANIC)) {
- if (EXT4_SB(sb)->s_journal &&
- !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
- return;
- panic("EXT4-fs panic from previous error\n");
- }
+ ext4_handle_error(sb, false, -errno, 0, 0, function, line);
}
void __ext4_msg(struct super_block *sb,
@@ -655,19 +976,29 @@ void __ext4_msg(struct super_block *sb,
struct va_format vaf;
va_list args;
- if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs"))
- return;
+ if (sb) {
+ atomic_inc(&EXT4_SB(sb)->s_msg_count);
+ if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state),
+ "EXT4-fs"))
+ return;
+ }
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
- printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
+ if (sb)
+ printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
+ else
+ printk("%sEXT4-fs: %pV\n", prefix, &vaf);
va_end(args);
}
-#define ext4_warning_ratelimit(sb) \
- ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state), \
- "EXT4-fs warning")
+static int ext4_warning_ratelimit(struct super_block *sb)
+{
+ atomic_inc(&EXT4_SB(sb)->s_warning_count);
+ return ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),
+ "EXT4-fs warning");
+}
void __ext4_warning(struct super_block *sb, const char *function,
unsigned int line, const char *fmt, ...)
@@ -713,15 +1044,11 @@ __acquires(bitlock)
{
struct va_format vaf;
va_list args;
- struct ext4_super_block *es = EXT4_SB(sb)->s_es;
- if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
+ if (unlikely(ext4_emergency_state(sb)))
return;
- es->s_last_error_ino = cpu_to_le32(ino);
- es->s_last_error_block = cpu_to_le64(block);
- __save_error_info(sb, function, line);
-
+ trace_ext4_error(sb, function, line);
if (ext4_error_ratelimit(sb)) {
va_start(args, fmt);
vaf.fmt = fmt;
@@ -738,12 +1065,18 @@ __acquires(bitlock)
}
if (test_opt(sb, ERRORS_CONT)) {
- ext4_commit_super(sb, 0);
+ if (test_opt(sb, WARN_ON_ERROR))
+ WARN_ON_ONCE(1);
+ EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
+ if (!bdev_read_only(sb->s_bdev)) {
+ save_error_info(sb, EFSCORRUPTED, ino, block, function,
+ line);
+ schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
+ }
return;
}
-
ext4_unlock_group(sb, grp);
- ext4_handle_error(sb);
+ ext4_handle_error(sb, false, EFSCORRUPTED, ino, block, function, line);
/*
* We only get here in the ERRORS_RO case; relocking the group
* may be dangerous, but nothing bad will happen since the
@@ -759,6 +1092,38 @@ __acquires(bitlock)
return;
}
+void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
+ ext4_group_t group,
+ unsigned int flags)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+ struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
+ int ret;
+
+ if (!grp || !gdp)
+ return;
+ if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) {
+ ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
+ &grp->bb_state);
+ if (!ret)
+ percpu_counter_sub(&sbi->s_freeclusters_counter,
+ grp->bb_free);
+ }
+
+ if (flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) {
+ ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
+ &grp->bb_state);
+ if (!ret && gdp) {
+ int count;
+
+ count = ext4_free_inodes_count(sb, gdp);
+ percpu_counter_sub(&sbi->s_freeinodes_counter,
+ count);
+ }
+ }
+}
+
void ext4_update_dynamic_rev(struct super_block *sb)
{
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
@@ -784,43 +1149,6 @@ void ext4_update_dynamic_rev(struct super_block *sb)
*/
}
-/*
- * Open the external journal device
- */
-static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
-{
- struct block_device *bdev;
- char b[BDEVNAME_SIZE];
-
- bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
- if (IS_ERR(bdev))
- goto fail;
- return bdev;
-
-fail:
- ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld",
- __bdevname(dev, b), PTR_ERR(bdev));
- return NULL;
-}
-
-/*
- * Release the journal device
- */
-static void ext4_blkdev_put(struct block_device *bdev)
-{
- blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
-}
-
-static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
-{
- struct block_device *bdev;
- bdev = sbi->journal_bdev;
- if (bdev) {
- ext4_blkdev_put(bdev);
- sbi->journal_bdev = NULL;
- }
-}
-
static inline struct inode *orphan_list_entry(struct list_head *l)
{
return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
@@ -847,67 +1175,165 @@ static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
#ifdef CONFIG_QUOTA
static int ext4_quota_off(struct super_block *sb, int type);
-static inline void ext4_quota_off_umount(struct super_block *sb)
+static inline void ext4_quotas_off(struct super_block *sb, int type)
{
- int type;
+ BUG_ON(type > EXT4_MAXQUOTAS);
/* Use our quota_off function to clear inode flags etc. */
- for (type = 0; type < EXT4_MAXQUOTAS; type++)
+ for (type--; type >= 0; type--)
ext4_quota_off(sb, type);
}
+
+/*
+ * This is a helper function which is used in the mount/remount
+ * codepaths (which holds s_umount) to fetch the quota file name.
+ */
+static inline char *get_qf_name(struct super_block *sb,
+ struct ext4_sb_info *sbi,
+ int type)
+{
+ return rcu_dereference_protected(sbi->s_qf_names[type],
+ lockdep_is_held(&sb->s_umount));
+}
#else
-static inline void ext4_quota_off_umount(struct super_block *sb)
+static inline void ext4_quotas_off(struct super_block *sb, int type)
{
}
#endif
+static int ext4_percpu_param_init(struct ext4_sb_info *sbi)
+{
+ ext4_fsblk_t block;
+ int err;
+
+ block = ext4_count_free_clusters(sbi->s_sb);
+ ext4_free_blocks_count_set(sbi->s_es, EXT4_C2B(sbi, block));
+ err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
+ GFP_KERNEL);
+ if (!err) {
+ unsigned long freei = ext4_count_free_inodes(sbi->s_sb);
+ sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
+ err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
+ GFP_KERNEL);
+ }
+ if (!err)
+ err = percpu_counter_init(&sbi->s_dirs_counter,
+ ext4_count_dirs(sbi->s_sb), GFP_KERNEL);
+ if (!err)
+ err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
+ GFP_KERNEL);
+ if (!err)
+ err = percpu_counter_init(&sbi->s_sra_exceeded_retry_limit, 0,
+ GFP_KERNEL);
+ if (!err)
+ err = percpu_init_rwsem(&sbi->s_writepages_rwsem);
+
+ if (err)
+ ext4_msg(sbi->s_sb, KERN_ERR, "insufficient memory");
+
+ return err;
+}
+
+static void ext4_percpu_param_destroy(struct ext4_sb_info *sbi)
+{
+ percpu_counter_destroy(&sbi->s_freeclusters_counter);
+ percpu_counter_destroy(&sbi->s_freeinodes_counter);
+ percpu_counter_destroy(&sbi->s_dirs_counter);
+ percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
+ percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit);
+ percpu_free_rwsem(&sbi->s_writepages_rwsem);
+}
+
+static void ext4_group_desc_free(struct ext4_sb_info *sbi)
+{
+ struct buffer_head **group_desc;
+ int i;
+
+ rcu_read_lock();
+ group_desc = rcu_dereference(sbi->s_group_desc);
+ for (i = 0; i < sbi->s_gdb_count; i++)
+ brelse(group_desc[i]);
+ kvfree(group_desc);
+ rcu_read_unlock();
+}
+
+static void ext4_flex_groups_free(struct ext4_sb_info *sbi)
+{
+ struct flex_groups **flex_groups;
+ int i;
+
+ rcu_read_lock();
+ flex_groups = rcu_dereference(sbi->s_flex_groups);
+ if (flex_groups) {
+ for (i = 0; i < sbi->s_flex_groups_allocated; i++)
+ kvfree(flex_groups[i]);
+ kvfree(flex_groups);
+ }
+ rcu_read_unlock();
+}
+
static void ext4_put_super(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
int aborted = 0;
- int i, err;
+ int err;
+
+ /*
+ * Unregister sysfs before destroying jbd2 journal.
+ * Since we could still access attr_journal_task attribute via sysfs
+ * path which could have sbi->s_journal->j_task as NULL
+ * Unregister sysfs before flush sbi->s_sb_upd_work.
+ * Since user may read /proc/fs/ext4/xx/mb_groups during umount, If
+ * read metadata verify failed then will queue error work.
+ * update_super_work will call start_this_handle may trigger
+ * BUG_ON.
+ */
+ ext4_unregister_sysfs(sb);
+
+ if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs unmount"))
+ ext4_msg(sb, KERN_INFO, "unmounting filesystem %pU.",
+ &sb->s_uuid);
ext4_unregister_li_request(sb);
- ext4_quota_off_umount(sb);
+ ext4_quotas_off(sb, EXT4_MAXQUOTAS);
- flush_workqueue(sbi->rsv_conversion_wq);
destroy_workqueue(sbi->rsv_conversion_wq);
+ ext4_release_orphan_info(sb);
if (sbi->s_journal) {
aborted = is_journal_aborted(sbi->s_journal);
- err = jbd2_journal_destroy(sbi->s_journal);
- sbi->s_journal = NULL;
- if ((err < 0) && !aborted)
- ext4_abort(sb, "Couldn't clean up the journal");
- }
+ err = ext4_journal_destroy(sbi, sbi->s_journal);
+ if ((err < 0) && !aborted) {
+ ext4_abort(sb, -err, "Couldn't clean up the journal");
+ }
+ } else
+ flush_work(&sbi->s_sb_upd_work);
- ext4_unregister_sysfs(sb);
ext4_es_unregister_shrinker(sbi);
- del_timer_sync(&sbi->s_err_report);
+ timer_shutdown_sync(&sbi->s_err_report);
ext4_release_system_zone(sb);
ext4_mb_release(sb);
ext4_ext_release(sb);
- if (!(sb->s_flags & MS_RDONLY) && !aborted) {
- ext4_clear_feature_journal_needs_recovery(sb);
- es->s_state = cpu_to_le16(sbi->s_mount_state);
+ if (!ext4_emergency_state(sb) && !sb_rdonly(sb)) {
+ if (!aborted) {
+ ext4_clear_feature_journal_needs_recovery(sb);
+ ext4_clear_feature_orphan_present(sb);
+ es->s_state = cpu_to_le16(sbi->s_mount_state);
+ }
+ ext4_commit_super(sb);
}
- if (!(sb->s_flags & MS_RDONLY))
- ext4_commit_super(sb, 1);
- for (i = 0; i < sbi->s_gdb_count; i++)
- brelse(sbi->s_group_desc[i]);
- kvfree(sbi->s_group_desc);
- kvfree(sbi->s_flex_groups);
- percpu_counter_destroy(&sbi->s_freeclusters_counter);
- percpu_counter_destroy(&sbi->s_freeinodes_counter);
- percpu_counter_destroy(&sbi->s_dirs_counter);
- percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
- percpu_free_rwsem(&sbi->s_journal_flag_rwsem);
+ ext4_group_desc_free(sbi);
+ ext4_flex_groups_free(sbi);
+
+ WARN_ON_ONCE(!(sbi->s_mount_state & EXT4_ERROR_FS) &&
+ percpu_counter_sum(&sbi->s_dirtyclusters_counter));
+ ext4_percpu_param_destroy(sbi);
#ifdef CONFIG_QUOTA
- for (i = 0; i < EXT4_MAXQUOTAS; i++)
- kfree(sbi->s_qf_names[i]);
+ for (int i = 0; i < EXT4_MAXQUOTAS; i++)
+ kfree(get_qf_name(sb, sbi, i));
#endif
/* Debugging code just in case the in-memory inode orphan list
@@ -916,30 +1342,28 @@ static void ext4_put_super(struct super_block *sb)
* in-memory list had better be clean by this point. */
if (!list_empty(&sbi->s_orphan))
dump_orphan_list(sb, sbi);
- J_ASSERT(list_empty(&sbi->s_orphan));
+ ASSERT(list_empty(&sbi->s_orphan));
sync_blockdev(sb->s_bdev);
invalidate_bdev(sb->s_bdev);
- if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
+ if (sbi->s_journal_bdev_file) {
/*
* Invalidate the journal device's buffers. We don't want them
* floating about in memory - the physical journal device may
* hotswapped, and it breaks the `ro-after' testing code.
*/
- sync_blockdev(sbi->journal_bdev);
- invalidate_bdev(sbi->journal_bdev);
- ext4_blkdev_remove(sbi);
- }
- if (sbi->s_ea_inode_cache) {
- ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
- sbi->s_ea_inode_cache = NULL;
- }
- if (sbi->s_ea_block_cache) {
- ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
- sbi->s_ea_block_cache = NULL;
+ sync_blockdev(file_bdev(sbi->s_journal_bdev_file));
+ invalidate_bdev(file_bdev(sbi->s_journal_bdev_file));
}
- if (sbi->s_mmp_tsk)
- kthread_stop(sbi->s_mmp_tsk);
+
+ ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
+ sbi->s_ea_inode_cache = NULL;
+
+ ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
+ sbi->s_ea_block_cache = NULL;
+
+ ext4_stop_mmpd(sbi);
+
brelse(sbi->s_sbh);
sb->s_fs_info = NULL;
/*
@@ -948,9 +1372,12 @@ static void ext4_put_super(struct super_block *sb)
*/
kobject_put(&sbi->s_kobj);
wait_for_completion(&sbi->s_kobj_unregister);
- if (sbi->s_chksum_driver)
- crypto_free_shash(sbi->s_chksum_driver);
kfree(sbi->s_blockgroup_lock);
+ fs_put_dax(sbi->s_daxdev, NULL);
+ fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
+#if IS_ENABLED(CONFIG_UNICODE)
+ utf8_unload(sb->s_encoding);
+#endif
kfree(sbi);
}
@@ -963,24 +1390,27 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
{
struct ext4_inode_info *ei;
- ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
+ ei = alloc_inode_sb(sb, ext4_inode_cachep, GFP_NOFS);
if (!ei)
return NULL;
- ei->vfs_inode.i_version = 1;
+ inode_set_iversion(&ei->vfs_inode, 1);
+ ei->i_flags = 0;
+ ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
spin_lock_init(&ei->i_raw_lock);
- INIT_LIST_HEAD(&ei->i_prealloc_list);
- spin_lock_init(&ei->i_prealloc_lock);
+ ei->i_prealloc_node = RB_ROOT;
+ atomic_set(&ei->i_prealloc_active, 0);
+ rwlock_init(&ei->i_prealloc_lock);
ext4_es_init_tree(&ei->i_es_tree);
rwlock_init(&ei->i_es_lock);
INIT_LIST_HEAD(&ei->i_es_list);
ei->i_es_all_nr = 0;
ei->i_es_shk_nr = 0;
ei->i_es_shrink_lblk = 0;
+ ei->i_es_seq = 0;
ei->i_reserved_data_blocks = 0;
- ei->i_da_metadata_calc_len = 0;
- ei->i_da_metadata_calc_last_lblock = 0;
spin_lock_init(&(ei->i_block_reservation_lock));
+ ext4_init_pending_tree(&ei->i_pending_tree);
#ifdef CONFIG_QUOTA
ei->i_reserved_quota = 0;
memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
@@ -990,57 +1420,83 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
spin_lock_init(&ei->i_completed_io_lock);
ei->i_sync_tid = 0;
ei->i_datasync_tid = 0;
- atomic_set(&ei->i_unwritten, 0);
INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
+ ext4_fc_init_inode(&ei->vfs_inode);
+ spin_lock_init(&ei->i_fc_lock);
return &ei->vfs_inode;
}
static int ext4_drop_inode(struct inode *inode)
{
- int drop = generic_drop_inode(inode);
+ int drop = inode_generic_drop(inode);
+
+ if (!drop)
+ drop = fscrypt_drop_inode(inode);
trace_ext4_drop_inode(inode, drop);
return drop;
}
-static void ext4_i_callback(struct rcu_head *head)
+static void ext4_free_in_core_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
+ fscrypt_free_inode(inode);
+ if (!list_empty(&(EXT4_I(inode)->i_fc_list))) {
+ pr_warn("%s: inode %ld still in fc list",
+ __func__, inode->i_ino);
+ }
kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
}
static void ext4_destroy_inode(struct inode *inode)
{
- if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
+ if (ext4_inode_orphan_tracked(inode)) {
ext4_msg(inode->i_sb, KERN_ERR,
- "Inode %lu (%p): orphan list check failed!",
+ "Inode %lu (%p): inode tracked as orphan!",
inode->i_ino, EXT4_I(inode));
print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
EXT4_I(inode), sizeof(struct ext4_inode_info),
true);
dump_stack();
}
- call_rcu(&inode->i_rcu, ext4_i_callback);
+
+ if (!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ERROR_FS) &&
+ WARN_ON_ONCE(EXT4_I(inode)->i_reserved_data_blocks))
+ ext4_msg(inode->i_sb, KERN_ERR,
+ "Inode %lu (%p): i_reserved_data_blocks (%u) not cleared!",
+ inode->i_ino, EXT4_I(inode),
+ EXT4_I(inode)->i_reserved_data_blocks);
+}
+
+static void ext4_shutdown(struct super_block *sb)
+{
+ ext4_force_shutdown(sb, EXT4_GOING_FLAGS_NOLOGFLUSH);
}
static void init_once(void *foo)
{
- struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
+ struct ext4_inode_info *ei = foo;
INIT_LIST_HEAD(&ei->i_orphan);
init_rwsem(&ei->xattr_sem);
init_rwsem(&ei->i_data_sem);
- init_rwsem(&ei->i_mmap_sem);
inode_init_once(&ei->vfs_inode);
+ ext4_fc_init_inode(&ei->vfs_inode);
+#ifdef CONFIG_FS_ENCRYPTION
+ ei->i_crypt_info = NULL;
+#endif
+#ifdef CONFIG_FS_VERITY
+ ei->i_verity_info = NULL;
+#endif
}
static int __init init_inodecache(void)
{
- ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
- sizeof(struct ext4_inode_info),
- 0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
- init_once);
+ ext4_inode_cachep = kmem_cache_create_usercopy("ext4_inode_cache",
+ sizeof(struct ext4_inode_info), 0,
+ SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
+ offsetof(struct ext4_inode_info, i_data),
+ sizeof_field(struct ext4_inode_info, i_data),
+ init_once);
if (ext4_inode_cachep == NULL)
return -ENOMEM;
return 0;
@@ -1058,20 +1514,20 @@ static void destroy_inodecache(void)
void ext4_clear_inode(struct inode *inode)
{
+ ext4_fc_del(inode);
invalidate_inode_buffers(inode);
clear_inode(inode);
- dquot_drop(inode);
ext4_discard_preallocations(inode);
ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
+ dquot_drop(inode);
if (EXT4_I(inode)->jinode) {
jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
EXT4_I(inode)->jinode);
jbd2_free_inode(EXT4_I(inode)->jinode);
EXT4_I(inode)->jinode = NULL;
}
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
- fscrypt_put_encryption_info(inode, NULL);
-#endif
+ fscrypt_put_encryption_info(inode);
+ fsverity_cleanup_inode(inode);
}
static struct inode *ext4_nfs_get_inode(struct super_block *sb,
@@ -1079,20 +1535,11 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb,
{
struct inode *inode;
- if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
- return ERR_PTR(-ESTALE);
- if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
- return ERR_PTR(-ESTALE);
-
- /* iget isn't really right if the inode is currently unallocated!!
- *
- * ext4_read_inode will return a bad_inode if the inode had been
- * deleted, so we should be safe.
- *
+ /*
* Currently we don't know the generation for parent directory, so
* a generation of 0 means "accept any"
*/
- inode = ext4_iget_normal(sb, ino);
+ inode = ext4_iget(sb, ino, EXT4_IGET_HANDLE);
if (IS_ERR(inode))
return ERR_CAST(inode);
if (generation && inode->i_generation != generation) {
@@ -1117,136 +1564,16 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
ext4_nfs_get_inode);
}
-/*
- * Try to release metadata pages (indirect blocks, directories) which are
- * mapped via the block device. Since these pages could have journal heads
- * which would prevent try_to_free_buffers() from freeing them, we must use
- * jbd2 layer's try_to_free_buffers() function to release them.
- */
-static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
- gfp_t wait)
-{
- journal_t *journal = EXT4_SB(sb)->s_journal;
-
- WARN_ON(PageChecked(page));
- if (!page_has_buffers(page))
- return 0;
- if (journal)
- return jbd2_journal_try_to_free_buffers(journal, page,
- wait & ~__GFP_DIRECT_RECLAIM);
- return try_to_free_buffers(page);
-}
-
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-static int ext4_get_context(struct inode *inode, void *ctx, size_t len)
-{
- return ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
- EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len);
-}
-
-static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
- void *fs_data)
+static int ext4_nfs_commit_metadata(struct inode *inode)
{
- handle_t *handle = fs_data;
- int res, res2, credits, retries = 0;
-
- /*
- * Encrypting the root directory is not allowed because e2fsck expects
- * lost+found to exist and be unencrypted, and encrypting the root
- * directory would imply encrypting the lost+found directory as well as
- * the filename "lost+found" itself.
- */
- if (inode->i_ino == EXT4_ROOT_INO)
- return -EPERM;
-
- res = ext4_convert_inline_data(inode);
- if (res)
- return res;
-
- /*
- * If a journal handle was specified, then the encryption context is
- * being set on a new inode via inheritance and is part of a larger
- * transaction to create the inode. Otherwise the encryption context is
- * being set on an existing inode in its own transaction. Only in the
- * latter case should the "retry on ENOSPC" logic be used.
- */
-
- if (handle) {
- res = ext4_xattr_set_handle(handle, inode,
- EXT4_XATTR_INDEX_ENCRYPTION,
- EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
- ctx, len, 0);
- if (!res) {
- ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
- ext4_clear_inode_state(inode,
- EXT4_STATE_MAY_INLINE_DATA);
- /*
- * Update inode->i_flags - e.g. S_DAX may get disabled
- */
- ext4_set_inode_flags(inode);
- }
- return res;
- }
-
- res = dquot_initialize(inode);
- if (res)
- return res;
-retry:
- res = ext4_xattr_set_credits(inode, len, false /* is_create */,
- &credits);
- if (res)
- return res;
-
- handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
- if (IS_ERR(handle))
- return PTR_ERR(handle);
-
- res = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_ENCRYPTION,
- EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
- ctx, len, 0);
- if (!res) {
- ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
- /* Update inode->i_flags - e.g. S_DAX may get disabled */
- ext4_set_inode_flags(inode);
- res = ext4_mark_inode_dirty(handle, inode);
- if (res)
- EXT4_ERROR_INODE(inode, "Failed to mark inode dirty");
- }
- res2 = ext4_journal_stop(handle);
-
- if (res == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
- goto retry;
- if (!res)
- res = res2;
- return res;
-}
-
-static bool ext4_dummy_context(struct inode *inode)
-{
- return DUMMY_ENCRYPTION_ENABLED(EXT4_SB(inode->i_sb));
-}
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL
+ };
-static unsigned ext4_max_namelen(struct inode *inode)
-{
- return S_ISLNK(inode->i_mode) ? inode->i_sb->s_blocksize :
- EXT4_NAME_LEN;
+ trace_ext4_nfs_commit_metadata(inode);
+ return ext4_write_inode(inode, &wbc);
}
-static const struct fscrypt_operations ext4_cryptops = {
- .key_prefix = "ext4:",
- .get_context = ext4_get_context,
- .set_context = ext4_set_context,
- .dummy_context = ext4_dummy_context,
- .is_encrypted = ext4_encrypted_inode,
- .empty_dir = ext4_empty_dir,
- .max_namelen = ext4_max_namelen,
-};
-#else
-static const struct fscrypt_operations ext4_cryptops = {
- .is_encrypted = ext4_encrypted_inode,
-};
-#endif
-
#ifdef CONFIG_QUOTA
static const char * const quotatypes[] = INITQFNAMES;
#define QTYPE2NAME(t) (quotatypes[t])
@@ -1258,17 +1585,14 @@ static int ext4_mark_dquot_dirty(struct dquot *dquot);
static int ext4_write_info(struct super_block *sb, int type);
static int ext4_quota_on(struct super_block *sb, int type, int format_id,
const struct path *path);
-static int ext4_quota_on_mount(struct super_block *sb, int type);
static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
size_t len, loff_t off);
static ssize_t ext4_quota_write(struct super_block *sb, int type,
const char *data, size_t len, loff_t off);
static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
unsigned int flags);
-static int ext4_enable_quotas(struct super_block *sb);
-static int ext4_get_next_id(struct super_block *sb, struct kqid *qid);
-static struct dquot **ext4_get_dquots(struct inode *inode)
+static struct dquot __rcu **ext4_get_dquots(struct inode *inode)
{
return EXT4_I(inode)->i_dquot;
}
@@ -1284,7 +1608,7 @@ static const struct dquot_operations ext4_quota_operations = {
.destroy_dquot = dquot_destroy,
.get_projid = ext4_get_projid,
.get_inode_usage = ext4_get_inode_usage,
- .get_next_id = ext4_get_next_id,
+ .get_next_id = dquot_get_next_id,
};
static const struct quotactl_ops ext4_qctl_operations = {
@@ -1301,6 +1625,7 @@ static const struct quotactl_ops ext4_qctl_operations = {
static const struct super_operations ext4_sops = {
.alloc_inode = ext4_alloc_inode,
+ .free_inode = ext4_free_in_core_inode,
.destroy_inode = ext4_destroy_inode,
.write_inode = ext4_write_inode,
.dirty_inode = ext4_dirty_inode,
@@ -1311,245 +1636,209 @@ static const struct super_operations ext4_sops = {
.freeze_fs = ext4_freeze,
.unfreeze_fs = ext4_unfreeze,
.statfs = ext4_statfs,
- .remount_fs = ext4_remount,
.show_options = ext4_show_options,
+ .shutdown = ext4_shutdown,
#ifdef CONFIG_QUOTA
.quota_read = ext4_quota_read,
.quota_write = ext4_quota_write,
.get_dquots = ext4_get_dquots,
#endif
- .bdev_try_to_free_page = bdev_try_to_free_page,
};
static const struct export_operations ext4_export_ops = {
+ .encode_fh = generic_encode_ino32_fh,
.fh_to_dentry = ext4_fh_to_dentry,
.fh_to_parent = ext4_fh_to_parent,
.get_parent = ext4_get_parent,
+ .commit_metadata = ext4_nfs_commit_metadata,
};
enum {
Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
- Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
+ Opt_resgid, Opt_resuid, Opt_sb,
Opt_nouid32, Opt_debug, Opt_removed,
- Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
+ Opt_user_xattr, Opt_acl,
Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev,
Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption,
- Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
- Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
+ Opt_inlinecrypt,
+ Opt_usrjquota, Opt_grpjquota, Opt_quota,
Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
- Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax,
- Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
- Opt_lazytime, Opt_nolazytime, Opt_debug_want_extra_isize,
+ Opt_usrquota, Opt_grpquota, Opt_prjquota,
+ Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never,
+ Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error,
+ Opt_nowarn_on_error, Opt_mblk_io_submit, Opt_debug_want_extra_isize,
Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
Opt_inode_readahead_blks, Opt_journal_ioprio,
Opt_dioread_nolock, Opt_dioread_lock,
Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
+ Opt_no_prefetch_block_bitmaps, Opt_mb_optimize_scan,
+ Opt_errors, Opt_data, Opt_data_err, Opt_jqfmt, Opt_dax_type,
+#ifdef CONFIG_EXT4_DEBUG
+ Opt_fc_debug_max_replay, Opt_fc_debug_force
+#endif
};
-static const match_table_t tokens = {
- {Opt_bsd_df, "bsddf"},
- {Opt_minix_df, "minixdf"},
- {Opt_grpid, "grpid"},
- {Opt_grpid, "bsdgroups"},
- {Opt_nogrpid, "nogrpid"},
- {Opt_nogrpid, "sysvgroups"},
- {Opt_resgid, "resgid=%u"},
- {Opt_resuid, "resuid=%u"},
- {Opt_sb, "sb=%u"},
- {Opt_err_cont, "errors=continue"},
- {Opt_err_panic, "errors=panic"},
- {Opt_err_ro, "errors=remount-ro"},
- {Opt_nouid32, "nouid32"},
- {Opt_debug, "debug"},
- {Opt_removed, "oldalloc"},
- {Opt_removed, "orlov"},
- {Opt_user_xattr, "user_xattr"},
- {Opt_nouser_xattr, "nouser_xattr"},
- {Opt_acl, "acl"},
- {Opt_noacl, "noacl"},
- {Opt_noload, "norecovery"},
- {Opt_noload, "noload"},
- {Opt_removed, "nobh"},
- {Opt_removed, "bh"},
- {Opt_commit, "commit=%u"},
- {Opt_min_batch_time, "min_batch_time=%u"},
- {Opt_max_batch_time, "max_batch_time=%u"},
- {Opt_journal_dev, "journal_dev=%u"},
- {Opt_journal_path, "journal_path=%s"},
- {Opt_journal_checksum, "journal_checksum"},
- {Opt_nojournal_checksum, "nojournal_checksum"},
- {Opt_journal_async_commit, "journal_async_commit"},
- {Opt_abort, "abort"},
- {Opt_data_journal, "data=journal"},
- {Opt_data_ordered, "data=ordered"},
- {Opt_data_writeback, "data=writeback"},
- {Opt_data_err_abort, "data_err=abort"},
- {Opt_data_err_ignore, "data_err=ignore"},
- {Opt_offusrjquota, "usrjquota="},
- {Opt_usrjquota, "usrjquota=%s"},
- {Opt_offgrpjquota, "grpjquota="},
- {Opt_grpjquota, "grpjquota=%s"},
- {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
- {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
- {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
- {Opt_grpquota, "grpquota"},
- {Opt_noquota, "noquota"},
- {Opt_quota, "quota"},
- {Opt_usrquota, "usrquota"},
- {Opt_prjquota, "prjquota"},
- {Opt_barrier, "barrier=%u"},
- {Opt_barrier, "barrier"},
- {Opt_nobarrier, "nobarrier"},
- {Opt_i_version, "i_version"},
- {Opt_dax, "dax"},
- {Opt_stripe, "stripe=%u"},
- {Opt_delalloc, "delalloc"},
- {Opt_lazytime, "lazytime"},
- {Opt_nolazytime, "nolazytime"},
- {Opt_debug_want_extra_isize, "debug_want_extra_isize=%u"},
- {Opt_nodelalloc, "nodelalloc"},
- {Opt_removed, "mblk_io_submit"},
- {Opt_removed, "nomblk_io_submit"},
- {Opt_block_validity, "block_validity"},
- {Opt_noblock_validity, "noblock_validity"},
- {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
- {Opt_journal_ioprio, "journal_ioprio=%u"},
- {Opt_auto_da_alloc, "auto_da_alloc=%u"},
- {Opt_auto_da_alloc, "auto_da_alloc"},
- {Opt_noauto_da_alloc, "noauto_da_alloc"},
- {Opt_dioread_nolock, "dioread_nolock"},
- {Opt_dioread_lock, "dioread_lock"},
- {Opt_discard, "discard"},
- {Opt_nodiscard, "nodiscard"},
- {Opt_init_itable, "init_itable=%u"},
- {Opt_init_itable, "init_itable"},
- {Opt_noinit_itable, "noinit_itable"},
- {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
- {Opt_test_dummy_encryption, "test_dummy_encryption"},
- {Opt_nombcache, "nombcache"},
- {Opt_nombcache, "no_mbcache"}, /* for backward compatibility */
- {Opt_removed, "check=none"}, /* mount option from ext2/3 */
- {Opt_removed, "nocheck"}, /* mount option from ext2/3 */
- {Opt_removed, "reservation"}, /* mount option from ext2/3 */
- {Opt_removed, "noreservation"}, /* mount option from ext2/3 */
- {Opt_removed, "journal=%u"}, /* mount option from ext2/3 */
- {Opt_err, NULL},
+static const struct constant_table ext4_param_errors[] = {
+ {"continue", EXT4_MOUNT_ERRORS_CONT},
+ {"panic", EXT4_MOUNT_ERRORS_PANIC},
+ {"remount-ro", EXT4_MOUNT_ERRORS_RO},
+ {}
};
-static ext4_fsblk_t get_sb_block(void **data)
-{
- ext4_fsblk_t sb_block;
- char *options = (char *) *data;
-
- if (!options || strncmp(options, "sb=", 3) != 0)
- return 1; /* Default location */
-
- options += 3;
- /* TODO: use simple_strtoll with >32bit ext4 */
- sb_block = simple_strtoul(options, &options, 0);
- if (*options && *options != ',') {
- printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
- (char *) *data);
- return 1;
- }
- if (*options == ',')
- options++;
- *data = (void *) options;
-
- return sb_block;
-}
-
-#define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
-static const char deprecated_msg[] =
- "Mount option \"%s\" will be removed by %s\n"
- "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n";
-
-#ifdef CONFIG_QUOTA
-static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- char *qname;
- int ret = -1;
+static const struct constant_table ext4_param_data[] = {
+ {"journal", EXT4_MOUNT_JOURNAL_DATA},
+ {"ordered", EXT4_MOUNT_ORDERED_DATA},
+ {"writeback", EXT4_MOUNT_WRITEBACK_DATA},
+ {}
+};
- if (sb_any_quota_loaded(sb) &&
- !sbi->s_qf_names[qtype]) {
- ext4_msg(sb, KERN_ERR,
- "Cannot change journaled "
- "quota options when quota turned on");
- return -1;
- }
- if (ext4_has_feature_quota(sb)) {
- ext4_msg(sb, KERN_INFO, "Journaled quota options "
- "ignored when QUOTA feature is enabled");
- return 1;
- }
- qname = match_strdup(args);
- if (!qname) {
- ext4_msg(sb, KERN_ERR,
- "Not enough memory for storing quotafile name");
- return -1;
- }
- if (sbi->s_qf_names[qtype]) {
- if (strcmp(sbi->s_qf_names[qtype], qname) == 0)
- ret = 1;
- else
- ext4_msg(sb, KERN_ERR,
- "%s quota file already specified",
- QTYPE2NAME(qtype));
- goto errout;
- }
- if (strchr(qname, '/')) {
- ext4_msg(sb, KERN_ERR,
- "quotafile must be on filesystem root");
- goto errout;
- }
- sbi->s_qf_names[qtype] = qname;
- set_opt(sb, QUOTA);
- return 1;
-errout:
- kfree(qname);
- return ret;
-}
+static const struct constant_table ext4_param_data_err[] = {
+ {"abort", Opt_data_err_abort},
+ {"ignore", Opt_data_err_ignore},
+ {}
+};
-static int clear_qf_name(struct super_block *sb, int qtype)
-{
+static const struct constant_table ext4_param_jqfmt[] = {
+ {"vfsold", QFMT_VFS_OLD},
+ {"vfsv0", QFMT_VFS_V0},
+ {"vfsv1", QFMT_VFS_V1},
+ {}
+};
- struct ext4_sb_info *sbi = EXT4_SB(sb);
+static const struct constant_table ext4_param_dax[] = {
+ {"always", Opt_dax_always},
+ {"inode", Opt_dax_inode},
+ {"never", Opt_dax_never},
+ {}
+};
- if (sb_any_quota_loaded(sb) &&
- sbi->s_qf_names[qtype]) {
- ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options"
- " when quota turned on");
- return -1;
- }
- kfree(sbi->s_qf_names[qtype]);
- sbi->s_qf_names[qtype] = NULL;
- return 1;
-}
+/*
+ * Mount option specification
+ * We don't use fsparam_flag_no because of the way we set the
+ * options and the way we show them in _ext4_show_options(). To
+ * keep the changes to a minimum, let's keep the negative options
+ * separate for now.
+ */
+static const struct fs_parameter_spec ext4_param_specs[] = {
+ fsparam_flag ("bsddf", Opt_bsd_df),
+ fsparam_flag ("minixdf", Opt_minix_df),
+ fsparam_flag ("grpid", Opt_grpid),
+ fsparam_flag ("bsdgroups", Opt_grpid),
+ fsparam_flag ("nogrpid", Opt_nogrpid),
+ fsparam_flag ("sysvgroups", Opt_nogrpid),
+ fsparam_gid ("resgid", Opt_resgid),
+ fsparam_uid ("resuid", Opt_resuid),
+ fsparam_u32 ("sb", Opt_sb),
+ fsparam_enum ("errors", Opt_errors, ext4_param_errors),
+ fsparam_flag ("nouid32", Opt_nouid32),
+ fsparam_flag ("debug", Opt_debug),
+ fsparam_flag ("oldalloc", Opt_removed),
+ fsparam_flag ("orlov", Opt_removed),
+ fsparam_flag ("user_xattr", Opt_user_xattr),
+ fsparam_flag ("acl", Opt_acl),
+ fsparam_flag ("norecovery", Opt_noload),
+ fsparam_flag ("noload", Opt_noload),
+ fsparam_flag ("bh", Opt_removed),
+ fsparam_flag ("nobh", Opt_removed),
+ fsparam_u32 ("commit", Opt_commit),
+ fsparam_u32 ("min_batch_time", Opt_min_batch_time),
+ fsparam_u32 ("max_batch_time", Opt_max_batch_time),
+ fsparam_u32 ("journal_dev", Opt_journal_dev),
+ fsparam_bdev ("journal_path", Opt_journal_path),
+ fsparam_flag ("journal_checksum", Opt_journal_checksum),
+ fsparam_flag ("nojournal_checksum", Opt_nojournal_checksum),
+ fsparam_flag ("journal_async_commit",Opt_journal_async_commit),
+ fsparam_flag ("abort", Opt_abort),
+ fsparam_enum ("data", Opt_data, ext4_param_data),
+ fsparam_enum ("data_err", Opt_data_err,
+ ext4_param_data_err),
+ fsparam_string_empty
+ ("usrjquota", Opt_usrjquota),
+ fsparam_string_empty
+ ("grpjquota", Opt_grpjquota),
+ fsparam_enum ("jqfmt", Opt_jqfmt, ext4_param_jqfmt),
+ fsparam_flag ("grpquota", Opt_grpquota),
+ fsparam_flag ("quota", Opt_quota),
+ fsparam_flag ("noquota", Opt_noquota),
+ fsparam_flag ("usrquota", Opt_usrquota),
+ fsparam_flag ("prjquota", Opt_prjquota),
+ fsparam_flag ("barrier", Opt_barrier),
+ fsparam_u32 ("barrier", Opt_barrier),
+ fsparam_flag ("nobarrier", Opt_nobarrier),
+ fsparam_flag ("i_version", Opt_removed),
+ fsparam_flag ("dax", Opt_dax),
+ fsparam_enum ("dax", Opt_dax_type, ext4_param_dax),
+ fsparam_u32 ("stripe", Opt_stripe),
+ fsparam_flag ("delalloc", Opt_delalloc),
+ fsparam_flag ("nodelalloc", Opt_nodelalloc),
+ fsparam_flag ("warn_on_error", Opt_warn_on_error),
+ fsparam_flag ("nowarn_on_error", Opt_nowarn_on_error),
+ fsparam_u32 ("debug_want_extra_isize",
+ Opt_debug_want_extra_isize),
+ fsparam_flag ("mblk_io_submit", Opt_removed),
+ fsparam_flag ("nomblk_io_submit", Opt_removed),
+ fsparam_flag ("block_validity", Opt_block_validity),
+ fsparam_flag ("noblock_validity", Opt_noblock_validity),
+ fsparam_u32 ("inode_readahead_blks",
+ Opt_inode_readahead_blks),
+ fsparam_u32 ("journal_ioprio", Opt_journal_ioprio),
+ fsparam_u32 ("auto_da_alloc", Opt_auto_da_alloc),
+ fsparam_flag ("auto_da_alloc", Opt_auto_da_alloc),
+ fsparam_flag ("noauto_da_alloc", Opt_noauto_da_alloc),
+ fsparam_flag ("dioread_nolock", Opt_dioread_nolock),
+ fsparam_flag ("nodioread_nolock", Opt_dioread_lock),
+ fsparam_flag ("dioread_lock", Opt_dioread_lock),
+ fsparam_flag ("discard", Opt_discard),
+ fsparam_flag ("nodiscard", Opt_nodiscard),
+ fsparam_u32 ("init_itable", Opt_init_itable),
+ fsparam_flag ("init_itable", Opt_init_itable),
+ fsparam_flag ("noinit_itable", Opt_noinit_itable),
+#ifdef CONFIG_EXT4_DEBUG
+ fsparam_flag ("fc_debug_force", Opt_fc_debug_force),
+ fsparam_u32 ("fc_debug_max_replay", Opt_fc_debug_max_replay),
#endif
+ fsparam_u32 ("max_dir_size_kb", Opt_max_dir_size_kb),
+ fsparam_flag ("test_dummy_encryption",
+ Opt_test_dummy_encryption),
+ fsparam_string ("test_dummy_encryption",
+ Opt_test_dummy_encryption),
+ fsparam_flag ("inlinecrypt", Opt_inlinecrypt),
+ fsparam_flag ("nombcache", Opt_nombcache),
+ fsparam_flag ("no_mbcache", Opt_nombcache), /* for backward compatibility */
+ fsparam_flag ("prefetch_block_bitmaps",
+ Opt_removed),
+ fsparam_flag ("no_prefetch_block_bitmaps",
+ Opt_no_prefetch_block_bitmaps),
+ fsparam_s32 ("mb_optimize_scan", Opt_mb_optimize_scan),
+ fsparam_string ("check", Opt_removed), /* mount option from ext2/3 */
+ fsparam_flag ("nocheck", Opt_removed), /* mount option from ext2/3 */
+ fsparam_flag ("reservation", Opt_removed), /* mount option from ext2/3 */
+ fsparam_flag ("noreservation", Opt_removed), /* mount option from ext2/3 */
+ fsparam_u32 ("journal", Opt_removed), /* mount option from ext2/3 */
+ {}
+};
+
#define MOPT_SET 0x0001
#define MOPT_CLEAR 0x0002
#define MOPT_NOSUPPORT 0x0004
#define MOPT_EXPLICIT 0x0008
-#define MOPT_CLEAR_ERR 0x0010
-#define MOPT_GTE0 0x0020
#ifdef CONFIG_QUOTA
#define MOPT_Q 0
-#define MOPT_QFMT 0x0040
+#define MOPT_QFMT 0x0010
#else
#define MOPT_Q MOPT_NOSUPPORT
#define MOPT_QFMT MOPT_NOSUPPORT
#endif
-#define MOPT_DATAJ 0x0080
-#define MOPT_NO_EXT2 0x0100
-#define MOPT_NO_EXT3 0x0200
+#define MOPT_NO_EXT2 0x0020
+#define MOPT_NO_EXT3 0x0040
#define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3)
-#define MOPT_STRING 0x0400
+#define MOPT_SKIP 0x0080
+#define MOPT_2 0x0100
static const struct mount_opts {
int token;
@@ -1572,6 +1861,9 @@ static const struct mount_opts {
MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
{Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
MOPT_EXT4_ONLY | MOPT_CLEAR},
+ {Opt_warn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_SET},
+ {Opt_nowarn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_CLEAR},
+ {Opt_commit, 0, MOPT_NO_EXT2},
{Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
MOPT_EXT4_ONLY | MOPT_CLEAR},
{Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
@@ -1580,46 +1872,25 @@ static const struct mount_opts {
EXT4_MOUNT_JOURNAL_CHECKSUM),
MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
{Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET},
- {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR},
- {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR},
- {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR},
- {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT,
- MOPT_NO_EXT2},
- {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT,
- MOPT_NO_EXT2},
+ {Opt_data_err, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_NO_EXT2},
{Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
{Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
{Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
{Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
{Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
- {Opt_commit, 0, MOPT_GTE0},
- {Opt_max_batch_time, 0, MOPT_GTE0},
- {Opt_min_batch_time, 0, MOPT_GTE0},
- {Opt_inode_readahead_blks, 0, MOPT_GTE0},
- {Opt_init_itable, 0, MOPT_GTE0},
- {Opt_dax, EXT4_MOUNT_DAX, MOPT_SET},
- {Opt_stripe, 0, MOPT_GTE0},
- {Opt_resuid, 0, MOPT_GTE0},
- {Opt_resgid, 0, MOPT_GTE0},
- {Opt_journal_dev, 0, MOPT_NO_EXT2 | MOPT_GTE0},
- {Opt_journal_path, 0, MOPT_NO_EXT2 | MOPT_STRING},
- {Opt_journal_ioprio, 0, MOPT_NO_EXT2 | MOPT_GTE0},
- {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
- {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
- {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA,
- MOPT_NO_EXT2 | MOPT_DATAJ},
+ {Opt_dax_type, 0, MOPT_EXT4_ONLY},
+ {Opt_journal_dev, 0, MOPT_NO_EXT2},
+ {Opt_journal_path, 0, MOPT_NO_EXT2},
+ {Opt_journal_ioprio, 0, MOPT_NO_EXT2},
+ {Opt_data, 0, MOPT_NO_EXT2},
{Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
- {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR},
#ifdef CONFIG_EXT4_FS_POSIX_ACL
{Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
- {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR},
#else
{Opt_acl, 0, MOPT_NOSUPPORT},
- {Opt_noacl, 0, MOPT_NOSUPPORT},
#endif
{Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
{Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
- {Opt_debug_want_extra_isize, 0, MOPT_GTE0},
{Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q},
{Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA,
MOPT_SET | MOPT_Q},
@@ -1632,335 +1903,976 @@ static const struct mount_opts {
MOPT_CLEAR | MOPT_Q},
{Opt_usrjquota, 0, MOPT_Q},
{Opt_grpjquota, 0, MOPT_Q},
- {Opt_offusrjquota, 0, MOPT_Q},
- {Opt_offgrpjquota, 0, MOPT_Q},
- {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
- {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
- {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
- {Opt_max_dir_size_kb, 0, MOPT_GTE0},
- {Opt_test_dummy_encryption, 0, MOPT_GTE0},
+ {Opt_jqfmt, 0, MOPT_QFMT},
{Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
+ {Opt_no_prefetch_block_bitmaps, EXT4_MOUNT_NO_PREFETCH_BLOCK_BITMAPS,
+ MOPT_SET},
+#ifdef CONFIG_EXT4_DEBUG
+ {Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
+ MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY},
+#endif
+ {Opt_abort, EXT4_MOUNT2_ABORT, MOPT_SET | MOPT_2},
{Opt_err, 0, 0}
};
-static int handle_mount_opt(struct super_block *sb, char *opt, int token,
- substring_t *args, unsigned long *journal_devnum,
- unsigned int *journal_ioprio, int is_remount)
+#if IS_ENABLED(CONFIG_UNICODE)
+static const struct ext4_sb_encodings {
+ __u16 magic;
+ char *name;
+ unsigned int version;
+} ext4_sb_encoding_map[] = {
+ {EXT4_ENC_UTF8_12_1, "utf8", UNICODE_AGE(12, 1, 0)},
+};
+
+static const struct ext4_sb_encodings *
+ext4_sb_read_encoding(const struct ext4_super_block *es)
{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- const struct mount_opts *m;
- kuid_t uid;
- kgid_t gid;
- int arg = 0;
+ __u16 magic = le16_to_cpu(es->s_encoding);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ext4_sb_encoding_map); i++)
+ if (magic == ext4_sb_encoding_map[i].magic)
+ return &ext4_sb_encoding_map[i];
+
+ return NULL;
+}
+#endif
+
+#define EXT4_SPEC_JQUOTA (1 << 0)
+#define EXT4_SPEC_JQFMT (1 << 1)
+#define EXT4_SPEC_DATAJ (1 << 2)
+#define EXT4_SPEC_SB_BLOCK (1 << 3)
+#define EXT4_SPEC_JOURNAL_DEV (1 << 4)
+#define EXT4_SPEC_JOURNAL_IOPRIO (1 << 5)
+#define EXT4_SPEC_s_want_extra_isize (1 << 7)
+#define EXT4_SPEC_s_max_batch_time (1 << 8)
+#define EXT4_SPEC_s_min_batch_time (1 << 9)
+#define EXT4_SPEC_s_inode_readahead_blks (1 << 10)
+#define EXT4_SPEC_s_li_wait_mult (1 << 11)
+#define EXT4_SPEC_s_max_dir_size_kb (1 << 12)
+#define EXT4_SPEC_s_stripe (1 << 13)
+#define EXT4_SPEC_s_resuid (1 << 14)
+#define EXT4_SPEC_s_resgid (1 << 15)
+#define EXT4_SPEC_s_commit_interval (1 << 16)
+#define EXT4_SPEC_s_fc_debug_max_replay (1 << 17)
+#define EXT4_SPEC_s_sb_block (1 << 18)
+#define EXT4_SPEC_mb_optimize_scan (1 << 19)
+
+struct ext4_fs_context {
+ char *s_qf_names[EXT4_MAXQUOTAS];
+ struct fscrypt_dummy_policy dummy_enc_policy;
+ int s_jquota_fmt; /* Format of quota to use */
+#ifdef CONFIG_EXT4_DEBUG
+ int s_fc_debug_max_replay;
+#endif
+ unsigned short qname_spec;
+ unsigned long vals_s_flags; /* Bits to set in s_flags */
+ unsigned long mask_s_flags; /* Bits changed in s_flags */
+ unsigned long journal_devnum;
+ unsigned long s_commit_interval;
+ unsigned long s_stripe;
+ unsigned int s_inode_readahead_blks;
+ unsigned int s_want_extra_isize;
+ unsigned int s_li_wait_mult;
+ unsigned int s_max_dir_size_kb;
+ unsigned int journal_ioprio;
+ unsigned int vals_s_mount_opt;
+ unsigned int mask_s_mount_opt;
+ unsigned int vals_s_mount_opt2;
+ unsigned int mask_s_mount_opt2;
+ unsigned int opt_flags; /* MOPT flags */
+ unsigned int spec;
+ u32 s_max_batch_time;
+ u32 s_min_batch_time;
+ kuid_t s_resuid;
+ kgid_t s_resgid;
+ ext4_fsblk_t s_sb_block;
+};
+
+static void ext4_fc_free(struct fs_context *fc)
+{
+ struct ext4_fs_context *ctx = fc->fs_private;
+ int i;
+
+ if (!ctx)
+ return;
+
+ for (i = 0; i < EXT4_MAXQUOTAS; i++)
+ kfree(ctx->s_qf_names[i]);
+
+ fscrypt_free_dummy_policy(&ctx->dummy_enc_policy);
+ kfree(ctx);
+}
+
+int ext4_init_fs_context(struct fs_context *fc)
+{
+ struct ext4_fs_context *ctx;
+
+ ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ fc->fs_private = ctx;
+ fc->ops = &ext4_context_ops;
+
+ /* i_version is always enabled now */
+ fc->sb_flags |= SB_I_VERSION;
+
+ return 0;
+}
#ifdef CONFIG_QUOTA
- if (token == Opt_usrjquota)
- return set_qf_name(sb, USRQUOTA, &args[0]);
- else if (token == Opt_grpjquota)
- return set_qf_name(sb, GRPQUOTA, &args[0]);
- else if (token == Opt_offusrjquota)
- return clear_qf_name(sb, USRQUOTA);
- else if (token == Opt_offgrpjquota)
- return clear_qf_name(sb, GRPQUOTA);
+/*
+ * Note the name of the specified quota file.
+ */
+static int note_qf_name(struct fs_context *fc, int qtype,
+ struct fs_parameter *param)
+{
+ struct ext4_fs_context *ctx = fc->fs_private;
+ char *qname;
+
+ if (param->size < 1) {
+ ext4_msg(NULL, KERN_ERR, "Missing quota name");
+ return -EINVAL;
+ }
+ if (strchr(param->string, '/')) {
+ ext4_msg(NULL, KERN_ERR,
+ "quotafile must be on filesystem root");
+ return -EINVAL;
+ }
+ if (ctx->s_qf_names[qtype]) {
+ if (strcmp(ctx->s_qf_names[qtype], param->string) != 0) {
+ ext4_msg(NULL, KERN_ERR,
+ "%s quota file already specified",
+ QTYPE2NAME(qtype));
+ return -EINVAL;
+ }
+ return 0;
+ }
+
+ qname = kmemdup_nul(param->string, param->size, GFP_KERNEL);
+ if (!qname) {
+ ext4_msg(NULL, KERN_ERR,
+ "Not enough memory for storing quotafile name");
+ return -ENOMEM;
+ }
+ ctx->s_qf_names[qtype] = qname;
+ ctx->qname_spec |= 1 << qtype;
+ ctx->spec |= EXT4_SPEC_JQUOTA;
+ return 0;
+}
+
+/*
+ * Clear the name of the specified quota file.
+ */
+static int unnote_qf_name(struct fs_context *fc, int qtype)
+{
+ struct ext4_fs_context *ctx = fc->fs_private;
+
+ kfree(ctx->s_qf_names[qtype]);
+
+ ctx->s_qf_names[qtype] = NULL;
+ ctx->qname_spec |= 1 << qtype;
+ ctx->spec |= EXT4_SPEC_JQUOTA;
+ return 0;
+}
#endif
- switch (token) {
- case Opt_noacl:
- case Opt_nouser_xattr:
- ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5");
- break;
- case Opt_sb:
- return 1; /* handled by get_sb_block() */
- case Opt_removed:
- ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt);
- return 1;
- case Opt_abort:
- sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
- return 1;
- case Opt_i_version:
- sb->s_flags |= MS_I_VERSION;
- return 1;
- case Opt_lazytime:
- sb->s_flags |= MS_LAZYTIME;
- return 1;
- case Opt_nolazytime:
- sb->s_flags &= ~MS_LAZYTIME;
- return 1;
+
+static int ext4_parse_test_dummy_encryption(const struct fs_parameter *param,
+ struct ext4_fs_context *ctx)
+{
+ int err;
+
+ if (!IS_ENABLED(CONFIG_FS_ENCRYPTION)) {
+ ext4_msg(NULL, KERN_WARNING,
+ "test_dummy_encryption option not supported");
+ return -EINVAL;
+ }
+ err = fscrypt_parse_test_dummy_encryption(param,
+ &ctx->dummy_enc_policy);
+ if (err == -EINVAL) {
+ ext4_msg(NULL, KERN_WARNING,
+ "Value of option \"%s\" is unrecognized", param->key);
+ } else if (err == -EEXIST) {
+ ext4_msg(NULL, KERN_WARNING,
+ "Conflicting test_dummy_encryption options");
+ return -EINVAL;
}
+ return err;
+}
+
+#define EXT4_SET_CTX(name) \
+static inline __maybe_unused \
+void ctx_set_##name(struct ext4_fs_context *ctx, unsigned long flag) \
+{ \
+ ctx->mask_s_##name |= flag; \
+ ctx->vals_s_##name |= flag; \
+}
+
+#define EXT4_CLEAR_CTX(name) \
+static inline __maybe_unused \
+void ctx_clear_##name(struct ext4_fs_context *ctx, unsigned long flag) \
+{ \
+ ctx->mask_s_##name |= flag; \
+ ctx->vals_s_##name &= ~flag; \
+}
+
+#define EXT4_TEST_CTX(name) \
+static inline unsigned long \
+ctx_test_##name(struct ext4_fs_context *ctx, unsigned long flag) \
+{ \
+ return (ctx->vals_s_##name & flag); \
+}
+
+EXT4_SET_CTX(flags); /* set only */
+EXT4_SET_CTX(mount_opt);
+EXT4_CLEAR_CTX(mount_opt);
+EXT4_TEST_CTX(mount_opt);
+EXT4_SET_CTX(mount_opt2);
+EXT4_CLEAR_CTX(mount_opt2);
+EXT4_TEST_CTX(mount_opt2);
+
+static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ struct ext4_fs_context *ctx = fc->fs_private;
+ struct fs_parse_result result;
+ const struct mount_opts *m;
+ int is_remount;
+ int token;
+
+ token = fs_parse(fc, ext4_param_specs, param, &result);
+ if (token < 0)
+ return token;
+ is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE;
for (m = ext4_mount_opts; m->token != Opt_err; m++)
if (token == m->token)
break;
- if (m->token == Opt_err) {
- ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" "
- "or missing value", opt);
- return -1;
- }
-
- if ((m->flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) {
- ext4_msg(sb, KERN_ERR,
- "Mount option \"%s\" incompatible with ext2", opt);
- return -1;
- }
- if ((m->flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) {
- ext4_msg(sb, KERN_ERR,
- "Mount option \"%s\" incompatible with ext3", opt);
- return -1;
- }
+ ctx->opt_flags |= m->flags;
- if (args->from && !(m->flags & MOPT_STRING) && match_int(args, &arg))
- return -1;
- if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
- return -1;
if (m->flags & MOPT_EXPLICIT) {
if (m->mount_opt & EXT4_MOUNT_DELALLOC) {
- set_opt2(sb, EXPLICIT_DELALLOC);
+ ctx_set_mount_opt2(ctx, EXT4_MOUNT2_EXPLICIT_DELALLOC);
} else if (m->mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) {
- set_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM);
+ ctx_set_mount_opt2(ctx,
+ EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM);
} else
- return -1;
- }
- if (m->flags & MOPT_CLEAR_ERR)
- clear_opt(sb, ERRORS_MASK);
- if (token == Opt_noquota && sb_any_quota_loaded(sb)) {
- ext4_msg(sb, KERN_ERR, "Cannot change quota "
- "options when quota turned on");
- return -1;
+ return -EINVAL;
}
if (m->flags & MOPT_NOSUPPORT) {
- ext4_msg(sb, KERN_ERR, "%s option not supported", opt);
- } else if (token == Opt_commit) {
- if (arg == 0)
- arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
- sbi->s_commit_interval = HZ * arg;
- } else if (token == Opt_debug_want_extra_isize) {
- sbi->s_want_extra_isize = arg;
- } else if (token == Opt_max_batch_time) {
- sbi->s_max_batch_time = arg;
- } else if (token == Opt_min_batch_time) {
- sbi->s_min_batch_time = arg;
- } else if (token == Opt_inode_readahead_blks) {
- if (arg && (arg > (1 << 30) || !is_power_of_2(arg))) {
- ext4_msg(sb, KERN_ERR,
- "EXT4-fs: inode_readahead_blks must be "
- "0 or a power of 2 smaller than 2^31");
- return -1;
+ ext4_msg(NULL, KERN_ERR, "%s option not supported",
+ param->key);
+ return 0;
+ }
+
+ switch (token) {
+#ifdef CONFIG_QUOTA
+ case Opt_usrjquota:
+ if (!*param->string)
+ return unnote_qf_name(fc, USRQUOTA);
+ else
+ return note_qf_name(fc, USRQUOTA, param);
+ case Opt_grpjquota:
+ if (!*param->string)
+ return unnote_qf_name(fc, GRPQUOTA);
+ else
+ return note_qf_name(fc, GRPQUOTA, param);
+#endif
+ case Opt_sb:
+ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
+ ext4_msg(NULL, KERN_WARNING,
+ "Ignoring %s option on remount", param->key);
+ } else {
+ ctx->s_sb_block = result.uint_32;
+ ctx->spec |= EXT4_SPEC_s_sb_block;
}
- sbi->s_inode_readahead_blks = arg;
- } else if (token == Opt_init_itable) {
- set_opt(sb, INIT_INODE_TABLE);
- if (!args->from)
- arg = EXT4_DEF_LI_WAIT_MULT;
- sbi->s_li_wait_mult = arg;
- } else if (token == Opt_max_dir_size_kb) {
- sbi->s_max_dir_size_kb = arg;
- } else if (token == Opt_stripe) {
- sbi->s_stripe = arg;
- } else if (token == Opt_resuid) {
- uid = make_kuid(current_user_ns(), arg);
- if (!uid_valid(uid)) {
- ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg);
- return -1;
+ return 0;
+ case Opt_removed:
+ ext4_msg(NULL, KERN_WARNING, "Ignoring removed %s option",
+ param->key);
+ return 0;
+ case Opt_inlinecrypt:
+#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
+ ctx_set_flags(ctx, SB_INLINECRYPT);
+#else
+ ext4_msg(NULL, KERN_ERR, "inline encryption not supported");
+#endif
+ return 0;
+ case Opt_errors:
+ ctx_clear_mount_opt(ctx, EXT4_MOUNT_ERRORS_MASK);
+ ctx_set_mount_opt(ctx, result.uint_32);
+ return 0;
+#ifdef CONFIG_QUOTA
+ case Opt_jqfmt:
+ ctx->s_jquota_fmt = result.uint_32;
+ ctx->spec |= EXT4_SPEC_JQFMT;
+ return 0;
+#endif
+ case Opt_data:
+ ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS);
+ ctx_set_mount_opt(ctx, result.uint_32);
+ ctx->spec |= EXT4_SPEC_DATAJ;
+ return 0;
+ case Opt_commit:
+ if (result.uint_32 == 0)
+ result.uint_32 = JBD2_DEFAULT_MAX_COMMIT_AGE;
+ else if (result.uint_32 > INT_MAX / HZ) {
+ ext4_msg(NULL, KERN_ERR,
+ "Invalid commit interval %d, "
+ "must be smaller than %d",
+ result.uint_32, INT_MAX / HZ);
+ return -EINVAL;
}
- sbi->s_resuid = uid;
- } else if (token == Opt_resgid) {
- gid = make_kgid(current_user_ns(), arg);
- if (!gid_valid(gid)) {
- ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg);
- return -1;
+ ctx->s_commit_interval = HZ * result.uint_32;
+ ctx->spec |= EXT4_SPEC_s_commit_interval;
+ return 0;
+ case Opt_debug_want_extra_isize:
+ if ((result.uint_32 & 1) || (result.uint_32 < 4)) {
+ ext4_msg(NULL, KERN_ERR,
+ "Invalid want_extra_isize %d", result.uint_32);
+ return -EINVAL;
}
- sbi->s_resgid = gid;
- } else if (token == Opt_journal_dev) {
+ ctx->s_want_extra_isize = result.uint_32;
+ ctx->spec |= EXT4_SPEC_s_want_extra_isize;
+ return 0;
+ case Opt_max_batch_time:
+ ctx->s_max_batch_time = result.uint_32;
+ ctx->spec |= EXT4_SPEC_s_max_batch_time;
+ return 0;
+ case Opt_min_batch_time:
+ ctx->s_min_batch_time = result.uint_32;
+ ctx->spec |= EXT4_SPEC_s_min_batch_time;
+ return 0;
+ case Opt_inode_readahead_blks:
+ if (result.uint_32 &&
+ (result.uint_32 > (1 << 30) ||
+ !is_power_of_2(result.uint_32))) {
+ ext4_msg(NULL, KERN_ERR,
+ "EXT4-fs: inode_readahead_blks must be "
+ "0 or a power of 2 smaller than 2^31");
+ return -EINVAL;
+ }
+ ctx->s_inode_readahead_blks = result.uint_32;
+ ctx->spec |= EXT4_SPEC_s_inode_readahead_blks;
+ return 0;
+ case Opt_init_itable:
+ ctx_set_mount_opt(ctx, EXT4_MOUNT_INIT_INODE_TABLE);
+ ctx->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
+ if (param->type == fs_value_is_string)
+ ctx->s_li_wait_mult = result.uint_32;
+ ctx->spec |= EXT4_SPEC_s_li_wait_mult;
+ return 0;
+ case Opt_max_dir_size_kb:
+ ctx->s_max_dir_size_kb = result.uint_32;
+ ctx->spec |= EXT4_SPEC_s_max_dir_size_kb;
+ return 0;
+#ifdef CONFIG_EXT4_DEBUG
+ case Opt_fc_debug_max_replay:
+ ctx->s_fc_debug_max_replay = result.uint_32;
+ ctx->spec |= EXT4_SPEC_s_fc_debug_max_replay;
+ return 0;
+#endif
+ case Opt_stripe:
+ ctx->s_stripe = result.uint_32;
+ ctx->spec |= EXT4_SPEC_s_stripe;
+ return 0;
+ case Opt_resuid:
+ ctx->s_resuid = result.uid;
+ ctx->spec |= EXT4_SPEC_s_resuid;
+ return 0;
+ case Opt_resgid:
+ ctx->s_resgid = result.gid;
+ ctx->spec |= EXT4_SPEC_s_resgid;
+ return 0;
+ case Opt_journal_dev:
if (is_remount) {
- ext4_msg(sb, KERN_ERR,
+ ext4_msg(NULL, KERN_ERR,
"Cannot specify journal on remount");
- return -1;
+ return -EINVAL;
}
- *journal_devnum = arg;
- } else if (token == Opt_journal_path) {
- char *journal_path;
+ ctx->journal_devnum = result.uint_32;
+ ctx->spec |= EXT4_SPEC_JOURNAL_DEV;
+ return 0;
+ case Opt_journal_path:
+ {
struct inode *journal_inode;
struct path path;
int error;
if (is_remount) {
- ext4_msg(sb, KERN_ERR,
+ ext4_msg(NULL, KERN_ERR,
"Cannot specify journal on remount");
- return -1;
- }
- journal_path = match_strdup(&args[0]);
- if (!journal_path) {
- ext4_msg(sb, KERN_ERR, "error: could not dup "
- "journal device string");
- return -1;
+ return -EINVAL;
}
- error = kern_path(journal_path, LOOKUP_FOLLOW, &path);
+ error = fs_lookup_param(fc, param, 1, LOOKUP_FOLLOW, &path);
if (error) {
- ext4_msg(sb, KERN_ERR, "error: could not find "
- "journal device path: error %d", error);
- kfree(journal_path);
- return -1;
+ ext4_msg(NULL, KERN_ERR, "error: could not find "
+ "journal device path");
+ return -EINVAL;
}
journal_inode = d_inode(path.dentry);
- if (!S_ISBLK(journal_inode->i_mode)) {
- ext4_msg(sb, KERN_ERR, "error: journal path %s "
- "is not a block device", journal_path);
- path_put(&path);
- kfree(journal_path);
- return -1;
- }
-
- *journal_devnum = new_encode_dev(journal_inode->i_rdev);
+ ctx->journal_devnum = new_encode_dev(journal_inode->i_rdev);
+ ctx->spec |= EXT4_SPEC_JOURNAL_DEV;
path_put(&path);
- kfree(journal_path);
- } else if (token == Opt_journal_ioprio) {
- if (arg > 7) {
- ext4_msg(sb, KERN_ERR, "Invalid journal IO priority"
+ return 0;
+ }
+ case Opt_journal_ioprio:
+ if (result.uint_32 > 7) {
+ ext4_msg(NULL, KERN_ERR, "Invalid journal IO priority"
" (must be 0-7)");
- return -1;
+ return -EINVAL;
}
- *journal_ioprio =
- IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
- } else if (token == Opt_test_dummy_encryption) {
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
- sbi->s_mount_flags |= EXT4_MF_TEST_DUMMY_ENCRYPTION;
- ext4_msg(sb, KERN_WARNING,
- "Test dummy encryption mode enabled");
+ ctx->journal_ioprio =
+ IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, result.uint_32);
+ ctx->spec |= EXT4_SPEC_JOURNAL_IOPRIO;
+ return 0;
+ case Opt_test_dummy_encryption:
+ return ext4_parse_test_dummy_encryption(param, ctx);
+ case Opt_dax:
+ case Opt_dax_type:
+#ifdef CONFIG_FS_DAX
+ {
+ int type = (token == Opt_dax) ?
+ Opt_dax : result.uint_32;
+
+ switch (type) {
+ case Opt_dax:
+ case Opt_dax_always:
+ ctx_set_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
+ ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
+ break;
+ case Opt_dax_never:
+ ctx_set_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
+ ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
+ break;
+ case Opt_dax_inode:
+ ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
+ ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
+ /* Strictly for printing options */
+ ctx_set_mount_opt2(ctx, EXT4_MOUNT2_DAX_INODE);
+ break;
+ }
+ return 0;
+ }
#else
- ext4_msg(sb, KERN_WARNING,
- "Test dummy encryption mount option ignored");
+ ext4_msg(NULL, KERN_INFO, "dax option not supported");
+ return -EINVAL;
#endif
- } else if (m->flags & MOPT_DATAJ) {
- if (is_remount) {
- if (!sbi->s_journal)
- ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option");
- else if (test_opt(sb, DATA_FLAGS) != m->mount_opt) {
- ext4_msg(sb, KERN_ERR,
- "Cannot change data mode on remount");
- return -1;
- }
+ case Opt_data_err:
+ if (result.uint_32 == Opt_data_err_abort)
+ ctx_set_mount_opt(ctx, m->mount_opt);
+ else if (result.uint_32 == Opt_data_err_ignore)
+ ctx_clear_mount_opt(ctx, m->mount_opt);
+ return 0;
+ case Opt_mb_optimize_scan:
+ if (result.int_32 == 1) {
+ ctx_set_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN);
+ ctx->spec |= EXT4_SPEC_mb_optimize_scan;
+ } else if (result.int_32 == 0) {
+ ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN);
+ ctx->spec |= EXT4_SPEC_mb_optimize_scan;
} else {
- clear_opt(sb, DATA_FLAGS);
- sbi->s_mount_opt |= m->mount_opt;
- }
-#ifdef CONFIG_QUOTA
- } else if (m->flags & MOPT_QFMT) {
- if (sb_any_quota_loaded(sb) &&
- sbi->s_jquota_fmt != m->mount_opt) {
- ext4_msg(sb, KERN_ERR, "Cannot change journaled "
- "quota options when quota turned on");
- return -1;
- }
- if (ext4_has_feature_quota(sb)) {
- ext4_msg(sb, KERN_INFO,
- "Quota format mount options ignored "
- "when QUOTA feature is enabled");
- return 1;
+ ext4_msg(NULL, KERN_WARNING,
+ "mb_optimize_scan should be set to 0 or 1.");
+ return -EINVAL;
}
- sbi->s_jquota_fmt = m->mount_opt;
-#endif
- } else if (token == Opt_dax) {
-#ifdef CONFIG_FS_DAX
- ext4_msg(sb, KERN_WARNING,
- "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
- sbi->s_mount_opt |= m->mount_opt;
-#else
- ext4_msg(sb, KERN_INFO, "dax option not supported");
- return -1;
-#endif
- } else if (token == Opt_data_err_abort) {
- sbi->s_mount_opt |= m->mount_opt;
- } else if (token == Opt_data_err_ignore) {
- sbi->s_mount_opt &= ~m->mount_opt;
- } else {
- if (!args->from)
- arg = 1;
+ return 0;
+ }
+
+ /*
+ * At this point we should only be getting options requiring MOPT_SET,
+ * or MOPT_CLEAR. Anything else is a bug
+ */
+ if (m->token == Opt_err) {
+ ext4_msg(NULL, KERN_WARNING, "buggy handling of option %s",
+ param->key);
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ else {
+ unsigned int set = 0;
+
+ if ((param->type == fs_value_is_flag) ||
+ result.uint_32 > 0)
+ set = 1;
+
if (m->flags & MOPT_CLEAR)
- arg = !arg;
+ set = !set;
else if (unlikely(!(m->flags & MOPT_SET))) {
- ext4_msg(sb, KERN_WARNING,
- "buggy handling of option %s", opt);
+ ext4_msg(NULL, KERN_WARNING,
+ "buggy handling of option %s",
+ param->key);
WARN_ON(1);
- return -1;
+ return -EINVAL;
+ }
+ if (m->flags & MOPT_2) {
+ if (set != 0)
+ ctx_set_mount_opt2(ctx, m->mount_opt);
+ else
+ ctx_clear_mount_opt2(ctx, m->mount_opt);
+ } else {
+ if (set != 0)
+ ctx_set_mount_opt(ctx, m->mount_opt);
+ else
+ ctx_clear_mount_opt(ctx, m->mount_opt);
}
- if (arg != 0)
- sbi->s_mount_opt |= m->mount_opt;
- else
- sbi->s_mount_opt &= ~m->mount_opt;
}
- return 1;
+
+ return 0;
}
-static int parse_options(char *options, struct super_block *sb,
- unsigned long *journal_devnum,
- unsigned int *journal_ioprio,
- int is_remount)
+static int parse_options(struct fs_context *fc, char *options)
{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- char *p;
- substring_t args[MAX_OPT_ARGS];
- int token;
+ struct fs_parameter param;
+ int ret;
+ char *key;
if (!options)
- return 1;
+ return 0;
- while ((p = strsep(&options, ",")) != NULL) {
- if (!*p)
- continue;
- /*
- * Initialize args struct so we know whether arg was
- * found; some options take optional arguments.
- */
- args[0].to = args[0].from = NULL;
- token = match_token(p, tokens, args);
- if (handle_mount_opt(sb, p, token, args, journal_devnum,
- journal_ioprio, is_remount) < 0)
- return 0;
+ while ((key = strsep(&options, ",")) != NULL) {
+ if (*key) {
+ size_t v_len = 0;
+ char *value = strchr(key, '=');
+
+ param.type = fs_value_is_flag;
+ param.string = NULL;
+
+ if (value) {
+ if (value == key)
+ continue;
+
+ *value++ = 0;
+ v_len = strlen(value);
+ param.string = kmemdup_nul(value, v_len,
+ GFP_KERNEL);
+ if (!param.string)
+ return -ENOMEM;
+ param.type = fs_value_is_string;
+ }
+
+ param.key = key;
+ param.size = v_len;
+
+ ret = ext4_parse_param(fc, &param);
+ kfree(param.string);
+ if (ret < 0)
+ return ret;
+ }
}
+
+ ret = ext4_validate_options(fc);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static int parse_apply_sb_mount_options(struct super_block *sb,
+ struct ext4_fs_context *m_ctx)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ char s_mount_opts[64];
+ struct ext4_fs_context *s_ctx = NULL;
+ struct fs_context *fc = NULL;
+ int ret = -ENOMEM;
+
+ if (!sbi->s_es->s_mount_opts[0])
+ return 0;
+
+ if (strscpy_pad(s_mount_opts, sbi->s_es->s_mount_opts) < 0)
+ return -E2BIG;
+
+ fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
+ if (!fc)
+ return -ENOMEM;
+
+ s_ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL);
+ if (!s_ctx)
+ goto out_free;
+
+ fc->fs_private = s_ctx;
+ fc->s_fs_info = sbi;
+
+ ret = parse_options(fc, s_mount_opts);
+ if (ret < 0)
+ goto parse_failed;
+
+ ret = ext4_check_opt_consistency(fc, sb);
+ if (ret < 0) {
+parse_failed:
+ ext4_msg(sb, KERN_WARNING,
+ "failed to parse options in superblock: %s",
+ s_mount_opts);
+ ret = 0;
+ goto out_free;
+ }
+
+ if (s_ctx->spec & EXT4_SPEC_JOURNAL_DEV)
+ m_ctx->journal_devnum = s_ctx->journal_devnum;
+ if (s_ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)
+ m_ctx->journal_ioprio = s_ctx->journal_ioprio;
+
+ ext4_apply_options(fc, sb);
+ ret = 0;
+
+out_free:
+ ext4_fc_free(fc);
+ kfree(fc);
+ return ret;
+}
+
+static void ext4_apply_quota_options(struct fs_context *fc,
+ struct super_block *sb)
+{
+#ifdef CONFIG_QUOTA
+ bool quota_feature = ext4_has_feature_quota(sb);
+ struct ext4_fs_context *ctx = fc->fs_private;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ char *qname;
+ int i;
+
+ if (quota_feature)
+ return;
+
+ if (ctx->spec & EXT4_SPEC_JQUOTA) {
+ for (i = 0; i < EXT4_MAXQUOTAS; i++) {
+ if (!(ctx->qname_spec & (1 << i)))
+ continue;
+
+ qname = ctx->s_qf_names[i]; /* May be NULL */
+ if (qname)
+ set_opt(sb, QUOTA);
+ ctx->s_qf_names[i] = NULL;
+ qname = rcu_replace_pointer(sbi->s_qf_names[i], qname,
+ lockdep_is_held(&sb->s_umount));
+ if (qname)
+ kfree_rcu_mightsleep(qname);
+ }
+ }
+
+ if (ctx->spec & EXT4_SPEC_JQFMT)
+ sbi->s_jquota_fmt = ctx->s_jquota_fmt;
+#endif
+}
+
+/*
+ * Check quota settings consistency.
+ */
+static int ext4_check_quota_consistency(struct fs_context *fc,
+ struct super_block *sb)
+{
#ifdef CONFIG_QUOTA
+ struct ext4_fs_context *ctx = fc->fs_private;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ bool quota_feature = ext4_has_feature_quota(sb);
+ bool quota_loaded = sb_any_quota_loaded(sb);
+ bool usr_qf_name, grp_qf_name, usrquota, grpquota;
+ int quota_flags, i;
+
/*
* We do the test below only for project quotas. 'usrquota' and
* 'grpquota' mount options are allowed even without quota feature
* to support legacy quotas in quota files.
*/
- if (test_opt(sb, PRJQUOTA) && !ext4_has_feature_project(sb)) {
- ext4_msg(sb, KERN_ERR, "Project quota feature not enabled. "
+ if (ctx_test_mount_opt(ctx, EXT4_MOUNT_PRJQUOTA) &&
+ !ext4_has_feature_project(sb)) {
+ ext4_msg(NULL, KERN_ERR, "Project quota feature not enabled. "
"Cannot enable project quota enforcement.");
- return 0;
+ return -EINVAL;
}
- if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
- if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
- clear_opt(sb, USRQUOTA);
- if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
- clear_opt(sb, GRPQUOTA);
+ quota_flags = EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
+ EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA;
+ if (quota_loaded &&
+ ctx->mask_s_mount_opt & quota_flags &&
+ !ctx_test_mount_opt(ctx, quota_flags))
+ goto err_quota_change;
+
+ if (ctx->spec & EXT4_SPEC_JQUOTA) {
+
+ for (i = 0; i < EXT4_MAXQUOTAS; i++) {
+ if (!(ctx->qname_spec & (1 << i)))
+ continue;
+
+ if (quota_loaded &&
+ !!sbi->s_qf_names[i] != !!ctx->s_qf_names[i])
+ goto err_jquota_change;
- if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
- ext4_msg(sb, KERN_ERR, "old and new quota "
- "format mixing");
+ if (sbi->s_qf_names[i] && ctx->s_qf_names[i] &&
+ strcmp(get_qf_name(sb, sbi, i),
+ ctx->s_qf_names[i]) != 0)
+ goto err_jquota_specified;
+ }
+
+ if (quota_feature) {
+ ext4_msg(NULL, KERN_INFO,
+ "Journaled quota options ignored when "
+ "QUOTA feature is enabled");
return 0;
}
+ }
- if (!sbi->s_jquota_fmt) {
- ext4_msg(sb, KERN_ERR, "journaled quota format "
- "not specified");
+ if (ctx->spec & EXT4_SPEC_JQFMT) {
+ if (sbi->s_jquota_fmt != ctx->s_jquota_fmt && quota_loaded)
+ goto err_jquota_change;
+ if (quota_feature) {
+ ext4_msg(NULL, KERN_INFO, "Quota format mount options "
+ "ignored when QUOTA feature is enabled");
return 0;
}
}
+
+ /* Make sure we don't mix old and new quota format */
+ usr_qf_name = (get_qf_name(sb, sbi, USRQUOTA) ||
+ ctx->s_qf_names[USRQUOTA]);
+ grp_qf_name = (get_qf_name(sb, sbi, GRPQUOTA) ||
+ ctx->s_qf_names[GRPQUOTA]);
+
+ usrquota = (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) ||
+ test_opt(sb, USRQUOTA));
+
+ grpquota = (ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA) ||
+ test_opt(sb, GRPQUOTA));
+
+ if (usr_qf_name) {
+ ctx_clear_mount_opt(ctx, EXT4_MOUNT_USRQUOTA);
+ usrquota = false;
+ }
+ if (grp_qf_name) {
+ ctx_clear_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA);
+ grpquota = false;
+ }
+
+ if (usr_qf_name || grp_qf_name) {
+ if (usrquota || grpquota) {
+ ext4_msg(NULL, KERN_ERR, "old and new quota "
+ "format mixing");
+ return -EINVAL;
+ }
+
+ if (!(ctx->spec & EXT4_SPEC_JQFMT || sbi->s_jquota_fmt)) {
+ ext4_msg(NULL, KERN_ERR, "journaled quota format "
+ "not specified");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+
+err_quota_change:
+ ext4_msg(NULL, KERN_ERR,
+ "Cannot change quota options when quota turned on");
+ return -EINVAL;
+err_jquota_change:
+ ext4_msg(NULL, KERN_ERR, "Cannot change journaled quota "
+ "options when quota turned on");
+ return -EINVAL;
+err_jquota_specified:
+ ext4_msg(NULL, KERN_ERR, "%s quota file already specified",
+ QTYPE2NAME(i));
+ return -EINVAL;
+#else
+ return 0;
#endif
- if (test_opt(sb, DIOREAD_NOLOCK)) {
- int blocksize =
- BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
+}
- if (blocksize < PAGE_SIZE) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "dioread_nolock if block size != PAGE_SIZE");
+static int ext4_check_test_dummy_encryption(const struct fs_context *fc,
+ struct super_block *sb)
+{
+ const struct ext4_fs_context *ctx = fc->fs_private;
+ const struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy))
+ return 0;
+
+ if (!ext4_has_feature_encrypt(sb)) {
+ ext4_msg(NULL, KERN_WARNING,
+ "test_dummy_encryption requires encrypt feature");
+ return -EINVAL;
+ }
+ /*
+ * This mount option is just for testing, and it's not worthwhile to
+ * implement the extra complexity (e.g. RCU protection) that would be
+ * needed to allow it to be set or changed during remount. We do allow
+ * it to be specified during remount, but only if there is no change.
+ */
+ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
+ if (fscrypt_dummy_policies_equal(&sbi->s_dummy_enc_policy,
+ &ctx->dummy_enc_policy))
+ return 0;
+ ext4_msg(NULL, KERN_WARNING,
+ "Can't set or change test_dummy_encryption on remount");
+ return -EINVAL;
+ }
+ /* Also make sure s_mount_opts didn't contain a conflicting value. */
+ if (fscrypt_is_dummy_policy_set(&sbi->s_dummy_enc_policy)) {
+ if (fscrypt_dummy_policies_equal(&sbi->s_dummy_enc_policy,
+ &ctx->dummy_enc_policy))
return 0;
+ ext4_msg(NULL, KERN_WARNING,
+ "Conflicting test_dummy_encryption options");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static void ext4_apply_test_dummy_encryption(struct ext4_fs_context *ctx,
+ struct super_block *sb)
+{
+ if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy) ||
+ /* if already set, it was already verified to be the same */
+ fscrypt_is_dummy_policy_set(&EXT4_SB(sb)->s_dummy_enc_policy))
+ return;
+ EXT4_SB(sb)->s_dummy_enc_policy = ctx->dummy_enc_policy;
+ memset(&ctx->dummy_enc_policy, 0, sizeof(ctx->dummy_enc_policy));
+ ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled");
+}
+
+static int ext4_check_opt_consistency(struct fs_context *fc,
+ struct super_block *sb)
+{
+ struct ext4_fs_context *ctx = fc->fs_private;
+ struct ext4_sb_info *sbi = fc->s_fs_info;
+ int is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE;
+ int err;
+
+ if ((ctx->opt_flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) {
+ ext4_msg(NULL, KERN_ERR,
+ "Mount option(s) incompatible with ext2");
+ return -EINVAL;
+ }
+ if ((ctx->opt_flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) {
+ ext4_msg(NULL, KERN_ERR,
+ "Mount option(s) incompatible with ext3");
+ return -EINVAL;
+ }
+
+ if (ctx->s_want_extra_isize >
+ (sbi->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE)) {
+ ext4_msg(NULL, KERN_ERR,
+ "Invalid want_extra_isize %d",
+ ctx->s_want_extra_isize);
+ return -EINVAL;
+ }
+
+ err = ext4_check_test_dummy_encryption(fc, sb);
+ if (err)
+ return err;
+
+ if ((ctx->spec & EXT4_SPEC_DATAJ) && is_remount) {
+ if (!sbi->s_journal) {
+ ext4_msg(NULL, KERN_WARNING,
+ "Remounting file system with no journal "
+ "so ignoring journalled data option");
+ ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS);
+ } else if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS) !=
+ test_opt(sb, DATA_FLAGS)) {
+ ext4_msg(NULL, KERN_ERR, "Cannot change data mode "
+ "on remount");
+ return -EINVAL;
+ }
+ }
+
+ if (is_remount) {
+ if (!sbi->s_journal &&
+ ctx_test_mount_opt(ctx, EXT4_MOUNT_DATA_ERR_ABORT)) {
+ ext4_msg(NULL, KERN_WARNING,
+ "Remounting fs w/o journal so ignoring data_err option");
+ ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_ERR_ABORT);
+ }
+
+ if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS) &&
+ (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) {
+ ext4_msg(NULL, KERN_ERR, "can't mount with "
+ "both data=journal and dax");
+ return -EINVAL;
+ }
+
+ if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS) &&
+ (!(sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
+ (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER))) {
+fail_dax_change_remount:
+ ext4_msg(NULL, KERN_ERR, "can't change "
+ "dax mount option while remounting");
+ return -EINVAL;
+ } else if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER) &&
+ (!(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
+ (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS))) {
+ goto fail_dax_change_remount;
+ } else if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_DAX_INODE) &&
+ ((sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
+ (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
+ !(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_INODE))) {
+ goto fail_dax_change_remount;
}
}
+
+ return ext4_check_quota_consistency(fc, sb);
+}
+
+static void ext4_apply_options(struct fs_context *fc, struct super_block *sb)
+{
+ struct ext4_fs_context *ctx = fc->fs_private;
+ struct ext4_sb_info *sbi = fc->s_fs_info;
+
+ sbi->s_mount_opt &= ~ctx->mask_s_mount_opt;
+ sbi->s_mount_opt |= ctx->vals_s_mount_opt;
+ sbi->s_mount_opt2 &= ~ctx->mask_s_mount_opt2;
+ sbi->s_mount_opt2 |= ctx->vals_s_mount_opt2;
+ sb->s_flags &= ~ctx->mask_s_flags;
+ sb->s_flags |= ctx->vals_s_flags;
+
+#define APPLY(X) ({ if (ctx->spec & EXT4_SPEC_##X) sbi->X = ctx->X; })
+ APPLY(s_commit_interval);
+ APPLY(s_stripe);
+ APPLY(s_max_batch_time);
+ APPLY(s_min_batch_time);
+ APPLY(s_want_extra_isize);
+ APPLY(s_inode_readahead_blks);
+ APPLY(s_max_dir_size_kb);
+ APPLY(s_li_wait_mult);
+ APPLY(s_resgid);
+ APPLY(s_resuid);
+
+#ifdef CONFIG_EXT4_DEBUG
+ APPLY(s_fc_debug_max_replay);
+#endif
+
+ ext4_apply_quota_options(fc, sb);
+ ext4_apply_test_dummy_encryption(ctx, sb);
+}
+
+
+static int ext4_validate_options(struct fs_context *fc)
+{
+#ifdef CONFIG_QUOTA
+ struct ext4_fs_context *ctx = fc->fs_private;
+ char *usr_qf_name, *grp_qf_name;
+
+ usr_qf_name = ctx->s_qf_names[USRQUOTA];
+ grp_qf_name = ctx->s_qf_names[GRPQUOTA];
+
+ if (usr_qf_name || grp_qf_name) {
+ if (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) && usr_qf_name)
+ ctx_clear_mount_opt(ctx, EXT4_MOUNT_USRQUOTA);
+
+ if (ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA) && grp_qf_name)
+ ctx_clear_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA);
+
+ if (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) ||
+ ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA)) {
+ ext4_msg(NULL, KERN_ERR, "old and new quota "
+ "format mixing");
+ return -EINVAL;
+ }
+ }
+#endif
return 1;
}
@@ -1969,6 +2881,7 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
{
#if defined(CONFIG_QUOTA)
struct ext4_sb_info *sbi = EXT4_SB(sb);
+ char *usr_qf_name, *grp_qf_name;
if (sbi->s_jquota_fmt) {
char *fmtname = "";
@@ -1987,22 +2900,25 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
seq_printf(seq, ",jqfmt=%s", fmtname);
}
- if (sbi->s_qf_names[USRQUOTA])
- seq_show_option(seq, "usrjquota", sbi->s_qf_names[USRQUOTA]);
-
- if (sbi->s_qf_names[GRPQUOTA])
- seq_show_option(seq, "grpjquota", sbi->s_qf_names[GRPQUOTA]);
+ rcu_read_lock();
+ usr_qf_name = rcu_dereference(sbi->s_qf_names[USRQUOTA]);
+ grp_qf_name = rcu_dereference(sbi->s_qf_names[GRPQUOTA]);
+ if (usr_qf_name)
+ seq_show_option(seq, "usrjquota", usr_qf_name);
+ if (grp_qf_name)
+ seq_show_option(seq, "grpjquota", grp_qf_name);
+ rcu_read_unlock();
#endif
}
static const char *token2str(int token)
{
- const struct match_token *t;
+ const struct fs_parameter_spec *spec;
- for (t = tokens; t->token != Opt_err; t++)
- if (t->token == token && !strchr(t->pattern, '='))
+ for (spec = ext4_param_specs; spec->name != NULL; spec++)
+ if (spec->opt == token && !spec->type)
break;
- return t->pattern;
+ return spec->name;
}
/*
@@ -2015,7 +2931,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
- int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt;
+ int def_errors;
const struct mount_opts *m;
char sep = nodefs ? '\n' : ',';
@@ -2027,24 +2943,37 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
for (m = ext4_mount_opts; m->token != Opt_err; m++) {
int want_set = m->flags & MOPT_SET;
+ int opt_2 = m->flags & MOPT_2;
+ unsigned int mount_opt, def_mount_opt;
+
if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
- (m->flags & MOPT_CLEAR_ERR))
+ m->flags & MOPT_SKIP)
continue;
- if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
- continue; /* skip if same as the default */
+
+ if (opt_2) {
+ mount_opt = sbi->s_mount_opt2;
+ def_mount_opt = sbi->s_def_mount_opt2;
+ } else {
+ mount_opt = sbi->s_mount_opt;
+ def_mount_opt = sbi->s_def_mount_opt;
+ }
+ /* skip if same as the default */
+ if (!nodefs && !(m->mount_opt & (mount_opt ^ def_mount_opt)))
+ continue;
+ /* select Opt_noFoo vs Opt_Foo */
if ((want_set &&
- (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) ||
- (!want_set && (sbi->s_mount_opt & m->mount_opt)))
- continue; /* select Opt_noFoo vs Opt_Foo */
+ (mount_opt & m->mount_opt) != m->mount_opt) ||
+ (!want_set && (mount_opt & m->mount_opt)))
+ continue;
SEQ_OPTS_PRINT("%s", token2str(m->token));
}
if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) ||
- le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
+ ext4_get_resuid(es) != EXT4_DEF_RESUID)
SEQ_OPTS_PRINT("resuid=%u",
from_kuid_munged(&init_user_ns, sbi->s_resuid));
if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) ||
- le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
+ ext4_get_resgid(es) != EXT4_DEF_RESGID)
SEQ_OPTS_PRINT("resgid=%u",
from_kgid_munged(&init_user_ns, sbi->s_resgid));
def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
@@ -2060,11 +2989,12 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
- if (sb->s_flags & MS_I_VERSION)
+ if (nodefs && sb->s_flags & SB_I_VERSION)
SEQ_OPTS_PUTS("i_version");
if (nodefs || sbi->s_stripe)
SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
- if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) {
+ if (nodefs || EXT4_MOUNT_DATA_FLAGS &
+ (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
SEQ_OPTS_PUTS("data=journal");
else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
@@ -2077,7 +3007,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
SEQ_OPTS_PRINT("inode_readahead_blks=%u",
sbi->s_inode_readahead_blks);
- if (nodefs || (test_opt(sb, INIT_INODE_TABLE) &&
+ if (test_opt(sb, INIT_INODE_TABLE) && (nodefs ||
(sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
if (nodefs || sbi->s_max_dir_size_kb)
@@ -2085,6 +3015,39 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
if (test_opt(sb, DATA_ERR_ABORT))
SEQ_OPTS_PUTS("data_err=abort");
+ fscrypt_show_test_dummy_encryption(seq, sep, sb);
+
+ if (sb->s_flags & SB_INLINECRYPT)
+ SEQ_OPTS_PUTS("inlinecrypt");
+
+ if (test_opt(sb, DAX_ALWAYS)) {
+ if (IS_EXT2_SB(sb))
+ SEQ_OPTS_PUTS("dax");
+ else
+ SEQ_OPTS_PUTS("dax=always");
+ } else if (test_opt2(sb, DAX_NEVER)) {
+ SEQ_OPTS_PUTS("dax=never");
+ } else if (test_opt2(sb, DAX_INODE)) {
+ SEQ_OPTS_PUTS("dax=inode");
+ }
+
+ if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD &&
+ !test_opt2(sb, MB_OPTIMIZE_SCAN)) {
+ SEQ_OPTS_PUTS("mb_optimize_scan=0");
+ } else if (sbi->s_groups_count < MB_DEFAULT_LINEAR_SCAN_THRESHOLD &&
+ test_opt2(sb, MB_OPTIMIZE_SCAN)) {
+ SEQ_OPTS_PUTS("mb_optimize_scan=1");
+ }
+
+ if (nodefs && !test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS))
+ SEQ_OPTS_PUTS("prefetch_block_bitmaps");
+
+ if (ext4_emergency_ro(sb))
+ SEQ_OPTS_PUTS("emergency_ro");
+
+ if (ext4_forced_shutdown(sb))
+ SEQ_OPTS_PUTS("shutdown");
+
ext4_show_quota_options(seq, sb);
return 0;
}
@@ -2099,9 +3062,9 @@ int ext4_seq_options_show(struct seq_file *seq, void *offset)
struct super_block *sb = seq->private;
int rc;
- seq_puts(seq, (sb->s_flags & MS_RDONLY) ? "ro" : "rw");
+ seq_puts(seq, sb_rdonly(sb) ? "ro" : "rw");
rc = _ext4_show_options(seq, sb, 1);
- seq_puts(seq, "\n");
+ seq_putc(seq, '\n');
return rc;
}
@@ -2109,12 +3072,13 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
int read_only)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- int res = 0;
+ int err = 0;
if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
ext4_msg(sb, KERN_ERR, "revision level too high, "
"forcing read-only mode");
- res = MS_RDONLY;
+ err = -EROFS;
+ goto done;
}
if (read_only)
goto done;
@@ -2132,8 +3096,8 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
"warning: maximal mount count reached, "
"running e2fsck is recommended");
else if (le32_to_cpu(es->s_checkinterval) &&
- (le32_to_cpu(es->s_lastcheck) +
- le32_to_cpu(es->s_checkinterval) <= get_seconds()))
+ (ext4_get_tstamp(es, s_lastcheck) +
+ le32_to_cpu(es->s_checkinterval) <= ktime_get_real_seconds()))
ext4_msg(sb, KERN_WARNING,
"warning: checktime reached, "
"running e2fsck is recommended");
@@ -2142,12 +3106,14 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
le16_add_cpu(&es->s_mnt_count, 1);
- es->s_mtime = cpu_to_le32(get_seconds());
- ext4_update_dynamic_rev(sb);
- if (sbi->s_journal)
+ ext4_update_tstamp(es, s_mtime);
+ if (sbi->s_journal) {
ext4_set_feature_journal_needs_recovery(sb);
+ if (ext4_has_feature_orphan_file(sb))
+ ext4_set_feature_orphan_present(sb);
+ }
- ext4_commit_super(sb, 1);
+ err = ext4_commit_super(sb);
done:
if (test_opt(sb, DEBUG))
printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
@@ -2157,16 +3123,14 @@ done:
EXT4_BLOCKS_PER_GROUP(sb),
EXT4_INODES_PER_GROUP(sb),
sbi->s_mount_opt, sbi->s_mount_opt2);
-
- cleancache_init_fs(sb);
- return res;
+ return err;
}
int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct flex_groups *new_groups;
- int size;
+ struct flex_groups **old_groups, **new_groups;
+ int size, i, j;
if (!sbi->s_log_groups_per_flex)
return 0;
@@ -2175,22 +3139,37 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
if (size <= sbi->s_flex_groups_allocated)
return 0;
- size = roundup_pow_of_two(size * sizeof(struct flex_groups));
- new_groups = kvzalloc(size, GFP_KERNEL);
+ new_groups = kvzalloc(roundup_pow_of_two(size *
+ sizeof(*sbi->s_flex_groups)), GFP_KERNEL);
if (!new_groups) {
- ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups",
- size / (int) sizeof(struct flex_groups));
+ ext4_msg(sb, KERN_ERR,
+ "not enough memory for %d flex group pointers", size);
return -ENOMEM;
}
-
- if (sbi->s_flex_groups) {
- memcpy(new_groups, sbi->s_flex_groups,
- (sbi->s_flex_groups_allocated *
- sizeof(struct flex_groups)));
- kvfree(sbi->s_flex_groups);
+ for (i = sbi->s_flex_groups_allocated; i < size; i++) {
+ new_groups[i] = kvzalloc(roundup_pow_of_two(
+ sizeof(struct flex_groups)),
+ GFP_KERNEL);
+ if (!new_groups[i]) {
+ for (j = sbi->s_flex_groups_allocated; j < i; j++)
+ kvfree(new_groups[j]);
+ kvfree(new_groups);
+ ext4_msg(sb, KERN_ERR,
+ "not enough memory for %d flex groups", size);
+ return -ENOMEM;
+ }
}
- sbi->s_flex_groups = new_groups;
- sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups);
+ rcu_read_lock();
+ old_groups = rcu_dereference(sbi->s_flex_groups);
+ if (old_groups)
+ memcpy(new_groups, old_groups,
+ (sbi->s_flex_groups_allocated *
+ sizeof(struct flex_groups *)));
+ rcu_read_unlock();
+ rcu_assign_pointer(sbi->s_flex_groups, new_groups);
+ sbi->s_flex_groups_allocated = size;
+ if (old_groups)
+ ext4_kvfree_array_rcu(old_groups);
return 0;
}
@@ -2198,6 +3177,7 @@ static int ext4_fill_flex_info(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_group_desc *gdp = NULL;
+ struct flex_groups *fg;
ext4_group_t flex_group;
int i, err;
@@ -2215,12 +3195,11 @@ static int ext4_fill_flex_info(struct super_block *sb)
gdp = ext4_get_group_desc(sb, i, NULL);
flex_group = ext4_flex_group(sbi, i);
- atomic_add(ext4_free_inodes_count(sb, gdp),
- &sbi->s_flex_groups[flex_group].free_inodes);
+ fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group);
+ atomic_add(ext4_free_inodes_count(sb, gdp), &fg->free_inodes);
atomic64_add(ext4_free_group_clusters(sb, gdp),
- &sbi->s_flex_groups[flex_group].free_clusters);
- atomic_add(ext4_used_dirs_count(sb, gdp),
- &sbi->s_flex_groups[flex_group].used_dirs);
+ &fg->free_clusters);
+ atomic_add(ext4_used_dirs_count(sb, gdp), &fg->used_dirs);
}
return 1;
@@ -2236,19 +3215,19 @@ static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
__le32 le_group = cpu_to_le32(block_group);
struct ext4_sb_info *sbi = EXT4_SB(sb);
- if (ext4_has_metadata_csum(sbi->s_sb)) {
+ if (ext4_has_feature_metadata_csum(sbi->s_sb)) {
/* Use new metadata_csum algorithm */
__u32 csum32;
__u16 dummy_csum = 0;
- csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
+ csum32 = ext4_chksum(sbi->s_csum_seed, (__u8 *)&le_group,
sizeof(le_group));
- csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, offset);
- csum32 = ext4_chksum(sbi, csum32, (__u8 *)&dummy_csum,
+ csum32 = ext4_chksum(csum32, (__u8 *)gdp, offset);
+ csum32 = ext4_chksum(csum32, (__u8 *)&dummy_csum,
sizeof(dummy_csum));
offset += sizeof(dummy_csum);
if (offset < sbi->s_desc_size)
- csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp + offset,
+ csum32 = ext4_chksum(csum32, (__u8 *)gdp + offset,
sbi->s_desc_size - offset);
crc = csum32 & 0xFFFF;
@@ -2264,11 +3243,9 @@ static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
crc = crc16(crc, (__u8 *)gdp, offset);
offset += sizeof(gdp->bg_checksum); /* skip checksum */
/* for checksum of struct ext4_group_desc do the rest...*/
- if (ext4_has_feature_64bit(sb) &&
- offset < le16_to_cpu(sbi->s_es->s_desc_size))
+ if (ext4_has_feature_64bit(sb) && offset < sbi->s_desc_size)
crc = crc16(crc, (__u8 *)gdp + offset,
- le16_to_cpu(sbi->s_es->s_desc_size) -
- offset);
+ sbi->s_desc_size - offset);
out:
return cpu_to_le16(crc);
@@ -2300,6 +3277,7 @@ static int ext4_check_descriptors(struct super_block *sb,
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
ext4_fsblk_t last_block;
+ ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0);
ext4_fsblk_t block_bitmap;
ext4_fsblk_t inode_bitmap;
ext4_fsblk_t inode_table;
@@ -2329,6 +3307,16 @@ static int ext4_check_descriptors(struct super_block *sb,
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
"Block bitmap for group %u overlaps "
"superblock", i);
+ if (!sb_rdonly(sb))
+ return 0;
+ }
+ if (block_bitmap >= sb_block + 1 &&
+ block_bitmap <= last_bg_block) {
+ ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+ "Block bitmap for group %u overlaps "
+ "block group descriptors", i);
+ if (!sb_rdonly(sb))
+ return 0;
}
if (block_bitmap < first_block || block_bitmap > last_block) {
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
@@ -2341,6 +3329,16 @@ static int ext4_check_descriptors(struct super_block *sb,
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
"Inode bitmap for group %u overlaps "
"superblock", i);
+ if (!sb_rdonly(sb))
+ return 0;
+ }
+ if (inode_bitmap >= sb_block + 1 &&
+ inode_bitmap <= last_bg_block) {
+ ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+ "Inode bitmap for group %u overlaps "
+ "block group descriptors", i);
+ if (!sb_rdonly(sb))
+ return 0;
}
if (inode_bitmap < first_block || inode_bitmap > last_block) {
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
@@ -2353,6 +3351,16 @@ static int ext4_check_descriptors(struct super_block *sb,
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
"Inode table for group %u overlaps "
"superblock", i);
+ if (!sb_rdonly(sb))
+ return 0;
+ }
+ if (inode_table >= sb_block + 1 &&
+ inode_table <= last_bg_block) {
+ ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+ "Inode table for group %u overlaps "
+ "block group descriptors", i);
+ if (!sb_rdonly(sb))
+ return 0;
}
if (inode_table < first_block ||
inode_table + sbi->s_itb_per_group - 1 > last_block) {
@@ -2367,7 +3375,7 @@ static int ext4_check_descriptors(struct super_block *sb,
"Checksum for group %u failed (%u!=%u)",
i, le16_to_cpu(ext4_group_desc_csum(sb, i,
gdp)), le16_to_cpu(gdp->bg_checksum));
- if (!(sb->s_flags & MS_RDONLY)) {
+ if (!sb_rdonly(sb)) {
ext4_unlock_group(sb, i);
return 0;
}
@@ -2381,144 +3389,6 @@ static int ext4_check_descriptors(struct super_block *sb,
return 1;
}
-/* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
- * the superblock) which were deleted from all directories, but held open by
- * a process at the time of a crash. We walk the list and try to delete these
- * inodes at recovery time (only with a read-write filesystem).
- *
- * In order to keep the orphan inode chain consistent during traversal (in
- * case of crash during recovery), we link each inode into the superblock
- * orphan list_head and handle it the same way as an inode deletion during
- * normal operation (which journals the operations for us).
- *
- * We only do an iget() and an iput() on each inode, which is very safe if we
- * accidentally point at an in-use or already deleted inode. The worst that
- * can happen in this case is that we get a "bit already cleared" message from
- * ext4_free_inode(). The only reason we would point at a wrong inode is if
- * e2fsck was run on this filesystem, and it must have already done the orphan
- * inode cleanup for us, so we can safely abort without any further action.
- */
-static void ext4_orphan_cleanup(struct super_block *sb,
- struct ext4_super_block *es)
-{
- unsigned int s_flags = sb->s_flags;
- int ret, nr_orphans = 0, nr_truncates = 0;
-#ifdef CONFIG_QUOTA
- int i;
-#endif
- if (!es->s_last_orphan) {
- jbd_debug(4, "no orphan inodes to clean up\n");
- return;
- }
-
- if (bdev_read_only(sb->s_bdev)) {
- ext4_msg(sb, KERN_ERR, "write access "
- "unavailable, skipping orphan cleanup");
- return;
- }
-
- /* Check if feature set would not allow a r/w mount */
- if (!ext4_feature_set_ok(sb, 0)) {
- ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
- "unknown ROCOMPAT features");
- return;
- }
-
- if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
- /* don't clear list on RO mount w/ errors */
- if (es->s_last_orphan && !(s_flags & MS_RDONLY)) {
- ext4_msg(sb, KERN_INFO, "Errors on filesystem, "
- "clearing orphan list.\n");
- es->s_last_orphan = 0;
- }
- jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
- return;
- }
-
- if (s_flags & MS_RDONLY) {
- ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
- sb->s_flags &= ~MS_RDONLY;
- }
-#ifdef CONFIG_QUOTA
- /* Needed for iput() to work correctly and not trash data */
- sb->s_flags |= MS_ACTIVE;
- /* Turn on quotas so that they are updated correctly */
- for (i = 0; i < EXT4_MAXQUOTAS; i++) {
- if (EXT4_SB(sb)->s_qf_names[i]) {
- int ret = ext4_quota_on_mount(sb, i);
- if (ret < 0)
- ext4_msg(sb, KERN_ERR,
- "Cannot turn on journaled "
- "quota: error %d", ret);
- }
- }
-#endif
-
- while (es->s_last_orphan) {
- struct inode *inode;
-
- /*
- * We may have encountered an error during cleanup; if
- * so, skip the rest.
- */
- if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
- jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
- es->s_last_orphan = 0;
- break;
- }
-
- inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
- if (IS_ERR(inode)) {
- es->s_last_orphan = 0;
- break;
- }
-
- list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
- dquot_initialize(inode);
- if (inode->i_nlink) {
- if (test_opt(sb, DEBUG))
- ext4_msg(sb, KERN_DEBUG,
- "%s: truncating inode %lu to %lld bytes",
- __func__, inode->i_ino, inode->i_size);
- jbd_debug(2, "truncating inode %lu to %lld bytes\n",
- inode->i_ino, inode->i_size);
- inode_lock(inode);
- truncate_inode_pages(inode->i_mapping, inode->i_size);
- ret = ext4_truncate(inode);
- if (ret)
- ext4_std_error(inode->i_sb, ret);
- inode_unlock(inode);
- nr_truncates++;
- } else {
- if (test_opt(sb, DEBUG))
- ext4_msg(sb, KERN_DEBUG,
- "%s: deleting unreferenced inode %lu",
- __func__, inode->i_ino);
- jbd_debug(2, "deleting unreferenced inode %lu\n",
- inode->i_ino);
- nr_orphans++;
- }
- iput(inode); /* The delete magic happens here! */
- }
-
-#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
-
- if (nr_orphans)
- ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
- PLURAL(nr_orphans));
- if (nr_truncates)
- ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
- PLURAL(nr_truncates));
-#ifdef CONFIG_QUOTA
- /* Turn quotas off */
- for (i = 0; i < EXT4_MAXQUOTAS; i++) {
- if (sb_dqopt(sb)->files[i])
- dquot_quota_off(sb, i);
- }
-#endif
- sb->s_flags = s_flags; /* Restore MS_RDONLY status */
-}
-
/*
* Maximal extent format file size.
* Resulting logical blkno at s_maxbytes must fit in our on-disk
@@ -2539,13 +3409,9 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files)
loff_t res;
loff_t upper_limit = MAX_LFS_FILESIZE;
- /* small i_blocks in vfs inode? */
- if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
- /*
- * CONFIG_LBDAF is not enabled implies the inode
- * i_block represent total blocks in 512 bytes
- * 32 == size of vfs inode i_blocks * 8
- */
+ BUILD_BUG_ON(sizeof(blkcnt_t) < sizeof(u64));
+
+ if (!has_huge_files) {
upper_limit = (1LL << 32) - 1;
/* total blocks in file system block size */
@@ -2575,22 +3441,23 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files)
*/
static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
{
- loff_t res = EXT4_NDIR_BLOCKS;
+ loff_t upper_limit, res = EXT4_NDIR_BLOCKS;
int meta_blocks;
- loff_t upper_limit;
- /* This is calculated to be the largest file size for a dense, block
+ unsigned int ppb = 1 << (bits - 2);
+
+ /*
+ * This is calculated to be the largest file size for a dense, block
* mapped file such that the file's total number of 512-byte sectors,
* including data and all indirect blocks, does not exceed (2^48 - 1).
*
* __u32 i_blocks_lo and _u16 i_blocks_high represent the total
* number of 512-byte sectors of the file.
*/
-
- if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
+ if (!has_huge_files) {
/*
- * !has_huge_files or CONFIG_LBDAF not enabled implies that
- * the inode i_block field represents total file blocks in
- * 2^32 512-byte sectors == size of vfs inode i_blocks * 8
+ * !has_huge_files or implies that the inode i_block field
+ * represents total file blocks in 2^32 512-byte sectors ==
+ * size of vfs inode i_blocks * 8
*/
upper_limit = (1LL << 32) - 1;
@@ -2608,23 +3475,38 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
}
+ /* Compute how many blocks we can address by block tree */
+ res += ppb;
+ res += ppb * ppb;
+ res += ((loff_t)ppb) * ppb * ppb;
+ /* Compute how many metadata blocks are needed */
+ meta_blocks = 1;
+ meta_blocks += 1 + ppb;
+ meta_blocks += 1 + ppb + ppb * ppb;
+ /* Does block tree limit file size? */
+ if (res + meta_blocks <= upper_limit)
+ goto check_lfs;
+
+ res = upper_limit;
+ /* How many metadata blocks are needed for addressing upper_limit? */
+ upper_limit -= EXT4_NDIR_BLOCKS;
/* indirect blocks */
meta_blocks = 1;
+ upper_limit -= ppb;
/* double indirect blocks */
- meta_blocks += 1 + (1LL << (bits-2));
- /* tripple indirect blocks */
- meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
-
- upper_limit -= meta_blocks;
- upper_limit <<= bits;
-
- res += 1LL << (bits-2);
- res += 1LL << (2*(bits-2));
- res += 1LL << (3*(bits-2));
+ if (upper_limit < ppb * ppb) {
+ meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb);
+ res -= meta_blocks;
+ goto check_lfs;
+ }
+ meta_blocks += 1 + ppb;
+ upper_limit -= ppb * ppb;
+ /* tripple indirect blocks for the rest */
+ meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb) +
+ DIV_ROUND_UP_ULL(upper_limit, ppb*ppb);
+ res -= meta_blocks;
+check_lfs:
res <<= bits;
- if (res > upper_limit)
- res = upper_limit;
-
if (res > MAX_LFS_FILESIZE)
res = MAX_LFS_FILESIZE;
@@ -2653,7 +3535,7 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb,
* compensate.
*/
if (sb->s_blocksize == 1024 && nr == 0 &&
- le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) == 0)
+ le32_to_cpu(sbi->s_es->s_first_data_block) == 0)
has_super++;
return (has_super + ext4_group_first_block_no(sb, bg));
@@ -2702,7 +3584,7 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
* Returns 1 if this filesystem can be mounted as requested,
* 0 if it cannot be.
*/
-static int ext4_feature_set_ok(struct super_block *sb, int readonly)
+int ext4_feature_set_ok(struct super_block *sb, int readonly)
{
if (ext4_has_unknown_ext4_incompat_features(sb)) {
ext4_msg(sb, KERN_ERR,
@@ -2713,12 +3595,19 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
return 0;
}
+ if (!IS_ENABLED(CONFIG_UNICODE) && ext4_has_feature_casefold(sb)) {
+ ext4_msg(sb, KERN_ERR,
+ "Filesystem with casefold feature cannot be "
+ "mounted without CONFIG_UNICODE");
+ return 0;
+ }
+
if (readonly)
return 1;
if (ext4_has_feature_readonly(sb)) {
ext4_msg(sb, KERN_INFO, "filesystem is read-only");
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
return 1;
}
@@ -2730,18 +3619,6 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
~EXT4_FEATURE_RO_COMPAT_SUPP));
return 0;
}
- /*
- * Large file size enabled file system can only be mounted
- * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF
- */
- if (ext4_has_feature_huge_file(sb)) {
- if (sizeof(blkcnt_t) < sizeof(u64)) {
- ext4_msg(sb, KERN_ERR, "Filesystem with huge files "
- "cannot be mounted RDWR without "
- "CONFIG_LBDAF");
- return 0;
- }
- }
if (ext4_has_feature_bigalloc(sb) && !ext4_has_feature_extents(sb)) {
ext4_msg(sb, KERN_ERR,
"Can't support bigalloc feature without "
@@ -2749,17 +3626,11 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
return 0;
}
-#ifndef CONFIG_QUOTA
- if (ext4_has_feature_quota(sb) && !readonly) {
- ext4_msg(sb, KERN_ERR,
- "Filesystem with quota feature cannot be mounted RDWR "
- "without CONFIG_QUOTA");
- return 0;
- }
- if (ext4_has_feature_project(sb) && !readonly) {
+#if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2)
+ if (!readonly && (ext4_has_feature_quota(sb) ||
+ ext4_has_feature_project(sb))) {
ext4_msg(sb, KERN_ERR,
- "Filesystem with project quota feature cannot be mounted RDWR "
- "without CONFIG_QUOTA");
+ "The kernel was not built with CONFIG_QUOTA and CONFIG_QFMT_V2");
return 0;
}
#endif /* CONFIG_QUOTA */
@@ -2770,22 +3641,20 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
* This function is called once a day if we have errors logged
* on the file system
*/
-static void print_daily_error_info(unsigned long arg)
+static void print_daily_error_info(struct timer_list *t)
{
- struct super_block *sb = (struct super_block *) arg;
- struct ext4_sb_info *sbi;
- struct ext4_super_block *es;
-
- sbi = EXT4_SB(sb);
- es = sbi->s_es;
+ struct ext4_sb_info *sbi = timer_container_of(sbi, t, s_err_report);
+ struct super_block *sb = sbi->s_sb;
+ struct ext4_super_block *es = sbi->s_es;
if (es->s_error_count)
/* fsck newer than v1.41.13 is needed to clean this condition. */
ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
le32_to_cpu(es->s_error_count));
if (es->s_first_error_time) {
- printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %u: %.*s:%d",
- sb->s_id, le32_to_cpu(es->s_first_error_time),
+ printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %llu: %.*s:%d",
+ sb->s_id,
+ ext4_get_tstamp(es, s_first_error_time),
(int) sizeof(es->s_first_error_func),
es->s_first_error_func,
le32_to_cpu(es->s_first_error_line));
@@ -2798,8 +3667,9 @@ static void print_daily_error_info(unsigned long arg)
printk(KERN_CONT "\n");
}
if (es->s_last_error_time) {
- printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d",
- sb->s_id, le32_to_cpu(es->s_last_error_time),
+ printk(KERN_NOTICE "EXT4-fs (%s): last error at time %llu: %.*s:%d",
+ sb->s_id,
+ ext4_get_tstamp(es, s_last_error_time),
(int) sizeof(es->s_last_error_func),
es->s_last_error_func,
le32_to_cpu(es->s_last_error_line));
@@ -2818,15 +3688,32 @@ static void print_daily_error_info(unsigned long arg)
static int ext4_run_li_request(struct ext4_li_request *elr)
{
struct ext4_group_desc *gdp = NULL;
- ext4_group_t group, ngroups;
- struct super_block *sb;
- unsigned long timeout = 0;
+ struct super_block *sb = elr->lr_super;
+ ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
+ ext4_group_t group = elr->lr_next_group;
+ unsigned int prefetch_ios = 0;
int ret = 0;
+ int nr = EXT4_SB(sb)->s_mb_prefetch;
+ u64 start_time;
+
+ if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) {
+ elr->lr_next_group = ext4_mb_prefetch(sb, group, nr, &prefetch_ios);
+ ext4_mb_prefetch_fini(sb, elr->lr_next_group, nr);
+ trace_ext4_prefetch_bitmaps(sb, group, elr->lr_next_group, nr);
+ if (group >= elr->lr_next_group) {
+ ret = 1;
+ if (elr->lr_first_not_zeroed != ngroups &&
+ !ext4_emergency_state(sb) && !sb_rdonly(sb) &&
+ test_opt(sb, INIT_INODE_TABLE)) {
+ elr->lr_next_group = elr->lr_first_not_zeroed;
+ elr->lr_mode = EXT4_LI_MODE_ITABLE;
+ ret = 0;
+ }
+ }
+ return ret;
+ }
- sb = elr->lr_super;
- ngroups = EXT4_SB(sb)->s_groups_count;
-
- for (group = elr->lr_next_group; group < ngroups; group++) {
+ for (; group < ngroups; group++) {
gdp = ext4_get_group_desc(sb, group, NULL);
if (!gdp) {
ret = 1;
@@ -2841,13 +3728,13 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
ret = 1;
if (!ret) {
- timeout = jiffies;
+ start_time = ktime_get_ns();
ret = ext4_init_inode_table(sb, group,
elr->lr_timeout ? 0 : 1);
+ trace_ext4_lazy_itable_init(sb, group);
if (elr->lr_timeout == 0) {
- timeout = (jiffies - timeout) *
- elr->lr_sbi->s_li_wait_mult;
- elr->lr_timeout = timeout;
+ elr->lr_timeout = nsecs_to_jiffies((ktime_get_ns() - start_time) *
+ EXT4_SB(elr->lr_super)->s_li_wait_mult);
}
elr->lr_next_sched = jiffies + elr->lr_timeout;
elr->lr_next_group = group + 1;
@@ -2861,15 +3748,11 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
*/
static void ext4_remove_li_request(struct ext4_li_request *elr)
{
- struct ext4_sb_info *sbi;
-
if (!elr)
return;
- sbi = elr->lr_sbi;
-
list_del(&elr->lr_request);
- sbi->s_li_request = NULL;
+ EXT4_SB(elr->lr_super)->s_li_request = NULL;
kfree(elr);
}
@@ -2900,17 +3783,19 @@ static struct task_struct *ext4_lazyinit_task;
*/
static int ext4_lazyinit_thread(void *arg)
{
- struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
+ struct ext4_lazy_init *eli = arg;
struct list_head *pos, *n;
struct ext4_li_request *elr;
unsigned long next_wakeup, cur;
BUG_ON(NULL == eli);
+ set_freezable();
cont_thread:
while (true) {
- next_wakeup = MAX_JIFFY_OFFSET;
+ bool next_wakeup_initialized = false;
+ next_wakeup = 0;
mutex_lock(&eli->li_list_mtx);
if (list_empty(&eli->li_request_list)) {
mutex_unlock(&eli->li_list_mtx);
@@ -2923,8 +3808,11 @@ cont_thread:
lr_request);
if (time_before(jiffies, elr->lr_next_sched)) {
- if (time_before(elr->lr_next_sched, next_wakeup))
+ if (!next_wakeup_initialized ||
+ time_before(elr->lr_next_sched, next_wakeup)) {
next_wakeup = elr->lr_next_sched;
+ next_wakeup_initialized = true;
+ }
continue;
}
if (down_read_trylock(&elr->lr_super->s_umount)) {
@@ -2950,19 +3838,20 @@ cont_thread:
}
if (!progress) {
elr->lr_next_sched = jiffies +
- (prandom_u32()
- % (EXT4_DEF_LI_MAX_START_DELAY * HZ));
+ get_random_u32_below(EXT4_DEF_LI_MAX_START_DELAY * HZ);
}
- if (time_before(elr->lr_next_sched, next_wakeup))
+ if (!next_wakeup_initialized ||
+ time_before(elr->lr_next_sched, next_wakeup)) {
next_wakeup = elr->lr_next_sched;
+ next_wakeup_initialized = true;
+ }
}
mutex_unlock(&eli->li_list_mtx);
try_to_freeze();
cur = jiffies;
- if ((time_after_eq(cur, next_wakeup)) ||
- (MAX_JIFFY_OFFSET == next_wakeup)) {
+ if (!next_wakeup_initialized || time_after_eq(cur, next_wakeup)) {
cond_resched();
continue;
}
@@ -3042,6 +3931,9 @@ static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
struct ext4_group_desc *gdp = NULL;
+ if (!ext4_has_group_desc_csum(sb))
+ return ngroups;
+
for (group = 0; group < ngroups; group++) {
gdp = ext4_get_group_desc(sb, group, NULL);
if (!gdp)
@@ -3075,7 +3967,6 @@ static int ext4_li_info_new(void)
static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
ext4_group_t start)
{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_li_request *elr;
elr = kzalloc(sizeof(*elr), GFP_KERNEL);
@@ -3083,16 +3974,20 @@ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
return NULL;
elr->lr_super = sb;
- elr->lr_sbi = sbi;
- elr->lr_next_group = start;
+ elr->lr_first_not_zeroed = start;
+ if (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS)) {
+ elr->lr_mode = EXT4_LI_MODE_ITABLE;
+ elr->lr_next_group = start;
+ } else {
+ elr->lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
+ }
/*
* Randomize first schedule time of the request to
* spread the inode table initialization requests
* better.
*/
- elr->lr_next_sched = jiffies + (prandom_u32() %
- (EXT4_DEF_LI_MAX_START_DELAY * HZ));
+ elr->lr_next_sched = jiffies + get_random_u32_below(EXT4_DEF_LI_MAX_START_DELAY * HZ);
return elr;
}
@@ -3101,7 +3996,7 @@ int ext4_register_li_request(struct super_block *sb,
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_li_request *elr = NULL;
- ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
+ ext4_group_t ngroups = sbi->s_groups_count;
int ret = 0;
mutex_lock(&ext4_li_mtx);
@@ -3114,9 +4009,9 @@ int ext4_register_li_request(struct super_block *sb,
goto out;
}
- if (first_not_zeroed == ngroups ||
- (sb->s_flags & MS_RDONLY) ||
- !test_opt(sb, INIT_INODE_TABLE))
+ if (ext4_emergency_state(sb) || sb_rdonly(sb) ||
+ (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS) &&
+ (first_not_zeroed == ngroups || !test_opt(sb, INIT_INODE_TABLE))))
goto out;
elr = ext4_li_request_new(sb, first_not_zeroed);
@@ -3177,7 +4072,7 @@ static int set_journal_csum_feature_set(struct super_block *sb)
int compat, incompat;
struct ext4_sb_info *sbi = EXT4_SB(sb);
- if (ext4_has_metadata_csum(sb)) {
+ if (ext4_has_feature_metadata_csum(sb)) {
/* journal checksum v3 */
compat = 0;
incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
@@ -3233,9 +4128,11 @@ static int count_overhead(struct super_block *sb, ext4_group_t grp,
ext4_fsblk_t first_block, last_block, b;
ext4_group_t i, ngroups = ext4_get_groups_count(sb);
int s, j, count = 0;
+ int has_super = ext4_bg_has_super(sb, grp);
if (!ext4_has_feature_bigalloc(sb))
- return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
+ return (has_super + ext4_bg_num_gdb(sb, grp) +
+ (has_super ? le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0) +
sbi->s_itb_per_group + 2);
first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
@@ -3294,7 +4191,7 @@ int ext4_calculate_overhead(struct super_block *sb)
unsigned int j_blocks, j_inum = le32_to_cpu(es->s_journal_inum);
ext4_group_t i, ngroups = ext4_get_groups_count(sb);
ext4_fsblk_t overhead = 0;
- char *buf = (char *) get_zeroed_page(GFP_NOFS);
+ char *buf = kvmalloc(sb->s_blocksize, GFP_NOFS | __GFP_ZERO);
if (!buf)
return -ENOMEM;
@@ -3319,7 +4216,7 @@ int ext4_calculate_overhead(struct super_block *sb)
blks = count_overhead(sb, i, buf);
overhead += blks;
if (blks)
- memset(buf, 0, PAGE_SIZE);
+ memset(buf, 0, sb->s_blocksize);
cond_resched();
}
@@ -3327,11 +4224,12 @@ int ext4_calculate_overhead(struct super_block *sb)
* Add the internal journal blocks whether the journal has been
* loaded or not
*/
- if (sbi->s_journal && !sbi->journal_bdev)
- overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
- else if (ext4_has_feature_journal(sb) && !sbi->s_journal) {
+ if (sbi->s_journal && !sbi->s_journal_bdev_file)
+ overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_total_len);
+ else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) {
+ /* j_inum for internal journal is non-zero */
j_inode = ext4_get_journal_inode(sb, j_inum);
- if (j_inode) {
+ if (!IS_ERR(j_inode)) {
j_blocks = j_inode->i_size >> sb->s_blocksize_bits;
overhead += EXT4_NUM_B2C(sbi, j_blocks);
iput(j_inode);
@@ -3341,7 +4239,7 @@ int ext4_calculate_overhead(struct super_block *sb)
}
sbi->s_overhead = overhead;
smp_wmb();
- free_page((unsigned long) buf);
+ kvfree(buf);
return 0;
}
@@ -3375,124 +4273,75 @@ static void ext4_set_resv_clusters(struct super_block *sb)
atomic64_set(&sbi->s_resv_clusters, resv_clusters);
}
-static int ext4_fill_super(struct super_block *sb, void *data, int silent)
+static const char *ext4_quota_mode(struct super_block *sb)
{
- char *orig_data = kstrdup(data, GFP_KERNEL);
- struct buffer_head *bh;
- struct ext4_super_block *es = NULL;
- struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
- ext4_fsblk_t block;
- ext4_fsblk_t sb_block = get_sb_block(&data);
- ext4_fsblk_t logical_sb_block;
- unsigned long offset = 0;
- unsigned long journal_devnum = 0;
- unsigned long def_mount_opts;
- struct inode *root;
- const char *descr;
- int ret = -ENOMEM;
- int blocksize, clustersize;
- unsigned int db_count;
- unsigned int i;
- int needs_recovery, has_huge_files, has_bigalloc;
- __u64 blocks_count;
- int err = 0;
- unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
- ext4_group_t first_not_zeroed;
+#ifdef CONFIG_QUOTA
+ if (!ext4_quota_capable(sb))
+ return "none";
- if ((data && !orig_data) || !sbi)
- goto out_free_base;
+ if (EXT4_SB(sb)->s_journal && ext4_is_quota_journalled(sb))
+ return "journalled";
+ else
+ return "writeback";
+#else
+ return "disabled";
+#endif
+}
- sbi->s_blockgroup_lock =
- kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
- if (!sbi->s_blockgroup_lock)
- goto out_free_base;
+static void ext4_setup_csum_trigger(struct super_block *sb,
+ enum ext4_journal_trigger_type type,
+ void (*trigger)(
+ struct jbd2_buffer_trigger_type *type,
+ struct buffer_head *bh,
+ void *mapped_data,
+ size_t size))
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
- sb->s_fs_info = sbi;
- sbi->s_sb = sb;
- sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
- sbi->s_sb_block = sb_block;
- if (sb->s_bdev->bd_part)
- sbi->s_sectors_written_start =
- part_stat_read(sb->s_bdev->bd_part, sectors[1]);
+ sbi->s_journal_triggers[type].sb = sb;
+ sbi->s_journal_triggers[type].tr_triggers.t_frozen = trigger;
+}
- /* Cleanup superblock name */
- strreplace(sb->s_id, '/', '!');
+static void ext4_free_sbi(struct ext4_sb_info *sbi)
+{
+ if (!sbi)
+ return;
- /* -EINVAL is default */
- ret = -EINVAL;
- blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
- if (!blocksize) {
- ext4_msg(sb, KERN_ERR, "unable to set blocksize");
- goto out_fail;
- }
+ kfree(sbi->s_blockgroup_lock);
+ fs_put_dax(sbi->s_daxdev, NULL);
+ kfree(sbi);
+}
- /*
- * The ext4 superblock will not be buffer aligned for other than 1kB
- * block sizes. We need to calculate the offset from buffer start.
- */
- if (blocksize != EXT4_MIN_BLOCK_SIZE) {
- logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
- offset = do_div(logical_sb_block, blocksize);
- } else {
- logical_sb_block = sb_block;
- }
+static struct ext4_sb_info *ext4_alloc_sbi(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi;
- if (!(bh = sb_bread_unmovable(sb, logical_sb_block))) {
- ext4_msg(sb, KERN_ERR, "unable to read superblock");
- goto out_fail;
- }
- /*
- * Note: s_es must be initialized as soon as possible because
- * some ext4 macro-instructions depend on its value
- */
- es = (struct ext4_super_block *) (bh->b_data + offset);
- sbi->s_es = es;
- sb->s_magic = le16_to_cpu(es->s_magic);
- if (sb->s_magic != EXT4_SUPER_MAGIC)
- goto cantfind_ext4;
- sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
+ sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
+ if (!sbi)
+ return NULL;
- /* Warn if metadata_csum and gdt_csum are both set. */
- if (ext4_has_feature_metadata_csum(sb) &&
- ext4_has_feature_gdt_csum(sb))
- ext4_warning(sb, "metadata_csum and uninit_bg are "
- "redundant flags; please run fsck.");
+ sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->s_dax_part_off,
+ NULL, NULL);
- /* Check for a known checksum algorithm */
- if (!ext4_verify_csum_type(sb, es)) {
- ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
- "unknown checksum algorithm.");
- silent = 1;
- goto cantfind_ext4;
- }
-
- /* Load the checksum driver */
- if (ext4_has_feature_metadata_csum(sb) ||
- ext4_has_feature_ea_inode(sb)) {
- sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
- if (IS_ERR(sbi->s_chksum_driver)) {
- ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
- ret = PTR_ERR(sbi->s_chksum_driver);
- sbi->s_chksum_driver = NULL;
- goto failed_mount;
- }
- }
+ sbi->s_blockgroup_lock =
+ kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
- /* Check superblock checksum */
- if (!ext4_superblock_csum_verify(sb, es)) {
- ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
- "invalid superblock checksum. Run e2fsck?");
- silent = 1;
- ret = -EFSBADCRC;
- goto cantfind_ext4;
- }
+ if (!sbi->s_blockgroup_lock)
+ goto err_out;
- /* Precompute checksum seed for all metadata */
- if (ext4_has_feature_csum_seed(sb))
- sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed);
- else if (ext4_has_metadata_csum(sb) || ext4_has_feature_ea_inode(sb))
- sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
- sizeof(es->s_uuid));
+ sb->s_fs_info = sbi;
+ sbi->s_sb = sb;
+ return sbi;
+err_out:
+ fs_put_dax(sbi->s_daxdev, NULL);
+ kfree(sbi);
+ return NULL;
+}
+
+static void ext4_set_def_opts(struct super_block *sb,
+ struct ext4_super_block *es)
+{
+ unsigned long def_mount_opts;
/* Set defaults before we parse the mount options */
def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
@@ -3508,8 +4357,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
#ifdef CONFIG_EXT4_FS_POSIX_ACL
set_opt(sb, POSIX_ACL);
#endif
+ if (ext4_has_feature_fast_commit(sb))
+ set_opt2(sb, JOURNAL_FAST_COMMIT);
/* don't forget to enable journal_csum when metadata_csum is enabled. */
- if (ext4_has_metadata_csum(sb))
+ if (ext4_has_feature_metadata_csum(sb))
set_opt(sb, JOURNAL_CHECKSUM);
if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
@@ -3519,9 +4370,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
set_opt(sb, WRITEBACK_DATA);
- if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
+ if (le16_to_cpu(es->s_errors) == EXT4_ERRORS_PANIC)
set_opt(sb, ERRORS_PANIC);
- else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
+ else if (le16_to_cpu(es->s_errors) == EXT4_ERRORS_CONTINUE)
set_opt(sb, ERRORS_CONT);
else
set_opt(sb, ERRORS_RO);
@@ -3530,12 +4381,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (def_mount_opts & EXT4_DEFM_DISCARD)
set_opt(sb, DISCARD);
- sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
- sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
- sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
- sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
- sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
-
if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
set_opt(sb, BARRIER);
@@ -3547,63 +4392,281 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
set_opt(sb, DELALLOC);
- /*
- * set default s_li_wait_mult for lazyinit, for the case there is
- * no mount option specified.
- */
- sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
+ set_opt(sb, DIOREAD_NOLOCK);
+}
- if (sbi->s_es->s_mount_opts[0]) {
- char *s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
- sizeof(sbi->s_es->s_mount_opts),
- GFP_KERNEL);
- if (!s_mount_opts)
- goto failed_mount;
- if (!parse_options(s_mount_opts, sb, &journal_devnum,
- &journal_ioprio, 0)) {
- ext4_msg(sb, KERN_WARNING,
- "failed to parse options in superblock: %s",
- s_mount_opts);
+static int ext4_handle_clustersize(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_super_block *es = sbi->s_es;
+ int clustersize;
+
+ /* Handle clustersize */
+ clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
+ if (ext4_has_feature_bigalloc(sb)) {
+ if (clustersize < sb->s_blocksize) {
+ ext4_msg(sb, KERN_ERR,
+ "cluster size (%d) smaller than "
+ "block size (%lu)", clustersize, sb->s_blocksize);
+ return -EINVAL;
}
- kfree(s_mount_opts);
+ sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
+ le32_to_cpu(es->s_log_block_size);
+ } else {
+ if (clustersize != sb->s_blocksize) {
+ ext4_msg(sb, KERN_ERR,
+ "fragment/cluster size (%d) != "
+ "block size (%lu)", clustersize, sb->s_blocksize);
+ return -EINVAL;
+ }
+ if (sbi->s_blocks_per_group > sb->s_blocksize * 8) {
+ ext4_msg(sb, KERN_ERR,
+ "#blocks per group too big: %lu",
+ sbi->s_blocks_per_group);
+ return -EINVAL;
+ }
+ sbi->s_cluster_bits = 0;
}
- sbi->s_def_mount_opt = sbi->s_mount_opt;
- if (!parse_options((char *) data, sb, &journal_devnum,
- &journal_ioprio, 0))
- goto failed_mount;
+ sbi->s_clusters_per_group = le32_to_cpu(es->s_clusters_per_group);
+ if (sbi->s_clusters_per_group > sb->s_blocksize * 8) {
+ ext4_msg(sb, KERN_ERR, "#clusters per group too big: %lu",
+ sbi->s_clusters_per_group);
+ return -EINVAL;
+ }
+ if (sbi->s_blocks_per_group !=
+ (sbi->s_clusters_per_group * (clustersize / sb->s_blocksize))) {
+ ext4_msg(sb, KERN_ERR,
+ "blocks per group (%lu) and clusters per group (%lu) inconsistent",
+ sbi->s_blocks_per_group, sbi->s_clusters_per_group);
+ return -EINVAL;
+ }
+ sbi->s_cluster_ratio = clustersize / sb->s_blocksize;
- if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
- printk_once(KERN_WARNING "EXT4-fs: Warning: mounting "
- "with data=journal disables delayed "
- "allocation and O_DIRECT support!\n");
- if (test_opt2(sb, EXPLICIT_DELALLOC)) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "both data=journal and delalloc");
- goto failed_mount;
+ /* Do we have standard group size of clustersize * 8 blocks ? */
+ if (sbi->s_blocks_per_group == clustersize << 3)
+ set_opt2(sb, STD_GROUP_SIZE);
+
+ return 0;
+}
+
+/*
+ * ext4_atomic_write_init: Initializes filesystem min & max atomic write units.
+ * With non-bigalloc filesystem awu will be based upon filesystem blocksize
+ * & bdev awu units.
+ * With bigalloc it will be based upon bigalloc cluster size & bdev awu units.
+ * @sb: super block
+ */
+static void ext4_atomic_write_init(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct block_device *bdev = sb->s_bdev;
+ unsigned int clustersize = EXT4_CLUSTER_SIZE(sb);
+
+ if (!bdev_can_atomic_write(bdev))
+ return;
+
+ if (!ext4_has_feature_extents(sb))
+ return;
+
+ sbi->s_awu_min = max(sb->s_blocksize,
+ bdev_atomic_write_unit_min_bytes(bdev));
+ sbi->s_awu_max = min(clustersize,
+ bdev_atomic_write_unit_max_bytes(bdev));
+ if (sbi->s_awu_min && sbi->s_awu_max &&
+ sbi->s_awu_min <= sbi->s_awu_max) {
+ ext4_msg(sb, KERN_NOTICE, "Supports (experimental) DIO atomic writes awu_min: %u, awu_max: %u",
+ sbi->s_awu_min, sbi->s_awu_max);
+ } else {
+ sbi->s_awu_min = 0;
+ sbi->s_awu_max = 0;
+ }
+}
+
+static void ext4_fast_commit_init(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ /* Initialize fast commit stuff */
+ atomic_set(&sbi->s_fc_subtid, 0);
+ INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_MAIN]);
+ INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_STAGING]);
+ INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]);
+ INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]);
+ sbi->s_fc_bytes = 0;
+ ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+ sbi->s_fc_ineligible_tid = 0;
+ mutex_init(&sbi->s_fc_lock);
+ memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
+ sbi->s_fc_replay_state.fc_regions = NULL;
+ sbi->s_fc_replay_state.fc_regions_size = 0;
+ sbi->s_fc_replay_state.fc_regions_used = 0;
+ sbi->s_fc_replay_state.fc_regions_valid = 0;
+ sbi->s_fc_replay_state.fc_modified_inodes = NULL;
+ sbi->s_fc_replay_state.fc_modified_inodes_size = 0;
+ sbi->s_fc_replay_state.fc_modified_inodes_used = 0;
+}
+
+static int ext4_inode_info_init(struct super_block *sb,
+ struct ext4_super_block *es)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
+ sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
+ sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
+ } else {
+ sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
+ sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
+ if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) {
+ ext4_msg(sb, KERN_ERR, "invalid first ino: %u",
+ sbi->s_first_ino);
+ return -EINVAL;
}
- if (test_opt(sb, DIOREAD_NOLOCK)) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "both data=journal and dioread_nolock");
- goto failed_mount;
+ if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
+ (!is_power_of_2(sbi->s_inode_size)) ||
+ (sbi->s_inode_size > sb->s_blocksize)) {
+ ext4_msg(sb, KERN_ERR,
+ "unsupported inode size: %d",
+ sbi->s_inode_size);
+ ext4_msg(sb, KERN_ERR, "blocksize: %lu", sb->s_blocksize);
+ return -EINVAL;
}
- if (test_opt(sb, DAX)) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "both data=journal and dax");
- goto failed_mount;
+ /*
+ * i_atime_extra is the last extra field available for
+ * [acm]times in struct ext4_inode. Checking for that
+ * field should suffice to ensure we have extra space
+ * for all three.
+ */
+ if (sbi->s_inode_size >= offsetof(struct ext4_inode, i_atime_extra) +
+ sizeof(((struct ext4_inode *)0)->i_atime_extra)) {
+ sb->s_time_gran = 1;
+ sb->s_time_max = EXT4_EXTRA_TIMESTAMP_MAX;
+ } else {
+ sb->s_time_gran = NSEC_PER_SEC;
+ sb->s_time_max = EXT4_NON_EXTRA_TIMESTAMP_MAX;
}
- if (ext4_has_feature_encrypt(sb)) {
- ext4_msg(sb, KERN_WARNING,
- "encrypted files will use data=ordered "
- "instead of data journaling mode");
+ sb->s_time_min = EXT4_TIMESTAMP_MIN;
+ }
+
+ if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
+ sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
+ EXT4_GOOD_OLD_INODE_SIZE;
+ if (ext4_has_feature_extra_isize(sb)) {
+ unsigned v, max = (sbi->s_inode_size -
+ EXT4_GOOD_OLD_INODE_SIZE);
+
+ v = le16_to_cpu(es->s_want_extra_isize);
+ if (v > max) {
+ ext4_msg(sb, KERN_ERR,
+ "bad s_want_extra_isize: %d", v);
+ return -EINVAL;
+ }
+ if (sbi->s_want_extra_isize < v)
+ sbi->s_want_extra_isize = v;
+
+ v = le16_to_cpu(es->s_min_extra_isize);
+ if (v > max) {
+ ext4_msg(sb, KERN_ERR,
+ "bad s_min_extra_isize: %d", v);
+ return -EINVAL;
+ }
+ if (sbi->s_want_extra_isize < v)
+ sbi->s_want_extra_isize = v;
}
- if (test_opt(sb, DELALLOC))
- clear_opt(sb, DELALLOC);
- } else {
- sb->s_iflags |= SB_I_CGROUPWB;
}
- sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
- (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
+ return 0;
+}
+
+#if IS_ENABLED(CONFIG_UNICODE)
+static int ext4_encoding_init(struct super_block *sb, struct ext4_super_block *es)
+{
+ const struct ext4_sb_encodings *encoding_info;
+ struct unicode_map *encoding;
+ __u16 encoding_flags = le16_to_cpu(es->s_encoding_flags);
+
+ if (!ext4_has_feature_casefold(sb) || sb->s_encoding)
+ return 0;
+
+ encoding_info = ext4_sb_read_encoding(es);
+ if (!encoding_info) {
+ ext4_msg(sb, KERN_ERR,
+ "Encoding requested by superblock is unknown");
+ return -EINVAL;
+ }
+
+ encoding = utf8_load(encoding_info->version);
+ if (IS_ERR(encoding)) {
+ ext4_msg(sb, KERN_ERR,
+ "can't mount with superblock charset: %s-%u.%u.%u "
+ "not supported by the kernel. flags: 0x%x.",
+ encoding_info->name,
+ unicode_major(encoding_info->version),
+ unicode_minor(encoding_info->version),
+ unicode_rev(encoding_info->version),
+ encoding_flags);
+ return -EINVAL;
+ }
+ ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: "
+ "%s-%u.%u.%u with flags 0x%hx", encoding_info->name,
+ unicode_major(encoding_info->version),
+ unicode_minor(encoding_info->version),
+ unicode_rev(encoding_info->version),
+ encoding_flags);
+
+ sb->s_encoding = encoding;
+ sb->s_encoding_flags = encoding_flags;
+
+ return 0;
+}
+#else
+static inline int ext4_encoding_init(struct super_block *sb, struct ext4_super_block *es)
+{
+ return 0;
+}
+#endif
+
+static int ext4_init_metadata_csum(struct super_block *sb, struct ext4_super_block *es)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ /* Warn if metadata_csum and gdt_csum are both set. */
+ if (ext4_has_feature_metadata_csum(sb) &&
+ ext4_has_feature_gdt_csum(sb))
+ ext4_warning(sb, "metadata_csum and uninit_bg are "
+ "redundant flags; please run fsck.");
+
+ /* Check for a known checksum algorithm */
+ if (!ext4_verify_csum_type(sb, es)) {
+ ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
+ "unknown checksum algorithm.");
+ return -EINVAL;
+ }
+ ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE,
+ ext4_orphan_file_block_trigger);
+
+ /* Check superblock checksum */
+ if (!ext4_superblock_csum_verify(sb, es)) {
+ ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
+ "invalid superblock checksum. Run e2fsck?");
+ return -EFSBADCRC;
+ }
+
+ /* Precompute checksum seed for all metadata */
+ if (ext4_has_feature_csum_seed(sb))
+ sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed);
+ else if (ext4_has_feature_metadata_csum(sb) ||
+ ext4_has_feature_ea_inode(sb))
+ sbi->s_csum_seed = ext4_chksum(~0, es->s_uuid,
+ sizeof(es->s_uuid));
+ return 0;
+}
+
+static int ext4_check_feature_compatibility(struct super_block *sb,
+ struct ext4_super_block *es,
+ int silent)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
(ext4_has_compat_features(sb) ||
@@ -3618,7 +4681,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (ext4_has_feature_64bit(sb)) {
ext4_msg(sb, KERN_ERR,
"The Hurd can't support 64-bit file systems");
- goto failed_mount;
+ return -EINVAL;
}
/*
@@ -3628,7 +4691,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (ext4_has_feature_ea_inode(sb)) {
ext4_msg(sb, KERN_ERR,
"ea_inode feature is not supported for Hurd");
- goto failed_mount;
+ return -EINVAL;
}
}
@@ -3637,9 +4700,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
"using the ext4 subsystem");
else {
+ /*
+ * If we're probing be silent, if this looks like
+ * it's actually an ext[34] filesystem.
+ */
+ if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
+ return -EINVAL;
ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
"to feature incompatibilities");
- goto failed_mount;
+ return -EINVAL;
}
}
@@ -3648,9 +4717,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
"using the ext4 subsystem");
else {
+ /*
+ * If we're probing be silent, if this looks like
+ * it's actually an ext4 filesystem.
+ */
+ if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
+ return -EINVAL;
ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
"to feature incompatibilities");
- goto failed_mount;
+ return -EINVAL;
}
}
@@ -3659,204 +4734,51 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
* previously didn't change the revision level when setting the flags,
* so there is a chance incompat flags are set on a rev 0 filesystem.
*/
- if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
- goto failed_mount;
-
- blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
- if (blocksize < EXT4_MIN_BLOCK_SIZE ||
- blocksize > EXT4_MAX_BLOCK_SIZE) {
- ext4_msg(sb, KERN_ERR,
- "Unsupported filesystem blocksize %d (%d log_block_size)",
- blocksize, le32_to_cpu(es->s_log_block_size));
- goto failed_mount;
- }
- if (le32_to_cpu(es->s_log_block_size) >
- (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
- ext4_msg(sb, KERN_ERR,
- "Invalid log block size: %u",
- le32_to_cpu(es->s_log_block_size));
- goto failed_mount;
- }
-
- if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) {
- ext4_msg(sb, KERN_ERR,
- "Number of reserved GDT blocks insanely large: %d",
- le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
- goto failed_mount;
- }
-
- if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
- err = bdev_dax_supported(sb, blocksize);
- if (err)
- goto failed_mount;
- }
+ if (!ext4_feature_set_ok(sb, (sb_rdonly(sb))))
+ return -EINVAL;
- if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) {
- ext4_msg(sb, KERN_ERR, "Unsupported encryption level %d",
- es->s_encryption_level);
- goto failed_mount;
+ if (sbi->s_daxdev) {
+ if (sb->s_blocksize == PAGE_SIZE)
+ set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
+ else
+ ext4_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n");
}
- if (sb->s_blocksize != blocksize) {
- /* Validate the filesystem blocksize */
- if (!sb_set_blocksize(sb, blocksize)) {
- ext4_msg(sb, KERN_ERR, "bad block size %d",
- blocksize);
- goto failed_mount;
- }
-
- brelse(bh);
- logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
- offset = do_div(logical_sb_block, blocksize);
- bh = sb_bread_unmovable(sb, logical_sb_block);
- if (!bh) {
- ext4_msg(sb, KERN_ERR,
- "Can't read superblock on 2nd try");
- goto failed_mount;
+ if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
+ if (ext4_has_feature_inline_data(sb)) {
+ ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
+ " that may contain inline data");
+ return -EINVAL;
}
- es = (struct ext4_super_block *)(bh->b_data + offset);
- sbi->s_es = es;
- if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
+ if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags)) {
ext4_msg(sb, KERN_ERR,
- "Magic mismatch, very weird!");
- goto failed_mount;
+ "DAX unsupported by block device.");
+ return -EINVAL;
}
}
- has_huge_files = ext4_has_feature_huge_file(sb);
- sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
- has_huge_files);
- sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
-
- if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
- sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
- sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
- } else {
- sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
- sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
- if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
- (!is_power_of_2(sbi->s_inode_size)) ||
- (sbi->s_inode_size > blocksize)) {
- ext4_msg(sb, KERN_ERR,
- "unsupported inode size: %d",
- sbi->s_inode_size);
- goto failed_mount;
- }
- if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
- sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
+ if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) {
+ ext4_msg(sb, KERN_ERR, "Unsupported encryption level %d",
+ es->s_encryption_level);
+ return -EINVAL;
}
- sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
- if (ext4_has_feature_64bit(sb)) {
- if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
- sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
- !is_power_of_2(sbi->s_desc_size)) {
- ext4_msg(sb, KERN_ERR,
- "unsupported descriptor size %lu",
- sbi->s_desc_size);
- goto failed_mount;
- }
- } else
- sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
-
- sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
- sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
-
- sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
- if (sbi->s_inodes_per_block == 0)
- goto cantfind_ext4;
- if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
- sbi->s_inodes_per_group > blocksize * 8) {
- ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
- sbi->s_blocks_per_group);
- goto failed_mount;
- }
- sbi->s_itb_per_group = sbi->s_inodes_per_group /
- sbi->s_inodes_per_block;
- sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
- sbi->s_sbh = bh;
- sbi->s_mount_state = le16_to_cpu(es->s_state);
- sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
- sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
+ return 0;
+}
- for (i = 0; i < 4; i++)
- sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
- sbi->s_def_hash_version = es->s_def_hash_version;
- if (ext4_has_feature_dir_index(sb)) {
- i = le32_to_cpu(es->s_flags);
- if (i & EXT2_FLAGS_UNSIGNED_HASH)
- sbi->s_hash_unsigned = 3;
- else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
-#ifdef __CHAR_UNSIGNED__
- if (!(sb->s_flags & MS_RDONLY))
- es->s_flags |=
- cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
- sbi->s_hash_unsigned = 3;
-#else
- if (!(sb->s_flags & MS_RDONLY))
- es->s_flags |=
- cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
-#endif
- }
- }
+static int ext4_check_geometry(struct super_block *sb,
+ struct ext4_super_block *es)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ __u64 blocks_count;
+ int err;
- /* Handle clustersize */
- clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
- has_bigalloc = ext4_has_feature_bigalloc(sb);
- if (has_bigalloc) {
- if (clustersize < blocksize) {
- ext4_msg(sb, KERN_ERR,
- "cluster size (%d) smaller than "
- "block size (%d)", clustersize, blocksize);
- goto failed_mount;
- }
- if (le32_to_cpu(es->s_log_cluster_size) >
- (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
- ext4_msg(sb, KERN_ERR,
- "Invalid log cluster size: %u",
- le32_to_cpu(es->s_log_cluster_size));
- goto failed_mount;
- }
- sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
- le32_to_cpu(es->s_log_block_size);
- sbi->s_clusters_per_group =
- le32_to_cpu(es->s_clusters_per_group);
- if (sbi->s_clusters_per_group > blocksize * 8) {
- ext4_msg(sb, KERN_ERR,
- "#clusters per group too big: %lu",
- sbi->s_clusters_per_group);
- goto failed_mount;
- }
- if (sbi->s_blocks_per_group !=
- (sbi->s_clusters_per_group * (clustersize / blocksize))) {
- ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
- "clusters per group (%lu) inconsistent",
- sbi->s_blocks_per_group,
- sbi->s_clusters_per_group);
- goto failed_mount;
- }
- } else {
- if (clustersize != blocksize) {
- ext4_warning(sb, "fragment/cluster size (%d) != "
- "block size (%d)", clustersize,
- blocksize);
- clustersize = blocksize;
- }
- if (sbi->s_blocks_per_group > blocksize * 8) {
- ext4_msg(sb, KERN_ERR,
- "#blocks per group too big: %lu",
- sbi->s_blocks_per_group);
- goto failed_mount;
- }
- sbi->s_clusters_per_group = sbi->s_blocks_per_group;
- sbi->s_cluster_bits = 0;
+ if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (sb->s_blocksize / 4)) {
+ ext4_msg(sb, KERN_ERR,
+ "Number of reserved GDT blocks insanely large: %d",
+ le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
+ return -EINVAL;
}
- sbi->s_cluster_ratio = clustersize / blocksize;
-
- /* Do we have standard group size of clustersize * 8 blocks ? */
- if (sbi->s_blocks_per_group == clustersize << 3)
- set_opt2(sb, STD_GROUP_SIZE);
-
/*
* Test whether we have more sectors than will fit in sector_t,
* and whether the max offset is addressable by the page cache.
@@ -3866,21 +4788,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (err) {
ext4_msg(sb, KERN_ERR, "filesystem"
" too large to mount safely on this system");
- if (sizeof(sector_t) < 8)
- ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
- goto failed_mount;
+ return err;
}
- if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
- goto cantfind_ext4;
-
/* check blocks count against device size */
- blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
+ blocks_count = sb_bdev_nr_blocks(sb);
if (blocks_count && ext4_blocks_count(es) > blocks_count) {
ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
"exceeds size of device (%llu blocks)",
ext4_blocks_count(es), blocks_count);
- goto failed_mount;
+ return -EINVAL;
}
/*
@@ -3892,24 +4809,52 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"block %u is beyond end of filesystem (%llu)",
le32_to_cpu(es->s_first_data_block),
ext4_blocks_count(es));
- goto failed_mount;
+ return -EINVAL;
+ }
+ if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) &&
+ (sbi->s_cluster_ratio == 1)) {
+ ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
+ "block is 0 with a 1k block and cluster size");
+ return -EINVAL;
}
+
blocks_count = (ext4_blocks_count(es) -
le32_to_cpu(es->s_first_data_block) +
EXT4_BLOCKS_PER_GROUP(sb) - 1);
do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
- ext4_msg(sb, KERN_WARNING, "groups count too large: %u "
+ ext4_msg(sb, KERN_WARNING, "groups count too large: %llu "
"(block count %llu, first data block %u, "
- "blocks per group %lu)", sbi->s_groups_count,
+ "blocks per group %lu)", blocks_count,
ext4_blocks_count(es),
le32_to_cpu(es->s_first_data_block),
EXT4_BLOCKS_PER_GROUP(sb));
- goto failed_mount;
+ return -EINVAL;
}
sbi->s_groups_count = blocks_count;
sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
(EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
+ if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
+ le32_to_cpu(es->s_inodes_count)) {
+ ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
+ le32_to_cpu(es->s_inodes_count),
+ ((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int ext4_group_desc_init(struct super_block *sb,
+ struct ext4_super_block *es,
+ ext4_fsblk_t logical_sb_block,
+ ext4_group_t *first_not_zeroed)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ unsigned int db_count;
+ ext4_fsblk_t block;
+ int i;
+
db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
EXT4_DESC_PER_BLOCK(sb);
if (ext4_has_feature_meta_bg(sb)) {
@@ -3918,16 +4863,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"first meta block group too large: %u "
"(group descriptor block count %u)",
le32_to_cpu(es->s_first_meta_bg), db_count);
- goto failed_mount;
+ return -EINVAL;
}
}
- sbi->s_group_desc = kvmalloc(db_count *
+ rcu_assign_pointer(sbi->s_group_desc,
+ kvmalloc_array(db_count,
sizeof(struct buffer_head *),
- GFP_KERNEL);
+ GFP_KERNEL));
if (sbi->s_group_desc == NULL) {
ext4_msg(sb, KERN_ERR, "not enough memory");
- ret = -ENOMEM;
- goto failed_mount;
+ return -ENOMEM;
}
bgl_lock_init(sbi->s_blockgroup_lock);
@@ -3935,37 +4880,537 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
/* Pre-read the descriptors into the buffer cache */
for (i = 0; i < db_count; i++) {
block = descriptor_loc(sb, logical_sb_block, i);
- sb_breadahead(sb, block);
+ ext4_sb_breadahead_unmovable(sb, block);
}
for (i = 0; i < db_count; i++) {
+ struct buffer_head *bh;
+
block = descriptor_loc(sb, logical_sb_block, i);
- sbi->s_group_desc[i] = sb_bread_unmovable(sb, block);
- if (!sbi->s_group_desc[i]) {
+ bh = ext4_sb_bread_unmovable(sb, block);
+ if (IS_ERR(bh)) {
ext4_msg(sb, KERN_ERR,
"can't read group descriptor %d", i);
- db_count = i;
- goto failed_mount2;
+ sbi->s_gdb_count = i;
+ return PTR_ERR(bh);
}
+ rcu_read_lock();
+ rcu_dereference(sbi->s_group_desc)[i] = bh;
+ rcu_read_unlock();
}
- if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
+ sbi->s_gdb_count = db_count;
+ if (!ext4_check_descriptors(sb, logical_sb_block, first_not_zeroed)) {
ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
- ret = -EFSCORRUPTED;
- goto failed_mount2;
+ return -EFSCORRUPTED;
}
- sbi->s_gdb_count = db_count;
- get_random_bytes(&sbi->s_next_generation, sizeof(u32));
- spin_lock_init(&sbi->s_next_gen_lock);
+ return 0;
+}
+
+static int ext4_load_and_init_journal(struct super_block *sb,
+ struct ext4_super_block *es,
+ struct ext4_fs_context *ctx)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ int err;
+
+ err = ext4_load_journal(sb, es, ctx->journal_devnum);
+ if (err)
+ return err;
+
+ if (ext4_has_feature_64bit(sb) &&
+ !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
+ JBD2_FEATURE_INCOMPAT_64BIT)) {
+ ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
+ goto out;
+ }
+
+ if (!set_journal_csum_feature_set(sb)) {
+ ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
+ "feature set");
+ goto out;
+ }
+
+ if (test_opt2(sb, JOURNAL_FAST_COMMIT) &&
+ !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
+ JBD2_FEATURE_INCOMPAT_FAST_COMMIT)) {
+ ext4_msg(sb, KERN_ERR,
+ "Failed to set fast commit journal feature");
+ goto out;
+ }
+
+ /* We have now updated the journal if required, so we can
+ * validate the data journaling mode. */
+ switch (test_opt(sb, DATA_FLAGS)) {
+ case 0:
+ /* No mode set, assume a default based on the journal
+ * capabilities: ORDERED_DATA if the journal can
+ * cope, else JOURNAL_DATA
+ */
+ if (jbd2_journal_check_available_features
+ (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
+ set_opt(sb, ORDERED_DATA);
+ sbi->s_def_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
+ } else {
+ set_opt(sb, JOURNAL_DATA);
+ sbi->s_def_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
+ }
+ break;
+
+ case EXT4_MOUNT_ORDERED_DATA:
+ case EXT4_MOUNT_WRITEBACK_DATA:
+ if (!jbd2_journal_check_available_features
+ (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
+ ext4_msg(sb, KERN_ERR, "Journal does not support "
+ "requested data journaling mode");
+ goto out;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
+ test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "journal_async_commit in data=ordered mode");
+ goto out;
+ }
+
+ set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio);
+
+ sbi->s_journal->j_submit_inode_data_buffers =
+ ext4_journal_submit_inode_data_buffers;
+ sbi->s_journal->j_finish_inode_data_buffers =
+ ext4_journal_finish_inode_data_buffers;
+
+ return 0;
+
+out:
+ ext4_journal_destroy(sbi, sbi->s_journal);
+ return -EINVAL;
+}
+
+static int ext4_check_journal_data_mode(struct super_block *sb)
+{
+ if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
+ printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with "
+ "data=journal disables delayed allocation, "
+ "dioread_nolock, O_DIRECT and fast_commit support!\n");
+ /* can't mount with both data=journal and dioread_nolock. */
+ clear_opt(sb, DIOREAD_NOLOCK);
+ clear_opt2(sb, JOURNAL_FAST_COMMIT);
+ if (test_opt2(sb, EXPLICIT_DELALLOC)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "both data=journal and delalloc");
+ return -EINVAL;
+ }
+ if (test_opt(sb, DAX_ALWAYS)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "both data=journal and dax");
+ return -EINVAL;
+ }
+ if (ext4_has_feature_encrypt(sb)) {
+ ext4_msg(sb, KERN_WARNING,
+ "encrypted files will use data=ordered "
+ "instead of data journaling mode");
+ }
+ if (test_opt(sb, DELALLOC))
+ clear_opt(sb, DELALLOC);
+ } else {
+ sb->s_iflags |= SB_I_CGROUPWB;
+ }
+
+ return 0;
+}
+
+static const char *ext4_has_journal_option(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ if (test_opt(sb, JOURNAL_ASYNC_COMMIT))
+ return "journal_async_commit";
+ if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM))
+ return "journal_checksum";
+ if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
+ return "commit=";
+ if (EXT4_MOUNT_DATA_FLAGS &
+ (sbi->s_mount_opt ^ sbi->s_def_mount_opt))
+ return "data=";
+ if (test_opt(sb, DATA_ERR_ABORT))
+ return "data_err=abort";
+ return NULL;
+}
+
+/*
+ * Limit the maximum folio order to 2048 blocks to prevent overestimation
+ * of reserve handle credits during the folio writeback in environments
+ * where the PAGE_SIZE exceeds 4KB.
+ */
+#define EXT4_MAX_PAGECACHE_ORDER(sb) \
+ umin(MAX_PAGECACHE_ORDER, (11 + (sb)->s_blocksize_bits - PAGE_SHIFT))
+static void ext4_set_max_mapping_order(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
+ sbi->s_max_folio_order = sbi->s_min_folio_order;
+ else
+ sbi->s_max_folio_order = EXT4_MAX_PAGECACHE_ORDER(sb);
+}
+
+static int ext4_check_large_folio(struct super_block *sb)
+{
+ const char *err_str = NULL;
+
+ if (ext4_has_feature_encrypt(sb))
+ err_str = "encrypt";
+
+ if (!err_str) {
+ ext4_set_max_mapping_order(sb);
+ } else if (sb->s_blocksize > PAGE_SIZE) {
+ ext4_msg(sb, KERN_ERR, "bs(%lu) > ps(%lu) unsupported for %s",
+ sb->s_blocksize, PAGE_SIZE, err_str);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int ext4_load_super(struct super_block *sb, ext4_fsblk_t *lsb,
+ int silent)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_super_block *es;
+ ext4_fsblk_t logical_sb_block;
+ unsigned long offset = 0;
+ struct buffer_head *bh;
+ int ret = -EINVAL;
+ int blocksize;
+
+ blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
+ if (!blocksize) {
+ ext4_msg(sb, KERN_ERR, "unable to set blocksize");
+ return -EINVAL;
+ }
+
+ /*
+ * The ext4 superblock will not be buffer aligned for other than 1kB
+ * block sizes. We need to calculate the offset from buffer start.
+ */
+ if (blocksize != EXT4_MIN_BLOCK_SIZE) {
+ logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE;
+ offset = do_div(logical_sb_block, blocksize);
+ } else {
+ logical_sb_block = sbi->s_sb_block;
+ }
+
+ bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
+ if (IS_ERR(bh)) {
+ ext4_msg(sb, KERN_ERR, "unable to read superblock");
+ return PTR_ERR(bh);
+ }
+ /*
+ * Note: s_es must be initialized as soon as possible because
+ * some ext4 macro-instructions depend on its value
+ */
+ es = (struct ext4_super_block *) (bh->b_data + offset);
+ sbi->s_es = es;
+ sb->s_magic = le16_to_cpu(es->s_magic);
+ if (sb->s_magic != EXT4_SUPER_MAGIC) {
+ if (!silent)
+ ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
+ goto out;
+ }
+
+ if (le32_to_cpu(es->s_log_block_size) >
+ (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
+ ext4_msg(sb, KERN_ERR,
+ "Invalid log block size: %u",
+ le32_to_cpu(es->s_log_block_size));
+ goto out;
+ }
+ if (le32_to_cpu(es->s_log_cluster_size) >
+ (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
+ ext4_msg(sb, KERN_ERR,
+ "Invalid log cluster size: %u",
+ le32_to_cpu(es->s_log_cluster_size));
+ goto out;
+ }
+
+ blocksize = EXT4_MIN_BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
+
+ /*
+ * If the default block size is not the same as the real block size,
+ * we need to reload it.
+ */
+ if (sb->s_blocksize == blocksize)
+ goto success;
+
+ /*
+ * bh must be released before kill_bdev(), otherwise
+ * it won't be freed and its page also. kill_bdev()
+ * is called by sb_set_blocksize().
+ */
+ brelse(bh);
+ /* Validate the filesystem blocksize */
+ if (!sb_set_blocksize(sb, blocksize)) {
+ ext4_msg(sb, KERN_ERR, "bad block size %d",
+ blocksize);
+ bh = NULL;
+ goto out;
+ }
+
+ logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE;
+ offset = do_div(logical_sb_block, blocksize);
+ bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
+ if (IS_ERR(bh)) {
+ ext4_msg(sb, KERN_ERR, "Can't read superblock on 2nd try");
+ ret = PTR_ERR(bh);
+ bh = NULL;
+ goto out;
+ }
+ es = (struct ext4_super_block *)(bh->b_data + offset);
+ sbi->s_es = es;
+ if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
+ ext4_msg(sb, KERN_ERR, "Magic mismatch, very weird!");
+ goto out;
+ }
+
+success:
+ sbi->s_min_folio_order = get_order(blocksize);
+ *lsb = logical_sb_block;
+ sbi->s_sbh = bh;
+ return 0;
+out:
+ brelse(bh);
+ return ret;
+}
+
+static int ext4_hash_info_init(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_super_block *es = sbi->s_es;
+ unsigned int i;
+
+ sbi->s_def_hash_version = es->s_def_hash_version;
+
+ if (sbi->s_def_hash_version > DX_HASH_LAST) {
+ ext4_msg(sb, KERN_ERR,
+ "Invalid default hash set in the superblock");
+ return -EINVAL;
+ } else if (sbi->s_def_hash_version == DX_HASH_SIPHASH) {
+ ext4_msg(sb, KERN_ERR,
+ "SIPHASH is not a valid default hash value");
+ return -EINVAL;
+ }
+
+ for (i = 0; i < 4; i++)
+ sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
+
+ if (ext4_has_feature_dir_index(sb)) {
+ i = le32_to_cpu(es->s_flags);
+ if (i & EXT2_FLAGS_UNSIGNED_HASH)
+ sbi->s_hash_unsigned = 3;
+ else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
+#ifdef __CHAR_UNSIGNED__
+ if (!sb_rdonly(sb))
+ es->s_flags |=
+ cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
+ sbi->s_hash_unsigned = 3;
+#else
+ if (!sb_rdonly(sb))
+ es->s_flags |=
+ cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
+#endif
+ }
+ }
+ return 0;
+}
+
+static int ext4_block_group_meta_init(struct super_block *sb, int silent)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_super_block *es = sbi->s_es;
+ int has_huge_files;
+
+ has_huge_files = ext4_has_feature_huge_file(sb);
+ sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
+ has_huge_files);
+ sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
+
+ sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
+ if (ext4_has_feature_64bit(sb)) {
+ if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
+ sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
+ !is_power_of_2(sbi->s_desc_size)) {
+ ext4_msg(sb, KERN_ERR,
+ "unsupported descriptor size %lu",
+ sbi->s_desc_size);
+ return -EINVAL;
+ }
+ } else
+ sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
+
+ sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
+ sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
+
+ sbi->s_inodes_per_block = sb->s_blocksize / EXT4_INODE_SIZE(sb);
+ if (sbi->s_inodes_per_block == 0 || sbi->s_blocks_per_group == 0) {
+ if (!silent)
+ ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
+ return -EINVAL;
+ }
+ if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
+ sbi->s_inodes_per_group > sb->s_blocksize * 8) {
+ ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
+ sbi->s_inodes_per_group);
+ return -EINVAL;
+ }
+ sbi->s_itb_per_group = sbi->s_inodes_per_group /
+ sbi->s_inodes_per_block;
+ sbi->s_desc_per_block = sb->s_blocksize / EXT4_DESC_SIZE(sb);
+ sbi->s_mount_state = le16_to_cpu(es->s_state) & ~EXT4_FC_REPLAY;
+ sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
+ sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
+
+ return 0;
+}
+
+/*
+ * It's hard to get stripe aligned blocks if stripe is not aligned with
+ * cluster, just disable stripe and alert user to simplify code and avoid
+ * stripe aligned allocation which will rarely succeed.
+ */
+static bool ext4_is_stripe_incompatible(struct super_block *sb, unsigned long stripe)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ return (stripe > 0 && sbi->s_cluster_ratio > 1 &&
+ stripe % sbi->s_cluster_ratio != 0);
+}
+
+static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
+{
+ struct ext4_super_block *es = NULL;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ ext4_fsblk_t logical_sb_block;
+ struct inode *root;
+ int needs_recovery;
+ int err;
+ ext4_group_t first_not_zeroed;
+ struct ext4_fs_context *ctx = fc->fs_private;
+ int silent = fc->sb_flags & SB_SILENT;
+
+ /* Set defaults for the variables that will be set during parsing */
+ if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO))
+ ctx->journal_ioprio = EXT4_DEF_JOURNAL_IOPRIO;
+
+ sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
+ sbi->s_sectors_written_start =
+ part_stat_read(sb->s_bdev, sectors[STAT_WRITE]);
+
+ err = ext4_load_super(sb, &logical_sb_block, silent);
+ if (err)
+ goto out_fail;
+
+ es = sbi->s_es;
+ sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
+
+ err = ext4_init_metadata_csum(sb, es);
+ if (err)
+ goto failed_mount;
+
+ ext4_set_def_opts(sb, es);
+
+ sbi->s_resuid = make_kuid(&init_user_ns, ext4_get_resuid(es));
+ sbi->s_resgid = make_kgid(&init_user_ns, ext4_get_resuid(es));
+ sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
+ sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
+ sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
+ sbi->s_sb_update_kb = EXT4_DEF_SB_UPDATE_INTERVAL_KB;
+ sbi->s_sb_update_sec = EXT4_DEF_SB_UPDATE_INTERVAL_SEC;
+
+ /*
+ * set default s_li_wait_mult for lazyinit, for the case there is
+ * no mount option specified.
+ */
+ sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
+
+ err = ext4_inode_info_init(sb, es);
+ if (err)
+ goto failed_mount;
+
+ err = parse_apply_sb_mount_options(sb, ctx);
+ if (err < 0)
+ goto failed_mount;
+
+ sbi->s_def_mount_opt = sbi->s_mount_opt;
+ sbi->s_def_mount_opt2 = sbi->s_mount_opt2;
+
+ err = ext4_check_opt_consistency(fc, sb);
+ if (err < 0)
+ goto failed_mount;
- setup_timer(&sbi->s_err_report, print_daily_error_info,
- (unsigned long) sb);
+ ext4_apply_options(fc, sb);
- /* Register extent status tree shrinker */
- if (ext4_es_register_shrinker(sbi))
+ err = ext4_check_large_folio(sb);
+ if (err < 0)
+ goto failed_mount;
+
+ err = ext4_encoding_init(sb, es);
+ if (err)
+ goto failed_mount;
+
+ err = ext4_check_journal_data_mode(sb);
+ if (err)
+ goto failed_mount;
+
+ sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
+ (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
+
+ /* HSM events are allowed by default. */
+ sb->s_iflags |= SB_I_ALLOW_HSM;
+
+ err = ext4_check_feature_compatibility(sb, es, silent);
+ if (err)
+ goto failed_mount;
+
+ err = ext4_block_group_meta_init(sb, silent);
+ if (err)
+ goto failed_mount;
+
+ err = ext4_hash_info_init(sb);
+ if (err)
+ goto failed_mount;
+
+ err = ext4_handle_clustersize(sb);
+ if (err)
+ goto failed_mount;
+
+ err = ext4_check_geometry(sb, es);
+ if (err)
+ goto failed_mount;
+
+ timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
+ spin_lock_init(&sbi->s_error_lock);
+ INIT_WORK(&sbi->s_sb_upd_work, update_super_work);
+
+ err = ext4_group_desc_init(sb, es, logical_sb_block, &first_not_zeroed);
+ if (err)
+ goto failed_mount3;
+
+ err = ext4_es_register_shrinker(sbi);
+ if (err)
goto failed_mount3;
sbi->s_stripe = ext4_get_stripe_size(sbi);
+ if (ext4_is_stripe_incompatible(sb, sbi->s_stripe)) {
+ ext4_msg(sb, KERN_WARNING,
+ "stripe (%lu) is not aligned with cluster size (%u), "
+ "stripe is disabled",
+ sbi->s_stripe, sbi->s_cluster_ratio);
+ sbi->s_stripe = 0;
+ }
sbi->s_extent_max_zeroout_kb = 32;
/*
@@ -3974,7 +5419,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sb->s_op = &ext4_sops;
sb->s_export_op = &ext4_export_ops;
sb->s_xattr = ext4_xattr_handlers;
+#ifdef CONFIG_FS_ENCRYPTION
sb->s_cop = &ext4_cryptops;
+#endif
+#ifdef CONFIG_FS_VERITY
+ sb->s_vop = &ext4_verityops;
+#endif
#ifdef CONFIG_QUOTA
sb->dq_op = &ext4_quota_operations;
if (ext4_has_feature_quota(sb))
@@ -3983,122 +5433,71 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sb->s_qcop = &ext4_qctl_operations;
sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
#endif
- memcpy(&sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
+ super_set_uuid(sb, es->s_uuid, sizeof(es->s_uuid));
+ super_set_sysfs_name_bdev(sb);
INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
mutex_init(&sbi->s_orphan_lock);
+ spin_lock_init(&sbi->s_bdev_wb_lock);
+
+ ext4_atomic_write_init(sb);
+ ext4_fast_commit_init(sb);
+
sb->s_root = NULL;
needs_recovery = (es->s_last_orphan != 0 ||
+ ext4_has_feature_orphan_present(sb) ||
ext4_has_feature_journal_needs_recovery(sb));
- if (ext4_has_feature_mmp(sb) && !(sb->s_flags & MS_RDONLY))
- if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
+ if (ext4_has_feature_mmp(sb) && !sb_rdonly(sb)) {
+ err = ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block));
+ if (err)
goto failed_mount3a;
+ }
+ err = -EINVAL;
/*
* The first inode we look at is the journal inode. Don't try
* root first: it may be modified in the journal!
*/
if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) {
- err = ext4_load_journal(sb, es, journal_devnum);
+ err = ext4_load_and_init_journal(sb, es, ctx);
if (err)
goto failed_mount3a;
- } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
+ if (bdev_read_only(sb->s_bdev))
+ needs_recovery = 0;
+ } else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) &&
ext4_has_feature_journal_needs_recovery(sb)) {
ext4_msg(sb, KERN_ERR, "required journal recovery "
"suppressed and not mounted read-only");
- goto failed_mount_wq;
+ goto failed_mount3a;
} else {
+ const char *journal_option;
+
/* Nojournal mode, all journal mount options are illegal */
- if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "journal_checksum, fs mounted w/o journal");
- goto failed_mount_wq;
- }
- if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "journal_async_commit, fs mounted w/o journal");
- goto failed_mount_wq;
- }
- if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "commit=%lu, fs mounted w/o journal",
- sbi->s_commit_interval / HZ);
- goto failed_mount_wq;
- }
- if (EXT4_MOUNT_DATA_FLAGS &
- (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "data=, fs mounted w/o journal");
- goto failed_mount_wq;
+ journal_option = ext4_has_journal_option(sb);
+ if (journal_option != NULL) {
+ ext4_msg(sb, KERN_ERR,
+ "can't mount with %s, fs mounted w/o journal",
+ journal_option);
+ goto failed_mount3a;
}
- sbi->s_def_mount_opt &= EXT4_MOUNT_JOURNAL_CHECKSUM;
+
+ sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
clear_opt(sb, JOURNAL_CHECKSUM);
clear_opt(sb, DATA_FLAGS);
+ clear_opt2(sb, JOURNAL_FAST_COMMIT);
sbi->s_journal = NULL;
needs_recovery = 0;
- goto no_journal;
- }
-
- if (ext4_has_feature_64bit(sb) &&
- !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
- JBD2_FEATURE_INCOMPAT_64BIT)) {
- ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
- goto failed_mount_wq;
}
- if (!set_journal_csum_feature_set(sb)) {
- ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
- "feature set");
- goto failed_mount_wq;
- }
-
- /* We have now updated the journal if required, so we can
- * validate the data journaling mode. */
- switch (test_opt(sb, DATA_FLAGS)) {
- case 0:
- /* No mode set, assume a default based on the journal
- * capabilities: ORDERED_DATA if the journal can
- * cope, else JOURNAL_DATA
- */
- if (jbd2_journal_check_available_features
- (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
- set_opt(sb, ORDERED_DATA);
- else
- set_opt(sb, JOURNAL_DATA);
- break;
-
- case EXT4_MOUNT_ORDERED_DATA:
- case EXT4_MOUNT_WRITEBACK_DATA:
- if (!jbd2_journal_check_available_features
- (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
- ext4_msg(sb, KERN_ERR, "Journal does not support "
- "requested data journaling mode");
- goto failed_mount_wq;
- }
- default:
- break;
- }
-
- if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
- test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "journal_async_commit in data=ordered mode");
- goto failed_mount_wq;
- }
-
- set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
-
- sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
-
-no_journal:
if (!test_opt(sb, NO_MBCACHE)) {
sbi->s_ea_block_cache = ext4_xattr_create_cache();
if (!sbi->s_ea_block_cache) {
ext4_msg(sb, KERN_ERR,
"Failed to create ea_block_cache");
+ err = -EINVAL;
goto failed_mount_wq;
}
@@ -4107,31 +5506,28 @@ no_journal:
if (!sbi->s_ea_inode_cache) {
ext4_msg(sb, KERN_ERR,
"Failed to create ea_inode_cache");
+ err = -EINVAL;
goto failed_mount_wq;
}
}
}
- if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) &&
- (blocksize != PAGE_SIZE)) {
- ext4_msg(sb, KERN_ERR,
- "Unsupported blocksize for fs encryption");
- goto failed_mount_wq;
- }
-
- if (DUMMY_ENCRYPTION_ENABLED(sbi) && !(sb->s_flags & MS_RDONLY) &&
- !ext4_has_feature_encrypt(sb)) {
- ext4_set_feature_encrypt(sb);
- ext4_commit_super(sb, 1);
- }
-
/*
* Get the # of file system overhead blocks from the
* superblock if present.
*/
- if (es->s_overhead_clusters)
- sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
- else {
+ sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
+ /* ignore the precalculated value if it is ridiculous */
+ if (sbi->s_overhead > ext4_blocks_count(es))
+ sbi->s_overhead = 0;
+ /*
+ * If the bigalloc feature is not enabled recalculating the
+ * overhead doesn't take long, so we might as well just redo
+ * it to make sure we are using the correct value.
+ */
+ if (!ext4_has_feature_bigalloc(sb))
+ sbi->s_overhead = 0;
+ if (sbi->s_overhead == 0) {
err = ext4_calculate_overhead(sb);
if (err)
goto failed_mount_wq;
@@ -4145,7 +5541,7 @@ no_journal:
alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
if (!EXT4_SB(sb)->rsv_conversion_wq) {
printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
- ret = -ENOMEM;
+ err = -ENOMEM;
goto failed_mount4;
}
@@ -4154,63 +5550,60 @@ no_journal:
* so we can safely mount the rest of the filesystem now.
*/
- root = ext4_iget(sb, EXT4_ROOT_INO);
+ root = ext4_iget(sb, EXT4_ROOT_INO, EXT4_IGET_SPECIAL);
if (IS_ERR(root)) {
ext4_msg(sb, KERN_ERR, "get root inode failed");
- ret = PTR_ERR(root);
+ err = PTR_ERR(root);
root = NULL;
goto failed_mount4;
}
if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
iput(root);
+ err = -EFSCORRUPTED;
goto failed_mount4;
}
+
+ generic_set_sb_d_ops(sb);
sb->s_root = d_make_root(root);
if (!sb->s_root) {
ext4_msg(sb, KERN_ERR, "get root dentry failed");
- ret = -ENOMEM;
+ err = -ENOMEM;
goto failed_mount4;
}
- if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY))
- sb->s_flags |= MS_RDONLY;
+ err = ext4_setup_super(sb, es, sb_rdonly(sb));
+ if (err == -EROFS) {
+ sb->s_flags |= SB_RDONLY;
+ } else if (err)
+ goto failed_mount4a;
- /* determine the minimum size of new large inodes, if present */
- if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE &&
- sbi->s_want_extra_isize == 0) {
- sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
- EXT4_GOOD_OLD_INODE_SIZE;
- if (ext4_has_feature_extra_isize(sb)) {
- if (sbi->s_want_extra_isize <
- le16_to_cpu(es->s_want_extra_isize))
- sbi->s_want_extra_isize =
- le16_to_cpu(es->s_want_extra_isize);
- if (sbi->s_want_extra_isize <
- le16_to_cpu(es->s_min_extra_isize))
- sbi->s_want_extra_isize =
- le16_to_cpu(es->s_min_extra_isize);
+ ext4_set_resv_clusters(sb);
+
+ if (test_opt(sb, BLOCK_VALIDITY)) {
+ err = ext4_setup_system_zone(sb);
+ if (err) {
+ ext4_msg(sb, KERN_ERR, "failed to initialize system "
+ "zone (%d)", err);
+ goto failed_mount4a;
}
}
- /* Check if enough inode space is available */
- if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
- sbi->s_inode_size) {
- sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
- EXT4_GOOD_OLD_INODE_SIZE;
- ext4_msg(sb, KERN_INFO, "required extra inode space not"
- "available");
- }
+ ext4_fc_replay_cleanup(sb);
- ext4_set_resv_clusters(sb);
+ ext4_ext_init(sb);
- err = ext4_setup_system_zone(sb);
- if (err) {
- ext4_msg(sb, KERN_ERR, "failed to initialize system "
- "zone (%d)", err);
- goto failed_mount4a;
+ /*
+ * Enable optimize_scan if number of groups is > threshold. This can be
+ * turned off by passing "mb_optimize_scan=0". This can also be
+ * turned on forcefully by passing "mb_optimize_scan=1".
+ */
+ if (!(ctx->spec & EXT4_SPEC_mb_optimize_scan)) {
+ if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD)
+ set_opt2(sb, MB_OPTIMIZE_SCAN);
+ else
+ clear_opt2(sb, MB_OPTIMIZE_SCAN);
}
- ext4_ext_init(sb);
err = ext4_mb_init(sb);
if (err) {
ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
@@ -4218,36 +5611,24 @@ no_journal:
goto failed_mount5;
}
- block = ext4_count_free_clusters(sb);
- ext4_free_blocks_count_set(sbi->s_es,
- EXT4_C2B(sbi, block));
- err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
- GFP_KERNEL);
- if (!err) {
- unsigned long freei = ext4_count_free_inodes(sb);
- sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
- err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
- GFP_KERNEL);
- }
- if (!err)
- err = percpu_counter_init(&sbi->s_dirs_counter,
- ext4_count_dirs(sb), GFP_KERNEL);
- if (!err)
- err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
- GFP_KERNEL);
- if (!err)
- err = percpu_init_rwsem(&sbi->s_journal_flag_rwsem);
+ /*
+ * We can only set up the journal commit callback once
+ * mballoc is initialized
+ */
+ if (sbi->s_journal)
+ sbi->s_journal->j_commit_callback =
+ ext4_journal_commit_callback;
- if (err) {
- ext4_msg(sb, KERN_ERR, "insufficient memory");
+ err = ext4_percpu_param_init(sbi);
+ if (err)
goto failed_mount6;
- }
if (ext4_has_feature_flex_bg(sb))
if (!ext4_fill_flex_info(sb)) {
ext4_msg(sb, KERN_ERR,
"unable to initialize "
"flex_bg meta info!");
+ err = -ENOMEM;
goto failed_mount6;
}
@@ -4255,51 +5636,48 @@ no_journal:
if (err)
goto failed_mount6;
- err = ext4_register_sysfs(sb);
+ err = ext4_init_orphan_info(sb);
if (err)
goto failed_mount7;
-
#ifdef CONFIG_QUOTA
/* Enable quota usage during mount. */
- if (ext4_has_feature_quota(sb) && !(sb->s_flags & MS_RDONLY)) {
+ if (ext4_has_feature_quota(sb) && !sb_rdonly(sb)) {
err = ext4_enable_quotas(sb);
if (err)
goto failed_mount8;
}
#endif /* CONFIG_QUOTA */
+ /*
+ * Save the original bdev mapping's wb_err value which could be
+ * used to detect the metadata async write error.
+ */
+ errseq_check_and_advance(&sb->s_bdev->bd_mapping->wb_err,
+ &sbi->s_bdev_wb_err);
EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
ext4_orphan_cleanup(sb, es);
EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
+ /*
+ * Update the checksum after updating free space/inode counters and
+ * ext4_orphan_cleanup. Otherwise the superblock can have an incorrect
+ * checksum in the buffer cache until it is written out and
+ * e2fsprogs programs trying to open a file system immediately
+ * after it is mounted can fail.
+ */
+ ext4_superblock_csum_set(sb);
if (needs_recovery) {
ext4_msg(sb, KERN_INFO, "recovery complete");
- ext4_mark_recovery_complete(sb, es);
+ err = ext4_mark_recovery_complete(sb, es);
+ if (err)
+ goto failed_mount9;
}
- if (EXT4_SB(sb)->s_journal) {
- if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
- descr = " journalled data mode";
- else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
- descr = " ordered data mode";
- else
- descr = " writeback data mode";
- } else
- descr = "out journal";
- if (test_opt(sb, DISCARD)) {
- struct request_queue *q = bdev_get_queue(sb->s_bdev);
- if (!blk_queue_discard(q))
- ext4_msg(sb, KERN_WARNING,
- "mounting with \"discard\" option, but "
- "the device does not support discard");
+ if (test_opt(sb, DISCARD) && !bdev_max_discard_sectors(sb->s_bdev)) {
+ ext4_msg(sb, KERN_WARNING,
+ "mounting with \"discard\" option, but the device does not support discard");
+ clear_opt(sb, DISCARD);
}
- if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
- ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
- "Opts: %.*s%s%s", descr,
- (int) sizeof(sbi->s_es->s_mount_opts),
- sbi->s_es->s_mount_opts,
- *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
-
if (es->s_error_count)
mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
@@ -4307,29 +5685,26 @@ no_journal:
ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
+ atomic_set(&sbi->s_warning_count, 0);
+ atomic_set(&sbi->s_msg_count, 0);
- kfree(orig_data);
- return 0;
+ /* Register sysfs after all initializations are complete. */
+ err = ext4_register_sysfs(sb);
+ if (err)
+ goto failed_mount9;
-cantfind_ext4:
- if (!silent)
- ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
- goto failed_mount;
+ return 0;
-#ifdef CONFIG_QUOTA
-failed_mount8:
- ext4_unregister_sysfs(sb);
-#endif
+failed_mount9:
+ ext4_quotas_off(sb, EXT4_MAXQUOTAS);
+failed_mount8: __maybe_unused
+ ext4_release_orphan_info(sb);
failed_mount7:
ext4_unregister_li_request(sb);
failed_mount6:
ext4_mb_release(sb);
- if (sbi->s_flex_groups)
- kvfree(sbi->s_flex_groups);
- percpu_counter_destroy(&sbi->s_freeclusters_counter);
- percpu_counter_destroy(&sbi->s_freeinodes_counter);
- percpu_counter_destroy(&sbi->s_dirs_counter);
- percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
+ ext4_flex_groups_free(sbi);
+ ext4_percpu_param_destroy(sbi);
failed_mount5:
ext4_ext_release(sb);
ext4_release_system_zone(sb);
@@ -4341,44 +5716,97 @@ failed_mount4:
if (EXT4_SB(sb)->rsv_conversion_wq)
destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
failed_mount_wq:
- if (sbi->s_ea_inode_cache) {
- ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
- sbi->s_ea_inode_cache = NULL;
- }
- if (sbi->s_ea_block_cache) {
- ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
- sbi->s_ea_block_cache = NULL;
- }
+ ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
+ sbi->s_ea_inode_cache = NULL;
+
+ ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
+ sbi->s_ea_block_cache = NULL;
+
if (sbi->s_journal) {
- jbd2_journal_destroy(sbi->s_journal);
- sbi->s_journal = NULL;
+ ext4_journal_destroy(sbi, sbi->s_journal);
}
failed_mount3a:
ext4_es_unregister_shrinker(sbi);
failed_mount3:
- del_timer_sync(&sbi->s_err_report);
- if (sbi->s_mmp_tsk)
- kthread_stop(sbi->s_mmp_tsk);
-failed_mount2:
- for (i = 0; i < db_count; i++)
- brelse(sbi->s_group_desc[i]);
- kvfree(sbi->s_group_desc);
+ /* flush s_sb_upd_work before sbi destroy */
+ flush_work(&sbi->s_sb_upd_work);
+ ext4_stop_mmpd(sbi);
+ timer_delete_sync(&sbi->s_err_report);
+ ext4_group_desc_free(sbi);
failed_mount:
- if (sbi->s_chksum_driver)
- crypto_free_shash(sbi->s_chksum_driver);
+#if IS_ENABLED(CONFIG_UNICODE)
+ utf8_unload(sb->s_encoding);
+#endif
+
#ifdef CONFIG_QUOTA
- for (i = 0; i < EXT4_MAXQUOTAS; i++)
- kfree(sbi->s_qf_names[i]);
+ for (unsigned int i = 0; i < EXT4_MAXQUOTAS; i++)
+ kfree(get_qf_name(sb, sbi, i));
#endif
- ext4_blkdev_remove(sbi);
- brelse(bh);
+ fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
+ brelse(sbi->s_sbh);
+ if (sbi->s_journal_bdev_file) {
+ invalidate_bdev(file_bdev(sbi->s_journal_bdev_file));
+ bdev_fput(sbi->s_journal_bdev_file);
+ }
out_fail:
+ invalidate_bdev(sb->s_bdev);
sb->s_fs_info = NULL;
- kfree(sbi->s_blockgroup_lock);
-out_free_base:
- kfree(sbi);
- kfree(orig_data);
- return err ? err : ret;
+ return err;
+}
+
+static int ext4_fill_super(struct super_block *sb, struct fs_context *fc)
+{
+ struct ext4_fs_context *ctx = fc->fs_private;
+ struct ext4_sb_info *sbi;
+ const char *descr;
+ int ret;
+
+ sbi = ext4_alloc_sbi(sb);
+ if (!sbi)
+ return -ENOMEM;
+
+ fc->s_fs_info = sbi;
+
+ /* Cleanup superblock name */
+ strreplace(sb->s_id, '/', '!');
+
+ sbi->s_sb_block = 1; /* Default super block location */
+ if (ctx->spec & EXT4_SPEC_s_sb_block)
+ sbi->s_sb_block = ctx->s_sb_block;
+
+ ret = __ext4_fill_super(fc, sb);
+ if (ret < 0)
+ goto free_sbi;
+
+ if (sbi->s_journal) {
+ if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
+ descr = " journalled data mode";
+ else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
+ descr = " ordered data mode";
+ else
+ descr = " writeback data mode";
+ } else
+ descr = "out journal";
+
+ if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
+ ext4_msg(sb, KERN_INFO, "mounted filesystem %pU %s with%s. "
+ "Quota mode: %s.", &sb->s_uuid,
+ sb_rdonly(sb) ? "ro" : "r/w", descr,
+ ext4_quota_mode(sb));
+
+ /* Update the s_overhead_clusters if necessary */
+ ext4_update_overhead(sb, false);
+ return 0;
+
+free_sbi:
+ ext4_free_sbi(sbi);
+ fc->s_fs_info = NULL;
+ return ret;
+}
+
+static int ext4_get_tree(struct fs_context *fc)
+{
+ return get_tree_bdev(fc, ext4_fill_super);
}
/*
@@ -4393,16 +5821,18 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
journal->j_commit_interval = sbi->s_commit_interval;
journal->j_min_batch_time = sbi->s_min_batch_time;
journal->j_max_batch_time = sbi->s_max_batch_time;
+ ext4_fc_init(sb, journal);
write_lock(&journal->j_state_lock);
if (test_opt(sb, BARRIER))
journal->j_flags |= JBD2_BARRIER;
else
journal->j_flags &= ~JBD2_BARRIER;
- if (test_opt(sb, DATA_ERR_ABORT))
- journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
- else
- journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
+ /*
+ * Always enable journal cycle record option, letting the journal
+ * records log transactions continuously between each mount.
+ */
+ journal->j_flags |= JBD2_CYCLE_RECORD;
write_unlock(&journal->j_state_lock);
}
@@ -4416,84 +5846,113 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb,
* happen if we iget() an unused inode, as the subsequent iput()
* will try to delete it.
*/
- journal_inode = ext4_iget(sb, journal_inum);
+ journal_inode = ext4_iget(sb, journal_inum, EXT4_IGET_SPECIAL);
if (IS_ERR(journal_inode)) {
ext4_msg(sb, KERN_ERR, "no journal found");
- return NULL;
+ return ERR_CAST(journal_inode);
}
if (!journal_inode->i_nlink) {
make_bad_inode(journal_inode);
iput(journal_inode);
ext4_msg(sb, KERN_ERR, "journal inode is deleted");
- return NULL;
+ return ERR_PTR(-EFSCORRUPTED);
}
-
- jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
- journal_inode, journal_inode->i_size);
- if (!S_ISREG(journal_inode->i_mode)) {
+ if (!S_ISREG(journal_inode->i_mode) || IS_ENCRYPTED(journal_inode)) {
ext4_msg(sb, KERN_ERR, "invalid journal inode");
iput(journal_inode);
- return NULL;
+ return ERR_PTR(-EFSCORRUPTED);
}
+
+ ext4_debug("Journal inode found at %p: %lld bytes\n",
+ journal_inode, journal_inode->i_size);
return journal_inode;
}
-static journal_t *ext4_get_journal(struct super_block *sb,
- unsigned int journal_inum)
+static int ext4_journal_bmap(journal_t *journal, sector_t *block)
+{
+ struct ext4_map_blocks map;
+ int ret;
+
+ if (journal->j_inode == NULL)
+ return 0;
+
+ map.m_lblk = *block;
+ map.m_len = 1;
+ ret = ext4_map_blocks(NULL, journal->j_inode, &map, 0);
+ if (ret <= 0) {
+ ext4_msg(journal->j_inode->i_sb, KERN_CRIT,
+ "journal bmap failed: block %llu ret %d\n",
+ *block, ret);
+ jbd2_journal_abort(journal, ret ? ret : -EFSCORRUPTED);
+ return ret;
+ }
+ *block = map.m_pblk;
+ return 0;
+}
+
+static journal_t *ext4_open_inode_journal(struct super_block *sb,
+ unsigned int journal_inum)
{
struct inode *journal_inode;
journal_t *journal;
- BUG_ON(!ext4_has_feature_journal(sb));
-
journal_inode = ext4_get_journal_inode(sb, journal_inum);
- if (!journal_inode)
- return NULL;
+ if (IS_ERR(journal_inode))
+ return ERR_CAST(journal_inode);
journal = jbd2_journal_init_inode(journal_inode);
- if (!journal) {
+ if (IS_ERR(journal)) {
ext4_msg(sb, KERN_ERR, "Could not load journal inode");
iput(journal_inode);
- return NULL;
+ return ERR_CAST(journal);
}
journal->j_private = sb;
+ journal->j_bmap = ext4_journal_bmap;
ext4_init_journal_params(sb, journal);
return journal;
}
-static journal_t *ext4_get_dev_journal(struct super_block *sb,
- dev_t j_dev)
+static struct file *ext4_get_journal_blkdev(struct super_block *sb,
+ dev_t j_dev, ext4_fsblk_t *j_start,
+ ext4_fsblk_t *j_len)
{
struct buffer_head *bh;
- journal_t *journal;
- ext4_fsblk_t start;
- ext4_fsblk_t len;
+ struct block_device *bdev;
+ struct file *bdev_file;
int hblock, blocksize;
ext4_fsblk_t sb_block;
unsigned long offset;
struct ext4_super_block *es;
- struct block_device *bdev;
+ int errno;
- BUG_ON(!ext4_has_feature_journal(sb));
-
- bdev = ext4_blkdev_get(j_dev, sb);
- if (bdev == NULL)
- return NULL;
+ bdev_file = bdev_file_open_by_dev(j_dev,
+ BLK_OPEN_READ | BLK_OPEN_WRITE | BLK_OPEN_RESTRICT_WRITES,
+ sb, &fs_holder_ops);
+ if (IS_ERR(bdev_file)) {
+ ext4_msg(sb, KERN_ERR,
+ "failed to open journal device unknown-block(%u,%u) %ld",
+ MAJOR(j_dev), MINOR(j_dev), PTR_ERR(bdev_file));
+ return bdev_file;
+ }
+ bdev = file_bdev(bdev_file);
blocksize = sb->s_blocksize;
hblock = bdev_logical_block_size(bdev);
if (blocksize < hblock) {
ext4_msg(sb, KERN_ERR,
"blocksize too small for journal device");
+ errno = -EINVAL;
goto out_bdev;
}
sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
offset = EXT4_MIN_BLOCK_SIZE % blocksize;
- set_blocksize(bdev, blocksize);
- if (!(bh = __bread(bdev, sb_block, blocksize))) {
+ set_blocksize(bdev_file, blocksize);
+ bh = __bread(bdev, sb_block, blocksize);
+ if (!bh) {
ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
"external journal");
+ errno = -EINVAL;
goto out_bdev;
}
@@ -4501,59 +5960,74 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
!(le32_to_cpu(es->s_feature_incompat) &
EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
- ext4_msg(sb, KERN_ERR, "external journal has "
- "bad superblock");
- brelse(bh);
- goto out_bdev;
+ ext4_msg(sb, KERN_ERR, "external journal has bad superblock");
+ errno = -EFSCORRUPTED;
+ goto out_bh;
}
if ((le32_to_cpu(es->s_feature_ro_compat) &
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
- es->s_checksum != ext4_superblock_csum(sb, es)) {
- ext4_msg(sb, KERN_ERR, "external journal has "
- "corrupt superblock");
- brelse(bh);
- goto out_bdev;
+ es->s_checksum != ext4_superblock_csum(es)) {
+ ext4_msg(sb, KERN_ERR, "external journal has corrupt superblock");
+ errno = -EFSCORRUPTED;
+ goto out_bh;
}
if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
ext4_msg(sb, KERN_ERR, "journal UUID does not match");
- brelse(bh);
- goto out_bdev;
+ errno = -EFSCORRUPTED;
+ goto out_bh;
}
- len = ext4_blocks_count(es);
- start = sb_block + 1;
- brelse(bh); /* we're done with the superblock */
+ *j_start = sb_block + 1;
+ *j_len = ext4_blocks_count(es);
+ brelse(bh);
+ return bdev_file;
+
+out_bh:
+ brelse(bh);
+out_bdev:
+ bdev_fput(bdev_file);
+ return ERR_PTR(errno);
+}
- journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
- start, len, blocksize);
- if (!journal) {
+static journal_t *ext4_open_dev_journal(struct super_block *sb,
+ dev_t j_dev)
+{
+ journal_t *journal;
+ ext4_fsblk_t j_start;
+ ext4_fsblk_t j_len;
+ struct file *bdev_file;
+ int errno = 0;
+
+ bdev_file = ext4_get_journal_blkdev(sb, j_dev, &j_start, &j_len);
+ if (IS_ERR(bdev_file))
+ return ERR_CAST(bdev_file);
+
+ journal = jbd2_journal_init_dev(file_bdev(bdev_file), sb->s_bdev, j_start,
+ j_len, sb->s_blocksize);
+ if (IS_ERR(journal)) {
ext4_msg(sb, KERN_ERR, "failed to create device journal");
+ errno = PTR_ERR(journal);
goto out_bdev;
}
- journal->j_private = sb;
- ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &journal->j_sb_buffer);
- wait_on_buffer(journal->j_sb_buffer);
- if (!buffer_uptodate(journal->j_sb_buffer)) {
- ext4_msg(sb, KERN_ERR, "I/O error on journal device");
- goto out_journal;
- }
if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
ext4_msg(sb, KERN_ERR, "External journal has more than one "
"user (unsupported) - %d",
be32_to_cpu(journal->j_superblock->s_nr_users));
+ errno = -EINVAL;
goto out_journal;
}
- EXT4_SB(sb)->journal_bdev = bdev;
+ journal->j_private = sb;
+ EXT4_SB(sb)->s_journal_bdev_file = bdev_file;
ext4_init_journal_params(sb, journal);
return journal;
out_journal:
- jbd2_journal_destroy(journal);
+ ext4_journal_destroy(EXT4_SB(sb), journal);
out_bdev:
- ext4_blkdev_put(bdev);
- return NULL;
+ bdev_fput(bdev_file);
+ return ERR_PTR(errno);
}
static int ext4_load_journal(struct super_block *sb,
@@ -4565,8 +6039,10 @@ static int ext4_load_journal(struct super_block *sb,
dev_t journal_dev;
int err = 0;
int really_read_only;
+ int journal_dev_ro;
- BUG_ON(!ext4_has_feature_journal(sb));
+ if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
+ return -EFSCORRUPTED;
if (journal_devnum &&
journal_devnum != le32_to_cpu(es->s_journal_dev)) {
@@ -4576,7 +6052,31 @@ static int ext4_load_journal(struct super_block *sb,
} else
journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
- really_read_only = bdev_read_only(sb->s_bdev);
+ if (journal_inum && journal_dev) {
+ ext4_msg(sb, KERN_ERR,
+ "filesystem has both journal inode and journal device!");
+ return -EINVAL;
+ }
+
+ if (journal_inum) {
+ journal = ext4_open_inode_journal(sb, journal_inum);
+ if (IS_ERR(journal))
+ return PTR_ERR(journal);
+ } else {
+ journal = ext4_open_dev_journal(sb, journal_dev);
+ if (IS_ERR(journal))
+ return PTR_ERR(journal);
+ }
+
+ journal_dev_ro = bdev_read_only(journal->j_dev);
+ really_read_only = bdev_read_only(sb->s_bdev) | journal_dev_ro;
+
+ if (journal_dev_ro && !sb_rdonly(sb)) {
+ ext4_msg(sb, KERN_ERR,
+ "journal device read-only, try mounting with '-o ro'");
+ err = -EROFS;
+ goto err_out;
+ }
/*
* Are we loading a blank journal or performing recovery after a
@@ -4584,33 +6084,21 @@ static int ext4_load_journal(struct super_block *sb,
* can get read-write access to the device.
*/
if (ext4_has_feature_journal_needs_recovery(sb)) {
- if (sb->s_flags & MS_RDONLY) {
+ if (sb_rdonly(sb)) {
ext4_msg(sb, KERN_INFO, "INFO: recovery "
"required on readonly filesystem");
if (really_read_only) {
ext4_msg(sb, KERN_ERR, "write access "
- "unavailable, cannot proceed");
- return -EROFS;
+ "unavailable, cannot proceed "
+ "(try mounting with noload)");
+ err = -EROFS;
+ goto err_out;
}
ext4_msg(sb, KERN_INFO, "write access will "
"be enabled during recovery");
}
}
- if (journal_inum && journal_dev) {
- ext4_msg(sb, KERN_ERR, "filesystem has both journal "
- "and inode journals!");
- return -EINVAL;
- }
-
- if (journal_inum) {
- if (!(journal = ext4_get_journal(sb, journal_inum)))
- return -EINVAL;
- } else {
- if (!(journal = ext4_get_dev_journal(sb, journal_dev)))
- return -EINVAL;
- }
-
if (!(journal->j_flags & JBD2_BARRIER))
ext4_msg(sb, KERN_INFO, "barriers disabled");
@@ -4618,44 +6106,71 @@ static int ext4_load_journal(struct super_block *sb,
err = jbd2_journal_wipe(journal, !really_read_only);
if (!err) {
char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
+ __le16 orig_state;
+ bool changed = false;
+
if (save)
memcpy(save, ((char *) es) +
EXT4_S_ERR_START, EXT4_S_ERR_LEN);
err = jbd2_journal_load(journal);
- if (save)
+ if (save && memcmp(((char *) es) + EXT4_S_ERR_START,
+ save, EXT4_S_ERR_LEN)) {
memcpy(((char *) es) + EXT4_S_ERR_START,
save, EXT4_S_ERR_LEN);
+ changed = true;
+ }
kfree(save);
+ orig_state = es->s_state;
+ es->s_state |= cpu_to_le16(EXT4_SB(sb)->s_mount_state &
+ EXT4_ERROR_FS);
+ if (orig_state != es->s_state)
+ changed = true;
+ /* Write out restored error information to the superblock */
+ if (changed && !really_read_only) {
+ int err2;
+ err2 = ext4_commit_super(sb);
+ err = err ? : err2;
+ }
}
if (err) {
ext4_msg(sb, KERN_ERR, "error loading journal");
- jbd2_journal_destroy(journal);
- return err;
+ goto err_out;
}
EXT4_SB(sb)->s_journal = journal;
- ext4_clear_journal_err(sb, es);
+ err = ext4_clear_journal_err(sb, es);
+ if (err) {
+ ext4_journal_destroy(EXT4_SB(sb), journal);
+ return err;
+ }
if (!really_read_only && journal_devnum &&
journal_devnum != le32_to_cpu(es->s_journal_dev)) {
es->s_journal_dev = cpu_to_le32(journal_devnum);
-
- /* Make sure we flush the recovery flag to disk. */
- ext4_commit_super(sb, 1);
+ ext4_commit_super(sb);
+ }
+ if (!really_read_only && journal_inum &&
+ journal_inum != le32_to_cpu(es->s_journal_inum)) {
+ es->s_journal_inum = cpu_to_le32(journal_inum);
+ ext4_commit_super(sb);
}
return 0;
+
+err_out:
+ ext4_journal_destroy(EXT4_SB(sb), journal);
+ return err;
}
-static int ext4_commit_super(struct super_block *sb, int sync)
+/* Copy state of EXT4_SB(sb) into buffer for on-disk superblock */
+static void ext4_update_super(struct super_block *sb)
{
- struct ext4_super_block *es = EXT4_SB(sb)->s_es;
- struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
- int error = 0;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_super_block *es = sbi->s_es;
+ struct buffer_head *sbh = sbi->s_sbh;
- if (!sbh || block_device_ejected(sb))
- return error;
+ lock_buffer(sbh);
/*
* If the file system is mounted read-only, don't update the
* superblock write time. This avoids updating the superblock
@@ -4666,29 +6181,80 @@ static int ext4_commit_super(struct super_block *sb, int sync)
* the clock is set in the future, and this will cause e2fsck
* to complain and force a full file system check.
*/
- if (!(sb->s_flags & MS_RDONLY))
- es->s_wtime = cpu_to_le32(get_seconds());
- if (sb->s_bdev->bd_part)
- es->s_kbytes_written =
- cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
- ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
- EXT4_SB(sb)->s_sectors_written_start) >> 1));
- else
- es->s_kbytes_written =
- cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
- if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter))
+ if (!sb_rdonly(sb))
+ ext4_update_tstamp(es, s_wtime);
+ es->s_kbytes_written =
+ cpu_to_le64(sbi->s_kbytes_written +
+ ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) -
+ sbi->s_sectors_written_start) >> 1));
+ if (percpu_counter_initialized(&sbi->s_freeclusters_counter))
ext4_free_blocks_count_set(es,
- EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
- &EXT4_SB(sb)->s_freeclusters_counter)));
- if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
+ EXT4_C2B(sbi, percpu_counter_sum_positive(
+ &sbi->s_freeclusters_counter)));
+ if (percpu_counter_initialized(&sbi->s_freeinodes_counter))
es->s_free_inodes_count =
cpu_to_le32(percpu_counter_sum_positive(
- &EXT4_SB(sb)->s_freeinodes_counter));
- BUFFER_TRACE(sbh, "marking dirty");
+ &sbi->s_freeinodes_counter));
+ /* Copy error information to the on-disk superblock */
+ spin_lock(&sbi->s_error_lock);
+ if (sbi->s_add_error_count > 0) {
+ es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
+ if (!es->s_first_error_time && !es->s_first_error_time_hi) {
+ __ext4_update_tstamp(&es->s_first_error_time,
+ &es->s_first_error_time_hi,
+ sbi->s_first_error_time);
+ strtomem_pad(es->s_first_error_func,
+ sbi->s_first_error_func, 0);
+ es->s_first_error_line =
+ cpu_to_le32(sbi->s_first_error_line);
+ es->s_first_error_ino =
+ cpu_to_le32(sbi->s_first_error_ino);
+ es->s_first_error_block =
+ cpu_to_le64(sbi->s_first_error_block);
+ es->s_first_error_errcode =
+ ext4_errno_to_code(sbi->s_first_error_code);
+ }
+ __ext4_update_tstamp(&es->s_last_error_time,
+ &es->s_last_error_time_hi,
+ sbi->s_last_error_time);
+ strtomem_pad(es->s_last_error_func, sbi->s_last_error_func, 0);
+ es->s_last_error_line = cpu_to_le32(sbi->s_last_error_line);
+ es->s_last_error_ino = cpu_to_le32(sbi->s_last_error_ino);
+ es->s_last_error_block = cpu_to_le64(sbi->s_last_error_block);
+ es->s_last_error_errcode =
+ ext4_errno_to_code(sbi->s_last_error_code);
+ /*
+ * Start the daily error reporting function if it hasn't been
+ * started already
+ */
+ if (!es->s_error_count)
+ mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);
+ le32_add_cpu(&es->s_error_count, sbi->s_add_error_count);
+ sbi->s_add_error_count = 0;
+ }
+ spin_unlock(&sbi->s_error_lock);
+
ext4_superblock_csum_set(sb);
- if (sync)
- lock_buffer(sbh);
- if (buffer_write_io_error(sbh)) {
+ unlock_buffer(sbh);
+}
+
+static int ext4_commit_super(struct super_block *sb)
+{
+ struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
+
+ if (!sbh)
+ return -EINVAL;
+
+ ext4_update_super(sb);
+
+ lock_buffer(sbh);
+ /* Buffer got discarded which means block device got invalidated */
+ if (!buffer_mapped(sbh)) {
+ unlock_buffer(sbh);
+ return -EIO;
+ }
+
+ if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
/*
* Oh, dear. A previous attempt to write the
* superblock failed. This could happen because the
@@ -4702,23 +6268,21 @@ static int ext4_commit_super(struct super_block *sb, int sync)
clear_buffer_write_io_error(sbh);
set_buffer_uptodate(sbh);
}
- mark_buffer_dirty(sbh);
- if (sync) {
- unlock_buffer(sbh);
- error = __sync_dirty_buffer(sbh,
- REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0));
- if (error)
- return error;
-
- error = buffer_write_io_error(sbh);
- if (error) {
- ext4_msg(sb, KERN_ERR, "I/O error while writing "
- "superblock");
- clear_buffer_write_io_error(sbh);
- set_buffer_uptodate(sbh);
- }
+ get_bh(sbh);
+ /* Clear potential dirty bit if it was journalled update */
+ clear_buffer_dirty(sbh);
+ sbh->b_end_io = end_buffer_write_sync;
+ submit_bh(REQ_OP_WRITE | REQ_SYNC |
+ (test_opt(sb, BARRIER) ? REQ_FUA : 0), sbh);
+ wait_on_buffer(sbh);
+ if (buffer_write_io_error(sbh)) {
+ ext4_msg(sb, KERN_ERR, "I/O error while writing "
+ "superblock");
+ clear_buffer_write_io_error(sbh);
+ set_buffer_uptodate(sbh);
+ return -EIO;
}
- return error;
+ return 0;
}
/*
@@ -4726,27 +6290,39 @@ static int ext4_commit_super(struct super_block *sb, int sync)
* remounting) the filesystem readonly, then we will end up with a
* consistent fs on disk. Record that fact.
*/
-static void ext4_mark_recovery_complete(struct super_block *sb,
- struct ext4_super_block *es)
+static int ext4_mark_recovery_complete(struct super_block *sb,
+ struct ext4_super_block *es)
{
+ int err;
journal_t *journal = EXT4_SB(sb)->s_journal;
if (!ext4_has_feature_journal(sb)) {
- BUG_ON(journal != NULL);
- return;
+ if (journal != NULL) {
+ ext4_error(sb, "Journal got removed while the fs was "
+ "mounted!");
+ return -EFSCORRUPTED;
+ }
+ return 0;
}
jbd2_journal_lock_updates(journal);
- if (jbd2_journal_flush(journal) < 0)
+ err = jbd2_journal_flush(journal, 0);
+ if (err < 0)
goto out;
- if (ext4_has_feature_journal_needs_recovery(sb) &&
- sb->s_flags & MS_RDONLY) {
+ if (sb_rdonly(sb) && (ext4_has_feature_journal_needs_recovery(sb) ||
+ ext4_has_feature_orphan_present(sb))) {
+ if (!ext4_orphan_file_empty(sb)) {
+ ext4_error(sb, "Orphan file not empty on read-only fs.");
+ err = -EFSCORRUPTED;
+ goto out;
+ }
ext4_clear_feature_journal_needs_recovery(sb);
- ext4_commit_super(sb, 1);
+ ext4_clear_feature_orphan_present(sb);
+ ext4_commit_super(sb);
}
-
out:
jbd2_journal_unlock_updates(journal);
+ return err;
}
/*
@@ -4754,14 +6330,17 @@ out:
* has recorded an error from a previous lifetime, move that error to the
* main filesystem now.
*/
-static void ext4_clear_journal_err(struct super_block *sb,
+static int ext4_clear_journal_err(struct super_block *sb,
struct ext4_super_block *es)
{
journal_t *journal;
int j_errno;
const char *errstr;
- BUG_ON(!ext4_has_feature_journal(sb));
+ if (!ext4_has_feature_journal(sb)) {
+ ext4_error(sb, "Journal got removed while the fs was mounted!");
+ return -EFSCORRUPTED;
+ }
journal = EXT4_SB(sb)->s_journal;
@@ -4777,15 +6356,18 @@ static void ext4_clear_journal_err(struct super_block *sb,
errstr = ext4_decode_error(sb, j_errno, nbuf);
ext4_warning(sb, "Filesystem error recorded "
"from previous mount: %s", errstr);
- ext4_warning(sb, "Marking fs in need of filesystem check.");
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
- ext4_commit_super(sb, 1);
+ j_errno = ext4_commit_super(sb);
+ if (j_errno)
+ return j_errno;
+ ext4_warning(sb, "Marked fs in need of filesystem check.");
jbd2_journal_clear_err(journal);
jbd2_journal_update_sb_errno(journal);
}
+ return 0;
}
/*
@@ -4794,13 +6376,7 @@ static void ext4_clear_journal_err(struct super_block *sb,
*/
int ext4_force_commit(struct super_block *sb)
{
- journal_t *journal;
-
- if (sb->s_flags & MS_RDONLY)
- return 0;
-
- journal = EXT4_SB(sb)->s_journal;
- return ext4_journal_force_commit(journal);
+ return ext4_journal_force_commit(EXT4_SB(sb)->s_journal);
}
static int ext4_sync_fs(struct super_block *sb, int wait)
@@ -4810,8 +6386,9 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
bool needs_barrier = false;
struct ext4_sb_info *sbi = EXT4_SB(sb);
- if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
- return 0;
+ ret = ext4_emergency_state(sb);
+ if (unlikely(ret))
+ return ret;
trace_ext4_sync_fs(sb, wait);
flush_workqueue(sbi->rsv_conversion_wq);
@@ -4840,7 +6417,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
needs_barrier = true;
if (needs_barrier) {
int err;
- err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
+ err = blkdev_issue_flush(sb->s_bdev);
if (!ret)
ret = err;
}
@@ -4859,12 +6436,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
static int ext4_freeze(struct super_block *sb)
{
int error = 0;
- journal_t *journal;
-
- if (sb->s_flags & MS_RDONLY)
- return 0;
-
- journal = EXT4_SB(sb)->s_journal;
+ journal_t *journal = EXT4_SB(sb)->s_journal;
if (journal) {
/* Now we set up the journal barrier. */
@@ -4874,15 +6446,17 @@ static int ext4_freeze(struct super_block *sb)
* Don't clear the needs_recovery flag if we failed to
* flush the journal.
*/
- error = jbd2_journal_flush(journal);
+ error = jbd2_journal_flush(journal, 0);
if (error < 0)
goto out;
/* Journal blocked and flushed, clear needs_recovery flag. */
ext4_clear_feature_journal_needs_recovery(sb);
+ if (ext4_orphan_file_empty(sb))
+ ext4_clear_feature_orphan_present(sb);
}
- error = ext4_commit_super(sb, 1);
+ error = ext4_commit_super(sb);
out:
if (journal)
/* we rely on upper layer to stop further updates */
@@ -4896,15 +6470,17 @@ out:
*/
static int ext4_unfreeze(struct super_block *sb)
{
- if ((sb->s_flags & MS_RDONLY) || ext4_forced_shutdown(EXT4_SB(sb)))
+ if (ext4_emergency_state(sb))
return 0;
if (EXT4_SB(sb)->s_journal) {
/* Reset the needs_recovery flag before the fs is unlocked. */
ext4_set_feature_journal_needs_recovery(sb);
+ if (ext4_has_feature_orphan_file(sb))
+ ext4_set_feature_orphan_present(sb);
}
- ext4_commit_super(sb, 1);
+ ext4_commit_super(sb);
return 0;
}
@@ -4924,20 +6500,22 @@ struct ext4_mount_options {
#endif
};
-static int ext4_remount(struct super_block *sb, int *flags, char *data)
+static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
{
+ struct ext4_fs_context *ctx = fc->fs_private;
struct ext4_super_block *es;
struct ext4_sb_info *sbi = EXT4_SB(sb);
unsigned long old_sb_flags;
struct ext4_mount_options old_opts;
- int enable_quota = 0;
ext4_group_t g;
- unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
int err = 0;
+ int alloc_ctx;
#ifdef CONFIG_QUOTA
+ int enable_quota = 0;
int i, j;
+ char *to_free[EXT4_MAXQUOTAS];
#endif
- char *orig_data = kstrdup(data, GFP_KERNEL);
+
/* Store the original options */
old_sb_flags = sb->s_flags;
@@ -4952,25 +6530,46 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
for (i = 0; i < EXT4_MAXQUOTAS; i++)
if (sbi->s_qf_names[i]) {
- old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i],
- GFP_KERNEL);
+ char *qf_name = get_qf_name(sb, sbi, i);
+
+ old_opts.s_qf_names[i] = kstrdup(qf_name, GFP_KERNEL);
if (!old_opts.s_qf_names[i]) {
for (j = 0; j < i; j++)
kfree(old_opts.s_qf_names[j]);
- kfree(orig_data);
return -ENOMEM;
}
} else
old_opts.s_qf_names[i] = NULL;
#endif
- if (sbi->s_journal && sbi->s_journal->j_task->io_context)
- journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
+ if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)) {
+ if (sbi->s_journal && sbi->s_journal->j_task->io_context)
+ ctx->journal_ioprio =
+ sbi->s_journal->j_task->io_context->ioprio;
+ else
+ ctx->journal_ioprio = EXT4_DEF_JOURNAL_IOPRIO;
- if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) {
- err = -EINVAL;
- goto restore_opts;
}
+ if ((ctx->spec & EXT4_SPEC_s_stripe) &&
+ ext4_is_stripe_incompatible(sb, ctx->s_stripe)) {
+ ext4_msg(sb, KERN_WARNING,
+ "stripe (%lu) is not aligned with cluster size (%u), "
+ "stripe is disabled",
+ ctx->s_stripe, sbi->s_cluster_ratio);
+ ctx->s_stripe = 0;
+ }
+
+ /*
+ * Changing the DIOREAD_NOLOCK or DELALLOC mount options may cause
+ * two calls to ext4_should_dioread_nolock() to return inconsistent
+ * values, triggering WARN_ON in ext4_add_complete_io(). we grab
+ * here s_writepages_rwsem to avoid race between writepages ops and
+ * remount.
+ */
+ alloc_ctx = ext4_writepages_down_write(sb);
+ ext4_apply_options(fc, sb);
+ ext4_writepages_up_write(sb, alloc_ctx);
+
if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
test_opt(sb, JOURNAL_CHECKSUM)) {
ext4_msg(sb, KERN_ERR, "changing journal_checksum "
@@ -4991,12 +6590,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
err = -EINVAL;
goto restore_opts;
}
- if (test_opt(sb, DAX)) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "both data=journal and dax");
- err = -EINVAL;
- goto restore_opts;
- }
} else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) {
if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
@@ -5012,35 +6605,33 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
- if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
- ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
- "dax flag with busy inodes while remounting");
- sbi->s_mount_opt ^= EXT4_MOUNT_DAX;
+ if ((old_opts.s_mount_opt & EXT4_MOUNT_DELALLOC) &&
+ !test_opt(sb, DELALLOC)) {
+ ext4_msg(sb, KERN_ERR, "can't disable delalloc during remount");
+ err = -EINVAL;
+ goto restore_opts;
}
- if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
- ext4_abort(sb, "Abort forced by user");
-
- sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
- (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
+ sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
+ (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
es = sbi->s_es;
if (sbi->s_journal) {
ext4_init_journal_params(sb, sbi->s_journal);
- set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
+ set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio);
}
- if (*flags & MS_LAZYTIME)
- sb->s_flags |= MS_LAZYTIME;
+ /* Flush outstanding errors before changing fs state */
+ flush_work(&sbi->s_sb_upd_work);
- if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
- if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
+ if ((bool)(fc->sb_flags & SB_RDONLY) != sb_rdonly(sb)) {
+ if (ext4_emergency_state(sb)) {
err = -EROFS;
goto restore_opts;
}
- if (*flags & MS_RDONLY) {
+ if (fc->sb_flags & SB_RDONLY) {
err = sync_filesystem(sb);
if (err < 0)
goto restore_opts;
@@ -5052,7 +6643,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
* First of all, the unconditional stuff we have to do
* to disable replay of the journal when we next remount
*/
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
/*
* OK, test if we are remounting a valid rw partition
@@ -5063,8 +6654,13 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
(sbi->s_mount_state & EXT4_VALID_FS))
es->s_state = cpu_to_le16(sbi->s_mount_state);
- if (sbi->s_journal)
+ if (sbi->s_journal) {
+ /*
+ * We let remount-ro finish even if marking fs
+ * as clean failed...
+ */
ext4_mark_recovery_complete(sb, es);
+ }
} else {
/* Make sure we can mount this feature set readwrite */
if (ext4_has_feature_readonly(sb) ||
@@ -5095,7 +6691,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
* around from a previously readonly bdev mount,
* require a full umount/remount for now.
*/
- if (es->s_last_orphan) {
+ if (es->s_last_orphan || !ext4_orphan_file_empty(sb)) {
ext4_msg(sb, KERN_WARNING, "Couldn't "
"remount RDWR because of unprocessed "
"orphan inode list. Please "
@@ -5110,41 +6706,49 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
* been changed by e2fsck since we originally mounted
* the partition.)
*/
- if (sbi->s_journal)
- ext4_clear_journal_err(sb, es);
- sbi->s_mount_state = le16_to_cpu(es->s_state);
- if (!ext4_setup_super(sb, es, 0))
- sb->s_flags &= ~MS_RDONLY;
- if (ext4_has_feature_mmp(sb))
- if (ext4_multi_mount_protect(sb,
- le64_to_cpu(es->s_mmp_block))) {
- err = -EROFS;
+ if (sbi->s_journal) {
+ err = ext4_clear_journal_err(sb, es);
+ if (err)
goto restore_opts;
- }
+ }
+ sbi->s_mount_state = (le16_to_cpu(es->s_state) &
+ ~EXT4_FC_REPLAY);
+
+ err = ext4_setup_super(sb, es, 0);
+ if (err)
+ goto restore_opts;
+
+ sb->s_flags &= ~SB_RDONLY;
+ if (ext4_has_feature_mmp(sb)) {
+ err = ext4_multi_mount_protect(sb,
+ le64_to_cpu(es->s_mmp_block));
+ if (err)
+ goto restore_opts;
+ }
+#ifdef CONFIG_QUOTA
enable_quota = 1;
+#endif
}
}
/*
- * Reinitialize lazy itable initialization thread based on
- * current settings
+ * Handle creation of system zone data early because it can fail.
+ * Releasing of existing data is done when we are sure remount will
+ * succeed.
*/
- if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE))
- ext4_unregister_li_request(sb);
- else {
- ext4_group_t first_not_zeroed;
- first_not_zeroed = ext4_has_uninit_itable(sb);
- ext4_register_li_request(sb, first_not_zeroed);
+ if (test_opt(sb, BLOCK_VALIDITY) && !sbi->s_system_blks) {
+ err = ext4_setup_system_zone(sb);
+ if (err)
+ goto restore_opts;
}
- ext4_setup_system_zone(sb);
- if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY))
- ext4_commit_super(sb, 1);
+ if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) {
+ err = ext4_commit_super(sb);
+ if (err)
+ goto restore_opts;
+ }
#ifdef CONFIG_QUOTA
- /* Release old quota file names */
- for (i = 0; i < EXT4_MAXQUOTAS; i++)
- kfree(old_opts.s_qf_names[i]);
if (enable_quota) {
if (sb_any_quota_suspended(sb))
dquot_resume(sb, -1);
@@ -5154,14 +6758,48 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
}
+ /* Release old quota file names */
+ for (i = 0; i < EXT4_MAXQUOTAS; i++)
+ kfree(old_opts.s_qf_names[i]);
#endif
+ if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
+ ext4_release_system_zone(sb);
+
+ /*
+ * Reinitialize lazy itable initialization thread based on
+ * current settings
+ */
+ if (sb_rdonly(sb) || !test_opt(sb, INIT_INODE_TABLE))
+ ext4_unregister_li_request(sb);
+ else {
+ ext4_group_t first_not_zeroed;
+ first_not_zeroed = ext4_has_uninit_itable(sb);
+ ext4_register_li_request(sb, first_not_zeroed);
+ }
+
+ if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
+ ext4_stop_mmpd(sbi);
+
+ /*
+ * Handle aborting the filesystem as the last thing during remount to
+ * avoid obsure errors during remount when some option changes fail to
+ * apply due to shutdown filesystem.
+ */
+ if (test_opt2(sb, ABORT))
+ ext4_abort(sb, ESHUTDOWN, "Abort forced by user");
- *flags = (*flags & ~MS_LAZYTIME) | (sb->s_flags & MS_LAZYTIME);
- ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
- kfree(orig_data);
return 0;
restore_opts:
+ /*
+ * If there was a failing r/w to ro transition, we may need to
+ * re-enable quota
+ */
+ if (sb_rdonly(sb) && !(old_sb_flags & SB_RDONLY) &&
+ sb_any_quota_suspended(sb))
+ dquot_resume(sb, -1);
+
+ alloc_ctx = ext4_writepages_down_write(sb);
sb->s_flags = old_sb_flags;
sbi->s_mount_opt = old_opts.s_mount_opt;
sbi->s_mount_opt2 = old_opts.s_mount_opt2;
@@ -5170,17 +6808,48 @@ restore_opts:
sbi->s_commit_interval = old_opts.s_commit_interval;
sbi->s_min_batch_time = old_opts.s_min_batch_time;
sbi->s_max_batch_time = old_opts.s_max_batch_time;
+ ext4_writepages_up_write(sb, alloc_ctx);
+
+ if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
+ ext4_release_system_zone(sb);
#ifdef CONFIG_QUOTA
sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
for (i = 0; i < EXT4_MAXQUOTAS; i++) {
- kfree(sbi->s_qf_names[i]);
- sbi->s_qf_names[i] = old_opts.s_qf_names[i];
+ to_free[i] = get_qf_name(sb, sbi, i);
+ rcu_assign_pointer(sbi->s_qf_names[i], old_opts.s_qf_names[i]);
}
+ synchronize_rcu();
+ for (i = 0; i < EXT4_MAXQUOTAS; i++)
+ kfree(to_free[i]);
#endif
- kfree(orig_data);
+ if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
+ ext4_stop_mmpd(sbi);
return err;
}
+static int ext4_reconfigure(struct fs_context *fc)
+{
+ struct super_block *sb = fc->root->d_sb;
+ int ret;
+ bool old_ro = sb_rdonly(sb);
+
+ fc->s_fs_info = EXT4_SB(sb);
+
+ ret = ext4_check_opt_consistency(fc, sb);
+ if (ret < 0)
+ return ret;
+
+ ret = __ext4_remount(fc, sb);
+ if (ret < 0)
+ return ret;
+
+ ext4_msg(sb, KERN_INFO, "re-mounted %pU%s.",
+ &sb->s_uuid,
+ (old_ro != sb_rdonly(sb)) ? (sb_rdonly(sb) ? " ro" : " r/w") : "");
+
+ return 0;
+}
+
#ifdef CONFIG_QUOTA
static int ext4_statfs_project(struct super_block *sb,
kprojid_t projid, struct kstatfs *buf)
@@ -5194,30 +6863,38 @@ static int ext4_statfs_project(struct super_block *sb,
dquot = dqget(sb, qid);
if (IS_ERR(dquot))
return PTR_ERR(dquot);
- spin_lock(&dq_data_lock);
-
- limit = (dquot->dq_dqb.dqb_bsoftlimit ?
- dquot->dq_dqb.dqb_bsoftlimit :
- dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits;
- if (limit && buf->f_blocks > limit) {
- curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits;
- buf->f_blocks = limit;
- buf->f_bfree = buf->f_bavail =
- (buf->f_blocks > curblock) ?
- (buf->f_blocks - curblock) : 0;
- }
-
- limit = dquot->dq_dqb.dqb_isoftlimit ?
- dquot->dq_dqb.dqb_isoftlimit :
- dquot->dq_dqb.dqb_ihardlimit;
- if (limit && buf->f_files > limit) {
- buf->f_files = limit;
- buf->f_ffree =
- (buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
- (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
- }
-
- spin_unlock(&dq_data_lock);
+ spin_lock(&dquot->dq_dqb_lock);
+
+ limit = min_not_zero(dquot->dq_dqb.dqb_bsoftlimit,
+ dquot->dq_dqb.dqb_bhardlimit);
+ limit >>= sb->s_blocksize_bits;
+
+ if (limit) {
+ uint64_t remaining = 0;
+
+ curblock = (dquot->dq_dqb.dqb_curspace +
+ dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits;
+ if (limit > curblock)
+ remaining = limit - curblock;
+
+ buf->f_blocks = min(buf->f_blocks, limit);
+ buf->f_bfree = min(buf->f_bfree, remaining);
+ buf->f_bavail = min(buf->f_bavail, remaining);
+ }
+
+ limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit,
+ dquot->dq_dqb.dqb_ihardlimit);
+ if (limit) {
+ uint64_t remaining = 0;
+
+ if (limit > dquot->dq_dqb.dqb_curinodes)
+ remaining = limit - dquot->dq_dqb.dqb_curinodes;
+
+ buf->f_files = min(buf->f_files, limit);
+ buf->f_ffree = min(buf->f_ffree, remaining);
+ }
+
+ spin_unlock(&dquot->dq_dqb_lock);
dqput(dquot);
return 0;
}
@@ -5229,7 +6906,6 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
ext4_fsblk_t overhead = 0, resv_blocks;
- u64 fsid;
s64 bfree;
resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
@@ -5250,10 +6926,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_files = le32_to_cpu(es->s_inodes_count);
buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
buf->f_namelen = EXT4_NAME_LEN;
- fsid = le64_to_cpup((void *)es->s_uuid) ^
- le64_to_cpup((void *)es->s_uuid + sizeof(u64));
- buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
- buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
+ buf->f_fsid = uuid_to_fsid(es->s_uuid);
#ifdef CONFIG_QUOTA
if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
@@ -5263,18 +6936,13 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
return 0;
}
-/* Helper function for writing quotas on sync - we need to start transaction
- * before quota file is locked for write. Otherwise the are possible deadlocks:
- * Process 1 Process 2
- * ext4_create() quota_sync()
- * jbd2_journal_start() write_dquot()
- * dquot_initialize() down(dqio_mutex)
- * down(dqio_mutex) jbd2_journal_start()
- *
- */
#ifdef CONFIG_QUOTA
+/*
+ * Helper functions so that transaction is started before we acquire dqio_sem
+ * to keep correct lock ordering of transaction > dqio_sem
+ */
static inline struct inode *dquot_to_inode(struct dquot *dquot)
{
return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
@@ -5292,6 +6960,10 @@ static int ext4_write_dquot(struct dquot *dquot)
if (IS_ERR(handle))
return PTR_ERR(handle);
ret = dquot_commit(dquot);
+ if (ret < 0)
+ ext4_error_err(dquot->dq_sb, -ret,
+ "Failed to commit dquot type %d",
+ dquot->dq_id.type);
err = ext4_journal_stop(handle);
if (!ret)
ret = err;
@@ -5308,6 +6980,10 @@ static int ext4_acquire_dquot(struct dquot *dquot)
if (IS_ERR(handle))
return PTR_ERR(handle);
ret = dquot_acquire(dquot);
+ if (ret < 0)
+ ext4_error_err(dquot->dq_sb, -ret,
+ "Failed to acquire dquot type %d",
+ dquot->dq_id.type);
err = ext4_journal_stop(handle);
if (!ret)
ret = err;
@@ -5318,29 +6994,47 @@ static int ext4_release_dquot(struct dquot *dquot)
{
int ret, err;
handle_t *handle;
+ bool freeze_protected = false;
+
+ /*
+ * Trying to sb_start_intwrite() in a running transaction
+ * can result in a deadlock. Further, running transactions
+ * are already protected from freezing.
+ */
+ if (!ext4_journal_current_handle()) {
+ sb_start_intwrite(dquot->dq_sb);
+ freeze_protected = true;
+ }
handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
if (IS_ERR(handle)) {
/* Release dquot anyway to avoid endless cycle in dqput() */
dquot_release(dquot);
+ if (freeze_protected)
+ sb_end_intwrite(dquot->dq_sb);
return PTR_ERR(handle);
}
ret = dquot_release(dquot);
+ if (ret < 0)
+ ext4_error_err(dquot->dq_sb, -ret,
+ "Failed to release dquot type %d",
+ dquot->dq_id.type);
err = ext4_journal_stop(handle);
if (!ret)
ret = err;
+
+ if (freeze_protected)
+ sb_end_intwrite(dquot->dq_sb);
+
return ret;
}
static int ext4_mark_dquot_dirty(struct dquot *dquot)
{
struct super_block *sb = dquot->dq_sb;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- /* Are we journaling quotas? */
- if (ext4_has_feature_quota(sb) ||
- sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
+ if (ext4_is_quota_journalled(sb)) {
dquot_mark_dquot_dirty(dquot);
return ext4_write_dquot(dquot);
} else {
@@ -5354,7 +7048,7 @@ static int ext4_write_info(struct super_block *sb, int type)
handle_t *handle;
/* Data block + inode block */
- handle = ext4_journal_start(d_inode(sb->s_root), EXT4_HT_QUOTA, 2);
+ handle = ext4_journal_start_sb(sb, EXT4_HT_QUOTA, 2);
if (IS_ERR(handle))
return PTR_ERR(handle);
ret = dquot_commit_info(sb, type);
@@ -5364,16 +7058,6 @@ static int ext4_write_info(struct super_block *sb, int type)
return ret;
}
-/*
- * Turn on quotas during mount time - we need to find
- * the quota file and such...
- */
-static int ext4_quota_on_mount(struct super_block *sb, int type)
-{
- return dquot_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
- EXT4_SB(sb)->s_jquota_fmt, type);
-}
-
static void lockdep_set_quota_inode(struct inode *inode, int subclass)
{
struct ext4_inode_info *ei = EXT4_I(inode);
@@ -5402,6 +7086,11 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
/* Quotafile not on the same filesystem? */
if (path->dentry->d_sb != sb)
return -EXDEV;
+
+ /* Quota already enabled for this file? */
+ if (IS_NOQUOTA(d_inode(path->dentry)))
+ return -EBUSY;
+
/* Journaling quota? */
if (EXT4_SB(sb)->s_qf_names[type]) {
/* Quotafile not in fs root? */
@@ -5409,31 +7098,18 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
ext4_msg(sb, KERN_WARNING,
"Quota file not on filesystem root. "
"Journaled quota will not work");
- }
-
- /*
- * When we journal data on quota file, we have to flush journal to see
- * all updates to the file when we bypass pagecache...
- */
- if (EXT4_SB(sb)->s_journal &&
- ext4_should_journal_data(d_inode(path->dentry))) {
+ sb_dqopt(sb)->flags |= DQUOT_NOLIST_DIRTY;
+ } else {
/*
- * We don't need to lock updates but journal_flush() could
- * otherwise be livelocked...
+ * Clear the flag just in case mount options changed since
+ * last time.
*/
- jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
- err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
- jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
- if (err)
- return err;
+ sb_dqopt(sb)->flags &= ~DQUOT_NOLIST_DIRTY;
}
lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA);
err = dquot_quota_on(sb, type, format_id, path);
- if (err) {
- lockdep_set_quota_inode(path->dentry->d_inode,
- I_DATA_SEM_NORMAL);
- } else {
+ if (!err) {
struct inode *inode = d_inode(path->dentry);
handle_t *handle;
@@ -5449,14 +7125,33 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
EXT4_I(inode)->i_flags |= EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL;
inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
S_NOATIME | S_IMMUTABLE);
- ext4_mark_inode_dirty(handle, inode);
+ err = ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
unlock_inode:
inode_unlock(inode);
+ if (err)
+ dquot_quota_off(sb, type);
}
+ if (err)
+ lockdep_set_quota_inode(path->dentry->d_inode,
+ I_DATA_SEM_NORMAL);
return err;
}
+static inline bool ext4_check_quota_inum(int type, unsigned long qf_inum)
+{
+ switch (type) {
+ case USRQUOTA:
+ return qf_inum == EXT4_USR_QUOTA_INO;
+ case GRPQUOTA:
+ return qf_inum == EXT4_GRP_QUOTA_INO;
+ case PRJQUOTA:
+ return qf_inum >= EXT4_GOOD_OLD_FIRST_INO;
+ default:
+ BUG();
+ }
+}
+
static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
unsigned int flags)
{
@@ -5473,25 +7168,32 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
if (!qf_inums[type])
return -EPERM;
- qf_inode = ext4_iget(sb, qf_inums[type]);
+ if (!ext4_check_quota_inum(type, qf_inums[type])) {
+ ext4_error(sb, "Bad quota inum: %lu, type: %d",
+ qf_inums[type], type);
+ return -EUCLEAN;
+ }
+
+ qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL);
if (IS_ERR(qf_inode)) {
- ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]);
+ ext4_error(sb, "Bad quota inode: %lu, type: %d",
+ qf_inums[type], type);
return PTR_ERR(qf_inode);
}
/* Don't account quota for quota files to avoid recursion */
qf_inode->i_flags |= S_NOQUOTA;
lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA);
- err = dquot_enable(qf_inode, type, format_id, flags);
- iput(qf_inode);
+ err = dquot_load_quota_inode(qf_inode, type, format_id, flags);
if (err)
lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL);
+ iput(qf_inode);
return err;
}
/* Enable usage tracking for all quota types. */
-static int ext4_enable_quotas(struct super_block *sb)
+int ext4_enable_quotas(struct super_block *sb)
{
int type, err = 0;
unsigned long qf_inums[EXT4_MAXQUOTAS] = {
@@ -5505,7 +7207,7 @@ static int ext4_enable_quotas(struct super_block *sb)
test_opt(sb, PRJQUOTA),
};
- sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
+ sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
for (type = 0; type < EXT4_MAXQUOTAS; type++) {
if (qf_inums[type]) {
err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
@@ -5514,8 +7216,11 @@ static int ext4_enable_quotas(struct super_block *sb)
if (err) {
ext4_warning(sb,
"Failed to enable quota tracking "
- "(type=%d, err=%d). Please run "
- "e2fsck to fix.", type, err);
+ "(type=%d, err=%d, ino=%lu). "
+ "Please run e2fsck to fix.", type,
+ err, qf_inums[type]);
+
+ ext4_quotas_off(sb, type);
return err;
}
}
@@ -5540,6 +7245,13 @@ static int ext4_quota_off(struct super_block *sb, int type)
err = dquot_quota_off(sb, type);
if (err || ext4_has_feature_quota(sb))
goto out_put;
+ /*
+ * When the filesystem was remounted read-only first, we cannot cleanup
+ * inode flags here. Bad luck but people should be using QUOTA feature
+ * these days anyway.
+ */
+ if (sb_rdonly(sb))
+ goto out_put;
inode_lock(inode);
/*
@@ -5548,12 +7260,14 @@ static int ext4_quota_off(struct super_block *sb, int type)
* this is not a hard failure and quotas are already disabled.
*/
handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
- if (IS_ERR(handle))
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
goto out_unlock;
+ }
EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL);
inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
- inode->i_mtime = inode->i_ctime = current_time(inode);
- ext4_mark_inode_dirty(handle, inode);
+ inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
+ err = ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
out_unlock:
inode_unlock(inode);
@@ -5586,8 +7300,7 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
len = i_size-off;
toread = len;
while (toread > 0) {
- tocopy = sb->s_blocksize - offset < toread ?
- sb->s_blocksize - offset : toread;
+ tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread);
bh = ext4_bread(NULL, inode, blk, 0);
if (IS_ERR(bh))
return PTR_ERR(bh);
@@ -5611,12 +7324,12 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
{
struct inode *inode = sb_dqopt(sb)->files[type];
ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
- int err, offset = off & (sb->s_blocksize - 1);
+ int err = 0, err2 = 0, offset = off & (sb->s_blocksize - 1);
int retries = 0;
struct buffer_head *bh;
handle_t *handle = journal_current_handle();
- if (EXT4_SB(sb)->s_journal && !handle) {
+ if (!handle) {
ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
" cancelled because transaction is not started",
(unsigned long long)off, (unsigned long long)len);
@@ -5637,21 +7350,21 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
bh = ext4_bread(handle, inode, blk,
EXT4_GET_BLOCKS_CREATE |
EXT4_GET_BLOCKS_METADATA_NOFAIL);
- } while (IS_ERR(bh) && (PTR_ERR(bh) == -ENOSPC) &&
+ } while (PTR_ERR(bh) == -ENOSPC &&
ext4_should_retry_alloc(inode->i_sb, &retries));
if (IS_ERR(bh))
return PTR_ERR(bh);
if (!bh)
goto out;
BUFFER_TRACE(bh, "get write access");
- err = ext4_journal_get_write_access(handle, bh);
+ err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE);
if (err) {
brelse(bh);
return err;
}
lock_buffer(bh);
memcpy(bh->b_data+offset, data, len);
- flush_dcache_page(bh->b_page);
+ flush_dcache_folio(bh->b_folio);
unlock_buffer(bh);
err = ext4_handle_dirty_metadata(handle, NULL, bh);
brelse(bh);
@@ -5659,30 +7372,14 @@ out:
if (inode->i_size < off + len) {
i_size_write(inode, off + len);
EXT4_I(inode)->i_disksize = inode->i_size;
- ext4_mark_inode_dirty(handle, inode);
+ err2 = ext4_mark_inode_dirty(handle, inode);
+ if (unlikely(err2 && !err))
+ err = err2;
}
- return len;
-}
-
-static int ext4_get_next_id(struct super_block *sb, struct kqid *qid)
-{
- const struct quota_format_ops *ops;
-
- if (!sb_has_quota_loaded(sb, qid->type))
- return -ESRCH;
- ops = sb_dqopt(sb)->ops[qid->type];
- if (!ops || !ops->get_next_id)
- return -ENOSYS;
- return dquot_get_next_id(sb, qid);
+ return err ? err : len;
}
#endif
-static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data)
-{
- return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
-}
-
#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
static inline void register_as_ext2(void)
{
@@ -5701,7 +7398,7 @@ static inline int ext2_feature_set_ok(struct super_block *sb)
{
if (ext4_has_unknown_ext2_incompat_features(sb))
return 0;
- if (sb->s_flags & MS_RDONLY)
+ if (sb_rdonly(sb))
return 1;
if (ext4_has_unknown_ext2_ro_compat_features(sb))
return 0;
@@ -5732,43 +7429,57 @@ static inline int ext3_feature_set_ok(struct super_block *sb)
return 0;
if (!ext4_has_feature_journal(sb))
return 0;
- if (sb->s_flags & MS_RDONLY)
+ if (sb_rdonly(sb))
return 1;
if (ext4_has_unknown_ext3_ro_compat_features(sb))
return 0;
return 1;
}
+static void ext4_kill_sb(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct file *bdev_file = sbi ? sbi->s_journal_bdev_file : NULL;
+
+ kill_block_super(sb);
+
+ if (bdev_file)
+ bdev_fput(bdev_file);
+}
+
static struct file_system_type ext4_fs_type = {
- .owner = THIS_MODULE,
- .name = "ext4",
- .mount = ext4_mount,
- .kill_sb = kill_block_super,
- .fs_flags = FS_REQUIRES_DEV,
+ .owner = THIS_MODULE,
+ .name = "ext4",
+ .init_fs_context = ext4_init_fs_context,
+ .parameters = ext4_param_specs,
+ .kill_sb = ext4_kill_sb,
+ .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME |
+ FS_LBS,
};
MODULE_ALIAS_FS("ext4");
-/* Shared across all ext4 file systems */
-wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
-
static int __init ext4_init_fs(void)
{
- int i, err;
+ int err;
ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
ext4_li_info = NULL;
- mutex_init(&ext4_li_mtx);
/* Build-time check for flags consistency */
ext4_check_flag_values();
- for (i = 0; i < EXT4_WQ_HASH_SZ; i++)
- init_waitqueue_head(&ext4__ioend_wq[i]);
-
err = ext4_init_es();
if (err)
return err;
+ err = ext4_init_pending();
+ if (err)
+ goto out7;
+
+ err = ext4_init_post_read_processing();
+ if (err)
+ goto out6;
+
err = ext4_init_pageio();
if (err)
goto out5;
@@ -5787,6 +7498,11 @@ static int __init ext4_init_fs(void)
err = init_inodecache();
if (err)
goto out1;
+
+ err = ext4_fc_init_dentry_cache();
+ if (err)
+ goto out05;
+
register_as_ext3();
register_as_ext2();
err = register_filesystem(&ext4_fs_type);
@@ -5797,6 +7513,8 @@ static int __init ext4_init_fs(void)
out:
unregister_as_ext2();
unregister_as_ext3();
+ ext4_fc_destroy_dentry_cache();
+out05:
destroy_inodecache();
out1:
ext4_exit_mballoc();
@@ -5807,6 +7525,10 @@ out3:
out4:
ext4_exit_pageio();
out5:
+ ext4_exit_post_read_processing();
+out6:
+ ext4_exit_pending();
+out7:
ext4_exit_es();
return err;
@@ -5818,12 +7540,15 @@ static void __exit ext4_exit_fs(void)
unregister_as_ext2();
unregister_as_ext3();
unregister_filesystem(&ext4_fs_type);
+ ext4_fc_destroy_dentry_cache();
destroy_inodecache();
ext4_exit_mballoc();
ext4_exit_sysfs();
ext4_exit_system_zone();
ext4_exit_pageio();
+ ext4_exit_post_read_processing();
ext4_exit_es();
+ ext4_exit_pending();
}
MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");