summaryrefslogtreecommitdiff
path: root/fs/ext4/super.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/super.c')
-rw-r--r--fs/ext4/super.c7108
1 files changed, 4585 insertions, 2523 deletions
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 85b3dd60169b..87205660c5d0 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/ext4/super.c
*
@@ -21,10 +22,10 @@
#include <linux/fs.h>
#include <linux/time.h>
#include <linux/vmalloc.h>
-#include <linux/jbd2.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/blkdev.h>
+#include <linux/backing-dev.h>
#include <linux/parser.h>
#include <linux/buffer_head.h>
#include <linux/exportfs.h>
@@ -34,15 +35,19 @@
#include <linux/namei.h>
#include <linux/quotaops.h>
#include <linux/seq_file.h>
-#include <linux/proc_fs.h>
#include <linux/ctype.h>
#include <linux/log2.h>
#include <linux/crc16.h>
-#include <linux/cleancache.h>
-#include <asm/uaccess.h>
-
+#include <linux/dax.h>
+#include <linux/uaccess.h>
+#include <linux/iversion.h>
+#include <linux/unicode.h>
+#include <linux/part_stat.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
+#include <linux/fsnotify.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
#include "ext4.h"
#include "ext4_extents.h" /* Needed for trace points definition */
@@ -50,141 +55,273 @@
#include "xattr.h"
#include "acl.h"
#include "mballoc.h"
+#include "fsmap.h"
#define CREATE_TRACE_POINTS
#include <trace/events/ext4.h>
-static struct proc_dir_entry *ext4_proc_root;
-static struct kset *ext4_kset;
static struct ext4_lazy_init *ext4_li_info;
-static struct mutex ext4_li_mtx;
-static struct ext4_features *ext4_feat;
+static DEFINE_MUTEX(ext4_li_mtx);
+static struct ratelimit_state ext4_mount_msg_ratelimit;
static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
unsigned long journal_devnum);
static int ext4_show_options(struct seq_file *seq, struct dentry *root);
-static int ext4_commit_super(struct super_block *sb, int sync);
-static void ext4_mark_recovery_complete(struct super_block *sb,
+static void ext4_update_super(struct super_block *sb);
+static int ext4_commit_super(struct super_block *sb);
+static int ext4_mark_recovery_complete(struct super_block *sb,
struct ext4_super_block *es);
-static void ext4_clear_journal_err(struct super_block *sb,
- struct ext4_super_block *es);
+static int ext4_clear_journal_err(struct super_block *sb,
+ struct ext4_super_block *es);
static int ext4_sync_fs(struct super_block *sb, int wait);
-static int ext4_sync_fs_nojournal(struct super_block *sb, int wait);
-static int ext4_remount(struct super_block *sb, int *flags, char *data);
static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
static int ext4_unfreeze(struct super_block *sb);
static int ext4_freeze(struct super_block *sb);
-static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data);
static inline int ext2_feature_set_ok(struct super_block *sb);
static inline int ext3_feature_set_ok(struct super_block *sb);
-static int ext4_feature_set_ok(struct super_block *sb, int readonly);
-static void ext4_destroy_lazyinit_thread(void);
static void ext4_unregister_li_request(struct super_block *sb);
static void ext4_clear_request_list(void);
-static int ext4_reserve_clusters(struct ext4_sb_info *, ext4_fsblk_t);
+static struct inode *ext4_get_journal_inode(struct super_block *sb,
+ unsigned int journal_inum);
+static int ext4_validate_options(struct fs_context *fc);
+static int ext4_check_opt_consistency(struct fs_context *fc,
+ struct super_block *sb);
+static void ext4_apply_options(struct fs_context *fc, struct super_block *sb);
+static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param);
+static int ext4_get_tree(struct fs_context *fc);
+static int ext4_reconfigure(struct fs_context *fc);
+static void ext4_fc_free(struct fs_context *fc);
+static int ext4_init_fs_context(struct fs_context *fc);
+static void ext4_kill_sb(struct super_block *sb);
+static const struct fs_parameter_spec ext4_param_specs[];
+
+/*
+ * Lock ordering
+ *
+ * page fault path:
+ * mmap_lock -> sb_start_pagefault -> invalidate_lock (r) -> transaction start
+ * -> page lock -> i_data_sem (rw)
+ *
+ * buffered write path:
+ * sb_start_write -> i_mutex -> mmap_lock
+ * sb_start_write -> i_mutex -> transaction start -> page lock ->
+ * i_data_sem (rw)
+ *
+ * truncate:
+ * sb_start_write -> i_mutex -> invalidate_lock (w) -> i_mmap_rwsem (w) ->
+ * page lock
+ * sb_start_write -> i_mutex -> invalidate_lock (w) -> transaction start ->
+ * i_data_sem (rw)
+ *
+ * direct IO:
+ * sb_start_write -> i_mutex -> mmap_lock
+ * sb_start_write -> i_mutex -> transaction start -> i_data_sem (rw)
+ *
+ * writepages:
+ * transaction start -> page lock(s) -> i_data_sem (rw)
+ */
-#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
+static const struct fs_context_operations ext4_context_ops = {
+ .parse_param = ext4_parse_param,
+ .get_tree = ext4_get_tree,
+ .reconfigure = ext4_reconfigure,
+ .free = ext4_fc_free,
+};
+
+
+#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
static struct file_system_type ext2_fs_type = {
- .owner = THIS_MODULE,
- .name = "ext2",
- .mount = ext4_mount,
- .kill_sb = kill_block_super,
- .fs_flags = FS_REQUIRES_DEV,
+ .owner = THIS_MODULE,
+ .name = "ext2",
+ .init_fs_context = ext4_init_fs_context,
+ .parameters = ext4_param_specs,
+ .kill_sb = ext4_kill_sb,
+ .fs_flags = FS_REQUIRES_DEV,
};
MODULE_ALIAS_FS("ext2");
MODULE_ALIAS("ext2");
-#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
+#define IS_EXT2_SB(sb) ((sb)->s_type == &ext2_fs_type)
#else
#define IS_EXT2_SB(sb) (0)
#endif
-#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
static struct file_system_type ext3_fs_type = {
- .owner = THIS_MODULE,
- .name = "ext3",
- .mount = ext4_mount,
- .kill_sb = kill_block_super,
- .fs_flags = FS_REQUIRES_DEV,
+ .owner = THIS_MODULE,
+ .name = "ext3",
+ .init_fs_context = ext4_init_fs_context,
+ .parameters = ext4_param_specs,
+ .kill_sb = ext4_kill_sb,
+ .fs_flags = FS_REQUIRES_DEV,
};
MODULE_ALIAS_FS("ext3");
MODULE_ALIAS("ext3");
-#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
-#else
-#define IS_EXT3_SB(sb) (0)
-#endif
+#define IS_EXT3_SB(sb) ((sb)->s_type == &ext3_fs_type)
-static int ext4_verify_csum_type(struct super_block *sb,
- struct ext4_super_block *es)
+
+static inline void __ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags,
+ bh_end_io_t *end_io, bool simu_fail)
{
- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
- return 1;
+ if (simu_fail) {
+ clear_buffer_uptodate(bh);
+ unlock_buffer(bh);
+ return;
+ }
- return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
+ /*
+ * buffer's verified bit is no longer valid after reading from
+ * disk again due to write out error, clear it to make sure we
+ * recheck the buffer contents.
+ */
+ clear_buffer_verified(bh);
+
+ bh->b_end_io = end_io ? end_io : end_buffer_read_sync;
+ get_bh(bh);
+ submit_bh(REQ_OP_READ | op_flags, bh);
}
-static __le32 ext4_superblock_csum(struct super_block *sb,
- struct ext4_super_block *es)
+void ext4_read_bh_nowait(struct buffer_head *bh, blk_opf_t op_flags,
+ bh_end_io_t *end_io, bool simu_fail)
{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- int offset = offsetof(struct ext4_super_block, s_checksum);
- __u32 csum;
+ BUG_ON(!buffer_locked(bh));
- csum = ext4_chksum(sbi, ~0, (char *)es, offset);
+ if (ext4_buffer_uptodate(bh)) {
+ unlock_buffer(bh);
+ return;
+ }
+ __ext4_read_bh(bh, op_flags, end_io, simu_fail);
+}
- return cpu_to_le32(csum);
+int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags,
+ bh_end_io_t *end_io, bool simu_fail)
+{
+ BUG_ON(!buffer_locked(bh));
+
+ if (ext4_buffer_uptodate(bh)) {
+ unlock_buffer(bh);
+ return 0;
+ }
+
+ __ext4_read_bh(bh, op_flags, end_io, simu_fail);
+
+ wait_on_buffer(bh);
+ if (buffer_uptodate(bh))
+ return 0;
+ return -EIO;
}
-int ext4_superblock_csum_verify(struct super_block *sb,
- struct ext4_super_block *es)
+int ext4_read_bh_lock(struct buffer_head *bh, blk_opf_t op_flags, bool wait)
{
- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
- return 1;
+ lock_buffer(bh);
+ if (!wait) {
+ ext4_read_bh_nowait(bh, op_flags, NULL, false);
+ return 0;
+ }
+ return ext4_read_bh(bh, op_flags, NULL, false);
+}
- return es->s_checksum == ext4_superblock_csum(sb, es);
+/*
+ * This works like __bread_gfp() except it uses ERR_PTR for error
+ * returns. Currently with sb_bread it's impossible to distinguish
+ * between ENOMEM and EIO situations (since both result in a NULL
+ * return.
+ */
+static struct buffer_head *__ext4_sb_bread_gfp(struct super_block *sb,
+ sector_t block,
+ blk_opf_t op_flags, gfp_t gfp)
+{
+ struct buffer_head *bh;
+ int ret;
+
+ bh = sb_getblk_gfp(sb, block, gfp);
+ if (bh == NULL)
+ return ERR_PTR(-ENOMEM);
+ if (ext4_buffer_uptodate(bh))
+ return bh;
+
+ ret = ext4_read_bh_lock(bh, REQ_META | op_flags, true);
+ if (ret) {
+ put_bh(bh);
+ return ERR_PTR(ret);
+ }
+ return bh;
}
-void ext4_superblock_csum_set(struct super_block *sb)
+struct buffer_head *ext4_sb_bread(struct super_block *sb, sector_t block,
+ blk_opf_t op_flags)
{
- struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+ gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_mapping,
+ ~__GFP_FS) | __GFP_MOVABLE;
- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
- return;
+ return __ext4_sb_bread_gfp(sb, block, op_flags, gfp);
+}
+
+struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb,
+ sector_t block)
+{
+ gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_mapping,
+ ~__GFP_FS);
- es->s_checksum = ext4_superblock_csum(sb, es);
+ return __ext4_sb_bread_gfp(sb, block, 0, gfp);
}
-void *ext4_kvmalloc(size_t size, gfp_t flags)
+struct buffer_head *ext4_sb_bread_nofail(struct super_block *sb,
+ sector_t block)
{
- void *ret;
+ gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_mapping,
+ ~__GFP_FS) | __GFP_MOVABLE | __GFP_NOFAIL;
- ret = kmalloc(size, flags);
- if (!ret)
- ret = __vmalloc(size, flags, PAGE_KERNEL);
- return ret;
+ return __ext4_sb_bread_gfp(sb, block, 0, gfp);
}
-void *ext4_kvzalloc(size_t size, gfp_t flags)
+void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block)
{
- void *ret;
+ struct buffer_head *bh = bdev_getblk(sb->s_bdev, block,
+ sb->s_blocksize, GFP_NOWAIT);
- ret = kzalloc(size, flags);
- if (!ret)
- ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
- return ret;
+ if (likely(bh)) {
+ if (trylock_buffer(bh))
+ ext4_read_bh_nowait(bh, REQ_RAHEAD, NULL, false);
+ brelse(bh);
+ }
}
-void ext4_kvfree(void *ptr)
+static int ext4_verify_csum_type(struct super_block *sb,
+ struct ext4_super_block *es)
{
- if (is_vmalloc_addr(ptr))
- vfree(ptr);
- else
- kfree(ptr);
+ if (!ext4_has_feature_metadata_csum(sb))
+ return 1;
+
+ return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
+}
+
+__le32 ext4_superblock_csum(struct ext4_super_block *es)
+{
+ int offset = offsetof(struct ext4_super_block, s_checksum);
+ __u32 csum;
+
+ csum = ext4_chksum(~0, (char *)es, offset);
+
+ return cpu_to_le32(csum);
+}
+static int ext4_superblock_csum_verify(struct super_block *sb,
+ struct ext4_super_block *es)
+{
+ if (!ext4_has_feature_metadata_csum(sb))
+ return 1;
+
+ return es->s_checksum == ext4_superblock_csum(es);
+}
+
+void ext4_superblock_csum_set(struct super_block *sb)
+{
+ struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+
+ if (!ext4_has_feature_metadata_csum(sb))
+ return;
+
+ es->s_checksum = ext4_superblock_csum(es);
}
ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
@@ -222,9 +359,9 @@ __u32 ext4_free_group_clusters(struct super_block *sb,
__u32 ext4_free_inodes_count(struct super_block *sb,
struct ext4_group_desc *bg)
{
- return le16_to_cpu(bg->bg_free_inodes_count_lo) |
+ return le16_to_cpu(READ_ONCE(bg->bg_free_inodes_count_lo)) |
(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
- (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
+ (__u32)le16_to_cpu(READ_ONCE(bg->bg_free_inodes_count_hi)) << 16 : 0);
}
__u32 ext4_used_dirs_count(struct super_block *sb,
@@ -278,9 +415,9 @@ void ext4_free_group_clusters_set(struct super_block *sb,
void ext4_free_inodes_set(struct super_block *sb,
struct ext4_group_desc *bg, __u32 count)
{
- bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
+ WRITE_ONCE(bg->bg_free_inodes_count_lo, cpu_to_le16((__u16)count));
if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
- bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
+ WRITE_ONCE(bg->bg_free_inodes_count_hi, cpu_to_le16(count >> 16));
}
void ext4_used_dirs_set(struct super_block *sb,
@@ -299,75 +436,234 @@ void ext4_itable_unused_set(struct super_block *sb,
bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
}
-
-static void __save_error_info(struct super_block *sb, const char *func,
- unsigned int line)
+static void __ext4_update_tstamp(__le32 *lo, __u8 *hi, time64_t now)
{
- struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+ now = clamp_val(now, 0, (1ull << 40) - 1);
- EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
- es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
- es->s_last_error_time = cpu_to_le32(get_seconds());
- strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
- es->s_last_error_line = cpu_to_le32(line);
- if (!es->s_first_error_time) {
- es->s_first_error_time = es->s_last_error_time;
- strncpy(es->s_first_error_func, func,
- sizeof(es->s_first_error_func));
- es->s_first_error_line = cpu_to_le32(line);
- es->s_first_error_ino = es->s_last_error_ino;
- es->s_first_error_block = es->s_last_error_block;
- }
- /*
- * Start the daily error reporting function if it hasn't been
- * started already
- */
- if (!es->s_error_count)
- mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
- le32_add_cpu(&es->s_error_count, 1);
+ *lo = cpu_to_le32(lower_32_bits(now));
+ *hi = upper_32_bits(now);
}
-static void save_error_info(struct super_block *sb, const char *func,
- unsigned int line)
+static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi)
{
- __save_error_info(sb, func, line);
- ext4_commit_super(sb, 1);
+ return ((time64_t)(*hi) << 32) + le32_to_cpu(*lo);
}
+#define ext4_update_tstamp(es, tstamp) \
+ __ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi, \
+ ktime_get_real_seconds())
+#define ext4_get_tstamp(es, tstamp) \
+ __ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
/*
- * The del_gendisk() function uninitializes the disk-specific data
- * structures, including the bdi structure, without telling anyone
- * else. Once this happens, any attempt to call mark_buffer_dirty()
- * (for example, by ext4_commit_super), will cause a kernel OOPS.
- * This is a kludge to prevent these oops until we can put in a proper
- * hook in del_gendisk() to inform the VFS and file system layers.
+ * The ext4_maybe_update_superblock() function checks and updates the
+ * superblock if needed.
+ *
+ * This function is designed to update the on-disk superblock only under
+ * certain conditions to prevent excessive disk writes and unnecessary
+ * waking of the disk from sleep. The superblock will be updated if:
+ * 1. More than sbi->s_sb_update_sec (def: 1 hour) has passed since the last
+ * superblock update
+ * 2. More than sbi->s_sb_update_kb (def: 16MB) kbs have been written since the
+ * last superblock update.
+ *
+ * @sb: The superblock
*/
-static int block_device_ejected(struct super_block *sb)
+static void ext4_maybe_update_superblock(struct super_block *sb)
{
- struct inode *bd_inode = sb->s_bdev->bd_inode;
- struct backing_dev_info *bdi = bd_inode->i_mapping->backing_dev_info;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_super_block *es = sbi->s_es;
+ journal_t *journal = sbi->s_journal;
+ time64_t now;
+ __u64 last_update;
+ __u64 lifetime_write_kbytes;
+ __u64 diff_size;
+
+ if (ext4_emergency_state(sb) || sb_rdonly(sb) ||
+ !(sb->s_flags & SB_ACTIVE) || !journal ||
+ journal->j_flags & JBD2_UNMOUNT)
+ return;
+
+ now = ktime_get_real_seconds();
+ last_update = ext4_get_tstamp(es, s_wtime);
- return bdi->dev == NULL;
+ if (likely(now - last_update < sbi->s_sb_update_sec))
+ return;
+
+ lifetime_write_kbytes = sbi->s_kbytes_written +
+ ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) -
+ sbi->s_sectors_written_start) >> 1);
+
+ /* Get the number of kilobytes not written to disk to account
+ * for statistics and compare with a multiple of 16 MB. This
+ * is used to determine when the next superblock commit should
+ * occur (i.e. not more often than once per 16MB if there was
+ * less written in an hour).
+ */
+ diff_size = lifetime_write_kbytes - le64_to_cpu(es->s_kbytes_written);
+
+ if (diff_size > sbi->s_sb_update_kb)
+ schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
}
static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
{
struct super_block *sb = journal->j_private;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- int error = is_journal_aborted(journal);
- struct ext4_journal_cb_entry *jce;
BUG_ON(txn->t_state == T_FINISHED);
- spin_lock(&sbi->s_md_lock);
- while (!list_empty(&txn->t_private_list)) {
- jce = list_entry(txn->t_private_list.next,
- struct ext4_journal_cb_entry, jce_list);
- list_del_init(&jce->jce_list);
- spin_unlock(&sbi->s_md_lock);
- jce->jce_func(sb, jce, error);
- spin_lock(&sbi->s_md_lock);
+
+ ext4_process_freed_data(sb, txn->t_tid);
+ ext4_maybe_update_superblock(sb);
+}
+
+static bool ext4_journalled_writepage_needs_redirty(struct jbd2_inode *jinode,
+ struct folio *folio)
+{
+ struct buffer_head *bh, *head;
+ struct journal_head *jh;
+
+ bh = head = folio_buffers(folio);
+ do {
+ /*
+ * We have to redirty a page in these cases:
+ * 1) If buffer is dirty, it means the page was dirty because it
+ * contains a buffer that needs checkpointing. So the dirty bit
+ * needs to be preserved so that checkpointing writes the buffer
+ * properly.
+ * 2) If buffer is not part of the committing transaction
+ * (we may have just accidentally come across this buffer because
+ * inode range tracking is not exact) or if the currently running
+ * transaction already contains this buffer as well, dirty bit
+ * needs to be preserved so that the buffer gets writeprotected
+ * properly on running transaction's commit.
+ */
+ jh = bh2jh(bh);
+ if (buffer_dirty(bh) ||
+ (jh && (jh->b_transaction != jinode->i_transaction ||
+ jh->b_next_transaction)))
+ return true;
+ } while ((bh = bh->b_this_page) != head);
+
+ return false;
+}
+
+static int ext4_journalled_submit_inode_data_buffers(struct jbd2_inode *jinode)
+{
+ struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = LONG_MAX,
+ .range_start = jinode->i_dirty_start,
+ .range_end = jinode->i_dirty_end,
+ };
+ struct folio *folio = NULL;
+ int error;
+
+ /*
+ * writeback_iter() already checks for dirty pages and calls
+ * folio_clear_dirty_for_io(), which we want to write protect the
+ * folios.
+ *
+ * However, we may have to redirty a folio sometimes.
+ */
+ while ((folio = writeback_iter(mapping, &wbc, folio, &error))) {
+ if (ext4_journalled_writepage_needs_redirty(jinode, folio))
+ folio_redirty_for_writepage(&wbc, folio);
+ folio_unlock(folio);
}
- spin_unlock(&sbi->s_md_lock);
+
+ return error;
+}
+
+static int ext4_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
+{
+ int ret;
+
+ if (ext4_should_journal_data(jinode->i_vfs_inode))
+ ret = ext4_journalled_submit_inode_data_buffers(jinode);
+ else
+ ret = ext4_normal_submit_inode_data_buffers(jinode);
+ return ret;
+}
+
+static int ext4_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
+{
+ int ret = 0;
+
+ if (!ext4_should_journal_data(jinode->i_vfs_inode))
+ ret = jbd2_journal_finish_inode_data_buffers(jinode);
+
+ return ret;
+}
+
+static bool system_going_down(void)
+{
+ return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
+ || system_state == SYSTEM_RESTART;
+}
+
+struct ext4_err_translation {
+ int code;
+ int errno;
+};
+
+#define EXT4_ERR_TRANSLATE(err) { .code = EXT4_ERR_##err, .errno = err }
+
+static struct ext4_err_translation err_translation[] = {
+ EXT4_ERR_TRANSLATE(EIO),
+ EXT4_ERR_TRANSLATE(ENOMEM),
+ EXT4_ERR_TRANSLATE(EFSBADCRC),
+ EXT4_ERR_TRANSLATE(EFSCORRUPTED),
+ EXT4_ERR_TRANSLATE(ENOSPC),
+ EXT4_ERR_TRANSLATE(ENOKEY),
+ EXT4_ERR_TRANSLATE(EROFS),
+ EXT4_ERR_TRANSLATE(EFBIG),
+ EXT4_ERR_TRANSLATE(EEXIST),
+ EXT4_ERR_TRANSLATE(ERANGE),
+ EXT4_ERR_TRANSLATE(EOVERFLOW),
+ EXT4_ERR_TRANSLATE(EBUSY),
+ EXT4_ERR_TRANSLATE(ENOTDIR),
+ EXT4_ERR_TRANSLATE(ENOTEMPTY),
+ EXT4_ERR_TRANSLATE(ESHUTDOWN),
+ EXT4_ERR_TRANSLATE(EFAULT),
+};
+
+static int ext4_errno_to_code(int errno)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(err_translation); i++)
+ if (err_translation[i].errno == errno)
+ return err_translation[i].code;
+ return EXT4_ERR_UNKNOWN;
+}
+
+static void save_error_info(struct super_block *sb, int error,
+ __u32 ino, __u64 block,
+ const char *func, unsigned int line)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ /* We default to EFSCORRUPTED error... */
+ if (error == 0)
+ error = EFSCORRUPTED;
+
+ spin_lock(&sbi->s_error_lock);
+ sbi->s_add_error_count++;
+ sbi->s_last_error_code = error;
+ sbi->s_last_error_line = line;
+ sbi->s_last_error_ino = ino;
+ sbi->s_last_error_block = block;
+ sbi->s_last_error_func = func;
+ sbi->s_last_error_time = ktime_get_real_seconds();
+ if (!sbi->s_first_error_time) {
+ sbi->s_first_error_code = error;
+ sbi->s_first_error_line = line;
+ sbi->s_first_error_ino = ino;
+ sbi->s_first_error_block = block;
+ sbi->s_first_error_func = func;
+ sbi->s_first_error_time = sbi->s_last_error_time;
+ }
+ spin_unlock(&sbi->s_error_lock);
}
/* Deal with the reporting of failure conditions on a filesystem such as
@@ -383,78 +679,187 @@ static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
* We'll just use the jbd2_journal_abort() error code to record an error in
* the journal instead. On recovery, the journal will complain about
* that error until we've noted it down and cleared it.
+ *
+ * If force_ro is set, we unconditionally force the filesystem into an
+ * ABORT|READONLY state, unless the error response on the fs has been set to
+ * panic in which case we take the easy way out and panic immediately. This is
+ * used to deal with unrecoverable failures such as journal IO errors or ENOMEM
+ * at a critical moment in log management.
*/
-
-static void ext4_handle_error(struct super_block *sb)
+static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
+ __u32 ino, __u64 block,
+ const char *func, unsigned int line)
{
- if (sb->s_flags & MS_RDONLY)
- return;
+ journal_t *journal = EXT4_SB(sb)->s_journal;
+ bool continue_fs = !force_ro && test_opt(sb, ERRORS_CONT);
- if (!test_opt(sb, ERRORS_CONT)) {
- journal_t *journal = EXT4_SB(sb)->s_journal;
+ EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
+ if (test_opt(sb, WARN_ON_ERROR))
+ WARN_ON_ONCE(1);
- EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
- if (journal)
- jbd2_journal_abort(journal, -EIO);
- }
- if (test_opt(sb, ERRORS_RO)) {
- ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
+ if (!continue_fs && !ext4_emergency_ro(sb) && journal)
+ jbd2_journal_abort(journal, -error);
+
+ if (!bdev_read_only(sb->s_bdev)) {
+ save_error_info(sb, error, ino, block, func, line);
/*
- * Make sure updated value of ->s_mount_flags will be visible
- * before ->s_flags update
+ * In case the fs should keep running, we need to writeout
+ * superblock through the journal. Due to lock ordering
+ * constraints, it may not be safe to do it right here so we
+ * defer superblock flushing to a workqueue. We just need to be
+ * careful when the journal is already shutting down. If we get
+ * here in that case, just update the sb directly as the last
+ * transaction won't commit anyway.
*/
- smp_wmb();
- sb->s_flags |= MS_RDONLY;
+ if (continue_fs && journal &&
+ !ext4_test_mount_flag(sb, EXT4_MF_JOURNAL_DESTROY))
+ schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
+ else
+ ext4_commit_super(sb);
}
- if (test_opt(sb, ERRORS_PANIC))
+
+ /*
+ * We force ERRORS_RO behavior when system is rebooting. Otherwise we
+ * could panic during 'reboot -f' as the underlying device got already
+ * disabled.
+ */
+ if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) {
panic("EXT4-fs (device %s): panic forced after error\n",
sb->s_id);
+ }
+
+ if (ext4_emergency_ro(sb) || continue_fs)
+ return;
+
+ ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
+ /*
+ * We don't set SB_RDONLY because that requires sb->s_umount
+ * semaphore and setting it without proper remount procedure is
+ * confusing code such as freeze_super() leading to deadlocks
+ * and other problems.
+ */
+ set_bit(EXT4_FLAGS_EMERGENCY_RO, &EXT4_SB(sb)->s_ext4_flags);
}
+static void update_super_work(struct work_struct *work)
+{
+ struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info,
+ s_sb_upd_work);
+ journal_t *journal = sbi->s_journal;
+ handle_t *handle;
+
+ /*
+ * If the journal is still running, we have to write out superblock
+ * through the journal to avoid collisions of other journalled sb
+ * updates.
+ *
+ * We use directly jbd2 functions here to avoid recursing back into
+ * ext4 error handling code during handling of previous errors.
+ */
+ if (!ext4_emergency_state(sbi->s_sb) &&
+ !sb_rdonly(sbi->s_sb) && journal) {
+ struct buffer_head *sbh = sbi->s_sbh;
+ bool call_notify_err = false;
+
+ handle = jbd2_journal_start(journal, 1);
+ if (IS_ERR(handle))
+ goto write_directly;
+ if (jbd2_journal_get_write_access(handle, sbh)) {
+ jbd2_journal_stop(handle);
+ goto write_directly;
+ }
+
+ if (sbi->s_add_error_count > 0)
+ call_notify_err = true;
+
+ ext4_update_super(sbi->s_sb);
+ if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
+ ext4_msg(sbi->s_sb, KERN_ERR, "previous I/O error to "
+ "superblock detected");
+ clear_buffer_write_io_error(sbh);
+ set_buffer_uptodate(sbh);
+ }
+
+ if (jbd2_journal_dirty_metadata(handle, sbh)) {
+ jbd2_journal_stop(handle);
+ goto write_directly;
+ }
+ jbd2_journal_stop(handle);
+
+ if (call_notify_err)
+ ext4_notify_error_sysfs(sbi);
+
+ return;
+ }
+write_directly:
+ /*
+ * Write through journal failed. Write sb directly to get error info
+ * out and hope for the best.
+ */
+ ext4_commit_super(sbi->s_sb);
+ ext4_notify_error_sysfs(sbi);
+}
+
+#define ext4_error_ratelimit(sb) \
+ ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state), \
+ "EXT4-fs error")
+
void __ext4_error(struct super_block *sb, const char *function,
- unsigned int line, const char *fmt, ...)
+ unsigned int line, bool force_ro, int error, __u64 block,
+ const char *fmt, ...)
{
struct va_format vaf;
va_list args;
- va_start(args, fmt);
- vaf.fmt = fmt;
- vaf.va = &args;
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
- sb->s_id, function, line, current->comm, &vaf);
- va_end(args);
- save_error_info(sb, function, line);
+ if (unlikely(ext4_emergency_state(sb)))
+ return;
- ext4_handle_error(sb);
+ trace_ext4_error(sb, function, line);
+ if (ext4_error_ratelimit(sb)) {
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ printk(KERN_CRIT
+ "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
+ sb->s_id, function, line, current->comm, &vaf);
+ va_end(args);
+ }
+ fsnotify_sb_error(sb, NULL, error ? error : EFSCORRUPTED);
+
+ ext4_handle_error(sb, force_ro, error, 0, block, function, line);
}
void __ext4_error_inode(struct inode *inode, const char *function,
- unsigned int line, ext4_fsblk_t block,
+ unsigned int line, ext4_fsblk_t block, int error,
const char *fmt, ...)
{
va_list args;
struct va_format vaf;
- struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
- es->s_last_error_ino = cpu_to_le32(inode->i_ino);
- es->s_last_error_block = cpu_to_le64(block);
- save_error_info(inode->i_sb, function, line);
- va_start(args, fmt);
- vaf.fmt = fmt;
- vaf.va = &args;
- if (block)
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
- "inode #%lu: block %llu: comm %s: %pV\n",
- inode->i_sb->s_id, function, line, inode->i_ino,
- block, current->comm, &vaf);
- else
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
- "inode #%lu: comm %s: %pV\n",
- inode->i_sb->s_id, function, line, inode->i_ino,
- current->comm, &vaf);
- va_end(args);
+ if (unlikely(ext4_emergency_state(inode->i_sb)))
+ return;
- ext4_handle_error(inode->i_sb);
+ trace_ext4_error(inode->i_sb, function, line);
+ if (ext4_error_ratelimit(inode->i_sb)) {
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ if (block)
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
+ "inode #%lu: block %llu: comm %s: %pV\n",
+ inode->i_sb->s_id, function, line, inode->i_ino,
+ block, current->comm, &vaf);
+ else
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
+ "inode #%lu: comm %s: %pV\n",
+ inode->i_sb->s_id, function, line, inode->i_ino,
+ current->comm, &vaf);
+ va_end(args);
+ }
+ fsnotify_sb_error(inode->i_sb, inode, error ? error : EFSCORRUPTED);
+
+ ext4_handle_error(inode->i_sb, false, error, inode->i_ino, block,
+ function, line);
}
void __ext4_error_file(struct file *file, const char *function,
@@ -463,34 +868,38 @@ void __ext4_error_file(struct file *file, const char *function,
{
va_list args;
struct va_format vaf;
- struct ext4_super_block *es;
struct inode *inode = file_inode(file);
char pathname[80], *path;
- es = EXT4_SB(inode->i_sb)->s_es;
- es->s_last_error_ino = cpu_to_le32(inode->i_ino);
- save_error_info(inode->i_sb, function, line);
- path = d_path(&(file->f_path), pathname, sizeof(pathname));
- if (IS_ERR(path))
- path = "(unknown)";
- va_start(args, fmt);
- vaf.fmt = fmt;
- vaf.va = &args;
- if (block)
- printk(KERN_CRIT
- "EXT4-fs error (device %s): %s:%d: inode #%lu: "
- "block %llu: comm %s: path %s: %pV\n",
- inode->i_sb->s_id, function, line, inode->i_ino,
- block, current->comm, path, &vaf);
- else
- printk(KERN_CRIT
- "EXT4-fs error (device %s): %s:%d: inode #%lu: "
- "comm %s: path %s: %pV\n",
- inode->i_sb->s_id, function, line, inode->i_ino,
- current->comm, path, &vaf);
- va_end(args);
+ if (unlikely(ext4_emergency_state(inode->i_sb)))
+ return;
+
+ trace_ext4_error(inode->i_sb, function, line);
+ if (ext4_error_ratelimit(inode->i_sb)) {
+ path = file_path(file, pathname, sizeof(pathname));
+ if (IS_ERR(path))
+ path = "(unknown)";
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ if (block)
+ printk(KERN_CRIT
+ "EXT4-fs error (device %s): %s:%d: inode #%lu: "
+ "block %llu: comm %s: path %s: %pV\n",
+ inode->i_sb->s_id, function, line, inode->i_ino,
+ block, current->comm, path, &vaf);
+ else
+ printk(KERN_CRIT
+ "EXT4-fs error (device %s): %s:%d: inode #%lu: "
+ "comm %s: path %s: %pV\n",
+ inode->i_sb->s_id, function, line, inode->i_ino,
+ current->comm, path, &vaf);
+ va_end(args);
+ }
+ fsnotify_sb_error(inode->i_sb, inode, EFSCORRUPTED);
- ext4_handle_error(inode->i_sb);
+ ext4_handle_error(inode->i_sb, false, EFSCORRUPTED, inode->i_ino, block,
+ function, line);
}
const char *ext4_decode_error(struct super_block *sb, int errno,
@@ -499,6 +908,12 @@ const char *ext4_decode_error(struct super_block *sb, int errno,
char *errstr = NULL;
switch (errno) {
+ case -EFSCORRUPTED:
+ errstr = "Corrupt filesystem";
+ break;
+ case -EFSBADCRC:
+ errstr = "Filesystem failed CRC";
+ break;
case -EIO:
errstr = "IO failure";
break;
@@ -536,85 +951,87 @@ void __ext4_std_error(struct super_block *sb, const char *function,
char nbuf[16];
const char *errstr;
+ if (unlikely(ext4_emergency_state(sb)))
+ return;
+
/* Special case: if the error is EROFS, and we're not already
* inside a transaction, then there's really no point in logging
* an error. */
- if (errno == -EROFS && journal_current_handle() == NULL &&
- (sb->s_flags & MS_RDONLY))
+ if (errno == -EROFS && journal_current_handle() == NULL && sb_rdonly(sb))
return;
- errstr = ext4_decode_error(sb, errno, nbuf);
- printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
- sb->s_id, function, line, errstr);
- save_error_info(sb, function, line);
+ if (ext4_error_ratelimit(sb)) {
+ errstr = ext4_decode_error(sb, errno, nbuf);
+ printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
+ sb->s_id, function, line, errstr);
+ }
+ fsnotify_sb_error(sb, NULL, errno ? errno : EFSCORRUPTED);
- ext4_handle_error(sb);
+ ext4_handle_error(sb, false, -errno, 0, 0, function, line);
}
-/*
- * ext4_abort is a much stronger failure handler than ext4_error. The
- * abort function may be used to deal with unrecoverable failures such
- * as journal IO errors or ENOMEM at a critical moment in log management.
- *
- * We unconditionally force the filesystem into an ABORT|READONLY state,
- * unless the error response on the fs has been set to panic in which
- * case we take the easy way out and panic immediately.
- */
-
-void __ext4_abort(struct super_block *sb, const char *function,
- unsigned int line, const char *fmt, ...)
+void __ext4_msg(struct super_block *sb,
+ const char *prefix, const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
- save_error_info(sb, function, line);
+ if (sb) {
+ atomic_inc(&EXT4_SB(sb)->s_msg_count);
+ if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state),
+ "EXT4-fs"))
+ return;
+ }
+
va_start(args, fmt);
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id,
- function, line);
- vprintk(fmt, args);
- printk("\n");
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ if (sb)
+ printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
+ else
+ printk("%sEXT4-fs: %pV\n", prefix, &vaf);
va_end(args);
+}
- if ((sb->s_flags & MS_RDONLY) == 0) {
- ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
- EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
- /*
- * Make sure updated value of ->s_mount_flags will be visible
- * before ->s_flags update
- */
- smp_wmb();
- sb->s_flags |= MS_RDONLY;
- if (EXT4_SB(sb)->s_journal)
- jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
- save_error_info(sb, function, line);
- }
- if (test_opt(sb, ERRORS_PANIC))
- panic("EXT4-fs panic from previous error\n");
+static int ext4_warning_ratelimit(struct super_block *sb)
+{
+ atomic_inc(&EXT4_SB(sb)->s_warning_count);
+ return ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),
+ "EXT4-fs warning");
}
-void __ext4_msg(struct super_block *sb,
- const char *prefix, const char *fmt, ...)
+void __ext4_warning(struct super_block *sb, const char *function,
+ unsigned int line, const char *fmt, ...)
{
struct va_format vaf;
va_list args;
+ if (!ext4_warning_ratelimit(sb))
+ return;
+
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
- printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
+ printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
+ sb->s_id, function, line, &vaf);
va_end(args);
}
-void __ext4_warning(struct super_block *sb, const char *function,
- unsigned int line, const char *fmt, ...)
+void __ext4_warning_inode(const struct inode *inode, const char *function,
+ unsigned int line, const char *fmt, ...)
{
struct va_format vaf;
va_list args;
+ if (!ext4_warning_ratelimit(inode->i_sb))
+ return;
+
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
- printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
- sb->s_id, function, line, &vaf);
+ printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: "
+ "inode #%lu: comm %s: %pV\n", inode->i_sb->s_id,
+ function, line, inode->i_ino, current->comm, &vaf);
va_end(args);
}
@@ -627,32 +1044,39 @@ __acquires(bitlock)
{
struct va_format vaf;
va_list args;
- struct ext4_super_block *es = EXT4_SB(sb)->s_es;
- es->s_last_error_ino = cpu_to_le32(ino);
- es->s_last_error_block = cpu_to_le64(block);
- __save_error_info(sb, function, line);
-
- va_start(args, fmt);
+ if (unlikely(ext4_emergency_state(sb)))
+ return;
- vaf.fmt = fmt;
- vaf.va = &args;
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
- sb->s_id, function, line, grp);
- if (ino)
- printk(KERN_CONT "inode %lu: ", ino);
- if (block)
- printk(KERN_CONT "block %llu:", (unsigned long long) block);
- printk(KERN_CONT "%pV\n", &vaf);
- va_end(args);
+ trace_ext4_error(sb, function, line);
+ if (ext4_error_ratelimit(sb)) {
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
+ sb->s_id, function, line, grp);
+ if (ino)
+ printk(KERN_CONT "inode %lu: ", ino);
+ if (block)
+ printk(KERN_CONT "block %llu:",
+ (unsigned long long) block);
+ printk(KERN_CONT "%pV\n", &vaf);
+ va_end(args);
+ }
if (test_opt(sb, ERRORS_CONT)) {
- ext4_commit_super(sb, 0);
+ if (test_opt(sb, WARN_ON_ERROR))
+ WARN_ON_ONCE(1);
+ EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
+ if (!bdev_read_only(sb->s_bdev)) {
+ save_error_info(sb, EFSCORRUPTED, ino, block, function,
+ line);
+ schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
+ }
return;
}
-
ext4_unlock_group(sb, grp);
- ext4_handle_error(sb);
+ ext4_handle_error(sb, false, EFSCORRUPTED, ino, block, function, line);
/*
* We only get here in the ERRORS_RO case; relocking the group
* may be dangerous, but nothing bad will happen since the
@@ -668,6 +1092,38 @@ __acquires(bitlock)
return;
}
+void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
+ ext4_group_t group,
+ unsigned int flags)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+ struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
+ int ret;
+
+ if (!grp || !gdp)
+ return;
+ if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) {
+ ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
+ &grp->bb_state);
+ if (!ret)
+ percpu_counter_sub(&sbi->s_freeclusters_counter,
+ grp->bb_free);
+ }
+
+ if (flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) {
+ ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
+ &grp->bb_state);
+ if (!ret && gdp) {
+ int count;
+
+ count = ext4_free_inodes_count(sb, gdp);
+ percpu_counter_sub(&sbi->s_freeinodes_counter,
+ count);
+ }
+ }
+}
+
void ext4_update_dynamic_rev(struct super_block *sb)
{
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
@@ -693,43 +1149,6 @@ void ext4_update_dynamic_rev(struct super_block *sb)
*/
}
-/*
- * Open the external journal device
- */
-static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
-{
- struct block_device *bdev;
- char b[BDEVNAME_SIZE];
-
- bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
- if (IS_ERR(bdev))
- goto fail;
- return bdev;
-
-fail:
- ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld",
- __bdevname(dev, b), PTR_ERR(bdev));
- return NULL;
-}
-
-/*
- * Release the journal device
- */
-static void ext4_blkdev_put(struct block_device *bdev)
-{
- blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
-}
-
-static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
-{
- struct block_device *bdev;
- bdev = sbi->journal_bdev;
- if (bdev) {
- ext4_blkdev_put(bdev);
- sbi->journal_bdev = NULL;
- }
-}
-
static inline struct inode *orphan_list_entry(struct list_head *l)
{
return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
@@ -753,60 +1172,168 @@ static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
}
}
+#ifdef CONFIG_QUOTA
+static int ext4_quota_off(struct super_block *sb, int type);
+
+static inline void ext4_quotas_off(struct super_block *sb, int type)
+{
+ BUG_ON(type > EXT4_MAXQUOTAS);
+
+ /* Use our quota_off function to clear inode flags etc. */
+ for (type--; type >= 0; type--)
+ ext4_quota_off(sb, type);
+}
+
+/*
+ * This is a helper function which is used in the mount/remount
+ * codepaths (which holds s_umount) to fetch the quota file name.
+ */
+static inline char *get_qf_name(struct super_block *sb,
+ struct ext4_sb_info *sbi,
+ int type)
+{
+ return rcu_dereference_protected(sbi->s_qf_names[type],
+ lockdep_is_held(&sb->s_umount));
+}
+#else
+static inline void ext4_quotas_off(struct super_block *sb, int type)
+{
+}
+#endif
+
+static int ext4_percpu_param_init(struct ext4_sb_info *sbi)
+{
+ ext4_fsblk_t block;
+ int err;
+
+ block = ext4_count_free_clusters(sbi->s_sb);
+ ext4_free_blocks_count_set(sbi->s_es, EXT4_C2B(sbi, block));
+ err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
+ GFP_KERNEL);
+ if (!err) {
+ unsigned long freei = ext4_count_free_inodes(sbi->s_sb);
+ sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
+ err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
+ GFP_KERNEL);
+ }
+ if (!err)
+ err = percpu_counter_init(&sbi->s_dirs_counter,
+ ext4_count_dirs(sbi->s_sb), GFP_KERNEL);
+ if (!err)
+ err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
+ GFP_KERNEL);
+ if (!err)
+ err = percpu_counter_init(&sbi->s_sra_exceeded_retry_limit, 0,
+ GFP_KERNEL);
+ if (!err)
+ err = percpu_init_rwsem(&sbi->s_writepages_rwsem);
+
+ if (err)
+ ext4_msg(sbi->s_sb, KERN_ERR, "insufficient memory");
+
+ return err;
+}
+
+static void ext4_percpu_param_destroy(struct ext4_sb_info *sbi)
+{
+ percpu_counter_destroy(&sbi->s_freeclusters_counter);
+ percpu_counter_destroy(&sbi->s_freeinodes_counter);
+ percpu_counter_destroy(&sbi->s_dirs_counter);
+ percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
+ percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit);
+ percpu_free_rwsem(&sbi->s_writepages_rwsem);
+}
+
+static void ext4_group_desc_free(struct ext4_sb_info *sbi)
+{
+ struct buffer_head **group_desc;
+ int i;
+
+ rcu_read_lock();
+ group_desc = rcu_dereference(sbi->s_group_desc);
+ for (i = 0; i < sbi->s_gdb_count; i++)
+ brelse(group_desc[i]);
+ kvfree(group_desc);
+ rcu_read_unlock();
+}
+
+static void ext4_flex_groups_free(struct ext4_sb_info *sbi)
+{
+ struct flex_groups **flex_groups;
+ int i;
+
+ rcu_read_lock();
+ flex_groups = rcu_dereference(sbi->s_flex_groups);
+ if (flex_groups) {
+ for (i = 0; i < sbi->s_flex_groups_allocated; i++)
+ kvfree(flex_groups[i]);
+ kvfree(flex_groups);
+ }
+ rcu_read_unlock();
+}
+
static void ext4_put_super(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
- int i, err;
+ int aborted = 0;
+ int err;
+
+ /*
+ * Unregister sysfs before destroying jbd2 journal.
+ * Since we could still access attr_journal_task attribute via sysfs
+ * path which could have sbi->s_journal->j_task as NULL
+ * Unregister sysfs before flush sbi->s_sb_upd_work.
+ * Since user may read /proc/fs/ext4/xx/mb_groups during umount, If
+ * read metadata verify failed then will queue error work.
+ * update_super_work will call start_this_handle may trigger
+ * BUG_ON.
+ */
+ ext4_unregister_sysfs(sb);
+
+ if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs unmount"))
+ ext4_msg(sb, KERN_INFO, "unmounting filesystem %pU.",
+ &sb->s_uuid);
ext4_unregister_li_request(sb);
- dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
+ ext4_quotas_off(sb, EXT4_MAXQUOTAS);
- flush_workqueue(sbi->unrsv_conversion_wq);
- flush_workqueue(sbi->rsv_conversion_wq);
- destroy_workqueue(sbi->unrsv_conversion_wq);
destroy_workqueue(sbi->rsv_conversion_wq);
+ ext4_release_orphan_info(sb);
if (sbi->s_journal) {
- err = jbd2_journal_destroy(sbi->s_journal);
- sbi->s_journal = NULL;
- if (err < 0)
- ext4_abort(sb, "Couldn't clean up the journal");
- }
+ aborted = is_journal_aborted(sbi->s_journal);
+ err = ext4_journal_destroy(sbi, sbi->s_journal);
+ if ((err < 0) && !aborted) {
+ ext4_abort(sb, -err, "Couldn't clean up the journal");
+ }
+ } else
+ flush_work(&sbi->s_sb_upd_work);
ext4_es_unregister_shrinker(sbi);
- del_timer(&sbi->s_err_report);
+ timer_shutdown_sync(&sbi->s_err_report);
ext4_release_system_zone(sb);
ext4_mb_release(sb);
ext4_ext_release(sb);
- ext4_xattr_put_super(sb);
- if (!(sb->s_flags & MS_RDONLY)) {
- EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
- es->s_state = cpu_to_le16(sbi->s_mount_state);
+ if (!ext4_emergency_state(sb) && !sb_rdonly(sb)) {
+ if (!aborted) {
+ ext4_clear_feature_journal_needs_recovery(sb);
+ ext4_clear_feature_orphan_present(sb);
+ es->s_state = cpu_to_le16(sbi->s_mount_state);
+ }
+ ext4_commit_super(sb);
}
- if (!(sb->s_flags & MS_RDONLY))
- ext4_commit_super(sb, 1);
- if (sbi->s_proc) {
- remove_proc_entry("options", sbi->s_proc);
- remove_proc_entry(sb->s_id, ext4_proc_root);
- }
- kobject_del(&sbi->s_kobj);
+ ext4_group_desc_free(sbi);
+ ext4_flex_groups_free(sbi);
- for (i = 0; i < sbi->s_gdb_count; i++)
- brelse(sbi->s_group_desc[i]);
- ext4_kvfree(sbi->s_group_desc);
- ext4_kvfree(sbi->s_flex_groups);
- percpu_counter_destroy(&sbi->s_freeclusters_counter);
- percpu_counter_destroy(&sbi->s_freeinodes_counter);
- percpu_counter_destroy(&sbi->s_dirs_counter);
- percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
- percpu_counter_destroy(&sbi->s_extent_cache_cnt);
- brelse(sbi->s_sbh);
+ WARN_ON_ONCE(!(sbi->s_mount_state & EXT4_ERROR_FS) &&
+ percpu_counter_sum(&sbi->s_dirtyclusters_counter));
+ ext4_percpu_param_destroy(sbi);
#ifdef CONFIG_QUOTA
- for (i = 0; i < MAXQUOTAS; i++)
- kfree(sbi->s_qf_names[i]);
+ for (int i = 0; i < EXT4_MAXQUOTAS; i++)
+ kfree(get_qf_name(sb, sbi, i));
#endif
/* Debugging code just in case the in-memory inode orphan list
@@ -815,21 +1342,29 @@ static void ext4_put_super(struct super_block *sb)
* in-memory list had better be clean by this point. */
if (!list_empty(&sbi->s_orphan))
dump_orphan_list(sb, sbi);
- J_ASSERT(list_empty(&sbi->s_orphan));
+ ASSERT(list_empty(&sbi->s_orphan));
+ sync_blockdev(sb->s_bdev);
invalidate_bdev(sb->s_bdev);
- if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
+ if (sbi->s_journal_bdev_file) {
/*
* Invalidate the journal device's buffers. We don't want them
* floating about in memory - the physical journal device may
* hotswapped, and it breaks the `ro-after' testing code.
*/
- sync_blockdev(sbi->journal_bdev);
- invalidate_bdev(sbi->journal_bdev);
- ext4_blkdev_remove(sbi);
+ sync_blockdev(file_bdev(sbi->s_journal_bdev_file));
+ invalidate_bdev(file_bdev(sbi->s_journal_bdev_file));
}
- if (sbi->s_mmp_tsk)
- kthread_stop(sbi->s_mmp_tsk);
+
+ ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
+ sbi->s_ea_inode_cache = NULL;
+
+ ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
+ sbi->s_ea_block_cache = NULL;
+
+ ext4_stop_mmpd(sbi);
+
+ brelse(sbi->s_sbh);
sb->s_fs_info = NULL;
/*
* Now that we are completely done shutting down the
@@ -837,9 +1372,12 @@ static void ext4_put_super(struct super_block *sb)
*/
kobject_put(&sbi->s_kobj);
wait_for_completion(&sbi->s_kobj_unregister);
- if (sbi->s_chksum_driver)
- crypto_free_shash(sbi->s_chksum_driver);
kfree(sbi->s_blockgroup_lock);
+ fs_put_dax(sbi->s_daxdev, NULL);
+ fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
+#if IS_ENABLED(CONFIG_UNICODE)
+ utf8_unload(sb->s_encoding);
+#endif
kfree(sbi);
}
@@ -852,86 +1390,113 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
{
struct ext4_inode_info *ei;
- ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
+ ei = alloc_inode_sb(sb, ext4_inode_cachep, GFP_NOFS);
if (!ei)
return NULL;
- ei->vfs_inode.i_version = 1;
- INIT_LIST_HEAD(&ei->i_prealloc_list);
- spin_lock_init(&ei->i_prealloc_lock);
+ inode_set_iversion(&ei->vfs_inode, 1);
+ ei->i_flags = 0;
+ ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
+ spin_lock_init(&ei->i_raw_lock);
+ ei->i_prealloc_node = RB_ROOT;
+ atomic_set(&ei->i_prealloc_active, 0);
+ rwlock_init(&ei->i_prealloc_lock);
ext4_es_init_tree(&ei->i_es_tree);
rwlock_init(&ei->i_es_lock);
- INIT_LIST_HEAD(&ei->i_es_lru);
- ei->i_es_lru_nr = 0;
- ei->i_touch_when = 0;
+ INIT_LIST_HEAD(&ei->i_es_list);
+ ei->i_es_all_nr = 0;
+ ei->i_es_shk_nr = 0;
+ ei->i_es_shrink_lblk = 0;
+ ei->i_es_seq = 0;
ei->i_reserved_data_blocks = 0;
- ei->i_reserved_meta_blocks = 0;
- ei->i_allocated_meta_blocks = 0;
- ei->i_da_metadata_calc_len = 0;
- ei->i_da_metadata_calc_last_lblock = 0;
spin_lock_init(&(ei->i_block_reservation_lock));
+ ext4_init_pending_tree(&ei->i_pending_tree);
#ifdef CONFIG_QUOTA
ei->i_reserved_quota = 0;
+ memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
#endif
ei->jinode = NULL;
INIT_LIST_HEAD(&ei->i_rsv_conversion_list);
- INIT_LIST_HEAD(&ei->i_unrsv_conversion_list);
spin_lock_init(&ei->i_completed_io_lock);
ei->i_sync_tid = 0;
ei->i_datasync_tid = 0;
- atomic_set(&ei->i_ioend_count, 0);
- atomic_set(&ei->i_unwritten, 0);
INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
- INIT_WORK(&ei->i_unrsv_conversion_work, ext4_end_io_unrsv_work);
-
+ ext4_fc_init_inode(&ei->vfs_inode);
+ spin_lock_init(&ei->i_fc_lock);
return &ei->vfs_inode;
}
static int ext4_drop_inode(struct inode *inode)
{
- int drop = generic_drop_inode(inode);
+ int drop = inode_generic_drop(inode);
+
+ if (!drop)
+ drop = fscrypt_drop_inode(inode);
trace_ext4_drop_inode(inode, drop);
return drop;
}
-static void ext4_i_callback(struct rcu_head *head)
+static void ext4_free_in_core_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
+ fscrypt_free_inode(inode);
+ if (!list_empty(&(EXT4_I(inode)->i_fc_list))) {
+ pr_warn("%s: inode %ld still in fc list",
+ __func__, inode->i_ino);
+ }
kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
}
static void ext4_destroy_inode(struct inode *inode)
{
- if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
+ if (ext4_inode_orphan_tracked(inode)) {
ext4_msg(inode->i_sb, KERN_ERR,
- "Inode %lu (%p): orphan list check failed!",
+ "Inode %lu (%p): inode tracked as orphan!",
inode->i_ino, EXT4_I(inode));
print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
EXT4_I(inode), sizeof(struct ext4_inode_info),
true);
dump_stack();
}
- call_rcu(&inode->i_rcu, ext4_i_callback);
+
+ if (!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ERROR_FS) &&
+ WARN_ON_ONCE(EXT4_I(inode)->i_reserved_data_blocks))
+ ext4_msg(inode->i_sb, KERN_ERR,
+ "Inode %lu (%p): i_reserved_data_blocks (%u) not cleared!",
+ inode->i_ino, EXT4_I(inode),
+ EXT4_I(inode)->i_reserved_data_blocks);
+}
+
+static void ext4_shutdown(struct super_block *sb)
+{
+ ext4_force_shutdown(sb, EXT4_GOING_FLAGS_NOLOGFLUSH);
}
static void init_once(void *foo)
{
- struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
+ struct ext4_inode_info *ei = foo;
INIT_LIST_HEAD(&ei->i_orphan);
init_rwsem(&ei->xattr_sem);
init_rwsem(&ei->i_data_sem);
inode_init_once(&ei->vfs_inode);
+ ext4_fc_init_inode(&ei->vfs_inode);
+#ifdef CONFIG_FS_ENCRYPTION
+ ei->i_crypt_info = NULL;
+#endif
+#ifdef CONFIG_FS_VERITY
+ ei->i_verity_info = NULL;
+#endif
}
-static int init_inodecache(void)
+static int __init init_inodecache(void)
{
- ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
- sizeof(struct ext4_inode_info),
- 0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
- init_once);
+ ext4_inode_cachep = kmem_cache_create_usercopy("ext4_inode_cache",
+ sizeof(struct ext4_inode_info), 0,
+ SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
+ offsetof(struct ext4_inode_info, i_data),
+ sizeof_field(struct ext4_inode_info, i_data),
+ init_once);
if (ext4_inode_cachep == NULL)
return -ENOMEM;
return 0;
@@ -949,18 +1514,20 @@ static void destroy_inodecache(void)
void ext4_clear_inode(struct inode *inode)
{
+ ext4_fc_del(inode);
invalidate_inode_buffers(inode);
clear_inode(inode);
- dquot_drop(inode);
ext4_discard_preallocations(inode);
ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
- ext4_es_lru_del(inode);
+ dquot_drop(inode);
if (EXT4_I(inode)->jinode) {
jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
EXT4_I(inode)->jinode);
jbd2_free_inode(EXT4_I(inode)->jinode);
EXT4_I(inode)->jinode = NULL;
}
+ fscrypt_put_encryption_info(inode);
+ fsverity_cleanup_inode(inode);
}
static struct inode *ext4_nfs_get_inode(struct super_block *sb,
@@ -968,20 +1535,11 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb,
{
struct inode *inode;
- if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
- return ERR_PTR(-ESTALE);
- if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
- return ERR_PTR(-ESTALE);
-
- /* iget isn't really right if the inode is currently unallocated!!
- *
- * ext4_read_inode will return a bad_inode if the inode had been
- * deleted, so we should be safe.
- *
+ /*
* Currently we don't know the generation for parent directory, so
* a generation of 0 means "accept any"
*/
- inode = ext4_iget(sb, ino);
+ inode = ext4_iget(sb, ino, EXT4_IGET_HANDLE);
if (IS_ERR(inode))
return ERR_CAST(inode);
if (generation && inode->i_generation != generation) {
@@ -1006,29 +1564,19 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
ext4_nfs_get_inode);
}
-/*
- * Try to release metadata pages (indirect blocks, directories) which are
- * mapped via the block device. Since these pages could have journal heads
- * which would prevent try_to_free_buffers() from freeing them, we must use
- * jbd2 layer's try_to_free_buffers() function to release them.
- */
-static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
- gfp_t wait)
+static int ext4_nfs_commit_metadata(struct inode *inode)
{
- journal_t *journal = EXT4_SB(sb)->s_journal;
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL
+ };
- WARN_ON(PageChecked(page));
- if (!page_has_buffers(page))
- return 0;
- if (journal)
- return jbd2_journal_try_to_free_buffers(journal, page,
- wait & ~__GFP_WAIT);
- return try_to_free_buffers(page);
+ trace_ext4_nfs_commit_metadata(inode);
+ return ext4_write_inode(inode, &wbc);
}
#ifdef CONFIG_QUOTA
-#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
-#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
+static const char * const quotatypes[] = INITQFNAMES;
+#define QTYPE2NAME(t) (quotatypes[t])
static int ext4_write_dquot(struct dquot *dquot);
static int ext4_acquire_dquot(struct dquot *dquot);
@@ -1036,54 +1584,48 @@ static int ext4_release_dquot(struct dquot *dquot);
static int ext4_mark_dquot_dirty(struct dquot *dquot);
static int ext4_write_info(struct super_block *sb, int type);
static int ext4_quota_on(struct super_block *sb, int type, int format_id,
- struct path *path);
-static int ext4_quota_on_sysfile(struct super_block *sb, int type,
- int format_id);
-static int ext4_quota_off(struct super_block *sb, int type);
-static int ext4_quota_off_sysfile(struct super_block *sb, int type);
-static int ext4_quota_on_mount(struct super_block *sb, int type);
+ const struct path *path);
static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
size_t len, loff_t off);
static ssize_t ext4_quota_write(struct super_block *sb, int type,
const char *data, size_t len, loff_t off);
static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
unsigned int flags);
-static int ext4_enable_quotas(struct super_block *sb);
+
+static struct dquot __rcu **ext4_get_dquots(struct inode *inode)
+{
+ return EXT4_I(inode)->i_dquot;
+}
static const struct dquot_operations ext4_quota_operations = {
- .get_reserved_space = ext4_get_reserved_space,
- .write_dquot = ext4_write_dquot,
- .acquire_dquot = ext4_acquire_dquot,
- .release_dquot = ext4_release_dquot,
- .mark_dirty = ext4_mark_dquot_dirty,
- .write_info = ext4_write_info,
- .alloc_dquot = dquot_alloc,
- .destroy_dquot = dquot_destroy,
+ .get_reserved_space = ext4_get_reserved_space,
+ .write_dquot = ext4_write_dquot,
+ .acquire_dquot = ext4_acquire_dquot,
+ .release_dquot = ext4_release_dquot,
+ .mark_dirty = ext4_mark_dquot_dirty,
+ .write_info = ext4_write_info,
+ .alloc_dquot = dquot_alloc,
+ .destroy_dquot = dquot_destroy,
+ .get_projid = ext4_get_projid,
+ .get_inode_usage = ext4_get_inode_usage,
+ .get_next_id = dquot_get_next_id,
};
static const struct quotactl_ops ext4_qctl_operations = {
.quota_on = ext4_quota_on,
.quota_off = ext4_quota_off,
.quota_sync = dquot_quota_sync,
- .get_info = dquot_get_dqinfo,
+ .get_state = dquot_get_state,
.set_info = dquot_set_dqinfo,
.get_dqblk = dquot_get_dqblk,
- .set_dqblk = dquot_set_dqblk
-};
-
-static const struct quotactl_ops ext4_qctl_sysfile_operations = {
- .quota_on_meta = ext4_quota_on_sysfile,
- .quota_off = ext4_quota_off_sysfile,
- .quota_sync = dquot_quota_sync,
- .get_info = dquot_get_dqinfo,
- .set_info = dquot_set_dqinfo,
- .get_dqblk = dquot_get_dqblk,
- .set_dqblk = dquot_set_dqblk
+ .set_dqblk = dquot_set_dqblk,
+ .get_nextdqblk = dquot_get_next_dqblk,
};
#endif
static const struct super_operations ext4_sops = {
.alloc_inode = ext4_alloc_inode,
+ .free_inode = ext4_free_in_core_inode,
.destroy_inode = ext4_destroy_inode,
.write_inode = ext4_write_inode,
.dirty_inode = ext4_dirty_inode,
@@ -1094,250 +1636,209 @@ static const struct super_operations ext4_sops = {
.freeze_fs = ext4_freeze,
.unfreeze_fs = ext4_unfreeze,
.statfs = ext4_statfs,
- .remount_fs = ext4_remount,
.show_options = ext4_show_options,
+ .shutdown = ext4_shutdown,
#ifdef CONFIG_QUOTA
.quota_read = ext4_quota_read,
.quota_write = ext4_quota_write,
+ .get_dquots = ext4_get_dquots,
#endif
- .bdev_try_to_free_page = bdev_try_to_free_page,
-};
-
-static const struct super_operations ext4_nojournal_sops = {
- .alloc_inode = ext4_alloc_inode,
- .destroy_inode = ext4_destroy_inode,
- .write_inode = ext4_write_inode,
- .dirty_inode = ext4_dirty_inode,
- .drop_inode = ext4_drop_inode,
- .evict_inode = ext4_evict_inode,
- .sync_fs = ext4_sync_fs_nojournal,
- .put_super = ext4_put_super,
- .statfs = ext4_statfs,
- .remount_fs = ext4_remount,
- .show_options = ext4_show_options,
-#ifdef CONFIG_QUOTA
- .quota_read = ext4_quota_read,
- .quota_write = ext4_quota_write,
-#endif
- .bdev_try_to_free_page = bdev_try_to_free_page,
};
static const struct export_operations ext4_export_ops = {
+ .encode_fh = generic_encode_ino32_fh,
.fh_to_dentry = ext4_fh_to_dentry,
.fh_to_parent = ext4_fh_to_parent,
.get_parent = ext4_get_parent,
+ .commit_metadata = ext4_nfs_commit_metadata,
};
enum {
Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
- Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
+ Opt_resgid, Opt_resuid, Opt_sb,
Opt_nouid32, Opt_debug, Opt_removed,
- Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
+ Opt_user_xattr, Opt_acl,
Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
- Opt_commit, Opt_min_batch_time, Opt_max_batch_time,
- Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit,
+ Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev,
+ Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
- Opt_data_err_abort, Opt_data_err_ignore,
- Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
- Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
+ Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption,
+ Opt_inlinecrypt,
+ Opt_usrjquota, Opt_grpjquota, Opt_quota,
Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
- Opt_usrquota, Opt_grpquota, Opt_i_version,
- Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
+ Opt_usrquota, Opt_grpquota, Opt_prjquota,
+ Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never,
+ Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error,
+ Opt_nowarn_on_error, Opt_mblk_io_submit, Opt_debug_want_extra_isize,
Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
Opt_inode_readahead_blks, Opt_journal_ioprio,
Opt_dioread_nolock, Opt_dioread_lock,
Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
- Opt_max_dir_size_kb,
+ Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
+ Opt_no_prefetch_block_bitmaps, Opt_mb_optimize_scan,
+ Opt_errors, Opt_data, Opt_data_err, Opt_jqfmt, Opt_dax_type,
+#ifdef CONFIG_EXT4_DEBUG
+ Opt_fc_debug_max_replay, Opt_fc_debug_force
+#endif
};
-static const match_table_t tokens = {
- {Opt_bsd_df, "bsddf"},
- {Opt_minix_df, "minixdf"},
- {Opt_grpid, "grpid"},
- {Opt_grpid, "bsdgroups"},
- {Opt_nogrpid, "nogrpid"},
- {Opt_nogrpid, "sysvgroups"},
- {Opt_resgid, "resgid=%u"},
- {Opt_resuid, "resuid=%u"},
- {Opt_sb, "sb=%u"},
- {Opt_err_cont, "errors=continue"},
- {Opt_err_panic, "errors=panic"},
- {Opt_err_ro, "errors=remount-ro"},
- {Opt_nouid32, "nouid32"},
- {Opt_debug, "debug"},
- {Opt_removed, "oldalloc"},
- {Opt_removed, "orlov"},
- {Opt_user_xattr, "user_xattr"},
- {Opt_nouser_xattr, "nouser_xattr"},
- {Opt_acl, "acl"},
- {Opt_noacl, "noacl"},
- {Opt_noload, "norecovery"},
- {Opt_noload, "noload"},
- {Opt_removed, "nobh"},
- {Opt_removed, "bh"},
- {Opt_commit, "commit=%u"},
- {Opt_min_batch_time, "min_batch_time=%u"},
- {Opt_max_batch_time, "max_batch_time=%u"},
- {Opt_journal_dev, "journal_dev=%u"},
- {Opt_journal_checksum, "journal_checksum"},
- {Opt_journal_async_commit, "journal_async_commit"},
- {Opt_abort, "abort"},
- {Opt_data_journal, "data=journal"},
- {Opt_data_ordered, "data=ordered"},
- {Opt_data_writeback, "data=writeback"},
- {Opt_data_err_abort, "data_err=abort"},
- {Opt_data_err_ignore, "data_err=ignore"},
- {Opt_offusrjquota, "usrjquota="},
- {Opt_usrjquota, "usrjquota=%s"},
- {Opt_offgrpjquota, "grpjquota="},
- {Opt_grpjquota, "grpjquota=%s"},
- {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
- {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
- {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
- {Opt_grpquota, "grpquota"},
- {Opt_noquota, "noquota"},
- {Opt_quota, "quota"},
- {Opt_usrquota, "usrquota"},
- {Opt_barrier, "barrier=%u"},
- {Opt_barrier, "barrier"},
- {Opt_nobarrier, "nobarrier"},
- {Opt_i_version, "i_version"},
- {Opt_stripe, "stripe=%u"},
- {Opt_delalloc, "delalloc"},
- {Opt_nodelalloc, "nodelalloc"},
- {Opt_removed, "mblk_io_submit"},
- {Opt_removed, "nomblk_io_submit"},
- {Opt_block_validity, "block_validity"},
- {Opt_noblock_validity, "noblock_validity"},
- {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
- {Opt_journal_ioprio, "journal_ioprio=%u"},
- {Opt_auto_da_alloc, "auto_da_alloc=%u"},
- {Opt_auto_da_alloc, "auto_da_alloc"},
- {Opt_noauto_da_alloc, "noauto_da_alloc"},
- {Opt_dioread_nolock, "dioread_nolock"},
- {Opt_dioread_lock, "dioread_lock"},
- {Opt_discard, "discard"},
- {Opt_nodiscard, "nodiscard"},
- {Opt_init_itable, "init_itable=%u"},
- {Opt_init_itable, "init_itable"},
- {Opt_noinit_itable, "noinit_itable"},
- {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
- {Opt_removed, "check=none"}, /* mount option from ext2/3 */
- {Opt_removed, "nocheck"}, /* mount option from ext2/3 */
- {Opt_removed, "reservation"}, /* mount option from ext2/3 */
- {Opt_removed, "noreservation"}, /* mount option from ext2/3 */
- {Opt_removed, "journal=%u"}, /* mount option from ext2/3 */
- {Opt_err, NULL},
+static const struct constant_table ext4_param_errors[] = {
+ {"continue", EXT4_MOUNT_ERRORS_CONT},
+ {"panic", EXT4_MOUNT_ERRORS_PANIC},
+ {"remount-ro", EXT4_MOUNT_ERRORS_RO},
+ {}
};
-static ext4_fsblk_t get_sb_block(void **data)
-{
- ext4_fsblk_t sb_block;
- char *options = (char *) *data;
-
- if (!options || strncmp(options, "sb=", 3) != 0)
- return 1; /* Default location */
-
- options += 3;
- /* TODO: use simple_strtoll with >32bit ext4 */
- sb_block = simple_strtoul(options, &options, 0);
- if (*options && *options != ',') {
- printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
- (char *) *data);
- return 1;
- }
- if (*options == ',')
- options++;
- *data = (void *) options;
-
- return sb_block;
-}
-
-#define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
-static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n"
- "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n";
-
-#ifdef CONFIG_QUOTA
-static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- char *qname;
- int ret = -1;
+static const struct constant_table ext4_param_data[] = {
+ {"journal", EXT4_MOUNT_JOURNAL_DATA},
+ {"ordered", EXT4_MOUNT_ORDERED_DATA},
+ {"writeback", EXT4_MOUNT_WRITEBACK_DATA},
+ {}
+};
- if (sb_any_quota_loaded(sb) &&
- !sbi->s_qf_names[qtype]) {
- ext4_msg(sb, KERN_ERR,
- "Cannot change journaled "
- "quota options when quota turned on");
- return -1;
- }
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) {
- ext4_msg(sb, KERN_ERR, "Cannot set journaled quota options "
- "when QUOTA feature is enabled");
- return -1;
- }
- qname = match_strdup(args);
- if (!qname) {
- ext4_msg(sb, KERN_ERR,
- "Not enough memory for storing quotafile name");
- return -1;
- }
- if (sbi->s_qf_names[qtype]) {
- if (strcmp(sbi->s_qf_names[qtype], qname) == 0)
- ret = 1;
- else
- ext4_msg(sb, KERN_ERR,
- "%s quota file already specified",
- QTYPE2NAME(qtype));
- goto errout;
- }
- if (strchr(qname, '/')) {
- ext4_msg(sb, KERN_ERR,
- "quotafile must be on filesystem root");
- goto errout;
- }
- sbi->s_qf_names[qtype] = qname;
- set_opt(sb, QUOTA);
- return 1;
-errout:
- kfree(qname);
- return ret;
-}
+static const struct constant_table ext4_param_data_err[] = {
+ {"abort", Opt_data_err_abort},
+ {"ignore", Opt_data_err_ignore},
+ {}
+};
-static int clear_qf_name(struct super_block *sb, int qtype)
-{
+static const struct constant_table ext4_param_jqfmt[] = {
+ {"vfsold", QFMT_VFS_OLD},
+ {"vfsv0", QFMT_VFS_V0},
+ {"vfsv1", QFMT_VFS_V1},
+ {}
+};
- struct ext4_sb_info *sbi = EXT4_SB(sb);
+static const struct constant_table ext4_param_dax[] = {
+ {"always", Opt_dax_always},
+ {"inode", Opt_dax_inode},
+ {"never", Opt_dax_never},
+ {}
+};
- if (sb_any_quota_loaded(sb) &&
- sbi->s_qf_names[qtype]) {
- ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options"
- " when quota turned on");
- return -1;
- }
- kfree(sbi->s_qf_names[qtype]);
- sbi->s_qf_names[qtype] = NULL;
- return 1;
-}
+/*
+ * Mount option specification
+ * We don't use fsparam_flag_no because of the way we set the
+ * options and the way we show them in _ext4_show_options(). To
+ * keep the changes to a minimum, let's keep the negative options
+ * separate for now.
+ */
+static const struct fs_parameter_spec ext4_param_specs[] = {
+ fsparam_flag ("bsddf", Opt_bsd_df),
+ fsparam_flag ("minixdf", Opt_minix_df),
+ fsparam_flag ("grpid", Opt_grpid),
+ fsparam_flag ("bsdgroups", Opt_grpid),
+ fsparam_flag ("nogrpid", Opt_nogrpid),
+ fsparam_flag ("sysvgroups", Opt_nogrpid),
+ fsparam_gid ("resgid", Opt_resgid),
+ fsparam_uid ("resuid", Opt_resuid),
+ fsparam_u32 ("sb", Opt_sb),
+ fsparam_enum ("errors", Opt_errors, ext4_param_errors),
+ fsparam_flag ("nouid32", Opt_nouid32),
+ fsparam_flag ("debug", Opt_debug),
+ fsparam_flag ("oldalloc", Opt_removed),
+ fsparam_flag ("orlov", Opt_removed),
+ fsparam_flag ("user_xattr", Opt_user_xattr),
+ fsparam_flag ("acl", Opt_acl),
+ fsparam_flag ("norecovery", Opt_noload),
+ fsparam_flag ("noload", Opt_noload),
+ fsparam_flag ("bh", Opt_removed),
+ fsparam_flag ("nobh", Opt_removed),
+ fsparam_u32 ("commit", Opt_commit),
+ fsparam_u32 ("min_batch_time", Opt_min_batch_time),
+ fsparam_u32 ("max_batch_time", Opt_max_batch_time),
+ fsparam_u32 ("journal_dev", Opt_journal_dev),
+ fsparam_bdev ("journal_path", Opt_journal_path),
+ fsparam_flag ("journal_checksum", Opt_journal_checksum),
+ fsparam_flag ("nojournal_checksum", Opt_nojournal_checksum),
+ fsparam_flag ("journal_async_commit",Opt_journal_async_commit),
+ fsparam_flag ("abort", Opt_abort),
+ fsparam_enum ("data", Opt_data, ext4_param_data),
+ fsparam_enum ("data_err", Opt_data_err,
+ ext4_param_data_err),
+ fsparam_string_empty
+ ("usrjquota", Opt_usrjquota),
+ fsparam_string_empty
+ ("grpjquota", Opt_grpjquota),
+ fsparam_enum ("jqfmt", Opt_jqfmt, ext4_param_jqfmt),
+ fsparam_flag ("grpquota", Opt_grpquota),
+ fsparam_flag ("quota", Opt_quota),
+ fsparam_flag ("noquota", Opt_noquota),
+ fsparam_flag ("usrquota", Opt_usrquota),
+ fsparam_flag ("prjquota", Opt_prjquota),
+ fsparam_flag ("barrier", Opt_barrier),
+ fsparam_u32 ("barrier", Opt_barrier),
+ fsparam_flag ("nobarrier", Opt_nobarrier),
+ fsparam_flag ("i_version", Opt_removed),
+ fsparam_flag ("dax", Opt_dax),
+ fsparam_enum ("dax", Opt_dax_type, ext4_param_dax),
+ fsparam_u32 ("stripe", Opt_stripe),
+ fsparam_flag ("delalloc", Opt_delalloc),
+ fsparam_flag ("nodelalloc", Opt_nodelalloc),
+ fsparam_flag ("warn_on_error", Opt_warn_on_error),
+ fsparam_flag ("nowarn_on_error", Opt_nowarn_on_error),
+ fsparam_u32 ("debug_want_extra_isize",
+ Opt_debug_want_extra_isize),
+ fsparam_flag ("mblk_io_submit", Opt_removed),
+ fsparam_flag ("nomblk_io_submit", Opt_removed),
+ fsparam_flag ("block_validity", Opt_block_validity),
+ fsparam_flag ("noblock_validity", Opt_noblock_validity),
+ fsparam_u32 ("inode_readahead_blks",
+ Opt_inode_readahead_blks),
+ fsparam_u32 ("journal_ioprio", Opt_journal_ioprio),
+ fsparam_u32 ("auto_da_alloc", Opt_auto_da_alloc),
+ fsparam_flag ("auto_da_alloc", Opt_auto_da_alloc),
+ fsparam_flag ("noauto_da_alloc", Opt_noauto_da_alloc),
+ fsparam_flag ("dioread_nolock", Opt_dioread_nolock),
+ fsparam_flag ("nodioread_nolock", Opt_dioread_lock),
+ fsparam_flag ("dioread_lock", Opt_dioread_lock),
+ fsparam_flag ("discard", Opt_discard),
+ fsparam_flag ("nodiscard", Opt_nodiscard),
+ fsparam_u32 ("init_itable", Opt_init_itable),
+ fsparam_flag ("init_itable", Opt_init_itable),
+ fsparam_flag ("noinit_itable", Opt_noinit_itable),
+#ifdef CONFIG_EXT4_DEBUG
+ fsparam_flag ("fc_debug_force", Opt_fc_debug_force),
+ fsparam_u32 ("fc_debug_max_replay", Opt_fc_debug_max_replay),
#endif
+ fsparam_u32 ("max_dir_size_kb", Opt_max_dir_size_kb),
+ fsparam_flag ("test_dummy_encryption",
+ Opt_test_dummy_encryption),
+ fsparam_string ("test_dummy_encryption",
+ Opt_test_dummy_encryption),
+ fsparam_flag ("inlinecrypt", Opt_inlinecrypt),
+ fsparam_flag ("nombcache", Opt_nombcache),
+ fsparam_flag ("no_mbcache", Opt_nombcache), /* for backward compatibility */
+ fsparam_flag ("prefetch_block_bitmaps",
+ Opt_removed),
+ fsparam_flag ("no_prefetch_block_bitmaps",
+ Opt_no_prefetch_block_bitmaps),
+ fsparam_s32 ("mb_optimize_scan", Opt_mb_optimize_scan),
+ fsparam_string ("check", Opt_removed), /* mount option from ext2/3 */
+ fsparam_flag ("nocheck", Opt_removed), /* mount option from ext2/3 */
+ fsparam_flag ("reservation", Opt_removed), /* mount option from ext2/3 */
+ fsparam_flag ("noreservation", Opt_removed), /* mount option from ext2/3 */
+ fsparam_u32 ("journal", Opt_removed), /* mount option from ext2/3 */
+ {}
+};
+
#define MOPT_SET 0x0001
#define MOPT_CLEAR 0x0002
#define MOPT_NOSUPPORT 0x0004
#define MOPT_EXPLICIT 0x0008
-#define MOPT_CLEAR_ERR 0x0010
-#define MOPT_GTE0 0x0020
#ifdef CONFIG_QUOTA
#define MOPT_Q 0
-#define MOPT_QFMT 0x0040
+#define MOPT_QFMT 0x0010
#else
#define MOPT_Q MOPT_NOSUPPORT
#define MOPT_QFMT MOPT_NOSUPPORT
#endif
-#define MOPT_DATAJ 0x0080
-#define MOPT_NO_EXT2 0x0100
-#define MOPT_NO_EXT3 0x0200
+#define MOPT_NO_EXT2 0x0020
+#define MOPT_NO_EXT3 0x0040
#define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3)
+#define MOPT_SKIP 0x0080
+#define MOPT_2 0x0100
static const struct mount_opts {
int token;
@@ -1359,47 +1860,34 @@ static const struct mount_opts {
{Opt_delalloc, EXT4_MOUNT_DELALLOC,
MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
{Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
- MOPT_EXT4_ONLY | MOPT_CLEAR | MOPT_EXPLICIT},
+ MOPT_EXT4_ONLY | MOPT_CLEAR},
+ {Opt_warn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_SET},
+ {Opt_nowarn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_CLEAR},
+ {Opt_commit, 0, MOPT_NO_EXT2},
+ {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
+ MOPT_EXT4_ONLY | MOPT_CLEAR},
{Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
- MOPT_EXT4_ONLY | MOPT_SET},
+ MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
{Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
EXT4_MOUNT_JOURNAL_CHECKSUM),
- MOPT_EXT4_ONLY | MOPT_SET},
+ MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
{Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET},
- {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR},
- {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR},
- {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR},
- {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT,
- MOPT_NO_EXT2 | MOPT_SET},
- {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT,
- MOPT_NO_EXT2 | MOPT_CLEAR},
+ {Opt_data_err, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_NO_EXT2},
{Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
{Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
{Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
{Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
{Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
- {Opt_commit, 0, MOPT_GTE0},
- {Opt_max_batch_time, 0, MOPT_GTE0},
- {Opt_min_batch_time, 0, MOPT_GTE0},
- {Opt_inode_readahead_blks, 0, MOPT_GTE0},
- {Opt_init_itable, 0, MOPT_GTE0},
- {Opt_stripe, 0, MOPT_GTE0},
- {Opt_resuid, 0, MOPT_GTE0},
- {Opt_resgid, 0, MOPT_GTE0},
- {Opt_journal_dev, 0, MOPT_GTE0},
- {Opt_journal_ioprio, 0, MOPT_GTE0},
- {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
- {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
- {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA,
- MOPT_NO_EXT2 | MOPT_DATAJ},
+ {Opt_dax_type, 0, MOPT_EXT4_ONLY},
+ {Opt_journal_dev, 0, MOPT_NO_EXT2},
+ {Opt_journal_path, 0, MOPT_NO_EXT2},
+ {Opt_journal_ioprio, 0, MOPT_NO_EXT2},
+ {Opt_data, 0, MOPT_NO_EXT2},
{Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
- {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR},
#ifdef CONFIG_EXT4_FS_POSIX_ACL
{Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
- {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR},
#else
{Opt_acl, 0, MOPT_NOSUPPORT},
- {Opt_noacl, 0, MOPT_NOSUPPORT},
#endif
{Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
{Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
@@ -1408,269 +1896,983 @@ static const struct mount_opts {
MOPT_SET | MOPT_Q},
{Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
MOPT_SET | MOPT_Q},
+ {Opt_prjquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_PRJQUOTA,
+ MOPT_SET | MOPT_Q},
{Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
- EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR | MOPT_Q},
+ EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA),
+ MOPT_CLEAR | MOPT_Q},
{Opt_usrjquota, 0, MOPT_Q},
{Opt_grpjquota, 0, MOPT_Q},
- {Opt_offusrjquota, 0, MOPT_Q},
- {Opt_offgrpjquota, 0, MOPT_Q},
- {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
- {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
- {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
- {Opt_max_dir_size_kb, 0, MOPT_GTE0},
+ {Opt_jqfmt, 0, MOPT_QFMT},
+ {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
+ {Opt_no_prefetch_block_bitmaps, EXT4_MOUNT_NO_PREFETCH_BLOCK_BITMAPS,
+ MOPT_SET},
+#ifdef CONFIG_EXT4_DEBUG
+ {Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
+ MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY},
+#endif
+ {Opt_abort, EXT4_MOUNT2_ABORT, MOPT_SET | MOPT_2},
{Opt_err, 0, 0}
};
-static int handle_mount_opt(struct super_block *sb, char *opt, int token,
- substring_t *args, unsigned long *journal_devnum,
- unsigned int *journal_ioprio, int is_remount)
+#if IS_ENABLED(CONFIG_UNICODE)
+static const struct ext4_sb_encodings {
+ __u16 magic;
+ char *name;
+ unsigned int version;
+} ext4_sb_encoding_map[] = {
+ {EXT4_ENC_UTF8_12_1, "utf8", UNICODE_AGE(12, 1, 0)},
+};
+
+static const struct ext4_sb_encodings *
+ext4_sb_read_encoding(const struct ext4_super_block *es)
{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- const struct mount_opts *m;
- kuid_t uid;
- kgid_t gid;
- int arg = 0;
+ __u16 magic = le16_to_cpu(es->s_encoding);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ext4_sb_encoding_map); i++)
+ if (magic == ext4_sb_encoding_map[i].magic)
+ return &ext4_sb_encoding_map[i];
+
+ return NULL;
+}
+#endif
+
+#define EXT4_SPEC_JQUOTA (1 << 0)
+#define EXT4_SPEC_JQFMT (1 << 1)
+#define EXT4_SPEC_DATAJ (1 << 2)
+#define EXT4_SPEC_SB_BLOCK (1 << 3)
+#define EXT4_SPEC_JOURNAL_DEV (1 << 4)
+#define EXT4_SPEC_JOURNAL_IOPRIO (1 << 5)
+#define EXT4_SPEC_s_want_extra_isize (1 << 7)
+#define EXT4_SPEC_s_max_batch_time (1 << 8)
+#define EXT4_SPEC_s_min_batch_time (1 << 9)
+#define EXT4_SPEC_s_inode_readahead_blks (1 << 10)
+#define EXT4_SPEC_s_li_wait_mult (1 << 11)
+#define EXT4_SPEC_s_max_dir_size_kb (1 << 12)
+#define EXT4_SPEC_s_stripe (1 << 13)
+#define EXT4_SPEC_s_resuid (1 << 14)
+#define EXT4_SPEC_s_resgid (1 << 15)
+#define EXT4_SPEC_s_commit_interval (1 << 16)
+#define EXT4_SPEC_s_fc_debug_max_replay (1 << 17)
+#define EXT4_SPEC_s_sb_block (1 << 18)
+#define EXT4_SPEC_mb_optimize_scan (1 << 19)
+
+struct ext4_fs_context {
+ char *s_qf_names[EXT4_MAXQUOTAS];
+ struct fscrypt_dummy_policy dummy_enc_policy;
+ int s_jquota_fmt; /* Format of quota to use */
+#ifdef CONFIG_EXT4_DEBUG
+ int s_fc_debug_max_replay;
+#endif
+ unsigned short qname_spec;
+ unsigned long vals_s_flags; /* Bits to set in s_flags */
+ unsigned long mask_s_flags; /* Bits changed in s_flags */
+ unsigned long journal_devnum;
+ unsigned long s_commit_interval;
+ unsigned long s_stripe;
+ unsigned int s_inode_readahead_blks;
+ unsigned int s_want_extra_isize;
+ unsigned int s_li_wait_mult;
+ unsigned int s_max_dir_size_kb;
+ unsigned int journal_ioprio;
+ unsigned int vals_s_mount_opt;
+ unsigned int mask_s_mount_opt;
+ unsigned int vals_s_mount_opt2;
+ unsigned int mask_s_mount_opt2;
+ unsigned int opt_flags; /* MOPT flags */
+ unsigned int spec;
+ u32 s_max_batch_time;
+ u32 s_min_batch_time;
+ kuid_t s_resuid;
+ kgid_t s_resgid;
+ ext4_fsblk_t s_sb_block;
+};
+
+static void ext4_fc_free(struct fs_context *fc)
+{
+ struct ext4_fs_context *ctx = fc->fs_private;
+ int i;
+
+ if (!ctx)
+ return;
+
+ for (i = 0; i < EXT4_MAXQUOTAS; i++)
+ kfree(ctx->s_qf_names[i]);
+
+ fscrypt_free_dummy_policy(&ctx->dummy_enc_policy);
+ kfree(ctx);
+}
+
+int ext4_init_fs_context(struct fs_context *fc)
+{
+ struct ext4_fs_context *ctx;
+
+ ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ fc->fs_private = ctx;
+ fc->ops = &ext4_context_ops;
+
+ /* i_version is always enabled now */
+ fc->sb_flags |= SB_I_VERSION;
+
+ return 0;
+}
#ifdef CONFIG_QUOTA
- if (token == Opt_usrjquota)
- return set_qf_name(sb, USRQUOTA, &args[0]);
- else if (token == Opt_grpjquota)
- return set_qf_name(sb, GRPQUOTA, &args[0]);
- else if (token == Opt_offusrjquota)
- return clear_qf_name(sb, USRQUOTA);
- else if (token == Opt_offgrpjquota)
- return clear_qf_name(sb, GRPQUOTA);
+/*
+ * Note the name of the specified quota file.
+ */
+static int note_qf_name(struct fs_context *fc, int qtype,
+ struct fs_parameter *param)
+{
+ struct ext4_fs_context *ctx = fc->fs_private;
+ char *qname;
+
+ if (param->size < 1) {
+ ext4_msg(NULL, KERN_ERR, "Missing quota name");
+ return -EINVAL;
+ }
+ if (strchr(param->string, '/')) {
+ ext4_msg(NULL, KERN_ERR,
+ "quotafile must be on filesystem root");
+ return -EINVAL;
+ }
+ if (ctx->s_qf_names[qtype]) {
+ if (strcmp(ctx->s_qf_names[qtype], param->string) != 0) {
+ ext4_msg(NULL, KERN_ERR,
+ "%s quota file already specified",
+ QTYPE2NAME(qtype));
+ return -EINVAL;
+ }
+ return 0;
+ }
+
+ qname = kmemdup_nul(param->string, param->size, GFP_KERNEL);
+ if (!qname) {
+ ext4_msg(NULL, KERN_ERR,
+ "Not enough memory for storing quotafile name");
+ return -ENOMEM;
+ }
+ ctx->s_qf_names[qtype] = qname;
+ ctx->qname_spec |= 1 << qtype;
+ ctx->spec |= EXT4_SPEC_JQUOTA;
+ return 0;
+}
+
+/*
+ * Clear the name of the specified quota file.
+ */
+static int unnote_qf_name(struct fs_context *fc, int qtype)
+{
+ struct ext4_fs_context *ctx = fc->fs_private;
+
+ kfree(ctx->s_qf_names[qtype]);
+
+ ctx->s_qf_names[qtype] = NULL;
+ ctx->qname_spec |= 1 << qtype;
+ ctx->spec |= EXT4_SPEC_JQUOTA;
+ return 0;
+}
#endif
- switch (token) {
- case Opt_noacl:
- case Opt_nouser_xattr:
- ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5");
- break;
- case Opt_sb:
- return 1; /* handled by get_sb_block() */
- case Opt_removed:
- ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt);
- return 1;
- case Opt_abort:
- sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
- return 1;
- case Opt_i_version:
- sb->s_flags |= MS_I_VERSION;
- return 1;
+
+static int ext4_parse_test_dummy_encryption(const struct fs_parameter *param,
+ struct ext4_fs_context *ctx)
+{
+ int err;
+
+ if (!IS_ENABLED(CONFIG_FS_ENCRYPTION)) {
+ ext4_msg(NULL, KERN_WARNING,
+ "test_dummy_encryption option not supported");
+ return -EINVAL;
+ }
+ err = fscrypt_parse_test_dummy_encryption(param,
+ &ctx->dummy_enc_policy);
+ if (err == -EINVAL) {
+ ext4_msg(NULL, KERN_WARNING,
+ "Value of option \"%s\" is unrecognized", param->key);
+ } else if (err == -EEXIST) {
+ ext4_msg(NULL, KERN_WARNING,
+ "Conflicting test_dummy_encryption options");
+ return -EINVAL;
}
+ return err;
+}
+
+#define EXT4_SET_CTX(name) \
+static inline __maybe_unused \
+void ctx_set_##name(struct ext4_fs_context *ctx, unsigned long flag) \
+{ \
+ ctx->mask_s_##name |= flag; \
+ ctx->vals_s_##name |= flag; \
+}
+
+#define EXT4_CLEAR_CTX(name) \
+static inline __maybe_unused \
+void ctx_clear_##name(struct ext4_fs_context *ctx, unsigned long flag) \
+{ \
+ ctx->mask_s_##name |= flag; \
+ ctx->vals_s_##name &= ~flag; \
+}
+
+#define EXT4_TEST_CTX(name) \
+static inline unsigned long \
+ctx_test_##name(struct ext4_fs_context *ctx, unsigned long flag) \
+{ \
+ return (ctx->vals_s_##name & flag); \
+}
+
+EXT4_SET_CTX(flags); /* set only */
+EXT4_SET_CTX(mount_opt);
+EXT4_CLEAR_CTX(mount_opt);
+EXT4_TEST_CTX(mount_opt);
+EXT4_SET_CTX(mount_opt2);
+EXT4_CLEAR_CTX(mount_opt2);
+EXT4_TEST_CTX(mount_opt2);
+
+static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ struct ext4_fs_context *ctx = fc->fs_private;
+ struct fs_parse_result result;
+ const struct mount_opts *m;
+ int is_remount;
+ int token;
+
+ token = fs_parse(fc, ext4_param_specs, param, &result);
+ if (token < 0)
+ return token;
+ is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE;
for (m = ext4_mount_opts; m->token != Opt_err; m++)
if (token == m->token)
break;
- if (m->token == Opt_err) {
- ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" "
- "or missing value", opt);
- return -1;
- }
+ ctx->opt_flags |= m->flags;
- if ((m->flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) {
- ext4_msg(sb, KERN_ERR,
- "Mount option \"%s\" incompatible with ext2", opt);
- return -1;
- }
- if ((m->flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) {
- ext4_msg(sb, KERN_ERR,
- "Mount option \"%s\" incompatible with ext3", opt);
- return -1;
+ if (m->flags & MOPT_EXPLICIT) {
+ if (m->mount_opt & EXT4_MOUNT_DELALLOC) {
+ ctx_set_mount_opt2(ctx, EXT4_MOUNT2_EXPLICIT_DELALLOC);
+ } else if (m->mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) {
+ ctx_set_mount_opt2(ctx,
+ EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM);
+ } else
+ return -EINVAL;
}
- if (args->from && match_int(args, &arg))
- return -1;
- if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
- return -1;
- if (m->flags & MOPT_EXPLICIT)
- set_opt2(sb, EXPLICIT_DELALLOC);
- if (m->flags & MOPT_CLEAR_ERR)
- clear_opt(sb, ERRORS_MASK);
- if (token == Opt_noquota && sb_any_quota_loaded(sb)) {
- ext4_msg(sb, KERN_ERR, "Cannot change quota "
- "options when quota turned on");
- return -1;
+ if (m->flags & MOPT_NOSUPPORT) {
+ ext4_msg(NULL, KERN_ERR, "%s option not supported",
+ param->key);
+ return 0;
}
- if (m->flags & MOPT_NOSUPPORT) {
- ext4_msg(sb, KERN_ERR, "%s option not supported", opt);
- } else if (token == Opt_commit) {
- if (arg == 0)
- arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
- sbi->s_commit_interval = HZ * arg;
- } else if (token == Opt_max_batch_time) {
- if (arg == 0)
- arg = EXT4_DEF_MAX_BATCH_TIME;
- sbi->s_max_batch_time = arg;
- } else if (token == Opt_min_batch_time) {
- sbi->s_min_batch_time = arg;
- } else if (token == Opt_inode_readahead_blks) {
- if (arg && (arg > (1 << 30) || !is_power_of_2(arg))) {
- ext4_msg(sb, KERN_ERR,
- "EXT4-fs: inode_readahead_blks must be "
- "0 or a power of 2 smaller than 2^31");
- return -1;
+ switch (token) {
+#ifdef CONFIG_QUOTA
+ case Opt_usrjquota:
+ if (!*param->string)
+ return unnote_qf_name(fc, USRQUOTA);
+ else
+ return note_qf_name(fc, USRQUOTA, param);
+ case Opt_grpjquota:
+ if (!*param->string)
+ return unnote_qf_name(fc, GRPQUOTA);
+ else
+ return note_qf_name(fc, GRPQUOTA, param);
+#endif
+ case Opt_sb:
+ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
+ ext4_msg(NULL, KERN_WARNING,
+ "Ignoring %s option on remount", param->key);
+ } else {
+ ctx->s_sb_block = result.uint_32;
+ ctx->spec |= EXT4_SPEC_s_sb_block;
}
- sbi->s_inode_readahead_blks = arg;
- } else if (token == Opt_init_itable) {
- set_opt(sb, INIT_INODE_TABLE);
- if (!args->from)
- arg = EXT4_DEF_LI_WAIT_MULT;
- sbi->s_li_wait_mult = arg;
- } else if (token == Opt_max_dir_size_kb) {
- sbi->s_max_dir_size_kb = arg;
- } else if (token == Opt_stripe) {
- sbi->s_stripe = arg;
- } else if (token == Opt_resuid) {
- uid = make_kuid(current_user_ns(), arg);
- if (!uid_valid(uid)) {
- ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg);
- return -1;
+ return 0;
+ case Opt_removed:
+ ext4_msg(NULL, KERN_WARNING, "Ignoring removed %s option",
+ param->key);
+ return 0;
+ case Opt_inlinecrypt:
+#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
+ ctx_set_flags(ctx, SB_INLINECRYPT);
+#else
+ ext4_msg(NULL, KERN_ERR, "inline encryption not supported");
+#endif
+ return 0;
+ case Opt_errors:
+ ctx_clear_mount_opt(ctx, EXT4_MOUNT_ERRORS_MASK);
+ ctx_set_mount_opt(ctx, result.uint_32);
+ return 0;
+#ifdef CONFIG_QUOTA
+ case Opt_jqfmt:
+ ctx->s_jquota_fmt = result.uint_32;
+ ctx->spec |= EXT4_SPEC_JQFMT;
+ return 0;
+#endif
+ case Opt_data:
+ ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS);
+ ctx_set_mount_opt(ctx, result.uint_32);
+ ctx->spec |= EXT4_SPEC_DATAJ;
+ return 0;
+ case Opt_commit:
+ if (result.uint_32 == 0)
+ result.uint_32 = JBD2_DEFAULT_MAX_COMMIT_AGE;
+ else if (result.uint_32 > INT_MAX / HZ) {
+ ext4_msg(NULL, KERN_ERR,
+ "Invalid commit interval %d, "
+ "must be smaller than %d",
+ result.uint_32, INT_MAX / HZ);
+ return -EINVAL;
}
- sbi->s_resuid = uid;
- } else if (token == Opt_resgid) {
- gid = make_kgid(current_user_ns(), arg);
- if (!gid_valid(gid)) {
- ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg);
- return -1;
+ ctx->s_commit_interval = HZ * result.uint_32;
+ ctx->spec |= EXT4_SPEC_s_commit_interval;
+ return 0;
+ case Opt_debug_want_extra_isize:
+ if ((result.uint_32 & 1) || (result.uint_32 < 4)) {
+ ext4_msg(NULL, KERN_ERR,
+ "Invalid want_extra_isize %d", result.uint_32);
+ return -EINVAL;
+ }
+ ctx->s_want_extra_isize = result.uint_32;
+ ctx->spec |= EXT4_SPEC_s_want_extra_isize;
+ return 0;
+ case Opt_max_batch_time:
+ ctx->s_max_batch_time = result.uint_32;
+ ctx->spec |= EXT4_SPEC_s_max_batch_time;
+ return 0;
+ case Opt_min_batch_time:
+ ctx->s_min_batch_time = result.uint_32;
+ ctx->spec |= EXT4_SPEC_s_min_batch_time;
+ return 0;
+ case Opt_inode_readahead_blks:
+ if (result.uint_32 &&
+ (result.uint_32 > (1 << 30) ||
+ !is_power_of_2(result.uint_32))) {
+ ext4_msg(NULL, KERN_ERR,
+ "EXT4-fs: inode_readahead_blks must be "
+ "0 or a power of 2 smaller than 2^31");
+ return -EINVAL;
}
- sbi->s_resgid = gid;
- } else if (token == Opt_journal_dev) {
+ ctx->s_inode_readahead_blks = result.uint_32;
+ ctx->spec |= EXT4_SPEC_s_inode_readahead_blks;
+ return 0;
+ case Opt_init_itable:
+ ctx_set_mount_opt(ctx, EXT4_MOUNT_INIT_INODE_TABLE);
+ ctx->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
+ if (param->type == fs_value_is_string)
+ ctx->s_li_wait_mult = result.uint_32;
+ ctx->spec |= EXT4_SPEC_s_li_wait_mult;
+ return 0;
+ case Opt_max_dir_size_kb:
+ ctx->s_max_dir_size_kb = result.uint_32;
+ ctx->spec |= EXT4_SPEC_s_max_dir_size_kb;
+ return 0;
+#ifdef CONFIG_EXT4_DEBUG
+ case Opt_fc_debug_max_replay:
+ ctx->s_fc_debug_max_replay = result.uint_32;
+ ctx->spec |= EXT4_SPEC_s_fc_debug_max_replay;
+ return 0;
+#endif
+ case Opt_stripe:
+ ctx->s_stripe = result.uint_32;
+ ctx->spec |= EXT4_SPEC_s_stripe;
+ return 0;
+ case Opt_resuid:
+ ctx->s_resuid = result.uid;
+ ctx->spec |= EXT4_SPEC_s_resuid;
+ return 0;
+ case Opt_resgid:
+ ctx->s_resgid = result.gid;
+ ctx->spec |= EXT4_SPEC_s_resgid;
+ return 0;
+ case Opt_journal_dev:
if (is_remount) {
- ext4_msg(sb, KERN_ERR,
+ ext4_msg(NULL, KERN_ERR,
"Cannot specify journal on remount");
- return -1;
- }
- *journal_devnum = arg;
- } else if (token == Opt_journal_ioprio) {
- if (arg > 7) {
- ext4_msg(sb, KERN_ERR, "Invalid journal IO priority"
- " (must be 0-7)");
- return -1;
+ return -EINVAL;
}
- *journal_ioprio =
- IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
- } else if (m->flags & MOPT_DATAJ) {
+ ctx->journal_devnum = result.uint_32;
+ ctx->spec |= EXT4_SPEC_JOURNAL_DEV;
+ return 0;
+ case Opt_journal_path:
+ {
+ struct inode *journal_inode;
+ struct path path;
+ int error;
+
if (is_remount) {
- if (!sbi->s_journal)
- ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option");
- else if (test_opt(sb, DATA_FLAGS) != m->mount_opt) {
- ext4_msg(sb, KERN_ERR,
- "Cannot change data mode on remount");
- return -1;
- }
- } else {
- clear_opt(sb, DATA_FLAGS);
- sbi->s_mount_opt |= m->mount_opt;
+ ext4_msg(NULL, KERN_ERR,
+ "Cannot specify journal on remount");
+ return -EINVAL;
}
-#ifdef CONFIG_QUOTA
- } else if (m->flags & MOPT_QFMT) {
- if (sb_any_quota_loaded(sb) &&
- sbi->s_jquota_fmt != m->mount_opt) {
- ext4_msg(sb, KERN_ERR, "Cannot change journaled "
- "quota options when quota turned on");
- return -1;
+
+ error = fs_lookup_param(fc, param, 1, LOOKUP_FOLLOW, &path);
+ if (error) {
+ ext4_msg(NULL, KERN_ERR, "error: could not find "
+ "journal device path");
+ return -EINVAL;
}
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_QUOTA)) {
- ext4_msg(sb, KERN_ERR,
- "Cannot set journaled quota options "
- "when QUOTA feature is enabled");
- return -1;
+
+ journal_inode = d_inode(path.dentry);
+ ctx->journal_devnum = new_encode_dev(journal_inode->i_rdev);
+ ctx->spec |= EXT4_SPEC_JOURNAL_DEV;
+ path_put(&path);
+ return 0;
+ }
+ case Opt_journal_ioprio:
+ if (result.uint_32 > 7) {
+ ext4_msg(NULL, KERN_ERR, "Invalid journal IO priority"
+ " (must be 0-7)");
+ return -EINVAL;
+ }
+ ctx->journal_ioprio =
+ IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, result.uint_32);
+ ctx->spec |= EXT4_SPEC_JOURNAL_IOPRIO;
+ return 0;
+ case Opt_test_dummy_encryption:
+ return ext4_parse_test_dummy_encryption(param, ctx);
+ case Opt_dax:
+ case Opt_dax_type:
+#ifdef CONFIG_FS_DAX
+ {
+ int type = (token == Opt_dax) ?
+ Opt_dax : result.uint_32;
+
+ switch (type) {
+ case Opt_dax:
+ case Opt_dax_always:
+ ctx_set_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
+ ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
+ break;
+ case Opt_dax_never:
+ ctx_set_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
+ ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
+ break;
+ case Opt_dax_inode:
+ ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
+ ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
+ /* Strictly for printing options */
+ ctx_set_mount_opt2(ctx, EXT4_MOUNT2_DAX_INODE);
+ break;
}
- sbi->s_jquota_fmt = m->mount_opt;
+ return 0;
+ }
+#else
+ ext4_msg(NULL, KERN_INFO, "dax option not supported");
+ return -EINVAL;
#endif
- } else {
- if (!args->from)
- arg = 1;
+ case Opt_data_err:
+ if (result.uint_32 == Opt_data_err_abort)
+ ctx_set_mount_opt(ctx, m->mount_opt);
+ else if (result.uint_32 == Opt_data_err_ignore)
+ ctx_clear_mount_opt(ctx, m->mount_opt);
+ return 0;
+ case Opt_mb_optimize_scan:
+ if (result.int_32 == 1) {
+ ctx_set_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN);
+ ctx->spec |= EXT4_SPEC_mb_optimize_scan;
+ } else if (result.int_32 == 0) {
+ ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN);
+ ctx->spec |= EXT4_SPEC_mb_optimize_scan;
+ } else {
+ ext4_msg(NULL, KERN_WARNING,
+ "mb_optimize_scan should be set to 0 or 1.");
+ return -EINVAL;
+ }
+ return 0;
+ }
+
+ /*
+ * At this point we should only be getting options requiring MOPT_SET,
+ * or MOPT_CLEAR. Anything else is a bug
+ */
+ if (m->token == Opt_err) {
+ ext4_msg(NULL, KERN_WARNING, "buggy handling of option %s",
+ param->key);
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ else {
+ unsigned int set = 0;
+
+ if ((param->type == fs_value_is_flag) ||
+ result.uint_32 > 0)
+ set = 1;
+
if (m->flags & MOPT_CLEAR)
- arg = !arg;
+ set = !set;
else if (unlikely(!(m->flags & MOPT_SET))) {
- ext4_msg(sb, KERN_WARNING,
- "buggy handling of option %s", opt);
+ ext4_msg(NULL, KERN_WARNING,
+ "buggy handling of option %s",
+ param->key);
WARN_ON(1);
- return -1;
+ return -EINVAL;
+ }
+ if (m->flags & MOPT_2) {
+ if (set != 0)
+ ctx_set_mount_opt2(ctx, m->mount_opt);
+ else
+ ctx_clear_mount_opt2(ctx, m->mount_opt);
+ } else {
+ if (set != 0)
+ ctx_set_mount_opt(ctx, m->mount_opt);
+ else
+ ctx_clear_mount_opt(ctx, m->mount_opt);
}
- if (arg != 0)
- sbi->s_mount_opt |= m->mount_opt;
- else
- sbi->s_mount_opt &= ~m->mount_opt;
}
- return 1;
+
+ return 0;
}
-static int parse_options(char *options, struct super_block *sb,
- unsigned long *journal_devnum,
- unsigned int *journal_ioprio,
- int is_remount)
+static int parse_options(struct fs_context *fc, char *options)
{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- char *p;
- substring_t args[MAX_OPT_ARGS];
- int token;
+ struct fs_parameter param;
+ int ret;
+ char *key;
if (!options)
- return 1;
+ return 0;
- while ((p = strsep(&options, ",")) != NULL) {
- if (!*p)
- continue;
- /*
- * Initialize args struct so we know whether arg was
- * found; some options take optional arguments.
- */
- args[0].to = args[0].from = NULL;
- token = match_token(p, tokens, args);
- if (handle_mount_opt(sb, p, token, args, journal_devnum,
- journal_ioprio, is_remount) < 0)
- return 0;
+ while ((key = strsep(&options, ",")) != NULL) {
+ if (*key) {
+ size_t v_len = 0;
+ char *value = strchr(key, '=');
+
+ param.type = fs_value_is_flag;
+ param.string = NULL;
+
+ if (value) {
+ if (value == key)
+ continue;
+
+ *value++ = 0;
+ v_len = strlen(value);
+ param.string = kmemdup_nul(value, v_len,
+ GFP_KERNEL);
+ if (!param.string)
+ return -ENOMEM;
+ param.type = fs_value_is_string;
+ }
+
+ param.key = key;
+ param.size = v_len;
+
+ ret = ext4_parse_param(fc, &param);
+ kfree(param.string);
+ if (ret < 0)
+ return ret;
+ }
}
-#ifdef CONFIG_QUOTA
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
- (test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) {
- ext4_msg(sb, KERN_ERR, "Cannot set quota options when QUOTA "
- "feature is enabled");
+
+ ret = ext4_validate_options(fc);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static int parse_apply_sb_mount_options(struct super_block *sb,
+ struct ext4_fs_context *m_ctx)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ char s_mount_opts[64];
+ struct ext4_fs_context *s_ctx = NULL;
+ struct fs_context *fc = NULL;
+ int ret = -ENOMEM;
+
+ if (!sbi->s_es->s_mount_opts[0])
return 0;
+
+ if (strscpy_pad(s_mount_opts, sbi->s_es->s_mount_opts) < 0)
+ return -E2BIG;
+
+ fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
+ if (!fc)
+ return -ENOMEM;
+
+ s_ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL);
+ if (!s_ctx)
+ goto out_free;
+
+ fc->fs_private = s_ctx;
+ fc->s_fs_info = sbi;
+
+ ret = parse_options(fc, s_mount_opts);
+ if (ret < 0)
+ goto parse_failed;
+
+ ret = ext4_check_opt_consistency(fc, sb);
+ if (ret < 0) {
+parse_failed:
+ ext4_msg(sb, KERN_WARNING,
+ "failed to parse options in superblock: %s",
+ s_mount_opts);
+ ret = 0;
+ goto out_free;
}
- if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
- if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
- clear_opt(sb, USRQUOTA);
- if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
- clear_opt(sb, GRPQUOTA);
+ if (s_ctx->spec & EXT4_SPEC_JOURNAL_DEV)
+ m_ctx->journal_devnum = s_ctx->journal_devnum;
+ if (s_ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)
+ m_ctx->journal_ioprio = s_ctx->journal_ioprio;
- if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
- ext4_msg(sb, KERN_ERR, "old and new quota "
- "format mixing");
- return 0;
+ ext4_apply_options(fc, sb);
+ ret = 0;
+
+out_free:
+ ext4_fc_free(fc);
+ kfree(fc);
+ return ret;
+}
+
+static void ext4_apply_quota_options(struct fs_context *fc,
+ struct super_block *sb)
+{
+#ifdef CONFIG_QUOTA
+ bool quota_feature = ext4_has_feature_quota(sb);
+ struct ext4_fs_context *ctx = fc->fs_private;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ char *qname;
+ int i;
+
+ if (quota_feature)
+ return;
+
+ if (ctx->spec & EXT4_SPEC_JQUOTA) {
+ for (i = 0; i < EXT4_MAXQUOTAS; i++) {
+ if (!(ctx->qname_spec & (1 << i)))
+ continue;
+
+ qname = ctx->s_qf_names[i]; /* May be NULL */
+ if (qname)
+ set_opt(sb, QUOTA);
+ ctx->s_qf_names[i] = NULL;
+ qname = rcu_replace_pointer(sbi->s_qf_names[i], qname,
+ lockdep_is_held(&sb->s_umount));
+ if (qname)
+ kfree_rcu_mightsleep(qname);
}
+ }
- if (!sbi->s_jquota_fmt) {
- ext4_msg(sb, KERN_ERR, "journaled quota format "
- "not specified");
+ if (ctx->spec & EXT4_SPEC_JQFMT)
+ sbi->s_jquota_fmt = ctx->s_jquota_fmt;
+#endif
+}
+
+/*
+ * Check quota settings consistency.
+ */
+static int ext4_check_quota_consistency(struct fs_context *fc,
+ struct super_block *sb)
+{
+#ifdef CONFIG_QUOTA
+ struct ext4_fs_context *ctx = fc->fs_private;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ bool quota_feature = ext4_has_feature_quota(sb);
+ bool quota_loaded = sb_any_quota_loaded(sb);
+ bool usr_qf_name, grp_qf_name, usrquota, grpquota;
+ int quota_flags, i;
+
+ /*
+ * We do the test below only for project quotas. 'usrquota' and
+ * 'grpquota' mount options are allowed even without quota feature
+ * to support legacy quotas in quota files.
+ */
+ if (ctx_test_mount_opt(ctx, EXT4_MOUNT_PRJQUOTA) &&
+ !ext4_has_feature_project(sb)) {
+ ext4_msg(NULL, KERN_ERR, "Project quota feature not enabled. "
+ "Cannot enable project quota enforcement.");
+ return -EINVAL;
+ }
+
+ quota_flags = EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
+ EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA;
+ if (quota_loaded &&
+ ctx->mask_s_mount_opt & quota_flags &&
+ !ctx_test_mount_opt(ctx, quota_flags))
+ goto err_quota_change;
+
+ if (ctx->spec & EXT4_SPEC_JQUOTA) {
+
+ for (i = 0; i < EXT4_MAXQUOTAS; i++) {
+ if (!(ctx->qname_spec & (1 << i)))
+ continue;
+
+ if (quota_loaded &&
+ !!sbi->s_qf_names[i] != !!ctx->s_qf_names[i])
+ goto err_jquota_change;
+
+ if (sbi->s_qf_names[i] && ctx->s_qf_names[i] &&
+ strcmp(get_qf_name(sb, sbi, i),
+ ctx->s_qf_names[i]) != 0)
+ goto err_jquota_specified;
+ }
+
+ if (quota_feature) {
+ ext4_msg(NULL, KERN_INFO,
+ "Journaled quota options ignored when "
+ "QUOTA feature is enabled");
return 0;
}
- } else {
- if (sbi->s_jquota_fmt) {
- ext4_msg(sb, KERN_ERR, "journaled quota format "
- "specified with no journaling "
- "enabled");
+ }
+
+ if (ctx->spec & EXT4_SPEC_JQFMT) {
+ if (sbi->s_jquota_fmt != ctx->s_jquota_fmt && quota_loaded)
+ goto err_jquota_change;
+ if (quota_feature) {
+ ext4_msg(NULL, KERN_INFO, "Quota format mount options "
+ "ignored when QUOTA feature is enabled");
return 0;
}
}
+
+ /* Make sure we don't mix old and new quota format */
+ usr_qf_name = (get_qf_name(sb, sbi, USRQUOTA) ||
+ ctx->s_qf_names[USRQUOTA]);
+ grp_qf_name = (get_qf_name(sb, sbi, GRPQUOTA) ||
+ ctx->s_qf_names[GRPQUOTA]);
+
+ usrquota = (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) ||
+ test_opt(sb, USRQUOTA));
+
+ grpquota = (ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA) ||
+ test_opt(sb, GRPQUOTA));
+
+ if (usr_qf_name) {
+ ctx_clear_mount_opt(ctx, EXT4_MOUNT_USRQUOTA);
+ usrquota = false;
+ }
+ if (grp_qf_name) {
+ ctx_clear_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA);
+ grpquota = false;
+ }
+
+ if (usr_qf_name || grp_qf_name) {
+ if (usrquota || grpquota) {
+ ext4_msg(NULL, KERN_ERR, "old and new quota "
+ "format mixing");
+ return -EINVAL;
+ }
+
+ if (!(ctx->spec & EXT4_SPEC_JQFMT || sbi->s_jquota_fmt)) {
+ ext4_msg(NULL, KERN_ERR, "journaled quota format "
+ "not specified");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+
+err_quota_change:
+ ext4_msg(NULL, KERN_ERR,
+ "Cannot change quota options when quota turned on");
+ return -EINVAL;
+err_jquota_change:
+ ext4_msg(NULL, KERN_ERR, "Cannot change journaled quota "
+ "options when quota turned on");
+ return -EINVAL;
+err_jquota_specified:
+ ext4_msg(NULL, KERN_ERR, "%s quota file already specified",
+ QTYPE2NAME(i));
+ return -EINVAL;
+#else
+ return 0;
#endif
- if (test_opt(sb, DIOREAD_NOLOCK)) {
- int blocksize =
- BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
+}
- if (blocksize < PAGE_CACHE_SIZE) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "dioread_nolock if block size != PAGE_SIZE");
+static int ext4_check_test_dummy_encryption(const struct fs_context *fc,
+ struct super_block *sb)
+{
+ const struct ext4_fs_context *ctx = fc->fs_private;
+ const struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy))
+ return 0;
+
+ if (!ext4_has_feature_encrypt(sb)) {
+ ext4_msg(NULL, KERN_WARNING,
+ "test_dummy_encryption requires encrypt feature");
+ return -EINVAL;
+ }
+ /*
+ * This mount option is just for testing, and it's not worthwhile to
+ * implement the extra complexity (e.g. RCU protection) that would be
+ * needed to allow it to be set or changed during remount. We do allow
+ * it to be specified during remount, but only if there is no change.
+ */
+ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
+ if (fscrypt_dummy_policies_equal(&sbi->s_dummy_enc_policy,
+ &ctx->dummy_enc_policy))
return 0;
+ ext4_msg(NULL, KERN_WARNING,
+ "Can't set or change test_dummy_encryption on remount");
+ return -EINVAL;
+ }
+ /* Also make sure s_mount_opts didn't contain a conflicting value. */
+ if (fscrypt_is_dummy_policy_set(&sbi->s_dummy_enc_policy)) {
+ if (fscrypt_dummy_policies_equal(&sbi->s_dummy_enc_policy,
+ &ctx->dummy_enc_policy))
+ return 0;
+ ext4_msg(NULL, KERN_WARNING,
+ "Conflicting test_dummy_encryption options");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static void ext4_apply_test_dummy_encryption(struct ext4_fs_context *ctx,
+ struct super_block *sb)
+{
+ if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy) ||
+ /* if already set, it was already verified to be the same */
+ fscrypt_is_dummy_policy_set(&EXT4_SB(sb)->s_dummy_enc_policy))
+ return;
+ EXT4_SB(sb)->s_dummy_enc_policy = ctx->dummy_enc_policy;
+ memset(&ctx->dummy_enc_policy, 0, sizeof(ctx->dummy_enc_policy));
+ ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled");
+}
+
+static int ext4_check_opt_consistency(struct fs_context *fc,
+ struct super_block *sb)
+{
+ struct ext4_fs_context *ctx = fc->fs_private;
+ struct ext4_sb_info *sbi = fc->s_fs_info;
+ int is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE;
+ int err;
+
+ if ((ctx->opt_flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) {
+ ext4_msg(NULL, KERN_ERR,
+ "Mount option(s) incompatible with ext2");
+ return -EINVAL;
+ }
+ if ((ctx->opt_flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) {
+ ext4_msg(NULL, KERN_ERR,
+ "Mount option(s) incompatible with ext3");
+ return -EINVAL;
+ }
+
+ if (ctx->s_want_extra_isize >
+ (sbi->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE)) {
+ ext4_msg(NULL, KERN_ERR,
+ "Invalid want_extra_isize %d",
+ ctx->s_want_extra_isize);
+ return -EINVAL;
+ }
+
+ err = ext4_check_test_dummy_encryption(fc, sb);
+ if (err)
+ return err;
+
+ if ((ctx->spec & EXT4_SPEC_DATAJ) && is_remount) {
+ if (!sbi->s_journal) {
+ ext4_msg(NULL, KERN_WARNING,
+ "Remounting file system with no journal "
+ "so ignoring journalled data option");
+ ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS);
+ } else if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS) !=
+ test_opt(sb, DATA_FLAGS)) {
+ ext4_msg(NULL, KERN_ERR, "Cannot change data mode "
+ "on remount");
+ return -EINVAL;
}
}
+
+ if (is_remount) {
+ if (!sbi->s_journal &&
+ ctx_test_mount_opt(ctx, EXT4_MOUNT_DATA_ERR_ABORT)) {
+ ext4_msg(NULL, KERN_WARNING,
+ "Remounting fs w/o journal so ignoring data_err option");
+ ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_ERR_ABORT);
+ }
+
+ if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS) &&
+ (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) {
+ ext4_msg(NULL, KERN_ERR, "can't mount with "
+ "both data=journal and dax");
+ return -EINVAL;
+ }
+
+ if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS) &&
+ (!(sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
+ (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER))) {
+fail_dax_change_remount:
+ ext4_msg(NULL, KERN_ERR, "can't change "
+ "dax mount option while remounting");
+ return -EINVAL;
+ } else if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER) &&
+ (!(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
+ (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS))) {
+ goto fail_dax_change_remount;
+ } else if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_DAX_INODE) &&
+ ((sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
+ (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
+ !(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_INODE))) {
+ goto fail_dax_change_remount;
+ }
+ }
+
+ return ext4_check_quota_consistency(fc, sb);
+}
+
+static void ext4_apply_options(struct fs_context *fc, struct super_block *sb)
+{
+ struct ext4_fs_context *ctx = fc->fs_private;
+ struct ext4_sb_info *sbi = fc->s_fs_info;
+
+ sbi->s_mount_opt &= ~ctx->mask_s_mount_opt;
+ sbi->s_mount_opt |= ctx->vals_s_mount_opt;
+ sbi->s_mount_opt2 &= ~ctx->mask_s_mount_opt2;
+ sbi->s_mount_opt2 |= ctx->vals_s_mount_opt2;
+ sb->s_flags &= ~ctx->mask_s_flags;
+ sb->s_flags |= ctx->vals_s_flags;
+
+#define APPLY(X) ({ if (ctx->spec & EXT4_SPEC_##X) sbi->X = ctx->X; })
+ APPLY(s_commit_interval);
+ APPLY(s_stripe);
+ APPLY(s_max_batch_time);
+ APPLY(s_min_batch_time);
+ APPLY(s_want_extra_isize);
+ APPLY(s_inode_readahead_blks);
+ APPLY(s_max_dir_size_kb);
+ APPLY(s_li_wait_mult);
+ APPLY(s_resgid);
+ APPLY(s_resuid);
+
+#ifdef CONFIG_EXT4_DEBUG
+ APPLY(s_fc_debug_max_replay);
+#endif
+
+ ext4_apply_quota_options(fc, sb);
+ ext4_apply_test_dummy_encryption(ctx, sb);
+}
+
+
+static int ext4_validate_options(struct fs_context *fc)
+{
+#ifdef CONFIG_QUOTA
+ struct ext4_fs_context *ctx = fc->fs_private;
+ char *usr_qf_name, *grp_qf_name;
+
+ usr_qf_name = ctx->s_qf_names[USRQUOTA];
+ grp_qf_name = ctx->s_qf_names[GRPQUOTA];
+
+ if (usr_qf_name || grp_qf_name) {
+ if (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) && usr_qf_name)
+ ctx_clear_mount_opt(ctx, EXT4_MOUNT_USRQUOTA);
+
+ if (ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA) && grp_qf_name)
+ ctx_clear_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA);
+
+ if (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) ||
+ ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA)) {
+ ext4_msg(NULL, KERN_ERR, "old and new quota "
+ "format mixing");
+ return -EINVAL;
+ }
+ }
+#endif
return 1;
}
@@ -1679,6 +2881,7 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
{
#if defined(CONFIG_QUOTA)
struct ext4_sb_info *sbi = EXT4_SB(sb);
+ char *usr_qf_name, *grp_qf_name;
if (sbi->s_jquota_fmt) {
char *fmtname = "";
@@ -1697,28 +2900,25 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
seq_printf(seq, ",jqfmt=%s", fmtname);
}
- if (sbi->s_qf_names[USRQUOTA])
- seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
-
- if (sbi->s_qf_names[GRPQUOTA])
- seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
-
- if (test_opt(sb, USRQUOTA))
- seq_puts(seq, ",usrquota");
-
- if (test_opt(sb, GRPQUOTA))
- seq_puts(seq, ",grpquota");
+ rcu_read_lock();
+ usr_qf_name = rcu_dereference(sbi->s_qf_names[USRQUOTA]);
+ grp_qf_name = rcu_dereference(sbi->s_qf_names[GRPQUOTA]);
+ if (usr_qf_name)
+ seq_show_option(seq, "usrjquota", usr_qf_name);
+ if (grp_qf_name)
+ seq_show_option(seq, "grpjquota", grp_qf_name);
+ rcu_read_unlock();
#endif
}
static const char *token2str(int token)
{
- const struct match_token *t;
+ const struct fs_parameter_spec *spec;
- for (t = tokens; t->token != Opt_err; t++)
- if (t->token == token && !strchr(t->pattern, '='))
+ for (spec = ext4_param_specs; spec->name != NULL; spec++)
+ if (spec->opt == token && !spec->type)
break;
- return t->pattern;
+ return spec->name;
}
/*
@@ -1731,7 +2931,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
- int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt;
+ int def_errors;
const struct mount_opts *m;
char sep = nodefs ? '\n' : ',';
@@ -1743,24 +2943,37 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
for (m = ext4_mount_opts; m->token != Opt_err; m++) {
int want_set = m->flags & MOPT_SET;
+ int opt_2 = m->flags & MOPT_2;
+ unsigned int mount_opt, def_mount_opt;
+
if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
- (m->flags & MOPT_CLEAR_ERR))
+ m->flags & MOPT_SKIP)
+ continue;
+
+ if (opt_2) {
+ mount_opt = sbi->s_mount_opt2;
+ def_mount_opt = sbi->s_def_mount_opt2;
+ } else {
+ mount_opt = sbi->s_mount_opt;
+ def_mount_opt = sbi->s_def_mount_opt;
+ }
+ /* skip if same as the default */
+ if (!nodefs && !(m->mount_opt & (mount_opt ^ def_mount_opt)))
continue;
- if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
- continue; /* skip if same as the default */
+ /* select Opt_noFoo vs Opt_Foo */
if ((want_set &&
- (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) ||
- (!want_set && (sbi->s_mount_opt & m->mount_opt)))
- continue; /* select Opt_noFoo vs Opt_Foo */
+ (mount_opt & m->mount_opt) != m->mount_opt) ||
+ (!want_set && (mount_opt & m->mount_opt)))
+ continue;
SEQ_OPTS_PRINT("%s", token2str(m->token));
}
if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) ||
- le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
+ ext4_get_resuid(es) != EXT4_DEF_RESUID)
SEQ_OPTS_PRINT("resuid=%u",
from_kuid_munged(&init_user_ns, sbi->s_resuid));
if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) ||
- le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
+ ext4_get_resgid(es) != EXT4_DEF_RESGID)
SEQ_OPTS_PRINT("resgid=%u",
from_kgid_munged(&init_user_ns, sbi->s_resgid));
def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
@@ -1776,11 +2989,12 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
- if (sb->s_flags & MS_I_VERSION)
+ if (nodefs && sb->s_flags & SB_I_VERSION)
SEQ_OPTS_PUTS("i_version");
if (nodefs || sbi->s_stripe)
SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
- if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) {
+ if (nodefs || EXT4_MOUNT_DATA_FLAGS &
+ (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
SEQ_OPTS_PUTS("data=journal");
else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
@@ -1793,11 +3007,46 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
SEQ_OPTS_PRINT("inode_readahead_blks=%u",
sbi->s_inode_readahead_blks);
- if (nodefs || (test_opt(sb, INIT_INODE_TABLE) &&
+ if (test_opt(sb, INIT_INODE_TABLE) && (nodefs ||
(sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
if (nodefs || sbi->s_max_dir_size_kb)
SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
+ if (test_opt(sb, DATA_ERR_ABORT))
+ SEQ_OPTS_PUTS("data_err=abort");
+
+ fscrypt_show_test_dummy_encryption(seq, sep, sb);
+
+ if (sb->s_flags & SB_INLINECRYPT)
+ SEQ_OPTS_PUTS("inlinecrypt");
+
+ if (test_opt(sb, DAX_ALWAYS)) {
+ if (IS_EXT2_SB(sb))
+ SEQ_OPTS_PUTS("dax");
+ else
+ SEQ_OPTS_PUTS("dax=always");
+ } else if (test_opt2(sb, DAX_NEVER)) {
+ SEQ_OPTS_PUTS("dax=never");
+ } else if (test_opt2(sb, DAX_INODE)) {
+ SEQ_OPTS_PUTS("dax=inode");
+ }
+
+ if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD &&
+ !test_opt2(sb, MB_OPTIMIZE_SCAN)) {
+ SEQ_OPTS_PUTS("mb_optimize_scan=0");
+ } else if (sbi->s_groups_count < MB_DEFAULT_LINEAR_SCAN_THRESHOLD &&
+ test_opt2(sb, MB_OPTIMIZE_SCAN)) {
+ SEQ_OPTS_PUTS("mb_optimize_scan=1");
+ }
+
+ if (nodefs && !test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS))
+ SEQ_OPTS_PUTS("prefetch_block_bitmaps");
+
+ if (ext4_emergency_ro(sb))
+ SEQ_OPTS_PUTS("emergency_ro");
+
+ if (ext4_forced_shutdown(sb))
+ SEQ_OPTS_PUTS("shutdown");
ext4_show_quota_options(seq, sb);
return 0;
@@ -1808,47 +3057,35 @@ static int ext4_show_options(struct seq_file *seq, struct dentry *root)
return _ext4_show_options(seq, root->d_sb, 0);
}
-static int options_seq_show(struct seq_file *seq, void *offset)
+int ext4_seq_options_show(struct seq_file *seq, void *offset)
{
struct super_block *sb = seq->private;
int rc;
- seq_puts(seq, (sb->s_flags & MS_RDONLY) ? "ro" : "rw");
+ seq_puts(seq, sb_rdonly(sb) ? "ro" : "rw");
rc = _ext4_show_options(seq, sb, 1);
- seq_puts(seq, "\n");
+ seq_putc(seq, '\n');
return rc;
}
-static int options_open_fs(struct inode *inode, struct file *file)
-{
- return single_open(file, options_seq_show, PDE_DATA(inode));
-}
-
-static const struct file_operations ext4_seq_options_fops = {
- .owner = THIS_MODULE,
- .open = options_open_fs,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
int read_only)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- int res = 0;
+ int err = 0;
if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
ext4_msg(sb, KERN_ERR, "revision level too high, "
"forcing read-only mode");
- res = MS_RDONLY;
+ err = -EROFS;
+ goto done;
}
if (read_only)
goto done;
if (!(sbi->s_mount_state & EXT4_VALID_FS))
ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
"running e2fsck is recommended");
- else if ((sbi->s_mount_state & EXT4_ERROR_FS))
+ else if (sbi->s_mount_state & EXT4_ERROR_FS)
ext4_msg(sb, KERN_WARNING,
"warning: mounting fs with errors, "
"running e2fsck is recommended");
@@ -1859,8 +3096,8 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
"warning: maximal mount count reached, "
"running e2fsck is recommended");
else if (le32_to_cpu(es->s_checkinterval) &&
- (le32_to_cpu(es->s_lastcheck) +
- le32_to_cpu(es->s_checkinterval) <= get_seconds()))
+ (ext4_get_tstamp(es, s_lastcheck) +
+ le32_to_cpu(es->s_checkinterval) <= ktime_get_real_seconds()))
ext4_msg(sb, KERN_WARNING,
"warning: checktime reached, "
"running e2fsck is recommended");
@@ -1869,12 +3106,14 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
le16_add_cpu(&es->s_mnt_count, 1);
- es->s_mtime = cpu_to_le32(get_seconds());
- ext4_update_dynamic_rev(sb);
- if (sbi->s_journal)
- EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+ ext4_update_tstamp(es, s_mtime);
+ if (sbi->s_journal) {
+ ext4_set_feature_journal_needs_recovery(sb);
+ if (ext4_has_feature_orphan_file(sb))
+ ext4_set_feature_orphan_present(sb);
+ }
- ext4_commit_super(sb, 1);
+ err = ext4_commit_super(sb);
done:
if (test_opt(sb, DEBUG))
printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
@@ -1884,16 +3123,14 @@ done:
EXT4_BLOCKS_PER_GROUP(sb),
EXT4_INODES_PER_GROUP(sb),
sbi->s_mount_opt, sbi->s_mount_opt2);
-
- cleancache_init_fs(sb);
- return res;
+ return err;
}
int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct flex_groups *new_groups;
- int size;
+ struct flex_groups **old_groups, **new_groups;
+ int size, i, j;
if (!sbi->s_log_groups_per_flex)
return 0;
@@ -1902,22 +3139,37 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
if (size <= sbi->s_flex_groups_allocated)
return 0;
- size = roundup_pow_of_two(size * sizeof(struct flex_groups));
- new_groups = ext4_kvzalloc(size, GFP_KERNEL);
+ new_groups = kvzalloc(roundup_pow_of_two(size *
+ sizeof(*sbi->s_flex_groups)), GFP_KERNEL);
if (!new_groups) {
- ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups",
- size / (int) sizeof(struct flex_groups));
+ ext4_msg(sb, KERN_ERR,
+ "not enough memory for %d flex group pointers", size);
return -ENOMEM;
}
-
- if (sbi->s_flex_groups) {
- memcpy(new_groups, sbi->s_flex_groups,
- (sbi->s_flex_groups_allocated *
- sizeof(struct flex_groups)));
- ext4_kvfree(sbi->s_flex_groups);
+ for (i = sbi->s_flex_groups_allocated; i < size; i++) {
+ new_groups[i] = kvzalloc(roundup_pow_of_two(
+ sizeof(struct flex_groups)),
+ GFP_KERNEL);
+ if (!new_groups[i]) {
+ for (j = sbi->s_flex_groups_allocated; j < i; j++)
+ kvfree(new_groups[j]);
+ kvfree(new_groups);
+ ext4_msg(sb, KERN_ERR,
+ "not enough memory for %d flex groups", size);
+ return -ENOMEM;
+ }
}
- sbi->s_flex_groups = new_groups;
- sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups);
+ rcu_read_lock();
+ old_groups = rcu_dereference(sbi->s_flex_groups);
+ if (old_groups)
+ memcpy(new_groups, old_groups,
+ (sbi->s_flex_groups_allocated *
+ sizeof(struct flex_groups *)));
+ rcu_read_unlock();
+ rcu_assign_pointer(sbi->s_flex_groups, new_groups);
+ sbi->s_flex_groups_allocated = size;
+ if (old_groups)
+ ext4_kvfree_array_rcu(old_groups);
return 0;
}
@@ -1925,6 +3177,7 @@ static int ext4_fill_flex_info(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_group_desc *gdp = NULL;
+ struct flex_groups *fg;
ext4_group_t flex_group;
int i, err;
@@ -1942,12 +3195,11 @@ static int ext4_fill_flex_info(struct super_block *sb)
gdp = ext4_get_group_desc(sb, i, NULL);
flex_group = ext4_flex_group(sbi, i);
- atomic_add(ext4_free_inodes_count(sb, gdp),
- &sbi->s_flex_groups[flex_group].free_inodes);
+ fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group);
+ atomic_add(ext4_free_inodes_count(sb, gdp), &fg->free_inodes);
atomic64_add(ext4_free_group_clusters(sb, gdp),
- &sbi->s_flex_groups[flex_group].free_clusters);
- atomic_add(ext4_used_dirs_count(sb, gdp),
- &sbi->s_flex_groups[flex_group].used_dirs);
+ &fg->free_clusters);
+ atomic_add(ext4_used_dirs_count(sb, gdp), &fg->used_dirs);
}
return 1;
@@ -1955,45 +3207,45 @@ failed:
return 0;
}
-static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
+static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
struct ext4_group_desc *gdp)
{
- int offset;
+ int offset = offsetof(struct ext4_group_desc, bg_checksum);
__u16 crc = 0;
__le32 le_group = cpu_to_le32(block_group);
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
- if ((sbi->s_es->s_feature_ro_compat &
- cpu_to_le32(EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))) {
+ if (ext4_has_feature_metadata_csum(sbi->s_sb)) {
/* Use new metadata_csum algorithm */
- __le16 save_csum;
__u32 csum32;
+ __u16 dummy_csum = 0;
- save_csum = gdp->bg_checksum;
- gdp->bg_checksum = 0;
- csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
+ csum32 = ext4_chksum(sbi->s_csum_seed, (__u8 *)&le_group,
sizeof(le_group));
- csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp,
- sbi->s_desc_size);
- gdp->bg_checksum = save_csum;
+ csum32 = ext4_chksum(csum32, (__u8 *)gdp, offset);
+ csum32 = ext4_chksum(csum32, (__u8 *)&dummy_csum,
+ sizeof(dummy_csum));
+ offset += sizeof(dummy_csum);
+ if (offset < sbi->s_desc_size)
+ csum32 = ext4_chksum(csum32, (__u8 *)gdp + offset,
+ sbi->s_desc_size - offset);
crc = csum32 & 0xFFFF;
goto out;
}
/* old crc16 code */
- offset = offsetof(struct ext4_group_desc, bg_checksum);
+ if (!ext4_has_feature_gdt_csum(sb))
+ return 0;
crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
crc = crc16(crc, (__u8 *)gdp, offset);
offset += sizeof(gdp->bg_checksum); /* skip checksum */
/* for checksum of struct ext4_group_desc do the rest...*/
- if ((sbi->s_es->s_feature_incompat &
- cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
- offset < le16_to_cpu(sbi->s_es->s_desc_size))
+ if (ext4_has_feature_64bit(sb) && offset < sbi->s_desc_size)
crc = crc16(crc, (__u8 *)gdp + offset,
- le16_to_cpu(sbi->s_es->s_desc_size) -
- offset);
+ sbi->s_desc_size - offset);
out:
return cpu_to_le16(crc);
@@ -2003,8 +3255,7 @@ int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group,
struct ext4_group_desc *gdp)
{
if (ext4_has_group_desc_csum(sb) &&
- (gdp->bg_checksum != ext4_group_desc_csum(EXT4_SB(sb),
- block_group, gdp)))
+ (gdp->bg_checksum != ext4_group_desc_csum(sb, block_group, gdp)))
return 0;
return 1;
@@ -2015,23 +3266,25 @@ void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
{
if (!ext4_has_group_desc_csum(sb))
return;
- gdp->bg_checksum = ext4_group_desc_csum(EXT4_SB(sb), block_group, gdp);
+ gdp->bg_checksum = ext4_group_desc_csum(sb, block_group, gdp);
}
/* Called at mount-time, super-block is locked */
static int ext4_check_descriptors(struct super_block *sb,
+ ext4_fsblk_t sb_block,
ext4_group_t *first_not_zeroed)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
ext4_fsblk_t last_block;
+ ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0);
ext4_fsblk_t block_bitmap;
ext4_fsblk_t inode_bitmap;
ext4_fsblk_t inode_table;
int flexbg_flag = 0;
ext4_group_t i, grp = sbi->s_groups_count;
- if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
+ if (ext4_has_feature_flex_bg(sb))
flexbg_flag = 1;
ext4_debug("Checking group descriptors");
@@ -2050,6 +3303,21 @@ static int ext4_check_descriptors(struct super_block *sb,
grp = i;
block_bitmap = ext4_block_bitmap(sb, gdp);
+ if (block_bitmap == sb_block) {
+ ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+ "Block bitmap for group %u overlaps "
+ "superblock", i);
+ if (!sb_rdonly(sb))
+ return 0;
+ }
+ if (block_bitmap >= sb_block + 1 &&
+ block_bitmap <= last_bg_block) {
+ ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+ "Block bitmap for group %u overlaps "
+ "block group descriptors", i);
+ if (!sb_rdonly(sb))
+ return 0;
+ }
if (block_bitmap < first_block || block_bitmap > last_block) {
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
"Block bitmap for group %u not in group "
@@ -2057,6 +3325,21 @@ static int ext4_check_descriptors(struct super_block *sb,
return 0;
}
inode_bitmap = ext4_inode_bitmap(sb, gdp);
+ if (inode_bitmap == sb_block) {
+ ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+ "Inode bitmap for group %u overlaps "
+ "superblock", i);
+ if (!sb_rdonly(sb))
+ return 0;
+ }
+ if (inode_bitmap >= sb_block + 1 &&
+ inode_bitmap <= last_bg_block) {
+ ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+ "Inode bitmap for group %u overlaps "
+ "block group descriptors", i);
+ if (!sb_rdonly(sb))
+ return 0;
+ }
if (inode_bitmap < first_block || inode_bitmap > last_block) {
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
"Inode bitmap for group %u not in group "
@@ -2064,6 +3347,21 @@ static int ext4_check_descriptors(struct super_block *sb,
return 0;
}
inode_table = ext4_inode_table(sb, gdp);
+ if (inode_table == sb_block) {
+ ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+ "Inode table for group %u overlaps "
+ "superblock", i);
+ if (!sb_rdonly(sb))
+ return 0;
+ }
+ if (inode_table >= sb_block + 1 &&
+ inode_table <= last_bg_block) {
+ ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+ "Inode table for group %u overlaps "
+ "block group descriptors", i);
+ if (!sb_rdonly(sb))
+ return 0;
+ }
if (inode_table < first_block ||
inode_table + sbi->s_itb_per_group - 1 > last_block) {
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
@@ -2075,9 +3373,9 @@ static int ext4_check_descriptors(struct super_block *sb,
if (!ext4_group_desc_csum_verify(sb, i, gdp)) {
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
"Checksum for group %u failed (%u!=%u)",
- i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
+ i, le16_to_cpu(ext4_group_desc_csum(sb, i,
gdp)), le16_to_cpu(gdp->bg_checksum));
- if (!(sb->s_flags & MS_RDONLY)) {
+ if (!sb_rdonly(sb)) {
ext4_unlock_group(sb, i);
return 0;
}
@@ -2088,139 +3386,9 @@ static int ext4_check_descriptors(struct super_block *sb,
}
if (NULL != first_not_zeroed)
*first_not_zeroed = grp;
-
- ext4_free_blocks_count_set(sbi->s_es,
- EXT4_C2B(sbi, ext4_count_free_clusters(sb)));
- sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
return 1;
}
-/* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
- * the superblock) which were deleted from all directories, but held open by
- * a process at the time of a crash. We walk the list and try to delete these
- * inodes at recovery time (only with a read-write filesystem).
- *
- * In order to keep the orphan inode chain consistent during traversal (in
- * case of crash during recovery), we link each inode into the superblock
- * orphan list_head and handle it the same way as an inode deletion during
- * normal operation (which journals the operations for us).
- *
- * We only do an iget() and an iput() on each inode, which is very safe if we
- * accidentally point at an in-use or already deleted inode. The worst that
- * can happen in this case is that we get a "bit already cleared" message from
- * ext4_free_inode(). The only reason we would point at a wrong inode is if
- * e2fsck was run on this filesystem, and it must have already done the orphan
- * inode cleanup for us, so we can safely abort without any further action.
- */
-static void ext4_orphan_cleanup(struct super_block *sb,
- struct ext4_super_block *es)
-{
- unsigned int s_flags = sb->s_flags;
- int nr_orphans = 0, nr_truncates = 0;
-#ifdef CONFIG_QUOTA
- int i;
-#endif
- if (!es->s_last_orphan) {
- jbd_debug(4, "no orphan inodes to clean up\n");
- return;
- }
-
- if (bdev_read_only(sb->s_bdev)) {
- ext4_msg(sb, KERN_ERR, "write access "
- "unavailable, skipping orphan cleanup");
- return;
- }
-
- /* Check if feature set would not allow a r/w mount */
- if (!ext4_feature_set_ok(sb, 0)) {
- ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
- "unknown ROCOMPAT features");
- return;
- }
-
- if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
- /* don't clear list on RO mount w/ errors */
- if (es->s_last_orphan && !(s_flags & MS_RDONLY)) {
- jbd_debug(1, "Errors on filesystem, "
- "clearing orphan list.\n");
- es->s_last_orphan = 0;
- }
- jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
- return;
- }
-
- if (s_flags & MS_RDONLY) {
- ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
- sb->s_flags &= ~MS_RDONLY;
- }
-#ifdef CONFIG_QUOTA
- /* Needed for iput() to work correctly and not trash data */
- sb->s_flags |= MS_ACTIVE;
- /* Turn on quotas so that they are updated correctly */
- for (i = 0; i < MAXQUOTAS; i++) {
- if (EXT4_SB(sb)->s_qf_names[i]) {
- int ret = ext4_quota_on_mount(sb, i);
- if (ret < 0)
- ext4_msg(sb, KERN_ERR,
- "Cannot turn on journaled "
- "quota: error %d", ret);
- }
- }
-#endif
-
- while (es->s_last_orphan) {
- struct inode *inode;
-
- inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
- if (IS_ERR(inode)) {
- es->s_last_orphan = 0;
- break;
- }
-
- list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
- dquot_initialize(inode);
- if (inode->i_nlink) {
- if (test_opt(sb, DEBUG))
- ext4_msg(sb, KERN_DEBUG,
- "%s: truncating inode %lu to %lld bytes",
- __func__, inode->i_ino, inode->i_size);
- jbd_debug(2, "truncating inode %lu to %lld bytes\n",
- inode->i_ino, inode->i_size);
- mutex_lock(&inode->i_mutex);
- truncate_inode_pages(inode->i_mapping, inode->i_size);
- ext4_truncate(inode);
- mutex_unlock(&inode->i_mutex);
- nr_truncates++;
- } else {
- if (test_opt(sb, DEBUG))
- ext4_msg(sb, KERN_DEBUG,
- "%s: deleting unreferenced inode %lu",
- __func__, inode->i_ino);
- jbd_debug(2, "deleting unreferenced inode %lu\n",
- inode->i_ino);
- nr_orphans++;
- }
- iput(inode); /* The delete magic happens here! */
- }
-
-#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
-
- if (nr_orphans)
- ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
- PLURAL(nr_orphans));
- if (nr_truncates)
- ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
- PLURAL(nr_truncates));
-#ifdef CONFIG_QUOTA
- /* Turn quotas off */
- for (i = 0; i < MAXQUOTAS; i++) {
- if (sb_dqopt(sb)->files[i])
- dquot_quota_off(sb, i);
- }
-#endif
- sb->s_flags = s_flags; /* Restore MS_RDONLY status */
-}
-
/*
* Maximal extent format file size.
* Resulting logical blkno at s_maxbytes must fit in our on-disk
@@ -2241,13 +3409,9 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files)
loff_t res;
loff_t upper_limit = MAX_LFS_FILESIZE;
- /* small i_blocks in vfs inode? */
- if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
- /*
- * CONFIG_LBDAF is not enabled implies the inode
- * i_block represent total blocks in 512 bytes
- * 32 == size of vfs inode i_blocks * 8
- */
+ BUILD_BUG_ON(sizeof(blkcnt_t) < sizeof(u64));
+
+ if (!has_huge_files) {
upper_limit = (1LL << 32) - 1;
/* total blocks in file system block size */
@@ -2277,22 +3441,23 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files)
*/
static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
{
- loff_t res = EXT4_NDIR_BLOCKS;
+ loff_t upper_limit, res = EXT4_NDIR_BLOCKS;
int meta_blocks;
- loff_t upper_limit;
- /* This is calculated to be the largest file size for a dense, block
+ unsigned int ppb = 1 << (bits - 2);
+
+ /*
+ * This is calculated to be the largest file size for a dense, block
* mapped file such that the file's total number of 512-byte sectors,
* including data and all indirect blocks, does not exceed (2^48 - 1).
*
* __u32 i_blocks_lo and _u16 i_blocks_high represent the total
* number of 512-byte sectors of the file.
*/
-
- if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
+ if (!has_huge_files) {
/*
- * !has_huge_files or CONFIG_LBDAF not enabled implies that
- * the inode i_block field represents total file blocks in
- * 2^32 512-byte sectors == size of vfs inode i_blocks * 8
+ * !has_huge_files or implies that the inode i_block field
+ * represents total file blocks in 2^32 512-byte sectors ==
+ * size of vfs inode i_blocks * 8
*/
upper_limit = (1LL << 32) - 1;
@@ -2310,23 +3475,38 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
}
+ /* Compute how many blocks we can address by block tree */
+ res += ppb;
+ res += ppb * ppb;
+ res += ((loff_t)ppb) * ppb * ppb;
+ /* Compute how many metadata blocks are needed */
+ meta_blocks = 1;
+ meta_blocks += 1 + ppb;
+ meta_blocks += 1 + ppb + ppb * ppb;
+ /* Does block tree limit file size? */
+ if (res + meta_blocks <= upper_limit)
+ goto check_lfs;
+
+ res = upper_limit;
+ /* How many metadata blocks are needed for addressing upper_limit? */
+ upper_limit -= EXT4_NDIR_BLOCKS;
/* indirect blocks */
meta_blocks = 1;
+ upper_limit -= ppb;
/* double indirect blocks */
- meta_blocks += 1 + (1LL << (bits-2));
- /* tripple indirect blocks */
- meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
-
- upper_limit -= meta_blocks;
- upper_limit <<= bits;
-
- res += 1LL << (bits-2);
- res += 1LL << (2*(bits-2));
- res += 1LL << (3*(bits-2));
+ if (upper_limit < ppb * ppb) {
+ meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb);
+ res -= meta_blocks;
+ goto check_lfs;
+ }
+ meta_blocks += 1 + ppb;
+ upper_limit -= ppb * ppb;
+ /* tripple indirect blocks for the rest */
+ meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb) +
+ DIV_ROUND_UP_ULL(upper_limit, ppb*ppb);
+ res -= meta_blocks;
+check_lfs:
res <<= bits;
- if (res > upper_limit)
- res = upper_limit;
-
if (res > MAX_LFS_FILESIZE)
res = MAX_LFS_FILESIZE;
@@ -2342,13 +3522,22 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb,
first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
- if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
- nr < first_meta_bg)
+ if (!ext4_has_feature_meta_bg(sb) || nr < first_meta_bg)
return logical_sb_block + nr + 1;
bg = sbi->s_desc_per_block * nr;
if (ext4_bg_has_super(sb, bg))
has_super = 1;
+ /*
+ * If we have a meta_bg fs with 1k blocks, group 0's GDT is at
+ * block 2, not 1. If s_first_data_block == 0 (bigalloc is enabled
+ * on modern mke2fs or blksize > 1k on older mke2fs) then we must
+ * compensate.
+ */
+ if (sb->s_blocksize == 1024 && nr == 0 &&
+ le32_to_cpu(sbi->s_es->s_first_data_block) == 0)
+ has_super++;
+
return (has_super + ext4_group_first_block_no(sb, bg));
}
@@ -2372,9 +3561,9 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
ret = sbi->s_stripe;
- else if (stripe_width <= sbi->s_blocks_per_group)
+ else if (stripe_width && stripe_width <= sbi->s_blocks_per_group)
ret = stripe_width;
- else if (stride <= sbi->s_blocks_per_group)
+ else if (stride && stride <= sbi->s_blocks_per_group)
ret = stride;
else
ret = 0;
@@ -2389,283 +3578,15 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
return ret;
}
-/* sysfs supprt */
-
-struct ext4_attr {
- struct attribute attr;
- ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *);
- ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *,
- const char *, size_t);
- union {
- int offset;
- int deprecated_val;
- } u;
-};
-
-static int parse_strtoull(const char *buf,
- unsigned long long max, unsigned long long *value)
-{
- int ret;
-
- ret = kstrtoull(skip_spaces(buf), 0, value);
- if (!ret && *value > max)
- ret = -EINVAL;
- return ret;
-}
-
-static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,
- struct ext4_sb_info *sbi,
- char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- (s64) EXT4_C2B(sbi,
- percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
-}
-
-static ssize_t session_write_kbytes_show(struct ext4_attr *a,
- struct ext4_sb_info *sbi, char *buf)
-{
- struct super_block *sb = sbi->s_buddy_cache->i_sb;
-
- if (!sb->s_bdev->bd_part)
- return snprintf(buf, PAGE_SIZE, "0\n");
- return snprintf(buf, PAGE_SIZE, "%lu\n",
- (part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
- sbi->s_sectors_written_start) >> 1);
-}
-
-static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
- struct ext4_sb_info *sbi, char *buf)
-{
- struct super_block *sb = sbi->s_buddy_cache->i_sb;
-
- if (!sb->s_bdev->bd_part)
- return snprintf(buf, PAGE_SIZE, "0\n");
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- (unsigned long long)(sbi->s_kbytes_written +
- ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
- EXT4_SB(sb)->s_sectors_written_start) >> 1)));
-}
-
-static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
- struct ext4_sb_info *sbi,
- const char *buf, size_t count)
-{
- unsigned long t;
- int ret;
-
- ret = kstrtoul(skip_spaces(buf), 0, &t);
- if (ret)
- return ret;
-
- if (t && (!is_power_of_2(t) || t > 0x40000000))
- return -EINVAL;
-
- sbi->s_inode_readahead_blks = t;
- return count;
-}
-
-static ssize_t sbi_ui_show(struct ext4_attr *a,
- struct ext4_sb_info *sbi, char *buf)
-{
- unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset);
-
- return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
-}
-
-static ssize_t sbi_ui_store(struct ext4_attr *a,
- struct ext4_sb_info *sbi,
- const char *buf, size_t count)
-{
- unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset);
- unsigned long t;
- int ret;
-
- ret = kstrtoul(skip_spaces(buf), 0, &t);
- if (ret)
- return ret;
- *ui = t;
- return count;
-}
-
-static ssize_t reserved_clusters_show(struct ext4_attr *a,
- struct ext4_sb_info *sbi, char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "%llu\n",
- (unsigned long long) atomic64_read(&sbi->s_resv_clusters));
-}
-
-static ssize_t reserved_clusters_store(struct ext4_attr *a,
- struct ext4_sb_info *sbi,
- const char *buf, size_t count)
-{
- unsigned long long val;
- int ret;
-
- if (parse_strtoull(buf, -1ULL, &val))
- return -EINVAL;
- ret = ext4_reserve_clusters(sbi, val);
-
- return ret ? ret : count;
-}
-
-static ssize_t trigger_test_error(struct ext4_attr *a,
- struct ext4_sb_info *sbi,
- const char *buf, size_t count)
-{
- int len = count;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- if (len && buf[len-1] == '\n')
- len--;
-
- if (len)
- ext4_error(sbi->s_sb, "%.*s", len, buf);
- return count;
-}
-
-static ssize_t sbi_deprecated_show(struct ext4_attr *a,
- struct ext4_sb_info *sbi, char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "%d\n", a->u.deprecated_val);
-}
-
-#define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \
-static struct ext4_attr ext4_attr_##_name = { \
- .attr = {.name = __stringify(_name), .mode = _mode }, \
- .show = _show, \
- .store = _store, \
- .u = { \
- .offset = offsetof(struct ext4_sb_info, _elname),\
- }, \
-}
-#define EXT4_ATTR(name, mode, show, store) \
-static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
-
-#define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL)
-#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
-#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
-#define EXT4_RW_ATTR_SBI_UI(name, elname) \
- EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname)
-#define ATTR_LIST(name) &ext4_attr_##name.attr
-#define EXT4_DEPRECATED_ATTR(_name, _val) \
-static struct ext4_attr ext4_attr_##_name = { \
- .attr = {.name = __stringify(_name), .mode = 0444 }, \
- .show = sbi_deprecated_show, \
- .u = { \
- .deprecated_val = _val, \
- }, \
-}
-
-EXT4_RO_ATTR(delayed_allocation_blocks);
-EXT4_RO_ATTR(session_write_kbytes);
-EXT4_RO_ATTR(lifetime_write_kbytes);
-EXT4_RW_ATTR(reserved_clusters);
-EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
- inode_readahead_blks_store, s_inode_readahead_blks);
-EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
-EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
-EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
-EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
-EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
-EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
-EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
-EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128);
-EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
-EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error);
-
-static struct attribute *ext4_attrs[] = {
- ATTR_LIST(delayed_allocation_blocks),
- ATTR_LIST(session_write_kbytes),
- ATTR_LIST(lifetime_write_kbytes),
- ATTR_LIST(reserved_clusters),
- ATTR_LIST(inode_readahead_blks),
- ATTR_LIST(inode_goal),
- ATTR_LIST(mb_stats),
- ATTR_LIST(mb_max_to_scan),
- ATTR_LIST(mb_min_to_scan),
- ATTR_LIST(mb_order2_req),
- ATTR_LIST(mb_stream_req),
- ATTR_LIST(mb_group_prealloc),
- ATTR_LIST(max_writeback_mb_bump),
- ATTR_LIST(extent_max_zeroout_kb),
- ATTR_LIST(trigger_fs_error),
- NULL,
-};
-
-/* Features this copy of ext4 supports */
-EXT4_INFO_ATTR(lazy_itable_init);
-EXT4_INFO_ATTR(batched_discard);
-EXT4_INFO_ATTR(meta_bg_resize);
-
-static struct attribute *ext4_feat_attrs[] = {
- ATTR_LIST(lazy_itable_init),
- ATTR_LIST(batched_discard),
- ATTR_LIST(meta_bg_resize),
- NULL,
-};
-
-static ssize_t ext4_attr_show(struct kobject *kobj,
- struct attribute *attr, char *buf)
-{
- struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
- s_kobj);
- struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
-
- return a->show ? a->show(a, sbi, buf) : 0;
-}
-
-static ssize_t ext4_attr_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buf, size_t len)
-{
- struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
- s_kobj);
- struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
-
- return a->store ? a->store(a, sbi, buf, len) : 0;
-}
-
-static void ext4_sb_release(struct kobject *kobj)
-{
- struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
- s_kobj);
- complete(&sbi->s_kobj_unregister);
-}
-
-static const struct sysfs_ops ext4_attr_ops = {
- .show = ext4_attr_show,
- .store = ext4_attr_store,
-};
-
-static struct kobj_type ext4_ktype = {
- .default_attrs = ext4_attrs,
- .sysfs_ops = &ext4_attr_ops,
- .release = ext4_sb_release,
-};
-
-static void ext4_feat_release(struct kobject *kobj)
-{
- complete(&ext4_feat->f_kobj_unregister);
-}
-
-static struct kobj_type ext4_feat_ktype = {
- .default_attrs = ext4_feat_attrs,
- .sysfs_ops = &ext4_attr_ops,
- .release = ext4_feat_release,
-};
-
/*
* Check whether this filesystem can be mounted based on
* the features present and the RDONLY/RDWR mount requested.
* Returns 1 if this filesystem can be mounted as requested,
* 0 if it cannot be.
*/
-static int ext4_feature_set_ok(struct super_block *sb, int readonly)
+int ext4_feature_set_ok(struct super_block *sb, int readonly)
{
- if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) {
+ if (ext4_has_unknown_ext4_incompat_features(sb)) {
ext4_msg(sb, KERN_ERR,
"Couldn't mount because of "
"unsupported optional features (%x)",
@@ -2674,43 +3595,42 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
return 0;
}
+ if (!IS_ENABLED(CONFIG_UNICODE) && ext4_has_feature_casefold(sb)) {
+ ext4_msg(sb, KERN_ERR,
+ "Filesystem with casefold feature cannot be "
+ "mounted without CONFIG_UNICODE");
+ return 0;
+ }
+
if (readonly)
return 1;
+ if (ext4_has_feature_readonly(sb)) {
+ ext4_msg(sb, KERN_INFO, "filesystem is read-only");
+ sb->s_flags |= SB_RDONLY;
+ return 1;
+ }
+
/* Check that feature set is OK for a read-write mount */
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) {
+ if (ext4_has_unknown_ext4_ro_compat_features(sb)) {
ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
"unsupported optional features (%x)",
(le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
~EXT4_FEATURE_RO_COMPAT_SUPP));
return 0;
}
- /*
- * Large file size enabled file system can only be mounted
- * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF
- */
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) {
- if (sizeof(blkcnt_t) < sizeof(u64)) {
- ext4_msg(sb, KERN_ERR, "Filesystem with huge files "
- "cannot be mounted RDWR without "
- "CONFIG_LBDAF");
- return 0;
- }
- }
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC) &&
- !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
+ if (ext4_has_feature_bigalloc(sb) && !ext4_has_feature_extents(sb)) {
ext4_msg(sb, KERN_ERR,
"Can't support bigalloc feature without "
"extents feature\n");
return 0;
}
-#ifndef CONFIG_QUOTA
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
- !readonly) {
+#if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2)
+ if (!readonly && (ext4_has_feature_quota(sb) ||
+ ext4_has_feature_project(sb))) {
ext4_msg(sb, KERN_ERR,
- "Filesystem with quota feature cannot be mounted RDWR "
- "without CONFIG_QUOTA");
+ "The kernel was not built with CONFIG_QUOTA and CONFIG_QFMT_V2");
return 0;
}
#endif /* CONFIG_QUOTA */
@@ -2721,45 +3641,45 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
* This function is called once a day if we have errors logged
* on the file system
*/
-static void print_daily_error_info(unsigned long arg)
+static void print_daily_error_info(struct timer_list *t)
{
- struct super_block *sb = (struct super_block *) arg;
- struct ext4_sb_info *sbi;
- struct ext4_super_block *es;
-
- sbi = EXT4_SB(sb);
- es = sbi->s_es;
+ struct ext4_sb_info *sbi = timer_container_of(sbi, t, s_err_report);
+ struct super_block *sb = sbi->s_sb;
+ struct ext4_super_block *es = sbi->s_es;
if (es->s_error_count)
- ext4_msg(sb, KERN_NOTICE, "error count: %u",
+ /* fsck newer than v1.41.13 is needed to clean this condition. */
+ ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
le32_to_cpu(es->s_error_count));
if (es->s_first_error_time) {
- printk(KERN_NOTICE "EXT4-fs (%s): initial error at %u: %.*s:%d",
- sb->s_id, le32_to_cpu(es->s_first_error_time),
+ printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %llu: %.*s:%d",
+ sb->s_id,
+ ext4_get_tstamp(es, s_first_error_time),
(int) sizeof(es->s_first_error_func),
es->s_first_error_func,
le32_to_cpu(es->s_first_error_line));
if (es->s_first_error_ino)
- printk(": inode %u",
+ printk(KERN_CONT ": inode %u",
le32_to_cpu(es->s_first_error_ino));
if (es->s_first_error_block)
- printk(": block %llu", (unsigned long long)
+ printk(KERN_CONT ": block %llu", (unsigned long long)
le64_to_cpu(es->s_first_error_block));
- printk("\n");
+ printk(KERN_CONT "\n");
}
if (es->s_last_error_time) {
- printk(KERN_NOTICE "EXT4-fs (%s): last error at %u: %.*s:%d",
- sb->s_id, le32_to_cpu(es->s_last_error_time),
+ printk(KERN_NOTICE "EXT4-fs (%s): last error at time %llu: %.*s:%d",
+ sb->s_id,
+ ext4_get_tstamp(es, s_last_error_time),
(int) sizeof(es->s_last_error_func),
es->s_last_error_func,
le32_to_cpu(es->s_last_error_line));
if (es->s_last_error_ino)
- printk(": inode %u",
+ printk(KERN_CONT ": inode %u",
le32_to_cpu(es->s_last_error_ino));
if (es->s_last_error_block)
- printk(": block %llu", (unsigned long long)
+ printk(KERN_CONT ": block %llu", (unsigned long long)
le64_to_cpu(es->s_last_error_block));
- printk("\n");
+ printk(KERN_CONT "\n");
}
mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */
}
@@ -2768,16 +3688,32 @@ static void print_daily_error_info(unsigned long arg)
static int ext4_run_li_request(struct ext4_li_request *elr)
{
struct ext4_group_desc *gdp = NULL;
- ext4_group_t group, ngroups;
- struct super_block *sb;
- unsigned long timeout = 0;
+ struct super_block *sb = elr->lr_super;
+ ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
+ ext4_group_t group = elr->lr_next_group;
+ unsigned int prefetch_ios = 0;
int ret = 0;
+ int nr = EXT4_SB(sb)->s_mb_prefetch;
+ u64 start_time;
+
+ if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) {
+ elr->lr_next_group = ext4_mb_prefetch(sb, group, nr, &prefetch_ios);
+ ext4_mb_prefetch_fini(sb, elr->lr_next_group, nr);
+ trace_ext4_prefetch_bitmaps(sb, group, elr->lr_next_group, nr);
+ if (group >= elr->lr_next_group) {
+ ret = 1;
+ if (elr->lr_first_not_zeroed != ngroups &&
+ !ext4_emergency_state(sb) && !sb_rdonly(sb) &&
+ test_opt(sb, INIT_INODE_TABLE)) {
+ elr->lr_next_group = elr->lr_first_not_zeroed;
+ elr->lr_mode = EXT4_LI_MODE_ITABLE;
+ ret = 0;
+ }
+ }
+ return ret;
+ }
- sb = elr->lr_super;
- ngroups = EXT4_SB(sb)->s_groups_count;
-
- sb_start_write(sb);
- for (group = elr->lr_next_group; group < ngroups; group++) {
+ for (; group < ngroups; group++) {
gdp = ext4_get_group_desc(sb, group, NULL);
if (!gdp) {
ret = 1;
@@ -2792,19 +3728,17 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
ret = 1;
if (!ret) {
- timeout = jiffies;
+ start_time = ktime_get_ns();
ret = ext4_init_inode_table(sb, group,
elr->lr_timeout ? 0 : 1);
+ trace_ext4_lazy_itable_init(sb, group);
if (elr->lr_timeout == 0) {
- timeout = (jiffies - timeout) *
- elr->lr_sbi->s_li_wait_mult;
- elr->lr_timeout = timeout;
+ elr->lr_timeout = nsecs_to_jiffies((ktime_get_ns() - start_time) *
+ EXT4_SB(elr->lr_super)->s_li_wait_mult);
}
elr->lr_next_sched = jiffies + elr->lr_timeout;
elr->lr_next_group = group + 1;
}
- sb_end_write(sb);
-
return ret;
}
@@ -2814,15 +3748,11 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
*/
static void ext4_remove_li_request(struct ext4_li_request *elr)
{
- struct ext4_sb_info *sbi;
-
if (!elr)
return;
- sbi = elr->lr_sbi;
-
list_del(&elr->lr_request);
- sbi->s_li_request = NULL;
+ EXT4_SB(elr->lr_super)->s_li_request = NULL;
kfree(elr);
}
@@ -2853,45 +3783,75 @@ static struct task_struct *ext4_lazyinit_task;
*/
static int ext4_lazyinit_thread(void *arg)
{
- struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
+ struct ext4_lazy_init *eli = arg;
struct list_head *pos, *n;
struct ext4_li_request *elr;
unsigned long next_wakeup, cur;
BUG_ON(NULL == eli);
+ set_freezable();
cont_thread:
while (true) {
- next_wakeup = MAX_JIFFY_OFFSET;
+ bool next_wakeup_initialized = false;
+ next_wakeup = 0;
mutex_lock(&eli->li_list_mtx);
if (list_empty(&eli->li_request_list)) {
mutex_unlock(&eli->li_list_mtx);
goto exit_thread;
}
-
list_for_each_safe(pos, n, &eli->li_request_list) {
+ int err = 0;
+ int progress = 0;
elr = list_entry(pos, struct ext4_li_request,
lr_request);
- if (time_after_eq(jiffies, elr->lr_next_sched)) {
- if (ext4_run_li_request(elr) != 0) {
- /* error, remove the lazy_init job */
- ext4_remove_li_request(elr);
- continue;
+ if (time_before(jiffies, elr->lr_next_sched)) {
+ if (!next_wakeup_initialized ||
+ time_before(elr->lr_next_sched, next_wakeup)) {
+ next_wakeup = elr->lr_next_sched;
+ next_wakeup_initialized = true;
}
+ continue;
}
-
- if (time_before(elr->lr_next_sched, next_wakeup))
+ if (down_read_trylock(&elr->lr_super->s_umount)) {
+ if (sb_start_write_trylock(elr->lr_super)) {
+ progress = 1;
+ /*
+ * We hold sb->s_umount, sb can not
+ * be removed from the list, it is
+ * now safe to drop li_list_mtx
+ */
+ mutex_unlock(&eli->li_list_mtx);
+ err = ext4_run_li_request(elr);
+ sb_end_write(elr->lr_super);
+ mutex_lock(&eli->li_list_mtx);
+ n = pos->next;
+ }
+ up_read((&elr->lr_super->s_umount));
+ }
+ /* error, remove the lazy_init job */
+ if (err) {
+ ext4_remove_li_request(elr);
+ continue;
+ }
+ if (!progress) {
+ elr->lr_next_sched = jiffies +
+ get_random_u32_below(EXT4_DEF_LI_MAX_START_DELAY * HZ);
+ }
+ if (!next_wakeup_initialized ||
+ time_before(elr->lr_next_sched, next_wakeup)) {
next_wakeup = elr->lr_next_sched;
+ next_wakeup_initialized = true;
+ }
}
mutex_unlock(&eli->li_list_mtx);
try_to_freeze();
cur = jiffies;
- if ((time_after_eq(cur, next_wakeup)) ||
- (MAX_JIFFY_OFFSET == next_wakeup)) {
+ if (!next_wakeup_initialized || time_after_eq(cur, next_wakeup)) {
cond_resched();
continue;
}
@@ -2971,6 +3931,9 @@ static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
struct ext4_group_desc *gdp = NULL;
+ if (!ext4_has_group_desc_csum(sb))
+ return ngroups;
+
for (group = 0; group < ngroups; group++) {
gdp = ext4_get_group_desc(sb, group, NULL);
if (!gdp)
@@ -3004,27 +3967,27 @@ static int ext4_li_info_new(void)
static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
ext4_group_t start)
{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_li_request *elr;
- unsigned long rnd;
elr = kzalloc(sizeof(*elr), GFP_KERNEL);
if (!elr)
return NULL;
elr->lr_super = sb;
- elr->lr_sbi = sbi;
- elr->lr_next_group = start;
+ elr->lr_first_not_zeroed = start;
+ if (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS)) {
+ elr->lr_mode = EXT4_LI_MODE_ITABLE;
+ elr->lr_next_group = start;
+ } else {
+ elr->lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
+ }
/*
* Randomize first schedule time of the request to
* spread the inode table initialization requests
* better.
*/
- get_random_bytes(&rnd, sizeof(rnd));
- elr->lr_next_sched = jiffies + (unsigned long)rnd %
- (EXT4_DEF_LI_MAX_START_DELAY * HZ);
-
+ elr->lr_next_sched = jiffies + get_random_u32_below(EXT4_DEF_LI_MAX_START_DELAY * HZ);
return elr;
}
@@ -3033,7 +3996,7 @@ int ext4_register_li_request(struct super_block *sb,
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_li_request *elr = NULL;
- ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
+ ext4_group_t ngroups = sbi->s_groups_count;
int ret = 0;
mutex_lock(&ext4_li_mtx);
@@ -3046,9 +4009,9 @@ int ext4_register_li_request(struct super_block *sb,
goto out;
}
- if (first_not_zeroed == ngroups ||
- (sb->s_flags & MS_RDONLY) ||
- !test_opt(sb, INIT_INODE_TABLE))
+ if (ext4_emergency_state(sb) || sb_rdonly(sb) ||
+ (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS) &&
+ (first_not_zeroed == ngroups || !test_opt(sb, INIT_INODE_TABLE))))
goto out;
elr = ext4_li_request_new(sb, first_not_zeroed);
@@ -3109,17 +4072,20 @@ static int set_journal_csum_feature_set(struct super_block *sb)
int compat, incompat;
struct ext4_sb_info *sbi = EXT4_SB(sb);
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
- /* journal checksum v2 */
+ if (ext4_has_feature_metadata_csum(sb)) {
+ /* journal checksum v3 */
compat = 0;
- incompat = JBD2_FEATURE_INCOMPAT_CSUM_V2;
+ incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
} else {
/* journal checksum v1 */
compat = JBD2_FEATURE_COMPAT_CHECKSUM;
incompat = 0;
}
+ jbd2_journal_clear_features(sbi->s_journal,
+ JBD2_FEATURE_COMPAT_CHECKSUM, 0,
+ JBD2_FEATURE_INCOMPAT_CSUM_V3 |
+ JBD2_FEATURE_INCOMPAT_CSUM_V2);
if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
ret = jbd2_journal_set_features(sbi->s_journal,
compat, 0,
@@ -3132,10 +4098,8 @@ static int set_journal_csum_feature_set(struct super_block *sb)
jbd2_journal_clear_features(sbi->s_journal, 0, 0,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
} else {
- jbd2_journal_clear_features(sbi->s_journal,
- JBD2_FEATURE_COMPAT_CHECKSUM, 0,
- JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
- JBD2_FEATURE_INCOMPAT_CSUM_V2);
+ jbd2_journal_clear_features(sbi->s_journal, 0, 0,
+ JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
}
return ret;
@@ -3164,9 +4128,11 @@ static int count_overhead(struct super_block *sb, ext4_group_t grp,
ext4_fsblk_t first_block, last_block, b;
ext4_group_t i, ngroups = ext4_get_groups_count(sb);
int s, j, count = 0;
+ int has_super = ext4_bg_has_super(sb, grp);
- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC))
- return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
+ if (!ext4_has_feature_bigalloc(sb))
+ return (has_super + ext4_bg_num_gdb(sb, grp) +
+ (has_super ? le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0) +
sbi->s_itb_per_group + 2);
first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
@@ -3198,10 +4164,15 @@ static int count_overhead(struct super_block *sb, ext4_group_t grp,
ext4_set_bit(s++, buf);
count++;
}
- for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) {
- ext4_set_bit(EXT4_B2C(sbi, s++), buf);
- count++;
+ j = ext4_bg_num_gdb(sb, grp);
+ if (s + j > EXT4_BLOCKS_PER_GROUP(sb)) {
+ ext4_error(sb, "Invalid number of block group "
+ "descriptor blocks: %d", j);
+ j = EXT4_BLOCKS_PER_GROUP(sb) - s;
}
+ count += j;
+ for (; j > 0; j--)
+ ext4_set_bit(EXT4_B2C(sbi, s++), buf);
}
if (!count)
return 0;
@@ -3216,9 +4187,11 @@ int ext4_calculate_overhead(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
+ struct inode *j_inode;
+ unsigned int j_blocks, j_inum = le32_to_cpu(es->s_journal_inum);
ext4_group_t i, ngroups = ext4_get_groups_count(sb);
ext4_fsblk_t overhead = 0;
- char *buf = (char *) get_zeroed_page(GFP_KERNEL);
+ char *buf = kvmalloc(sb->s_blocksize, GFP_NOFS | __GFP_ZERO);
if (!buf)
return -ENOMEM;
@@ -3243,174 +4216,132 @@ int ext4_calculate_overhead(struct super_block *sb)
blks = count_overhead(sb, i, buf);
overhead += blks;
if (blks)
- memset(buf, 0, PAGE_SIZE);
+ memset(buf, 0, sb->s_blocksize);
cond_resched();
}
- /* Add the journal blocks as well */
- if (sbi->s_journal)
- overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
+ /*
+ * Add the internal journal blocks whether the journal has been
+ * loaded or not
+ */
+ if (sbi->s_journal && !sbi->s_journal_bdev_file)
+ overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_total_len);
+ else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) {
+ /* j_inum for internal journal is non-zero */
+ j_inode = ext4_get_journal_inode(sb, j_inum);
+ if (!IS_ERR(j_inode)) {
+ j_blocks = j_inode->i_size >> sb->s_blocksize_bits;
+ overhead += EXT4_NUM_B2C(sbi, j_blocks);
+ iput(j_inode);
+ } else {
+ ext4_msg(sb, KERN_ERR, "can't get journal size");
+ }
+ }
sbi->s_overhead = overhead;
smp_wmb();
- free_page((unsigned long) buf);
+ kvfree(buf);
return 0;
}
-
-static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi)
+static void ext4_set_resv_clusters(struct super_block *sb)
{
ext4_fsblk_t resv_clusters;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
/*
+ * There's no need to reserve anything when we aren't using extents.
+ * The space estimates are exact, there are no unwritten extents,
+ * hole punching doesn't need new metadata... This is needed especially
+ * to keep ext2/3 backward compatibility.
+ */
+ if (!ext4_has_feature_extents(sb))
+ return;
+ /*
* By default we reserve 2% or 4096 clusters, whichever is smaller.
* This should cover the situations where we can not afford to run
* out of space like for example punch hole, or converting
- * uninitialized extents in delalloc path. In most cases such
+ * unwritten extents in delalloc path. In most cases such
* allocation would require 1, or 2 blocks, higher numbers are
* very rare.
*/
- resv_clusters = ext4_blocks_count(sbi->s_es) >> sbi->s_cluster_bits;
+ resv_clusters = (ext4_blocks_count(sbi->s_es) >>
+ sbi->s_cluster_bits);
do_div(resv_clusters, 50);
resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
- return resv_clusters;
+ atomic64_set(&sbi->s_resv_clusters, resv_clusters);
}
+static const char *ext4_quota_mode(struct super_block *sb)
+{
+#ifdef CONFIG_QUOTA
+ if (!ext4_quota_capable(sb))
+ return "none";
-static int ext4_reserve_clusters(struct ext4_sb_info *sbi, ext4_fsblk_t count)
+ if (EXT4_SB(sb)->s_journal && ext4_is_quota_journalled(sb))
+ return "journalled";
+ else
+ return "writeback";
+#else
+ return "disabled";
+#endif
+}
+
+static void ext4_setup_csum_trigger(struct super_block *sb,
+ enum ext4_journal_trigger_type type,
+ void (*trigger)(
+ struct jbd2_buffer_trigger_type *type,
+ struct buffer_head *bh,
+ void *mapped_data,
+ size_t size))
{
- ext4_fsblk_t clusters = ext4_blocks_count(sbi->s_es) >>
- sbi->s_cluster_bits;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
- if (count >= clusters)
- return -EINVAL;
+ sbi->s_journal_triggers[type].sb = sb;
+ sbi->s_journal_triggers[type].tr_triggers.t_frozen = trigger;
+}
- atomic64_set(&sbi->s_resv_clusters, count);
- return 0;
+static void ext4_free_sbi(struct ext4_sb_info *sbi)
+{
+ if (!sbi)
+ return;
+
+ kfree(sbi->s_blockgroup_lock);
+ fs_put_dax(sbi->s_daxdev, NULL);
+ kfree(sbi);
}
-static int ext4_fill_super(struct super_block *sb, void *data, int silent)
+static struct ext4_sb_info *ext4_alloc_sbi(struct super_block *sb)
{
- char *orig_data = kstrdup(data, GFP_KERNEL);
- struct buffer_head *bh;
- struct ext4_super_block *es = NULL;
struct ext4_sb_info *sbi;
- ext4_fsblk_t block;
- ext4_fsblk_t sb_block = get_sb_block(&data);
- ext4_fsblk_t logical_sb_block;
- unsigned long offset = 0;
- unsigned long journal_devnum = 0;
- unsigned long def_mount_opts;
- struct inode *root;
- char *cp;
- const char *descr;
- int ret = -ENOMEM;
- int blocksize, clustersize;
- unsigned int db_count;
- unsigned int i;
- int needs_recovery, has_huge_files, has_bigalloc;
- __u64 blocks_count;
- int err = 0;
- unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
- ext4_group_t first_not_zeroed;
sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi)
- goto out_free_orig;
+ return NULL;
+
+ sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->s_dax_part_off,
+ NULL, NULL);
sbi->s_blockgroup_lock =
kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
- if (!sbi->s_blockgroup_lock) {
- kfree(sbi);
- goto out_free_orig;
- }
- sb->s_fs_info = sbi;
- sbi->s_sb = sb;
- sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
- sbi->s_sb_block = sb_block;
- if (sb->s_bdev->bd_part)
- sbi->s_sectors_written_start =
- part_stat_read(sb->s_bdev->bd_part, sectors[1]);
-
- /* Cleanup superblock name */
- for (cp = sb->s_id; (cp = strchr(cp, '/'));)
- *cp = '!';
-
- /* -EINVAL is default */
- ret = -EINVAL;
- blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
- if (!blocksize) {
- ext4_msg(sb, KERN_ERR, "unable to set blocksize");
- goto out_fail;
- }
-
- /*
- * The ext4 superblock will not be buffer aligned for other than 1kB
- * block sizes. We need to calculate the offset from buffer start.
- */
- if (blocksize != EXT4_MIN_BLOCK_SIZE) {
- logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
- offset = do_div(logical_sb_block, blocksize);
- } else {
- logical_sb_block = sb_block;
- }
-
- if (!(bh = sb_bread(sb, logical_sb_block))) {
- ext4_msg(sb, KERN_ERR, "unable to read superblock");
- goto out_fail;
- }
- /*
- * Note: s_es must be initialized as soon as possible because
- * some ext4 macro-instructions depend on its value
- */
- es = (struct ext4_super_block *) (bh->b_data + offset);
- sbi->s_es = es;
- sb->s_magic = le16_to_cpu(es->s_magic);
- if (sb->s_magic != EXT4_SUPER_MAGIC)
- goto cantfind_ext4;
- sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
- /* Warn if metadata_csum and gdt_csum are both set. */
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
- EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
- ext4_warning(sb, KERN_INFO "metadata_csum and uninit_bg are "
- "redundant flags; please run fsck.");
-
- /* Check for a known checksum algorithm */
- if (!ext4_verify_csum_type(sb, es)) {
- ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
- "unknown checksum algorithm.");
- silent = 1;
- goto cantfind_ext4;
- }
-
- /* Load the checksum driver */
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
- sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
- if (IS_ERR(sbi->s_chksum_driver)) {
- ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
- ret = PTR_ERR(sbi->s_chksum_driver);
- sbi->s_chksum_driver = NULL;
- goto failed_mount;
- }
- }
+ if (!sbi->s_blockgroup_lock)
+ goto err_out;
- /* Check superblock checksum */
- if (!ext4_superblock_csum_verify(sb, es)) {
- ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
- "invalid superblock checksum. Run e2fsck?");
- silent = 1;
- goto cantfind_ext4;
- }
+ sb->s_fs_info = sbi;
+ sbi->s_sb = sb;
+ return sbi;
+err_out:
+ fs_put_dax(sbi->s_daxdev, NULL);
+ kfree(sbi);
+ return NULL;
+}
- /* Precompute checksum seed for all metadata */
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
- sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
- sizeof(es->s_uuid));
+static void ext4_set_def_opts(struct super_block *sb,
+ struct ext4_super_block *es)
+{
+ unsigned long def_mount_opts;
/* Set defaults before we parse the mount options */
def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
@@ -3426,6 +4357,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
#ifdef CONFIG_EXT4_FS_POSIX_ACL
set_opt(sb, POSIX_ACL);
#endif
+ if (ext4_has_feature_fast_commit(sb))
+ set_opt2(sb, JOURNAL_FAST_COMMIT);
+ /* don't forget to enable journal_csum when metadata_csum is enabled. */
+ if (ext4_has_feature_metadata_csum(sb))
+ set_opt(sb, JOURNAL_CHECKSUM);
+
if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
set_opt(sb, JOURNAL_DATA);
else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
@@ -3433,23 +4370,17 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
set_opt(sb, WRITEBACK_DATA);
- if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
+ if (le16_to_cpu(es->s_errors) == EXT4_ERRORS_PANIC)
set_opt(sb, ERRORS_PANIC);
- else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
+ else if (le16_to_cpu(es->s_errors) == EXT4_ERRORS_CONTINUE)
set_opt(sb, ERRORS_CONT);
else
set_opt(sb, ERRORS_RO);
- if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)
- set_opt(sb, BLOCK_VALIDITY);
+ /* block_validity enabled by default; disable with noblock_validity */
+ set_opt(sb, BLOCK_VALIDITY);
if (def_mount_opts & EXT4_DEFM_DISCARD)
set_opt(sb, DISCARD);
- sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
- sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
- sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
- sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
- sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
-
if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
set_opt(sb, BARRIER);
@@ -3461,60 +4392,323 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
set_opt(sb, DELALLOC);
- /*
- * set default s_li_wait_mult for lazyinit, for the case there is
- * no mount option specified.
- */
- sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
+ set_opt(sb, DIOREAD_NOLOCK);
+}
- if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
- &journal_devnum, &journal_ioprio, 0)) {
- ext4_msg(sb, KERN_WARNING,
- "failed to parse options in superblock: %s",
- sbi->s_es->s_mount_opts);
+static int ext4_handle_clustersize(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_super_block *es = sbi->s_es;
+ int clustersize;
+
+ /* Handle clustersize */
+ clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
+ if (ext4_has_feature_bigalloc(sb)) {
+ if (clustersize < sb->s_blocksize) {
+ ext4_msg(sb, KERN_ERR,
+ "cluster size (%d) smaller than "
+ "block size (%lu)", clustersize, sb->s_blocksize);
+ return -EINVAL;
+ }
+ sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
+ le32_to_cpu(es->s_log_block_size);
+ } else {
+ if (clustersize != sb->s_blocksize) {
+ ext4_msg(sb, KERN_ERR,
+ "fragment/cluster size (%d) != "
+ "block size (%lu)", clustersize, sb->s_blocksize);
+ return -EINVAL;
+ }
+ if (sbi->s_blocks_per_group > sb->s_blocksize * 8) {
+ ext4_msg(sb, KERN_ERR,
+ "#blocks per group too big: %lu",
+ sbi->s_blocks_per_group);
+ return -EINVAL;
+ }
+ sbi->s_cluster_bits = 0;
}
- sbi->s_def_mount_opt = sbi->s_mount_opt;
- if (!parse_options((char *) data, sb, &journal_devnum,
- &journal_ioprio, 0))
- goto failed_mount;
+ sbi->s_clusters_per_group = le32_to_cpu(es->s_clusters_per_group);
+ if (sbi->s_clusters_per_group > sb->s_blocksize * 8) {
+ ext4_msg(sb, KERN_ERR, "#clusters per group too big: %lu",
+ sbi->s_clusters_per_group);
+ return -EINVAL;
+ }
+ if (sbi->s_blocks_per_group !=
+ (sbi->s_clusters_per_group * (clustersize / sb->s_blocksize))) {
+ ext4_msg(sb, KERN_ERR,
+ "blocks per group (%lu) and clusters per group (%lu) inconsistent",
+ sbi->s_blocks_per_group, sbi->s_clusters_per_group);
+ return -EINVAL;
+ }
+ sbi->s_cluster_ratio = clustersize / sb->s_blocksize;
- if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
- printk_once(KERN_WARNING "EXT4-fs: Warning: mounting "
- "with data=journal disables delayed "
- "allocation and O_DIRECT support!\n");
- if (test_opt2(sb, EXPLICIT_DELALLOC)) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "both data=journal and delalloc");
- goto failed_mount;
+ /* Do we have standard group size of clustersize * 8 blocks ? */
+ if (sbi->s_blocks_per_group == clustersize << 3)
+ set_opt2(sb, STD_GROUP_SIZE);
+
+ return 0;
+}
+
+/*
+ * ext4_atomic_write_init: Initializes filesystem min & max atomic write units.
+ * With non-bigalloc filesystem awu will be based upon filesystem blocksize
+ * & bdev awu units.
+ * With bigalloc it will be based upon bigalloc cluster size & bdev awu units.
+ * @sb: super block
+ */
+static void ext4_atomic_write_init(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct block_device *bdev = sb->s_bdev;
+ unsigned int clustersize = EXT4_CLUSTER_SIZE(sb);
+
+ if (!bdev_can_atomic_write(bdev))
+ return;
+
+ if (!ext4_has_feature_extents(sb))
+ return;
+
+ sbi->s_awu_min = max(sb->s_blocksize,
+ bdev_atomic_write_unit_min_bytes(bdev));
+ sbi->s_awu_max = min(clustersize,
+ bdev_atomic_write_unit_max_bytes(bdev));
+ if (sbi->s_awu_min && sbi->s_awu_max &&
+ sbi->s_awu_min <= sbi->s_awu_max) {
+ ext4_msg(sb, KERN_NOTICE, "Supports (experimental) DIO atomic writes awu_min: %u, awu_max: %u",
+ sbi->s_awu_min, sbi->s_awu_max);
+ } else {
+ sbi->s_awu_min = 0;
+ sbi->s_awu_max = 0;
+ }
+}
+
+static void ext4_fast_commit_init(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ /* Initialize fast commit stuff */
+ atomic_set(&sbi->s_fc_subtid, 0);
+ INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_MAIN]);
+ INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_STAGING]);
+ INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]);
+ INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]);
+ sbi->s_fc_bytes = 0;
+ ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+ sbi->s_fc_ineligible_tid = 0;
+ mutex_init(&sbi->s_fc_lock);
+ memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
+ sbi->s_fc_replay_state.fc_regions = NULL;
+ sbi->s_fc_replay_state.fc_regions_size = 0;
+ sbi->s_fc_replay_state.fc_regions_used = 0;
+ sbi->s_fc_replay_state.fc_regions_valid = 0;
+ sbi->s_fc_replay_state.fc_modified_inodes = NULL;
+ sbi->s_fc_replay_state.fc_modified_inodes_size = 0;
+ sbi->s_fc_replay_state.fc_modified_inodes_used = 0;
+}
+
+static int ext4_inode_info_init(struct super_block *sb,
+ struct ext4_super_block *es)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
+ sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
+ sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
+ } else {
+ sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
+ sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
+ if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) {
+ ext4_msg(sb, KERN_ERR, "invalid first ino: %u",
+ sbi->s_first_ino);
+ return -EINVAL;
}
- if (test_opt(sb, DIOREAD_NOLOCK)) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "both data=journal and delalloc");
- goto failed_mount;
+ if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
+ (!is_power_of_2(sbi->s_inode_size)) ||
+ (sbi->s_inode_size > sb->s_blocksize)) {
+ ext4_msg(sb, KERN_ERR,
+ "unsupported inode size: %d",
+ sbi->s_inode_size);
+ ext4_msg(sb, KERN_ERR, "blocksize: %lu", sb->s_blocksize);
+ return -EINVAL;
}
- if (test_opt(sb, DELALLOC))
- clear_opt(sb, DELALLOC);
+ /*
+ * i_atime_extra is the last extra field available for
+ * [acm]times in struct ext4_inode. Checking for that
+ * field should suffice to ensure we have extra space
+ * for all three.
+ */
+ if (sbi->s_inode_size >= offsetof(struct ext4_inode, i_atime_extra) +
+ sizeof(((struct ext4_inode *)0)->i_atime_extra)) {
+ sb->s_time_gran = 1;
+ sb->s_time_max = EXT4_EXTRA_TIMESTAMP_MAX;
+ } else {
+ sb->s_time_gran = NSEC_PER_SEC;
+ sb->s_time_max = EXT4_NON_EXTRA_TIMESTAMP_MAX;
+ }
+ sb->s_time_min = EXT4_TIMESTAMP_MIN;
+ }
+
+ if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
+ sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
+ EXT4_GOOD_OLD_INODE_SIZE;
+ if (ext4_has_feature_extra_isize(sb)) {
+ unsigned v, max = (sbi->s_inode_size -
+ EXT4_GOOD_OLD_INODE_SIZE);
+
+ v = le16_to_cpu(es->s_want_extra_isize);
+ if (v > max) {
+ ext4_msg(sb, KERN_ERR,
+ "bad s_want_extra_isize: %d", v);
+ return -EINVAL;
+ }
+ if (sbi->s_want_extra_isize < v)
+ sbi->s_want_extra_isize = v;
+
+ v = le16_to_cpu(es->s_min_extra_isize);
+ if (v > max) {
+ ext4_msg(sb, KERN_ERR,
+ "bad s_min_extra_isize: %d", v);
+ return -EINVAL;
+ }
+ if (sbi->s_want_extra_isize < v)
+ sbi->s_want_extra_isize = v;
+ }
+ }
+
+ return 0;
+}
+
+#if IS_ENABLED(CONFIG_UNICODE)
+static int ext4_encoding_init(struct super_block *sb, struct ext4_super_block *es)
+{
+ const struct ext4_sb_encodings *encoding_info;
+ struct unicode_map *encoding;
+ __u16 encoding_flags = le16_to_cpu(es->s_encoding_flags);
+
+ if (!ext4_has_feature_casefold(sb) || sb->s_encoding)
+ return 0;
+
+ encoding_info = ext4_sb_read_encoding(es);
+ if (!encoding_info) {
+ ext4_msg(sb, KERN_ERR,
+ "Encoding requested by superblock is unknown");
+ return -EINVAL;
}
- sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
- (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
+ encoding = utf8_load(encoding_info->version);
+ if (IS_ERR(encoding)) {
+ ext4_msg(sb, KERN_ERR,
+ "can't mount with superblock charset: %s-%u.%u.%u "
+ "not supported by the kernel. flags: 0x%x.",
+ encoding_info->name,
+ unicode_major(encoding_info->version),
+ unicode_minor(encoding_info->version),
+ unicode_rev(encoding_info->version),
+ encoding_flags);
+ return -EINVAL;
+ }
+ ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: "
+ "%s-%u.%u.%u with flags 0x%hx", encoding_info->name,
+ unicode_major(encoding_info->version),
+ unicode_minor(encoding_info->version),
+ unicode_rev(encoding_info->version),
+ encoding_flags);
+
+ sb->s_encoding = encoding;
+ sb->s_encoding_flags = encoding_flags;
+
+ return 0;
+}
+#else
+static inline int ext4_encoding_init(struct super_block *sb, struct ext4_super_block *es)
+{
+ return 0;
+}
+#endif
+
+static int ext4_init_metadata_csum(struct super_block *sb, struct ext4_super_block *es)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ /* Warn if metadata_csum and gdt_csum are both set. */
+ if (ext4_has_feature_metadata_csum(sb) &&
+ ext4_has_feature_gdt_csum(sb))
+ ext4_warning(sb, "metadata_csum and uninit_bg are "
+ "redundant flags; please run fsck.");
+
+ /* Check for a known checksum algorithm */
+ if (!ext4_verify_csum_type(sb, es)) {
+ ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
+ "unknown checksum algorithm.");
+ return -EINVAL;
+ }
+ ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE,
+ ext4_orphan_file_block_trigger);
+
+ /* Check superblock checksum */
+ if (!ext4_superblock_csum_verify(sb, es)) {
+ ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
+ "invalid superblock checksum. Run e2fsck?");
+ return -EFSBADCRC;
+ }
+
+ /* Precompute checksum seed for all metadata */
+ if (ext4_has_feature_csum_seed(sb))
+ sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed);
+ else if (ext4_has_feature_metadata_csum(sb) ||
+ ext4_has_feature_ea_inode(sb))
+ sbi->s_csum_seed = ext4_chksum(~0, es->s_uuid,
+ sizeof(es->s_uuid));
+ return 0;
+}
+
+static int ext4_check_feature_compatibility(struct super_block *sb,
+ struct ext4_super_block *es,
+ int silent)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
- (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
- EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
- EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U)))
+ (ext4_has_compat_features(sb) ||
+ ext4_has_ro_compat_features(sb) ||
+ ext4_has_incompat_features(sb)))
ext4_msg(sb, KERN_WARNING,
"feature flags set on rev 0 fs, "
"running e2fsck is recommended");
+ if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) {
+ set_opt2(sb, HURD_COMPAT);
+ if (ext4_has_feature_64bit(sb)) {
+ ext4_msg(sb, KERN_ERR,
+ "The Hurd can't support 64-bit file systems");
+ return -EINVAL;
+ }
+
+ /*
+ * ea_inode feature uses l_i_version field which is not
+ * available in HURD_COMPAT mode.
+ */
+ if (ext4_has_feature_ea_inode(sb)) {
+ ext4_msg(sb, KERN_ERR,
+ "ea_inode feature is not supported for Hurd");
+ return -EINVAL;
+ }
+ }
+
if (IS_EXT2_SB(sb)) {
if (ext2_feature_set_ok(sb))
ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
"using the ext4 subsystem");
else {
+ /*
+ * If we're probing be silent, if this looks like
+ * it's actually an ext[34] filesystem.
+ */
+ if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
+ return -EINVAL;
ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
"to feature incompatibilities");
- goto failed_mount;
+ return -EINVAL;
}
}
@@ -3523,9 +4717,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
"using the ext4 subsystem");
else {
+ /*
+ * If we're probing be silent, if this looks like
+ * it's actually an ext4 filesystem.
+ */
+ if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
+ return -EINVAL;
ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
"to feature incompatibilities");
- goto failed_mount;
+ return -EINVAL;
}
}
@@ -3534,169 +4734,51 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
* previously didn't change the revision level when setting the flags,
* so there is a chance incompat flags are set on a rev 0 filesystem.
*/
- if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
- goto failed_mount;
+ if (!ext4_feature_set_ok(sb, (sb_rdonly(sb))))
+ return -EINVAL;
- blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
- if (blocksize < EXT4_MIN_BLOCK_SIZE ||
- blocksize > EXT4_MAX_BLOCK_SIZE) {
- ext4_msg(sb, KERN_ERR,
- "Unsupported filesystem blocksize %d", blocksize);
- goto failed_mount;
+ if (sbi->s_daxdev) {
+ if (sb->s_blocksize == PAGE_SIZE)
+ set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
+ else
+ ext4_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n");
}
- if (sb->s_blocksize != blocksize) {
- /* Validate the filesystem blocksize */
- if (!sb_set_blocksize(sb, blocksize)) {
- ext4_msg(sb, KERN_ERR, "bad block size %d",
- blocksize);
- goto failed_mount;
- }
-
- brelse(bh);
- logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
- offset = do_div(logical_sb_block, blocksize);
- bh = sb_bread(sb, logical_sb_block);
- if (!bh) {
- ext4_msg(sb, KERN_ERR,
- "Can't read superblock on 2nd try");
- goto failed_mount;
+ if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
+ if (ext4_has_feature_inline_data(sb)) {
+ ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
+ " that may contain inline data");
+ return -EINVAL;
}
- es = (struct ext4_super_block *)(bh->b_data + offset);
- sbi->s_es = es;
- if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
+ if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags)) {
ext4_msg(sb, KERN_ERR,
- "Magic mismatch, very weird!");
- goto failed_mount;
+ "DAX unsupported by block device.");
+ return -EINVAL;
}
}
- has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
- sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
- has_huge_files);
- sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
-
- if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
- sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
- sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
- } else {
- sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
- sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
- if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
- (!is_power_of_2(sbi->s_inode_size)) ||
- (sbi->s_inode_size > blocksize)) {
- ext4_msg(sb, KERN_ERR,
- "unsupported inode size: %d",
- sbi->s_inode_size);
- goto failed_mount;
- }
- if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
- sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
+ if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) {
+ ext4_msg(sb, KERN_ERR, "Unsupported encryption level %d",
+ es->s_encryption_level);
+ return -EINVAL;
}
- sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
- if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
- if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
- sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
- !is_power_of_2(sbi->s_desc_size)) {
- ext4_msg(sb, KERN_ERR,
- "unsupported descriptor size %lu",
- sbi->s_desc_size);
- goto failed_mount;
- }
- } else
- sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
-
- sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
- sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
- if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0)
- goto cantfind_ext4;
-
- sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
- if (sbi->s_inodes_per_block == 0)
- goto cantfind_ext4;
- sbi->s_itb_per_group = sbi->s_inodes_per_group /
- sbi->s_inodes_per_block;
- sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
- sbi->s_sbh = bh;
- sbi->s_mount_state = le16_to_cpu(es->s_state);
- sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
- sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
-
- /* Do we have standard group size of blocksize * 8 blocks ? */
- if (sbi->s_blocks_per_group == blocksize << 3)
- set_opt2(sb, STD_GROUP_SIZE);
-
- for (i = 0; i < 4; i++)
- sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
- sbi->s_def_hash_version = es->s_def_hash_version;
- i = le32_to_cpu(es->s_flags);
- if (i & EXT2_FLAGS_UNSIGNED_HASH)
- sbi->s_hash_unsigned = 3;
- else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
-#ifdef __CHAR_UNSIGNED__
- es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
- sbi->s_hash_unsigned = 3;
-#else
- es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
-#endif
- }
+ return 0;
+}
- /* Handle clustersize */
- clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
- has_bigalloc = EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_BIGALLOC);
- if (has_bigalloc) {
- if (clustersize < blocksize) {
- ext4_msg(sb, KERN_ERR,
- "cluster size (%d) smaller than "
- "block size (%d)", clustersize, blocksize);
- goto failed_mount;
- }
- sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
- le32_to_cpu(es->s_log_block_size);
- sbi->s_clusters_per_group =
- le32_to_cpu(es->s_clusters_per_group);
- if (sbi->s_clusters_per_group > blocksize * 8) {
- ext4_msg(sb, KERN_ERR,
- "#clusters per group too big: %lu",
- sbi->s_clusters_per_group);
- goto failed_mount;
- }
- if (sbi->s_blocks_per_group !=
- (sbi->s_clusters_per_group * (clustersize / blocksize))) {
- ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
- "clusters per group (%lu) inconsistent",
- sbi->s_blocks_per_group,
- sbi->s_clusters_per_group);
- goto failed_mount;
- }
- } else {
- if (clustersize != blocksize) {
- ext4_warning(sb, "fragment/cluster size (%d) != "
- "block size (%d)", clustersize,
- blocksize);
- clustersize = blocksize;
- }
- if (sbi->s_blocks_per_group > blocksize * 8) {
- ext4_msg(sb, KERN_ERR,
- "#blocks per group too big: %lu",
- sbi->s_blocks_per_group);
- goto failed_mount;
- }
- sbi->s_clusters_per_group = sbi->s_blocks_per_group;
- sbi->s_cluster_bits = 0;
- }
- sbi->s_cluster_ratio = clustersize / blocksize;
+static int ext4_check_geometry(struct super_block *sb,
+ struct ext4_super_block *es)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ __u64 blocks_count;
+ int err;
- if (sbi->s_inodes_per_group > blocksize * 8) {
+ if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (sb->s_blocksize / 4)) {
ext4_msg(sb, KERN_ERR,
- "#inodes per group too big: %lu",
- sbi->s_inodes_per_group);
- goto failed_mount;
+ "Number of reserved GDT blocks insanely large: %d",
+ le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
+ return -EINVAL;
}
-
/*
* Test whether we have more sectors than will fit in sector_t,
* and whether the max offset is addressable by the page cache.
@@ -3706,21 +4788,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (err) {
ext4_msg(sb, KERN_ERR, "filesystem"
" too large to mount safely on this system");
- if (sizeof(sector_t) < 8)
- ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
- goto failed_mount;
+ return err;
}
- if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
- goto cantfind_ext4;
-
/* check blocks count against device size */
- blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
+ blocks_count = sb_bdev_nr_blocks(sb);
if (blocks_count && ext4_blocks_count(es) > blocks_count) {
ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
"exceeds size of device (%llu blocks)",
ext4_blocks_count(es), blocks_count);
- goto failed_mount;
+ return -EINVAL;
}
/*
@@ -3732,217 +4809,725 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"block %u is beyond end of filesystem (%llu)",
le32_to_cpu(es->s_first_data_block),
ext4_blocks_count(es));
- goto failed_mount;
+ return -EINVAL;
}
+ if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) &&
+ (sbi->s_cluster_ratio == 1)) {
+ ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
+ "block is 0 with a 1k block and cluster size");
+ return -EINVAL;
+ }
+
blocks_count = (ext4_blocks_count(es) -
le32_to_cpu(es->s_first_data_block) +
EXT4_BLOCKS_PER_GROUP(sb) - 1);
do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
- ext4_msg(sb, KERN_WARNING, "groups count too large: %u "
+ ext4_msg(sb, KERN_WARNING, "groups count too large: %llu "
"(block count %llu, first data block %u, "
- "blocks per group %lu)", sbi->s_groups_count,
+ "blocks per group %lu)", blocks_count,
ext4_blocks_count(es),
le32_to_cpu(es->s_first_data_block),
EXT4_BLOCKS_PER_GROUP(sb));
- goto failed_mount;
+ return -EINVAL;
}
sbi->s_groups_count = blocks_count;
sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
(EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
+ if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
+ le32_to_cpu(es->s_inodes_count)) {
+ ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
+ le32_to_cpu(es->s_inodes_count),
+ ((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int ext4_group_desc_init(struct super_block *sb,
+ struct ext4_super_block *es,
+ ext4_fsblk_t logical_sb_block,
+ ext4_group_t *first_not_zeroed)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ unsigned int db_count;
+ ext4_fsblk_t block;
+ int i;
+
db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
EXT4_DESC_PER_BLOCK(sb);
- sbi->s_group_desc = ext4_kvmalloc(db_count *
+ if (ext4_has_feature_meta_bg(sb)) {
+ if (le32_to_cpu(es->s_first_meta_bg) > db_count) {
+ ext4_msg(sb, KERN_WARNING,
+ "first meta block group too large: %u "
+ "(group descriptor block count %u)",
+ le32_to_cpu(es->s_first_meta_bg), db_count);
+ return -EINVAL;
+ }
+ }
+ rcu_assign_pointer(sbi->s_group_desc,
+ kvmalloc_array(db_count,
sizeof(struct buffer_head *),
- GFP_KERNEL);
+ GFP_KERNEL));
if (sbi->s_group_desc == NULL) {
ext4_msg(sb, KERN_ERR, "not enough memory");
- ret = -ENOMEM;
- goto failed_mount;
+ return -ENOMEM;
}
- if (ext4_proc_root)
- sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
-
- if (sbi->s_proc)
- proc_create_data("options", S_IRUGO, sbi->s_proc,
- &ext4_seq_options_fops, sb);
-
bgl_lock_init(sbi->s_blockgroup_lock);
+ /* Pre-read the descriptors into the buffer cache */
for (i = 0; i < db_count; i++) {
block = descriptor_loc(sb, logical_sb_block, i);
- sbi->s_group_desc[i] = sb_bread(sb, block);
- if (!sbi->s_group_desc[i]) {
+ ext4_sb_breadahead_unmovable(sb, block);
+ }
+
+ for (i = 0; i < db_count; i++) {
+ struct buffer_head *bh;
+
+ block = descriptor_loc(sb, logical_sb_block, i);
+ bh = ext4_sb_bread_unmovable(sb, block);
+ if (IS_ERR(bh)) {
ext4_msg(sb, KERN_ERR,
"can't read group descriptor %d", i);
- db_count = i;
- goto failed_mount2;
+ sbi->s_gdb_count = i;
+ return PTR_ERR(bh);
}
+ rcu_read_lock();
+ rcu_dereference(sbi->s_group_desc)[i] = bh;
+ rcu_read_unlock();
}
- if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
+ sbi->s_gdb_count = db_count;
+ if (!ext4_check_descriptors(sb, logical_sb_block, first_not_zeroed)) {
ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
- goto failed_mount2;
+ return -EFSCORRUPTED;
}
- if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
- if (!ext4_fill_flex_info(sb)) {
- ext4_msg(sb, KERN_ERR,
- "unable to initialize "
- "flex_bg meta info!");
- goto failed_mount2;
+
+ return 0;
+}
+
+static int ext4_load_and_init_journal(struct super_block *sb,
+ struct ext4_super_block *es,
+ struct ext4_fs_context *ctx)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ int err;
+
+ err = ext4_load_journal(sb, es, ctx->journal_devnum);
+ if (err)
+ return err;
+
+ if (ext4_has_feature_64bit(sb) &&
+ !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
+ JBD2_FEATURE_INCOMPAT_64BIT)) {
+ ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
+ goto out;
+ }
+
+ if (!set_journal_csum_feature_set(sb)) {
+ ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
+ "feature set");
+ goto out;
+ }
+
+ if (test_opt2(sb, JOURNAL_FAST_COMMIT) &&
+ !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
+ JBD2_FEATURE_INCOMPAT_FAST_COMMIT)) {
+ ext4_msg(sb, KERN_ERR,
+ "Failed to set fast commit journal feature");
+ goto out;
+ }
+
+ /* We have now updated the journal if required, so we can
+ * validate the data journaling mode. */
+ switch (test_opt(sb, DATA_FLAGS)) {
+ case 0:
+ /* No mode set, assume a default based on the journal
+ * capabilities: ORDERED_DATA if the journal can
+ * cope, else JOURNAL_DATA
+ */
+ if (jbd2_journal_check_available_features
+ (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
+ set_opt(sb, ORDERED_DATA);
+ sbi->s_def_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
+ } else {
+ set_opt(sb, JOURNAL_DATA);
+ sbi->s_def_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
}
+ break;
- sbi->s_gdb_count = db_count;
- get_random_bytes(&sbi->s_next_generation, sizeof(u32));
- spin_lock_init(&sbi->s_next_gen_lock);
+ case EXT4_MOUNT_ORDERED_DATA:
+ case EXT4_MOUNT_WRITEBACK_DATA:
+ if (!jbd2_journal_check_available_features
+ (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
+ ext4_msg(sb, KERN_ERR, "Journal does not support "
+ "requested data journaling mode");
+ goto out;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
+ test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "journal_async_commit in data=ordered mode");
+ goto out;
+ }
- init_timer(&sbi->s_err_report);
- sbi->s_err_report.function = print_daily_error_info;
- sbi->s_err_report.data = (unsigned long) sb;
+ set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio);
- /* Register extent status tree shrinker */
- ext4_es_register_shrinker(sbi);
+ sbi->s_journal->j_submit_inode_data_buffers =
+ ext4_journal_submit_inode_data_buffers;
+ sbi->s_journal->j_finish_inode_data_buffers =
+ ext4_journal_finish_inode_data_buffers;
- err = percpu_counter_init(&sbi->s_freeclusters_counter,
- ext4_count_free_clusters(sb));
- if (!err) {
- err = percpu_counter_init(&sbi->s_freeinodes_counter,
- ext4_count_free_inodes(sb));
+ return 0;
+
+out:
+ ext4_journal_destroy(sbi, sbi->s_journal);
+ return -EINVAL;
+}
+
+static int ext4_check_journal_data_mode(struct super_block *sb)
+{
+ if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
+ printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with "
+ "data=journal disables delayed allocation, "
+ "dioread_nolock, O_DIRECT and fast_commit support!\n");
+ /* can't mount with both data=journal and dioread_nolock. */
+ clear_opt(sb, DIOREAD_NOLOCK);
+ clear_opt2(sb, JOURNAL_FAST_COMMIT);
+ if (test_opt2(sb, EXPLICIT_DELALLOC)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "both data=journal and delalloc");
+ return -EINVAL;
+ }
+ if (test_opt(sb, DAX_ALWAYS)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "both data=journal and dax");
+ return -EINVAL;
+ }
+ if (ext4_has_feature_encrypt(sb)) {
+ ext4_msg(sb, KERN_WARNING,
+ "encrypted files will use data=ordered "
+ "instead of data journaling mode");
+ }
+ if (test_opt(sb, DELALLOC))
+ clear_opt(sb, DELALLOC);
+ } else {
+ sb->s_iflags |= SB_I_CGROUPWB;
}
- if (!err) {
- err = percpu_counter_init(&sbi->s_dirs_counter,
- ext4_count_dirs(sb));
+
+ return 0;
+}
+
+static const char *ext4_has_journal_option(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ if (test_opt(sb, JOURNAL_ASYNC_COMMIT))
+ return "journal_async_commit";
+ if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM))
+ return "journal_checksum";
+ if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
+ return "commit=";
+ if (EXT4_MOUNT_DATA_FLAGS &
+ (sbi->s_mount_opt ^ sbi->s_def_mount_opt))
+ return "data=";
+ if (test_opt(sb, DATA_ERR_ABORT))
+ return "data_err=abort";
+ return NULL;
+}
+
+/*
+ * Limit the maximum folio order to 2048 blocks to prevent overestimation
+ * of reserve handle credits during the folio writeback in environments
+ * where the PAGE_SIZE exceeds 4KB.
+ */
+#define EXT4_MAX_PAGECACHE_ORDER(sb) \
+ umin(MAX_PAGECACHE_ORDER, (11 + (sb)->s_blocksize_bits - PAGE_SHIFT))
+static void ext4_set_max_mapping_order(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
+ sbi->s_max_folio_order = sbi->s_min_folio_order;
+ else
+ sbi->s_max_folio_order = EXT4_MAX_PAGECACHE_ORDER(sb);
+}
+
+static int ext4_check_large_folio(struct super_block *sb)
+{
+ const char *err_str = NULL;
+
+ if (ext4_has_feature_encrypt(sb))
+ err_str = "encrypt";
+
+ if (!err_str) {
+ ext4_set_max_mapping_order(sb);
+ } else if (sb->s_blocksize > PAGE_SIZE) {
+ ext4_msg(sb, KERN_ERR, "bs(%lu) > ps(%lu) unsupported for %s",
+ sb->s_blocksize, PAGE_SIZE, err_str);
+ return -EINVAL;
}
- if (!err) {
- err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0);
+
+ return 0;
+}
+
+static int ext4_load_super(struct super_block *sb, ext4_fsblk_t *lsb,
+ int silent)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_super_block *es;
+ ext4_fsblk_t logical_sb_block;
+ unsigned long offset = 0;
+ struct buffer_head *bh;
+ int ret = -EINVAL;
+ int blocksize;
+
+ blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
+ if (!blocksize) {
+ ext4_msg(sb, KERN_ERR, "unable to set blocksize");
+ return -EINVAL;
}
- if (!err) {
- err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0);
+
+ /*
+ * The ext4 superblock will not be buffer aligned for other than 1kB
+ * block sizes. We need to calculate the offset from buffer start.
+ */
+ if (blocksize != EXT4_MIN_BLOCK_SIZE) {
+ logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE;
+ offset = do_div(logical_sb_block, blocksize);
+ } else {
+ logical_sb_block = sbi->s_sb_block;
}
- if (err) {
- ext4_msg(sb, KERN_ERR, "insufficient memory");
- goto failed_mount3;
+
+ bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
+ if (IS_ERR(bh)) {
+ ext4_msg(sb, KERN_ERR, "unable to read superblock");
+ return PTR_ERR(bh);
+ }
+ /*
+ * Note: s_es must be initialized as soon as possible because
+ * some ext4 macro-instructions depend on its value
+ */
+ es = (struct ext4_super_block *) (bh->b_data + offset);
+ sbi->s_es = es;
+ sb->s_magic = le16_to_cpu(es->s_magic);
+ if (sb->s_magic != EXT4_SUPER_MAGIC) {
+ if (!silent)
+ ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
+ goto out;
}
+ if (le32_to_cpu(es->s_log_block_size) >
+ (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
+ ext4_msg(sb, KERN_ERR,
+ "Invalid log block size: %u",
+ le32_to_cpu(es->s_log_block_size));
+ goto out;
+ }
+ if (le32_to_cpu(es->s_log_cluster_size) >
+ (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
+ ext4_msg(sb, KERN_ERR,
+ "Invalid log cluster size: %u",
+ le32_to_cpu(es->s_log_cluster_size));
+ goto out;
+ }
+
+ blocksize = EXT4_MIN_BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
+
+ /*
+ * If the default block size is not the same as the real block size,
+ * we need to reload it.
+ */
+ if (sb->s_blocksize == blocksize)
+ goto success;
+
+ /*
+ * bh must be released before kill_bdev(), otherwise
+ * it won't be freed and its page also. kill_bdev()
+ * is called by sb_set_blocksize().
+ */
+ brelse(bh);
+ /* Validate the filesystem blocksize */
+ if (!sb_set_blocksize(sb, blocksize)) {
+ ext4_msg(sb, KERN_ERR, "bad block size %d",
+ blocksize);
+ bh = NULL;
+ goto out;
+ }
+
+ logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE;
+ offset = do_div(logical_sb_block, blocksize);
+ bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
+ if (IS_ERR(bh)) {
+ ext4_msg(sb, KERN_ERR, "Can't read superblock on 2nd try");
+ ret = PTR_ERR(bh);
+ bh = NULL;
+ goto out;
+ }
+ es = (struct ext4_super_block *)(bh->b_data + offset);
+ sbi->s_es = es;
+ if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
+ ext4_msg(sb, KERN_ERR, "Magic mismatch, very weird!");
+ goto out;
+ }
+
+success:
+ sbi->s_min_folio_order = get_order(blocksize);
+ *lsb = logical_sb_block;
+ sbi->s_sbh = bh;
+ return 0;
+out:
+ brelse(bh);
+ return ret;
+}
+
+static int ext4_hash_info_init(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_super_block *es = sbi->s_es;
+ unsigned int i;
+
+ sbi->s_def_hash_version = es->s_def_hash_version;
+
+ if (sbi->s_def_hash_version > DX_HASH_LAST) {
+ ext4_msg(sb, KERN_ERR,
+ "Invalid default hash set in the superblock");
+ return -EINVAL;
+ } else if (sbi->s_def_hash_version == DX_HASH_SIPHASH) {
+ ext4_msg(sb, KERN_ERR,
+ "SIPHASH is not a valid default hash value");
+ return -EINVAL;
+ }
+
+ for (i = 0; i < 4; i++)
+ sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
+
+ if (ext4_has_feature_dir_index(sb)) {
+ i = le32_to_cpu(es->s_flags);
+ if (i & EXT2_FLAGS_UNSIGNED_HASH)
+ sbi->s_hash_unsigned = 3;
+ else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
+#ifdef __CHAR_UNSIGNED__
+ if (!sb_rdonly(sb))
+ es->s_flags |=
+ cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
+ sbi->s_hash_unsigned = 3;
+#else
+ if (!sb_rdonly(sb))
+ es->s_flags |=
+ cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
+#endif
+ }
+ }
+ return 0;
+}
+
+static int ext4_block_group_meta_init(struct super_block *sb, int silent)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_super_block *es = sbi->s_es;
+ int has_huge_files;
+
+ has_huge_files = ext4_has_feature_huge_file(sb);
+ sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
+ has_huge_files);
+ sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
+
+ sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
+ if (ext4_has_feature_64bit(sb)) {
+ if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
+ sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
+ !is_power_of_2(sbi->s_desc_size)) {
+ ext4_msg(sb, KERN_ERR,
+ "unsupported descriptor size %lu",
+ sbi->s_desc_size);
+ return -EINVAL;
+ }
+ } else
+ sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
+
+ sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
+ sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
+
+ sbi->s_inodes_per_block = sb->s_blocksize / EXT4_INODE_SIZE(sb);
+ if (sbi->s_inodes_per_block == 0 || sbi->s_blocks_per_group == 0) {
+ if (!silent)
+ ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
+ return -EINVAL;
+ }
+ if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
+ sbi->s_inodes_per_group > sb->s_blocksize * 8) {
+ ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
+ sbi->s_inodes_per_group);
+ return -EINVAL;
+ }
+ sbi->s_itb_per_group = sbi->s_inodes_per_group /
+ sbi->s_inodes_per_block;
+ sbi->s_desc_per_block = sb->s_blocksize / EXT4_DESC_SIZE(sb);
+ sbi->s_mount_state = le16_to_cpu(es->s_state) & ~EXT4_FC_REPLAY;
+ sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
+ sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
+
+ return 0;
+}
+
+/*
+ * It's hard to get stripe aligned blocks if stripe is not aligned with
+ * cluster, just disable stripe and alert user to simplify code and avoid
+ * stripe aligned allocation which will rarely succeed.
+ */
+static bool ext4_is_stripe_incompatible(struct super_block *sb, unsigned long stripe)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ return (stripe > 0 && sbi->s_cluster_ratio > 1 &&
+ stripe % sbi->s_cluster_ratio != 0);
+}
+
+static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
+{
+ struct ext4_super_block *es = NULL;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ ext4_fsblk_t logical_sb_block;
+ struct inode *root;
+ int needs_recovery;
+ int err;
+ ext4_group_t first_not_zeroed;
+ struct ext4_fs_context *ctx = fc->fs_private;
+ int silent = fc->sb_flags & SB_SILENT;
+
+ /* Set defaults for the variables that will be set during parsing */
+ if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO))
+ ctx->journal_ioprio = EXT4_DEF_JOURNAL_IOPRIO;
+
+ sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
+ sbi->s_sectors_written_start =
+ part_stat_read(sb->s_bdev, sectors[STAT_WRITE]);
+
+ err = ext4_load_super(sb, &logical_sb_block, silent);
+ if (err)
+ goto out_fail;
+
+ es = sbi->s_es;
+ sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
+
+ err = ext4_init_metadata_csum(sb, es);
+ if (err)
+ goto failed_mount;
+
+ ext4_set_def_opts(sb, es);
+
+ sbi->s_resuid = make_kuid(&init_user_ns, ext4_get_resuid(es));
+ sbi->s_resgid = make_kgid(&init_user_ns, ext4_get_resuid(es));
+ sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
+ sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
+ sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
+ sbi->s_sb_update_kb = EXT4_DEF_SB_UPDATE_INTERVAL_KB;
+ sbi->s_sb_update_sec = EXT4_DEF_SB_UPDATE_INTERVAL_SEC;
+
+ /*
+ * set default s_li_wait_mult for lazyinit, for the case there is
+ * no mount option specified.
+ */
+ sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
+
+ err = ext4_inode_info_init(sb, es);
+ if (err)
+ goto failed_mount;
+
+ err = parse_apply_sb_mount_options(sb, ctx);
+ if (err < 0)
+ goto failed_mount;
+
+ sbi->s_def_mount_opt = sbi->s_mount_opt;
+ sbi->s_def_mount_opt2 = sbi->s_mount_opt2;
+
+ err = ext4_check_opt_consistency(fc, sb);
+ if (err < 0)
+ goto failed_mount;
+
+ ext4_apply_options(fc, sb);
+
+ err = ext4_check_large_folio(sb);
+ if (err < 0)
+ goto failed_mount;
+
+ err = ext4_encoding_init(sb, es);
+ if (err)
+ goto failed_mount;
+
+ err = ext4_check_journal_data_mode(sb);
+ if (err)
+ goto failed_mount;
+
+ sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
+ (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
+
+ /* HSM events are allowed by default. */
+ sb->s_iflags |= SB_I_ALLOW_HSM;
+
+ err = ext4_check_feature_compatibility(sb, es, silent);
+ if (err)
+ goto failed_mount;
+
+ err = ext4_block_group_meta_init(sb, silent);
+ if (err)
+ goto failed_mount;
+
+ err = ext4_hash_info_init(sb);
+ if (err)
+ goto failed_mount;
+
+ err = ext4_handle_clustersize(sb);
+ if (err)
+ goto failed_mount;
+
+ err = ext4_check_geometry(sb, es);
+ if (err)
+ goto failed_mount;
+
+ timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
+ spin_lock_init(&sbi->s_error_lock);
+ INIT_WORK(&sbi->s_sb_upd_work, update_super_work);
+
+ err = ext4_group_desc_init(sb, es, logical_sb_block, &first_not_zeroed);
+ if (err)
+ goto failed_mount3;
+
+ err = ext4_es_register_shrinker(sbi);
+ if (err)
+ goto failed_mount3;
+
sbi->s_stripe = ext4_get_stripe_size(sbi);
+ if (ext4_is_stripe_incompatible(sb, sbi->s_stripe)) {
+ ext4_msg(sb, KERN_WARNING,
+ "stripe (%lu) is not aligned with cluster size (%u), "
+ "stripe is disabled",
+ sbi->s_stripe, sbi->s_cluster_ratio);
+ sbi->s_stripe = 0;
+ }
sbi->s_extent_max_zeroout_kb = 32;
/*
* set up enough so that it can read an inode
*/
- if (!test_opt(sb, NOLOAD) &&
- EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
- sb->s_op = &ext4_sops;
- else
- sb->s_op = &ext4_nojournal_sops;
+ sb->s_op = &ext4_sops;
sb->s_export_op = &ext4_export_ops;
sb->s_xattr = ext4_xattr_handlers;
+#ifdef CONFIG_FS_ENCRYPTION
+ sb->s_cop = &ext4_cryptops;
+#endif
+#ifdef CONFIG_FS_VERITY
+ sb->s_vop = &ext4_verityops;
+#endif
#ifdef CONFIG_QUOTA
sb->dq_op = &ext4_quota_operations;
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
- sb->s_qcop = &ext4_qctl_sysfile_operations;
+ if (ext4_has_feature_quota(sb))
+ sb->s_qcop = &dquot_quotactl_sysfile_ops;
else
sb->s_qcop = &ext4_qctl_operations;
+ sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
#endif
- memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
+ super_set_uuid(sb, es->s_uuid, sizeof(es->s_uuid));
+ super_set_sysfs_name_bdev(sb);
INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
mutex_init(&sbi->s_orphan_lock);
+ spin_lock_init(&sbi->s_bdev_wb_lock);
+
+ ext4_atomic_write_init(sb);
+ ext4_fast_commit_init(sb);
+
sb->s_root = NULL;
needs_recovery = (es->s_last_orphan != 0 ||
- EXT4_HAS_INCOMPAT_FEATURE(sb,
- EXT4_FEATURE_INCOMPAT_RECOVER));
+ ext4_has_feature_orphan_present(sb) ||
+ ext4_has_feature_journal_needs_recovery(sb));
- if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) &&
- !(sb->s_flags & MS_RDONLY))
- if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
- goto failed_mount3;
+ if (ext4_has_feature_mmp(sb) && !sb_rdonly(sb)) {
+ err = ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block));
+ if (err)
+ goto failed_mount3a;
+ }
+ err = -EINVAL;
/*
* The first inode we look at is the journal inode. Don't try
* root first: it may be modified in the journal!
*/
- if (!test_opt(sb, NOLOAD) &&
- EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
- if (ext4_load_journal(sb, es, journal_devnum))
- goto failed_mount3;
- } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
- EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
+ if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) {
+ err = ext4_load_and_init_journal(sb, es, ctx);
+ if (err)
+ goto failed_mount3a;
+ if (bdev_read_only(sb->s_bdev))
+ needs_recovery = 0;
+ } else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) &&
+ ext4_has_feature_journal_needs_recovery(sb)) {
ext4_msg(sb, KERN_ERR, "required journal recovery "
"suppressed and not mounted read-only");
- goto failed_mount_wq;
+ goto failed_mount3a;
} else {
+ const char *journal_option;
+
+ /* Nojournal mode, all journal mount options are illegal */
+ journal_option = ext4_has_journal_option(sb);
+ if (journal_option != NULL) {
+ ext4_msg(sb, KERN_ERR,
+ "can't mount with %s, fs mounted w/o journal",
+ journal_option);
+ goto failed_mount3a;
+ }
+
+ sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
+ clear_opt(sb, JOURNAL_CHECKSUM);
clear_opt(sb, DATA_FLAGS);
+ clear_opt2(sb, JOURNAL_FAST_COMMIT);
sbi->s_journal = NULL;
needs_recovery = 0;
- goto no_journal;
- }
-
- if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT) &&
- !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
- JBD2_FEATURE_INCOMPAT_64BIT)) {
- ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
- goto failed_mount_wq;
- }
-
- if (!set_journal_csum_feature_set(sb)) {
- ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
- "feature set");
- goto failed_mount_wq;
}
- /* We have now updated the journal if required, so we can
- * validate the data journaling mode. */
- switch (test_opt(sb, DATA_FLAGS)) {
- case 0:
- /* No mode set, assume a default based on the journal
- * capabilities: ORDERED_DATA if the journal can
- * cope, else JOURNAL_DATA
- */
- if (jbd2_journal_check_available_features
- (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
- set_opt(sb, ORDERED_DATA);
- else
- set_opt(sb, JOURNAL_DATA);
- break;
-
- case EXT4_MOUNT_ORDERED_DATA:
- case EXT4_MOUNT_WRITEBACK_DATA:
- if (!jbd2_journal_check_available_features
- (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
- ext4_msg(sb, KERN_ERR, "Journal does not support "
- "requested data journaling mode");
+ if (!test_opt(sb, NO_MBCACHE)) {
+ sbi->s_ea_block_cache = ext4_xattr_create_cache();
+ if (!sbi->s_ea_block_cache) {
+ ext4_msg(sb, KERN_ERR,
+ "Failed to create ea_block_cache");
+ err = -EINVAL;
goto failed_mount_wq;
}
- default:
- break;
- }
- set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
- sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
+ if (ext4_has_feature_ea_inode(sb)) {
+ sbi->s_ea_inode_cache = ext4_xattr_create_cache();
+ if (!sbi->s_ea_inode_cache) {
+ ext4_msg(sb, KERN_ERR,
+ "Failed to create ea_inode_cache");
+ err = -EINVAL;
+ goto failed_mount_wq;
+ }
+ }
+ }
/*
- * The journal may have updated the bg summary counts, so we
- * need to update the global counters.
- */
- percpu_counter_set(&sbi->s_freeclusters_counter,
- ext4_count_free_clusters(sb));
- percpu_counter_set(&sbi->s_freeinodes_counter,
- ext4_count_free_inodes(sb));
- percpu_counter_set(&sbi->s_dirs_counter,
- ext4_count_dirs(sb));
- percpu_counter_set(&sbi->s_dirtyclusters_counter, 0);
-
-no_journal:
- /*
* Get the # of file system overhead blocks from the
* superblock if present.
*/
- if (es->s_overhead_clusters)
- sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
- else {
+ sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
+ /* ignore the precalculated value if it is ridiculous */
+ if (sbi->s_overhead > ext4_blocks_count(es))
+ sbi->s_overhead = 0;
+ /*
+ * If the bigalloc feature is not enabled recalculating the
+ * overhead doesn't take long, so we might as well just redo
+ * it to make sure we are using the correct value.
+ */
+ if (!ext4_has_feature_bigalloc(sb))
+ sbi->s_overhead = 0;
+ if (sbi->s_overhead == 0) {
err = ext4_calculate_overhead(sb);
if (err)
goto failed_mount_wq;
@@ -3956,15 +5541,7 @@ no_journal:
alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
if (!EXT4_SB(sb)->rsv_conversion_wq) {
printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
- ret = -ENOMEM;
- goto failed_mount4;
- }
-
- EXT4_SB(sb)->unrsv_conversion_wq =
- alloc_workqueue("ext4-unrsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
- if (!EXT4_SB(sb)->unrsv_conversion_wq) {
- printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
- ret = -ENOMEM;
+ err = -ENOMEM;
goto failed_mount4;
}
@@ -3973,68 +5550,60 @@ no_journal:
* so we can safely mount the rest of the filesystem now.
*/
- root = ext4_iget(sb, EXT4_ROOT_INO);
+ root = ext4_iget(sb, EXT4_ROOT_INO, EXT4_IGET_SPECIAL);
if (IS_ERR(root)) {
ext4_msg(sb, KERN_ERR, "get root inode failed");
- ret = PTR_ERR(root);
+ err = PTR_ERR(root);
root = NULL;
goto failed_mount4;
}
if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
iput(root);
+ err = -EFSCORRUPTED;
goto failed_mount4;
}
+
+ generic_set_sb_d_ops(sb);
sb->s_root = d_make_root(root);
if (!sb->s_root) {
ext4_msg(sb, KERN_ERR, "get root dentry failed");
- ret = -ENOMEM;
+ err = -ENOMEM;
goto failed_mount4;
}
- if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY))
- sb->s_flags |= MS_RDONLY;
+ err = ext4_setup_super(sb, es, sb_rdonly(sb));
+ if (err == -EROFS) {
+ sb->s_flags |= SB_RDONLY;
+ } else if (err)
+ goto failed_mount4a;
+
+ ext4_set_resv_clusters(sb);
- /* determine the minimum size of new large inodes, if present */
- if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
- sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
- EXT4_GOOD_OLD_INODE_SIZE;
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) {
- if (sbi->s_want_extra_isize <
- le16_to_cpu(es->s_want_extra_isize))
- sbi->s_want_extra_isize =
- le16_to_cpu(es->s_want_extra_isize);
- if (sbi->s_want_extra_isize <
- le16_to_cpu(es->s_min_extra_isize))
- sbi->s_want_extra_isize =
- le16_to_cpu(es->s_min_extra_isize);
+ if (test_opt(sb, BLOCK_VALIDITY)) {
+ err = ext4_setup_system_zone(sb);
+ if (err) {
+ ext4_msg(sb, KERN_ERR, "failed to initialize system "
+ "zone (%d)", err);
+ goto failed_mount4a;
}
}
- /* Check if enough inode space is available */
- if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
- sbi->s_inode_size) {
- sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
- EXT4_GOOD_OLD_INODE_SIZE;
- ext4_msg(sb, KERN_INFO, "required extra inode space not"
- "available");
- }
+ ext4_fc_replay_cleanup(sb);
- err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sbi));
- if (err) {
- ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for "
- "reserved pool", ext4_calculate_resv_clusters(sbi));
- goto failed_mount4a;
- }
+ ext4_ext_init(sb);
- err = ext4_setup_system_zone(sb);
- if (err) {
- ext4_msg(sb, KERN_ERR, "failed to initialize system "
- "zone (%d)", err);
- goto failed_mount4a;
+ /*
+ * Enable optimize_scan if number of groups is > threshold. This can be
+ * turned off by passing "mb_optimize_scan=0". This can also be
+ * turned on forcefully by passing "mb_optimize_scan=1".
+ */
+ if (!(ctx->spec & EXT4_SPEC_mb_optimize_scan)) {
+ if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD)
+ set_opt2(sb, MB_OPTIMIZE_SCAN);
+ else
+ clear_opt2(sb, MB_OPTIMIZE_SCAN);
}
- ext4_ext_init(sb);
err = ext4_mb_init(sb);
if (err) {
ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
@@ -4042,75 +5611,100 @@ no_journal:
goto failed_mount5;
}
+ /*
+ * We can only set up the journal commit callback once
+ * mballoc is initialized
+ */
+ if (sbi->s_journal)
+ sbi->s_journal->j_commit_callback =
+ ext4_journal_commit_callback;
+
+ err = ext4_percpu_param_init(sbi);
+ if (err)
+ goto failed_mount6;
+
+ if (ext4_has_feature_flex_bg(sb))
+ if (!ext4_fill_flex_info(sb)) {
+ ext4_msg(sb, KERN_ERR,
+ "unable to initialize "
+ "flex_bg meta info!");
+ err = -ENOMEM;
+ goto failed_mount6;
+ }
+
err = ext4_register_li_request(sb, first_not_zeroed);
if (err)
goto failed_mount6;
- sbi->s_kobj.kset = ext4_kset;
- init_completion(&sbi->s_kobj_unregister);
- err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
- "%s", sb->s_id);
+ err = ext4_init_orphan_info(sb);
if (err)
goto failed_mount7;
-
#ifdef CONFIG_QUOTA
/* Enable quota usage during mount. */
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
- !(sb->s_flags & MS_RDONLY)) {
+ if (ext4_has_feature_quota(sb) && !sb_rdonly(sb)) {
err = ext4_enable_quotas(sb);
if (err)
goto failed_mount8;
}
#endif /* CONFIG_QUOTA */
+ /*
+ * Save the original bdev mapping's wb_err value which could be
+ * used to detect the metadata async write error.
+ */
+ errseq_check_and_advance(&sb->s_bdev->bd_mapping->wb_err,
+ &sbi->s_bdev_wb_err);
EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
ext4_orphan_cleanup(sb, es);
EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
+ /*
+ * Update the checksum after updating free space/inode counters and
+ * ext4_orphan_cleanup. Otherwise the superblock can have an incorrect
+ * checksum in the buffer cache until it is written out and
+ * e2fsprogs programs trying to open a file system immediately
+ * after it is mounted can fail.
+ */
+ ext4_superblock_csum_set(sb);
if (needs_recovery) {
ext4_msg(sb, KERN_INFO, "recovery complete");
- ext4_mark_recovery_complete(sb, es);
+ err = ext4_mark_recovery_complete(sb, es);
+ if (err)
+ goto failed_mount9;
}
- if (EXT4_SB(sb)->s_journal) {
- if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
- descr = " journalled data mode";
- else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
- descr = " ordered data mode";
- else
- descr = " writeback data mode";
- } else
- descr = "out journal";
- if (test_opt(sb, DISCARD)) {
- struct request_queue *q = bdev_get_queue(sb->s_bdev);
- if (!blk_queue_discard(q))
- ext4_msg(sb, KERN_WARNING,
- "mounting with \"discard\" option, but "
- "the device does not support discard");
+ if (test_opt(sb, DISCARD) && !bdev_max_discard_sectors(sb->s_bdev)) {
+ ext4_msg(sb, KERN_WARNING,
+ "mounting with \"discard\" option, but the device does not support discard");
+ clear_opt(sb, DISCARD);
}
- ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
- "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
- *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
-
if (es->s_error_count)
mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
- kfree(orig_data);
- return 0;
+ /* Enable message ratelimiting. Default is 10 messages per 5 secs. */
+ ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
+ ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
+ ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
+ atomic_set(&sbi->s_warning_count, 0);
+ atomic_set(&sbi->s_msg_count, 0);
-cantfind_ext4:
- if (!silent)
- ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
- goto failed_mount;
+ /* Register sysfs after all initializations are complete. */
+ err = ext4_register_sysfs(sb);
+ if (err)
+ goto failed_mount9;
-#ifdef CONFIG_QUOTA
-failed_mount8:
- kobject_del(&sbi->s_kobj);
-#endif
+ return 0;
+
+failed_mount9:
+ ext4_quotas_off(sb, EXT4_MAXQUOTAS);
+failed_mount8: __maybe_unused
+ ext4_release_orphan_info(sb);
failed_mount7:
ext4_unregister_li_request(sb);
failed_mount6:
ext4_mb_release(sb);
+ ext4_flex_groups_free(sbi);
+ ext4_percpu_param_destroy(sbi);
failed_mount5:
ext4_ext_release(sb);
ext4_release_system_zone(sb);
@@ -4121,49 +5715,98 @@ failed_mount4:
ext4_msg(sb, KERN_ERR, "mount failed");
if (EXT4_SB(sb)->rsv_conversion_wq)
destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
- if (EXT4_SB(sb)->unrsv_conversion_wq)
- destroy_workqueue(EXT4_SB(sb)->unrsv_conversion_wq);
failed_mount_wq:
+ ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
+ sbi->s_ea_inode_cache = NULL;
+
+ ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
+ sbi->s_ea_block_cache = NULL;
+
if (sbi->s_journal) {
- jbd2_journal_destroy(sbi->s_journal);
- sbi->s_journal = NULL;
+ ext4_journal_destroy(sbi, sbi->s_journal);
}
-failed_mount3:
+failed_mount3a:
ext4_es_unregister_shrinker(sbi);
- del_timer(&sbi->s_err_report);
- if (sbi->s_flex_groups)
- ext4_kvfree(sbi->s_flex_groups);
- percpu_counter_destroy(&sbi->s_freeclusters_counter);
- percpu_counter_destroy(&sbi->s_freeinodes_counter);
- percpu_counter_destroy(&sbi->s_dirs_counter);
- percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
- percpu_counter_destroy(&sbi->s_extent_cache_cnt);
- if (sbi->s_mmp_tsk)
- kthread_stop(sbi->s_mmp_tsk);
-failed_mount2:
- for (i = 0; i < db_count; i++)
- brelse(sbi->s_group_desc[i]);
- ext4_kvfree(sbi->s_group_desc);
+failed_mount3:
+ /* flush s_sb_upd_work before sbi destroy */
+ flush_work(&sbi->s_sb_upd_work);
+ ext4_stop_mmpd(sbi);
+ timer_delete_sync(&sbi->s_err_report);
+ ext4_group_desc_free(sbi);
failed_mount:
- if (sbi->s_chksum_driver)
- crypto_free_shash(sbi->s_chksum_driver);
- if (sbi->s_proc) {
- remove_proc_entry("options", sbi->s_proc);
- remove_proc_entry(sb->s_id, ext4_proc_root);
- }
+#if IS_ENABLED(CONFIG_UNICODE)
+ utf8_unload(sb->s_encoding);
+#endif
+
#ifdef CONFIG_QUOTA
- for (i = 0; i < MAXQUOTAS; i++)
- kfree(sbi->s_qf_names[i]);
+ for (unsigned int i = 0; i < EXT4_MAXQUOTAS; i++)
+ kfree(get_qf_name(sb, sbi, i));
#endif
- ext4_blkdev_remove(sbi);
- brelse(bh);
+ fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
+ brelse(sbi->s_sbh);
+ if (sbi->s_journal_bdev_file) {
+ invalidate_bdev(file_bdev(sbi->s_journal_bdev_file));
+ bdev_fput(sbi->s_journal_bdev_file);
+ }
out_fail:
+ invalidate_bdev(sb->s_bdev);
sb->s_fs_info = NULL;
- kfree(sbi->s_blockgroup_lock);
- kfree(sbi);
-out_free_orig:
- kfree(orig_data);
- return err ? err : ret;
+ return err;
+}
+
+static int ext4_fill_super(struct super_block *sb, struct fs_context *fc)
+{
+ struct ext4_fs_context *ctx = fc->fs_private;
+ struct ext4_sb_info *sbi;
+ const char *descr;
+ int ret;
+
+ sbi = ext4_alloc_sbi(sb);
+ if (!sbi)
+ return -ENOMEM;
+
+ fc->s_fs_info = sbi;
+
+ /* Cleanup superblock name */
+ strreplace(sb->s_id, '/', '!');
+
+ sbi->s_sb_block = 1; /* Default super block location */
+ if (ctx->spec & EXT4_SPEC_s_sb_block)
+ sbi->s_sb_block = ctx->s_sb_block;
+
+ ret = __ext4_fill_super(fc, sb);
+ if (ret < 0)
+ goto free_sbi;
+
+ if (sbi->s_journal) {
+ if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
+ descr = " journalled data mode";
+ else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
+ descr = " ordered data mode";
+ else
+ descr = " writeback data mode";
+ } else
+ descr = "out journal";
+
+ if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
+ ext4_msg(sb, KERN_INFO, "mounted filesystem %pU %s with%s. "
+ "Quota mode: %s.", &sb->s_uuid,
+ sb_rdonly(sb) ? "ro" : "r/w", descr,
+ ext4_quota_mode(sb));
+
+ /* Update the s_overhead_clusters if necessary */
+ ext4_update_overhead(sb, false);
+ return 0;
+
+free_sbi:
+ ext4_free_sbi(sbi);
+ fc->s_fs_info = NULL;
+ return ret;
+}
+
+static int ext4_get_tree(struct fs_context *fc)
+{
+ return get_tree_bdev(fc, ext4_fill_super);
}
/*
@@ -4178,95 +5821,138 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
journal->j_commit_interval = sbi->s_commit_interval;
journal->j_min_batch_time = sbi->s_min_batch_time;
journal->j_max_batch_time = sbi->s_max_batch_time;
+ ext4_fc_init(sb, journal);
write_lock(&journal->j_state_lock);
if (test_opt(sb, BARRIER))
journal->j_flags |= JBD2_BARRIER;
else
journal->j_flags &= ~JBD2_BARRIER;
- if (test_opt(sb, DATA_ERR_ABORT))
- journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
- else
- journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
+ /*
+ * Always enable journal cycle record option, letting the journal
+ * records log transactions continuously between each mount.
+ */
+ journal->j_flags |= JBD2_CYCLE_RECORD;
write_unlock(&journal->j_state_lock);
}
-static journal_t *ext4_get_journal(struct super_block *sb,
- unsigned int journal_inum)
+static struct inode *ext4_get_journal_inode(struct super_block *sb,
+ unsigned int journal_inum)
{
struct inode *journal_inode;
- journal_t *journal;
-
- BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
-
- /* First, test for the existence of a valid inode on disk. Bad
- * things happen if we iget() an unused inode, as the subsequent
- * iput() will try to delete it. */
- journal_inode = ext4_iget(sb, journal_inum);
+ /*
+ * Test for the existence of a valid inode on disk. Bad things
+ * happen if we iget() an unused inode, as the subsequent iput()
+ * will try to delete it.
+ */
+ journal_inode = ext4_iget(sb, journal_inum, EXT4_IGET_SPECIAL);
if (IS_ERR(journal_inode)) {
ext4_msg(sb, KERN_ERR, "no journal found");
- return NULL;
+ return ERR_CAST(journal_inode);
}
if (!journal_inode->i_nlink) {
make_bad_inode(journal_inode);
iput(journal_inode);
ext4_msg(sb, KERN_ERR, "journal inode is deleted");
- return NULL;
+ return ERR_PTR(-EFSCORRUPTED);
}
-
- jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
- journal_inode, journal_inode->i_size);
- if (!S_ISREG(journal_inode->i_mode)) {
+ if (!S_ISREG(journal_inode->i_mode) || IS_ENCRYPTED(journal_inode)) {
ext4_msg(sb, KERN_ERR, "invalid journal inode");
iput(journal_inode);
- return NULL;
+ return ERR_PTR(-EFSCORRUPTED);
}
+ ext4_debug("Journal inode found at %p: %lld bytes\n",
+ journal_inode, journal_inode->i_size);
+ return journal_inode;
+}
+
+static int ext4_journal_bmap(journal_t *journal, sector_t *block)
+{
+ struct ext4_map_blocks map;
+ int ret;
+
+ if (journal->j_inode == NULL)
+ return 0;
+
+ map.m_lblk = *block;
+ map.m_len = 1;
+ ret = ext4_map_blocks(NULL, journal->j_inode, &map, 0);
+ if (ret <= 0) {
+ ext4_msg(journal->j_inode->i_sb, KERN_CRIT,
+ "journal bmap failed: block %llu ret %d\n",
+ *block, ret);
+ jbd2_journal_abort(journal, ret ? ret : -EFSCORRUPTED);
+ return ret;
+ }
+ *block = map.m_pblk;
+ return 0;
+}
+
+static journal_t *ext4_open_inode_journal(struct super_block *sb,
+ unsigned int journal_inum)
+{
+ struct inode *journal_inode;
+ journal_t *journal;
+
+ journal_inode = ext4_get_journal_inode(sb, journal_inum);
+ if (IS_ERR(journal_inode))
+ return ERR_CAST(journal_inode);
+
journal = jbd2_journal_init_inode(journal_inode);
- if (!journal) {
+ if (IS_ERR(journal)) {
ext4_msg(sb, KERN_ERR, "Could not load journal inode");
iput(journal_inode);
- return NULL;
+ return ERR_CAST(journal);
}
journal->j_private = sb;
+ journal->j_bmap = ext4_journal_bmap;
ext4_init_journal_params(sb, journal);
return journal;
}
-static journal_t *ext4_get_dev_journal(struct super_block *sb,
- dev_t j_dev)
+static struct file *ext4_get_journal_blkdev(struct super_block *sb,
+ dev_t j_dev, ext4_fsblk_t *j_start,
+ ext4_fsblk_t *j_len)
{
struct buffer_head *bh;
- journal_t *journal;
- ext4_fsblk_t start;
- ext4_fsblk_t len;
+ struct block_device *bdev;
+ struct file *bdev_file;
int hblock, blocksize;
ext4_fsblk_t sb_block;
unsigned long offset;
struct ext4_super_block *es;
- struct block_device *bdev;
-
- BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
+ int errno;
- bdev = ext4_blkdev_get(j_dev, sb);
- if (bdev == NULL)
- return NULL;
+ bdev_file = bdev_file_open_by_dev(j_dev,
+ BLK_OPEN_READ | BLK_OPEN_WRITE | BLK_OPEN_RESTRICT_WRITES,
+ sb, &fs_holder_ops);
+ if (IS_ERR(bdev_file)) {
+ ext4_msg(sb, KERN_ERR,
+ "failed to open journal device unknown-block(%u,%u) %ld",
+ MAJOR(j_dev), MINOR(j_dev), PTR_ERR(bdev_file));
+ return bdev_file;
+ }
+ bdev = file_bdev(bdev_file);
blocksize = sb->s_blocksize;
hblock = bdev_logical_block_size(bdev);
if (blocksize < hblock) {
ext4_msg(sb, KERN_ERR,
"blocksize too small for journal device");
+ errno = -EINVAL;
goto out_bdev;
}
sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
offset = EXT4_MIN_BLOCK_SIZE % blocksize;
- set_blocksize(bdev, blocksize);
- if (!(bh = __bread(bdev, sb_block, blocksize))) {
+ set_blocksize(bdev_file, blocksize);
+ bh = __bread(bdev, sb_block, blocksize);
+ if (!bh) {
ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
"external journal");
+ errno = -EINVAL;
goto out_bdev;
}
@@ -4274,50 +5960,74 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
!(le32_to_cpu(es->s_feature_incompat) &
EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
- ext4_msg(sb, KERN_ERR, "external journal has "
- "bad superblock");
- brelse(bh);
- goto out_bdev;
+ ext4_msg(sb, KERN_ERR, "external journal has bad superblock");
+ errno = -EFSCORRUPTED;
+ goto out_bh;
+ }
+
+ if ((le32_to_cpu(es->s_feature_ro_compat) &
+ EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
+ es->s_checksum != ext4_superblock_csum(es)) {
+ ext4_msg(sb, KERN_ERR, "external journal has corrupt superblock");
+ errno = -EFSCORRUPTED;
+ goto out_bh;
}
if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
ext4_msg(sb, KERN_ERR, "journal UUID does not match");
- brelse(bh);
- goto out_bdev;
+ errno = -EFSCORRUPTED;
+ goto out_bh;
}
- len = ext4_blocks_count(es);
- start = sb_block + 1;
- brelse(bh); /* we're done with the superblock */
+ *j_start = sb_block + 1;
+ *j_len = ext4_blocks_count(es);
+ brelse(bh);
+ return bdev_file;
+
+out_bh:
+ brelse(bh);
+out_bdev:
+ bdev_fput(bdev_file);
+ return ERR_PTR(errno);
+}
- journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
- start, len, blocksize);
- if (!journal) {
+static journal_t *ext4_open_dev_journal(struct super_block *sb,
+ dev_t j_dev)
+{
+ journal_t *journal;
+ ext4_fsblk_t j_start;
+ ext4_fsblk_t j_len;
+ struct file *bdev_file;
+ int errno = 0;
+
+ bdev_file = ext4_get_journal_blkdev(sb, j_dev, &j_start, &j_len);
+ if (IS_ERR(bdev_file))
+ return ERR_CAST(bdev_file);
+
+ journal = jbd2_journal_init_dev(file_bdev(bdev_file), sb->s_bdev, j_start,
+ j_len, sb->s_blocksize);
+ if (IS_ERR(journal)) {
ext4_msg(sb, KERN_ERR, "failed to create device journal");
+ errno = PTR_ERR(journal);
goto out_bdev;
}
- journal->j_private = sb;
- ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &journal->j_sb_buffer);
- wait_on_buffer(journal->j_sb_buffer);
- if (!buffer_uptodate(journal->j_sb_buffer)) {
- ext4_msg(sb, KERN_ERR, "I/O error on journal device");
- goto out_journal;
- }
if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
ext4_msg(sb, KERN_ERR, "External journal has more than one "
"user (unsupported) - %d",
be32_to_cpu(journal->j_superblock->s_nr_users));
+ errno = -EINVAL;
goto out_journal;
}
- EXT4_SB(sb)->journal_bdev = bdev;
+ journal->j_private = sb;
+ EXT4_SB(sb)->s_journal_bdev_file = bdev_file;
ext4_init_journal_params(sb, journal);
return journal;
out_journal:
- jbd2_journal_destroy(journal);
+ ext4_journal_destroy(EXT4_SB(sb), journal);
out_bdev:
- ext4_blkdev_put(bdev);
- return NULL;
+ bdev_fput(bdev_file);
+ return ERR_PTR(errno);
}
static int ext4_load_journal(struct super_block *sb,
@@ -4329,8 +6039,10 @@ static int ext4_load_journal(struct super_block *sb,
dev_t journal_dev;
int err = 0;
int really_read_only;
+ int journal_dev_ro;
- BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
+ if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
+ return -EFSCORRUPTED;
if (journal_devnum &&
journal_devnum != le32_to_cpu(es->s_journal_dev)) {
@@ -4340,87 +6052,209 @@ static int ext4_load_journal(struct super_block *sb,
} else
journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
- really_read_only = bdev_read_only(sb->s_bdev);
+ if (journal_inum && journal_dev) {
+ ext4_msg(sb, KERN_ERR,
+ "filesystem has both journal inode and journal device!");
+ return -EINVAL;
+ }
+
+ if (journal_inum) {
+ journal = ext4_open_inode_journal(sb, journal_inum);
+ if (IS_ERR(journal))
+ return PTR_ERR(journal);
+ } else {
+ journal = ext4_open_dev_journal(sb, journal_dev);
+ if (IS_ERR(journal))
+ return PTR_ERR(journal);
+ }
+
+ journal_dev_ro = bdev_read_only(journal->j_dev);
+ really_read_only = bdev_read_only(sb->s_bdev) | journal_dev_ro;
+
+ if (journal_dev_ro && !sb_rdonly(sb)) {
+ ext4_msg(sb, KERN_ERR,
+ "journal device read-only, try mounting with '-o ro'");
+ err = -EROFS;
+ goto err_out;
+ }
/*
* Are we loading a blank journal or performing recovery after a
* crash? For recovery, we need to check in advance whether we
* can get read-write access to the device.
*/
- if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
- if (sb->s_flags & MS_RDONLY) {
+ if (ext4_has_feature_journal_needs_recovery(sb)) {
+ if (sb_rdonly(sb)) {
ext4_msg(sb, KERN_INFO, "INFO: recovery "
"required on readonly filesystem");
if (really_read_only) {
ext4_msg(sb, KERN_ERR, "write access "
- "unavailable, cannot proceed");
- return -EROFS;
+ "unavailable, cannot proceed "
+ "(try mounting with noload)");
+ err = -EROFS;
+ goto err_out;
}
ext4_msg(sb, KERN_INFO, "write access will "
"be enabled during recovery");
}
}
- if (journal_inum && journal_dev) {
- ext4_msg(sb, KERN_ERR, "filesystem has both journal "
- "and inode journals!");
- return -EINVAL;
- }
-
- if (journal_inum) {
- if (!(journal = ext4_get_journal(sb, journal_inum)))
- return -EINVAL;
- } else {
- if (!(journal = ext4_get_dev_journal(sb, journal_dev)))
- return -EINVAL;
- }
-
if (!(journal->j_flags & JBD2_BARRIER))
ext4_msg(sb, KERN_INFO, "barriers disabled");
- if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
+ if (!ext4_has_feature_journal_needs_recovery(sb))
err = jbd2_journal_wipe(journal, !really_read_only);
if (!err) {
char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
+ __le16 orig_state;
+ bool changed = false;
+
if (save)
memcpy(save, ((char *) es) +
EXT4_S_ERR_START, EXT4_S_ERR_LEN);
err = jbd2_journal_load(journal);
- if (save)
+ if (save && memcmp(((char *) es) + EXT4_S_ERR_START,
+ save, EXT4_S_ERR_LEN)) {
memcpy(((char *) es) + EXT4_S_ERR_START,
save, EXT4_S_ERR_LEN);
+ changed = true;
+ }
kfree(save);
+ orig_state = es->s_state;
+ es->s_state |= cpu_to_le16(EXT4_SB(sb)->s_mount_state &
+ EXT4_ERROR_FS);
+ if (orig_state != es->s_state)
+ changed = true;
+ /* Write out restored error information to the superblock */
+ if (changed && !really_read_only) {
+ int err2;
+ err2 = ext4_commit_super(sb);
+ err = err ? : err2;
+ }
}
if (err) {
ext4_msg(sb, KERN_ERR, "error loading journal");
- jbd2_journal_destroy(journal);
- return err;
+ goto err_out;
}
EXT4_SB(sb)->s_journal = journal;
- ext4_clear_journal_err(sb, es);
+ err = ext4_clear_journal_err(sb, es);
+ if (err) {
+ ext4_journal_destroy(EXT4_SB(sb), journal);
+ return err;
+ }
if (!really_read_only && journal_devnum &&
journal_devnum != le32_to_cpu(es->s_journal_dev)) {
es->s_journal_dev = cpu_to_le32(journal_devnum);
-
- /* Make sure we flush the recovery flag to disk. */
- ext4_commit_super(sb, 1);
+ ext4_commit_super(sb);
+ }
+ if (!really_read_only && journal_inum &&
+ journal_inum != le32_to_cpu(es->s_journal_inum)) {
+ es->s_journal_inum = cpu_to_le32(journal_inum);
+ ext4_commit_super(sb);
}
return 0;
+
+err_out:
+ ext4_journal_destroy(EXT4_SB(sb), journal);
+ return err;
}
-static int ext4_commit_super(struct super_block *sb, int sync)
+/* Copy state of EXT4_SB(sb) into buffer for on-disk superblock */
+static void ext4_update_super(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_super_block *es = sbi->s_es;
+ struct buffer_head *sbh = sbi->s_sbh;
+
+ lock_buffer(sbh);
+ /*
+ * If the file system is mounted read-only, don't update the
+ * superblock write time. This avoids updating the superblock
+ * write time when we are mounting the root file system
+ * read/only but we need to replay the journal; at that point,
+ * for people who are east of GMT and who make their clock
+ * tick in localtime for Windows bug-for-bug compatibility,
+ * the clock is set in the future, and this will cause e2fsck
+ * to complain and force a full file system check.
+ */
+ if (!sb_rdonly(sb))
+ ext4_update_tstamp(es, s_wtime);
+ es->s_kbytes_written =
+ cpu_to_le64(sbi->s_kbytes_written +
+ ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) -
+ sbi->s_sectors_written_start) >> 1));
+ if (percpu_counter_initialized(&sbi->s_freeclusters_counter))
+ ext4_free_blocks_count_set(es,
+ EXT4_C2B(sbi, percpu_counter_sum_positive(
+ &sbi->s_freeclusters_counter)));
+ if (percpu_counter_initialized(&sbi->s_freeinodes_counter))
+ es->s_free_inodes_count =
+ cpu_to_le32(percpu_counter_sum_positive(
+ &sbi->s_freeinodes_counter));
+ /* Copy error information to the on-disk superblock */
+ spin_lock(&sbi->s_error_lock);
+ if (sbi->s_add_error_count > 0) {
+ es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
+ if (!es->s_first_error_time && !es->s_first_error_time_hi) {
+ __ext4_update_tstamp(&es->s_first_error_time,
+ &es->s_first_error_time_hi,
+ sbi->s_first_error_time);
+ strtomem_pad(es->s_first_error_func,
+ sbi->s_first_error_func, 0);
+ es->s_first_error_line =
+ cpu_to_le32(sbi->s_first_error_line);
+ es->s_first_error_ino =
+ cpu_to_le32(sbi->s_first_error_ino);
+ es->s_first_error_block =
+ cpu_to_le64(sbi->s_first_error_block);
+ es->s_first_error_errcode =
+ ext4_errno_to_code(sbi->s_first_error_code);
+ }
+ __ext4_update_tstamp(&es->s_last_error_time,
+ &es->s_last_error_time_hi,
+ sbi->s_last_error_time);
+ strtomem_pad(es->s_last_error_func, sbi->s_last_error_func, 0);
+ es->s_last_error_line = cpu_to_le32(sbi->s_last_error_line);
+ es->s_last_error_ino = cpu_to_le32(sbi->s_last_error_ino);
+ es->s_last_error_block = cpu_to_le64(sbi->s_last_error_block);
+ es->s_last_error_errcode =
+ ext4_errno_to_code(sbi->s_last_error_code);
+ /*
+ * Start the daily error reporting function if it hasn't been
+ * started already
+ */
+ if (!es->s_error_count)
+ mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);
+ le32_add_cpu(&es->s_error_count, sbi->s_add_error_count);
+ sbi->s_add_error_count = 0;
+ }
+ spin_unlock(&sbi->s_error_lock);
+
+ ext4_superblock_csum_set(sb);
+ unlock_buffer(sbh);
+}
+
+static int ext4_commit_super(struct super_block *sb)
{
- struct ext4_super_block *es = EXT4_SB(sb)->s_es;
struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
- int error = 0;
- if (!sbh || block_device_ejected(sb))
- return error;
- if (buffer_write_io_error(sbh)) {
+ if (!sbh)
+ return -EINVAL;
+
+ ext4_update_super(sb);
+
+ lock_buffer(sbh);
+ /* Buffer got discarded which means block device got invalidated */
+ if (!buffer_mapped(sbh)) {
+ unlock_buffer(sbh);
+ return -EIO;
+ }
+
+ if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
/*
* Oh, dear. A previous attempt to write the
* superblock failed. This could happen because the
@@ -4434,49 +6268,21 @@ static int ext4_commit_super(struct super_block *sb, int sync)
clear_buffer_write_io_error(sbh);
set_buffer_uptodate(sbh);
}
- /*
- * If the file system is mounted read-only, don't update the
- * superblock write time. This avoids updating the superblock
- * write time when we are mounting the root file system
- * read/only but we need to replay the journal; at that point,
- * for people who are east of GMT and who make their clock
- * tick in localtime for Windows bug-for-bug compatibility,
- * the clock is set in the future, and this will cause e2fsck
- * to complain and force a full file system check.
- */
- if (!(sb->s_flags & MS_RDONLY))
- es->s_wtime = cpu_to_le32(get_seconds());
- if (sb->s_bdev->bd_part)
- es->s_kbytes_written =
- cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
- ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
- EXT4_SB(sb)->s_sectors_written_start) >> 1));
- else
- es->s_kbytes_written =
- cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
- ext4_free_blocks_count_set(es,
- EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
- &EXT4_SB(sb)->s_freeclusters_counter)));
- es->s_free_inodes_count =
- cpu_to_le32(percpu_counter_sum_positive(
- &EXT4_SB(sb)->s_freeinodes_counter));
- BUFFER_TRACE(sbh, "marking dirty");
- ext4_superblock_csum_set(sb);
- mark_buffer_dirty(sbh);
- if (sync) {
- error = sync_dirty_buffer(sbh);
- if (error)
- return error;
-
- error = buffer_write_io_error(sbh);
- if (error) {
- ext4_msg(sb, KERN_ERR, "I/O error while writing "
- "superblock");
- clear_buffer_write_io_error(sbh);
- set_buffer_uptodate(sbh);
- }
+ get_bh(sbh);
+ /* Clear potential dirty bit if it was journalled update */
+ clear_buffer_dirty(sbh);
+ sbh->b_end_io = end_buffer_write_sync;
+ submit_bh(REQ_OP_WRITE | REQ_SYNC |
+ (test_opt(sb, BARRIER) ? REQ_FUA : 0), sbh);
+ wait_on_buffer(sbh);
+ if (buffer_write_io_error(sbh)) {
+ ext4_msg(sb, KERN_ERR, "I/O error while writing "
+ "superblock");
+ clear_buffer_write_io_error(sbh);
+ set_buffer_uptodate(sbh);
+ return -EIO;
}
- return error;
+ return 0;
}
/*
@@ -4484,27 +6290,39 @@ static int ext4_commit_super(struct super_block *sb, int sync)
* remounting) the filesystem readonly, then we will end up with a
* consistent fs on disk. Record that fact.
*/
-static void ext4_mark_recovery_complete(struct super_block *sb,
- struct ext4_super_block *es)
+static int ext4_mark_recovery_complete(struct super_block *sb,
+ struct ext4_super_block *es)
{
+ int err;
journal_t *journal = EXT4_SB(sb)->s_journal;
- if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
- BUG_ON(journal != NULL);
- return;
+ if (!ext4_has_feature_journal(sb)) {
+ if (journal != NULL) {
+ ext4_error(sb, "Journal got removed while the fs was "
+ "mounted!");
+ return -EFSCORRUPTED;
+ }
+ return 0;
}
jbd2_journal_lock_updates(journal);
- if (jbd2_journal_flush(journal) < 0)
+ err = jbd2_journal_flush(journal, 0);
+ if (err < 0)
goto out;
- if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
- sb->s_flags & MS_RDONLY) {
- EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
- ext4_commit_super(sb, 1);
+ if (sb_rdonly(sb) && (ext4_has_feature_journal_needs_recovery(sb) ||
+ ext4_has_feature_orphan_present(sb))) {
+ if (!ext4_orphan_file_empty(sb)) {
+ ext4_error(sb, "Orphan file not empty on read-only fs.");
+ err = -EFSCORRUPTED;
+ goto out;
+ }
+ ext4_clear_feature_journal_needs_recovery(sb);
+ ext4_clear_feature_orphan_present(sb);
+ ext4_commit_super(sb);
}
-
out:
jbd2_journal_unlock_updates(journal);
+ return err;
}
/*
@@ -4512,14 +6330,17 @@ out:
* has recorded an error from a previous lifetime, move that error to the
* main filesystem now.
*/
-static void ext4_clear_journal_err(struct super_block *sb,
+static int ext4_clear_journal_err(struct super_block *sb,
struct ext4_super_block *es)
{
journal_t *journal;
int j_errno;
const char *errstr;
- BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
+ if (!ext4_has_feature_journal(sb)) {
+ ext4_error(sb, "Journal got removed while the fs was mounted!");
+ return -EFSCORRUPTED;
+ }
journal = EXT4_SB(sb)->s_journal;
@@ -4535,15 +6356,18 @@ static void ext4_clear_journal_err(struct super_block *sb,
errstr = ext4_decode_error(sb, j_errno, nbuf);
ext4_warning(sb, "Filesystem error recorded "
"from previous mount: %s", errstr);
- ext4_warning(sb, "Marking fs in need of filesystem check.");
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
- ext4_commit_super(sb, 1);
+ j_errno = ext4_commit_super(sb);
+ if (j_errno)
+ return j_errno;
+ ext4_warning(sb, "Marked fs in need of filesystem check.");
jbd2_journal_clear_err(journal);
jbd2_journal_update_sb_errno(journal);
}
+ return 0;
}
/*
@@ -4552,13 +6376,7 @@ static void ext4_clear_journal_err(struct super_block *sb,
*/
int ext4_force_commit(struct super_block *sb)
{
- journal_t *journal;
-
- if (sb->s_flags & MS_RDONLY)
- return 0;
-
- journal = EXT4_SB(sb)->s_journal;
- return ext4_journal_force_commit(journal);
+ return ext4_journal_force_commit(EXT4_SB(sb)->s_journal);
}
static int ext4_sync_fs(struct super_block *sb, int wait)
@@ -4568,9 +6386,12 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
bool needs_barrier = false;
struct ext4_sb_info *sbi = EXT4_SB(sb);
+ ret = ext4_emergency_state(sb);
+ if (unlikely(ret))
+ return ret;
+
trace_ext4_sync_fs(sb, wait);
flush_workqueue(sbi->rsv_conversion_wq);
- flush_workqueue(sbi->unrsv_conversion_wq);
/*
* Writeback quota in non-journalled quota case - journalled quota has
* no dirty dquots
@@ -4581,18 +6402,22 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
* being sent at the end of the function. But we can skip it if
* transaction_commit will do it for us.
*/
- target = jbd2_get_latest_transaction(sbi->s_journal);
- if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
- !jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
+ if (sbi->s_journal) {
+ target = jbd2_get_latest_transaction(sbi->s_journal);
+ if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
+ !jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
+ needs_barrier = true;
+
+ if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
+ if (wait)
+ ret = jbd2_log_wait_commit(sbi->s_journal,
+ target);
+ }
+ } else if (wait && test_opt(sb, BARRIER))
needs_barrier = true;
-
- if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
- if (wait)
- ret = jbd2_log_wait_commit(sbi->s_journal, target);
- }
if (needs_barrier) {
int err;
- err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
+ err = blkdev_issue_flush(sb->s_bdev);
if (!ret)
ret = err;
}
@@ -4600,20 +6425,6 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
return ret;
}
-static int ext4_sync_fs_nojournal(struct super_block *sb, int wait)
-{
- int ret = 0;
-
- trace_ext4_sync_fs(sb, wait);
- flush_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
- flush_workqueue(EXT4_SB(sb)->unrsv_conversion_wq);
- dquot_writeback_dquots(sb, -1);
- if (wait && test_opt(sb, BARRIER))
- ret = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
-
- return ret;
-}
-
/*
* LVM calls this function before a (read-only) snapshot is created. This
* gives us a chance to flush the journal completely and mark the fs clean.
@@ -4625,30 +6436,31 @@ static int ext4_sync_fs_nojournal(struct super_block *sb, int wait)
static int ext4_freeze(struct super_block *sb)
{
int error = 0;
- journal_t *journal;
+ journal_t *journal = EXT4_SB(sb)->s_journal;
- if (sb->s_flags & MS_RDONLY)
- return 0;
+ if (journal) {
+ /* Now we set up the journal barrier. */
+ jbd2_journal_lock_updates(journal);
- journal = EXT4_SB(sb)->s_journal;
-
- /* Now we set up the journal barrier. */
- jbd2_journal_lock_updates(journal);
+ /*
+ * Don't clear the needs_recovery flag if we failed to
+ * flush the journal.
+ */
+ error = jbd2_journal_flush(journal, 0);
+ if (error < 0)
+ goto out;
- /*
- * Don't clear the needs_recovery flag if we failed to flush
- * the journal.
- */
- error = jbd2_journal_flush(journal);
- if (error < 0)
- goto out;
+ /* Journal blocked and flushed, clear needs_recovery flag. */
+ ext4_clear_feature_journal_needs_recovery(sb);
+ if (ext4_orphan_file_empty(sb))
+ ext4_clear_feature_orphan_present(sb);
+ }
- /* Journal blocked and flushed, clear needs_recovery flag. */
- EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
- error = ext4_commit_super(sb, 1);
+ error = ext4_commit_super(sb);
out:
- /* we rely on upper layer to stop further updates */
- jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+ if (journal)
+ /* we rely on upper layer to stop further updates */
+ jbd2_journal_unlock_updates(journal);
return error;
}
@@ -4658,12 +6470,17 @@ out:
*/
static int ext4_unfreeze(struct super_block *sb)
{
- if (sb->s_flags & MS_RDONLY)
+ if (ext4_emergency_state(sb))
return 0;
- /* Reset the needs_recovery flag before the fs is unlocked. */
- EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
- ext4_commit_super(sb, 1);
+ if (EXT4_SB(sb)->s_journal) {
+ /* Reset the needs_recovery flag before the fs is unlocked. */
+ ext4_set_feature_journal_needs_recovery(sb);
+ if (ext4_has_feature_orphan_file(sb))
+ ext4_set_feature_orphan_present(sb);
+ }
+
+ ext4_commit_super(sb);
return 0;
}
@@ -4679,24 +6496,26 @@ struct ext4_mount_options {
u32 s_min_batch_time, s_max_batch_time;
#ifdef CONFIG_QUOTA
int s_jquota_fmt;
- char *s_qf_names[MAXQUOTAS];
+ char *s_qf_names[EXT4_MAXQUOTAS];
#endif
};
-static int ext4_remount(struct super_block *sb, int *flags, char *data)
+static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
{
+ struct ext4_fs_context *ctx = fc->fs_private;
struct ext4_super_block *es;
struct ext4_sb_info *sbi = EXT4_SB(sb);
unsigned long old_sb_flags;
struct ext4_mount_options old_opts;
- int enable_quota = 0;
ext4_group_t g;
- unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
int err = 0;
+ int alloc_ctx;
#ifdef CONFIG_QUOTA
+ int enable_quota = 0;
int i, j;
+ char *to_free[EXT4_MAXQUOTAS];
#endif
- char *orig_data = kstrdup(data, GFP_KERNEL);
+
/* Store the original options */
old_sb_flags = sb->s_flags;
@@ -4709,50 +6528,113 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
old_opts.s_max_batch_time = sbi->s_max_batch_time;
#ifdef CONFIG_QUOTA
old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
- for (i = 0; i < MAXQUOTAS; i++)
+ for (i = 0; i < EXT4_MAXQUOTAS; i++)
if (sbi->s_qf_names[i]) {
- old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i],
- GFP_KERNEL);
+ char *qf_name = get_qf_name(sb, sbi, i);
+
+ old_opts.s_qf_names[i] = kstrdup(qf_name, GFP_KERNEL);
if (!old_opts.s_qf_names[i]) {
for (j = 0; j < i; j++)
kfree(old_opts.s_qf_names[j]);
- kfree(orig_data);
return -ENOMEM;
}
} else
old_opts.s_qf_names[i] = NULL;
#endif
- if (sbi->s_journal && sbi->s_journal->j_task->io_context)
- journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
+ if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)) {
+ if (sbi->s_journal && sbi->s_journal->j_task->io_context)
+ ctx->journal_ioprio =
+ sbi->s_journal->j_task->io_context->ioprio;
+ else
+ ctx->journal_ioprio = EXT4_DEF_JOURNAL_IOPRIO;
+
+ }
+
+ if ((ctx->spec & EXT4_SPEC_s_stripe) &&
+ ext4_is_stripe_incompatible(sb, ctx->s_stripe)) {
+ ext4_msg(sb, KERN_WARNING,
+ "stripe (%lu) is not aligned with cluster size (%u), "
+ "stripe is disabled",
+ ctx->s_stripe, sbi->s_cluster_ratio);
+ ctx->s_stripe = 0;
+ }
/*
- * Allow the "check" option to be passed as a remount option.
+ * Changing the DIOREAD_NOLOCK or DELALLOC mount options may cause
+ * two calls to ext4_should_dioread_nolock() to return inconsistent
+ * values, triggering WARN_ON in ext4_add_complete_io(). we grab
+ * here s_writepages_rwsem to avoid race between writepages ops and
+ * remount.
*/
- if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) {
+ alloc_ctx = ext4_writepages_down_write(sb);
+ ext4_apply_options(fc, sb);
+ ext4_writepages_up_write(sb, alloc_ctx);
+
+ if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
+ test_opt(sb, JOURNAL_CHECKSUM)) {
+ ext4_msg(sb, KERN_ERR, "changing journal_checksum "
+ "during remount not supported; ignoring");
+ sbi->s_mount_opt ^= EXT4_MOUNT_JOURNAL_CHECKSUM;
+ }
+
+ if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
+ if (test_opt2(sb, EXPLICIT_DELALLOC)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "both data=journal and delalloc");
+ err = -EINVAL;
+ goto restore_opts;
+ }
+ if (test_opt(sb, DIOREAD_NOLOCK)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "both data=journal and dioread_nolock");
+ err = -EINVAL;
+ goto restore_opts;
+ }
+ } else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) {
+ if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "journal_async_commit in data=ordered mode");
+ err = -EINVAL;
+ goto restore_opts;
+ }
+ }
+
+ if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_NO_MBCACHE) {
+ ext4_msg(sb, KERN_ERR, "can't enable nombcache during remount");
err = -EINVAL;
goto restore_opts;
}
- if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
- ext4_abort(sb, "Abort forced by user");
+ if ((old_opts.s_mount_opt & EXT4_MOUNT_DELALLOC) &&
+ !test_opt(sb, DELALLOC)) {
+ ext4_msg(sb, KERN_ERR, "can't disable delalloc during remount");
+ err = -EINVAL;
+ goto restore_opts;
+ }
- sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
- (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
+ sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
+ (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
es = sbi->s_es;
if (sbi->s_journal) {
ext4_init_journal_params(sb, sbi->s_journal);
- set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
+ set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio);
}
- if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
- if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
+ /* Flush outstanding errors before changing fs state */
+ flush_work(&sbi->s_sb_upd_work);
+
+ if ((bool)(fc->sb_flags & SB_RDONLY) != sb_rdonly(sb)) {
+ if (ext4_emergency_state(sb)) {
err = -EROFS;
goto restore_opts;
}
- if (*flags & MS_RDONLY) {
+ if (fc->sb_flags & SB_RDONLY) {
+ err = sync_filesystem(sb);
+ if (err < 0)
+ goto restore_opts;
err = dquot_suspend(sb, -1);
if (err < 0)
goto restore_opts;
@@ -4761,7 +6643,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
* First of all, the unconditional stuff we have to do
* to disable replay of the journal when we next remount
*/
- sb->s_flags |= MS_RDONLY;
+ sb->s_flags |= SB_RDONLY;
/*
* OK, test if we are remounting a valid rw partition
@@ -4772,11 +6654,17 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
(sbi->s_mount_state & EXT4_VALID_FS))
es->s_state = cpu_to_le16(sbi->s_mount_state);
- if (sbi->s_journal)
+ if (sbi->s_journal) {
+ /*
+ * We let remount-ro finish even if marking fs
+ * as clean failed...
+ */
ext4_mark_recovery_complete(sb, es);
+ }
} else {
/* Make sure we can mount this feature set readwrite */
- if (!ext4_feature_set_ok(sb, 0)) {
+ if (ext4_has_feature_readonly(sb) ||
+ !ext4_feature_set_ok(sb, 0)) {
err = -EROFS;
goto restore_opts;
}
@@ -4791,9 +6679,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
if (!ext4_group_desc_csum_verify(sb, g, gdp)) {
ext4_msg(sb, KERN_ERR,
"ext4_remount: Checksum for group %u failed (%u!=%u)",
- g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
+ g, le16_to_cpu(ext4_group_desc_csum(sb, g, gdp)),
le16_to_cpu(gdp->bg_checksum));
- err = -EINVAL;
+ err = -EFSBADCRC;
goto restore_opts;
}
}
@@ -4803,7 +6691,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
* around from a previously readonly bdev mount,
* require a full umount/remount for now.
*/
- if (es->s_last_orphan) {
+ if (es->s_last_orphan || !ext4_orphan_file_empty(sb)) {
ext4_msg(sb, KERN_WARNING, "Couldn't "
"remount RDWR because of unprocessed "
"orphan inode list. Please "
@@ -4818,59 +6706,100 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
* been changed by e2fsck since we originally mounted
* the partition.)
*/
- if (sbi->s_journal)
- ext4_clear_journal_err(sb, es);
- sbi->s_mount_state = le16_to_cpu(es->s_state);
- if (!ext4_setup_super(sb, es, 0))
- sb->s_flags &= ~MS_RDONLY;
- if (EXT4_HAS_INCOMPAT_FEATURE(sb,
- EXT4_FEATURE_INCOMPAT_MMP))
- if (ext4_multi_mount_protect(sb,
- le64_to_cpu(es->s_mmp_block))) {
- err = -EROFS;
+ if (sbi->s_journal) {
+ err = ext4_clear_journal_err(sb, es);
+ if (err)
goto restore_opts;
- }
+ }
+ sbi->s_mount_state = (le16_to_cpu(es->s_state) &
+ ~EXT4_FC_REPLAY);
+
+ err = ext4_setup_super(sb, es, 0);
+ if (err)
+ goto restore_opts;
+
+ sb->s_flags &= ~SB_RDONLY;
+ if (ext4_has_feature_mmp(sb)) {
+ err = ext4_multi_mount_protect(sb,
+ le64_to_cpu(es->s_mmp_block));
+ if (err)
+ goto restore_opts;
+ }
+#ifdef CONFIG_QUOTA
enable_quota = 1;
+#endif
}
}
/*
- * Reinitialize lazy itable initialization thread based on
- * current settings
+ * Handle creation of system zone data early because it can fail.
+ * Releasing of existing data is done when we are sure remount will
+ * succeed.
*/
- if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE))
- ext4_unregister_li_request(sb);
- else {
- ext4_group_t first_not_zeroed;
- first_not_zeroed = ext4_has_uninit_itable(sb);
- ext4_register_li_request(sb, first_not_zeroed);
+ if (test_opt(sb, BLOCK_VALIDITY) && !sbi->s_system_blks) {
+ err = ext4_setup_system_zone(sb);
+ if (err)
+ goto restore_opts;
}
- ext4_setup_system_zone(sb);
- if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY))
- ext4_commit_super(sb, 1);
+ if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) {
+ err = ext4_commit_super(sb);
+ if (err)
+ goto restore_opts;
+ }
#ifdef CONFIG_QUOTA
- /* Release old quota file names */
- for (i = 0; i < MAXQUOTAS; i++)
- kfree(old_opts.s_qf_names[i]);
if (enable_quota) {
if (sb_any_quota_suspended(sb))
dquot_resume(sb, -1);
- else if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_QUOTA)) {
+ else if (ext4_has_feature_quota(sb)) {
err = ext4_enable_quotas(sb);
if (err)
goto restore_opts;
}
}
+ /* Release old quota file names */
+ for (i = 0; i < EXT4_MAXQUOTAS; i++)
+ kfree(old_opts.s_qf_names[i]);
#endif
+ if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
+ ext4_release_system_zone(sb);
+
+ /*
+ * Reinitialize lazy itable initialization thread based on
+ * current settings
+ */
+ if (sb_rdonly(sb) || !test_opt(sb, INIT_INODE_TABLE))
+ ext4_unregister_li_request(sb);
+ else {
+ ext4_group_t first_not_zeroed;
+ first_not_zeroed = ext4_has_uninit_itable(sb);
+ ext4_register_li_request(sb, first_not_zeroed);
+ }
+
+ if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
+ ext4_stop_mmpd(sbi);
+
+ /*
+ * Handle aborting the filesystem as the last thing during remount to
+ * avoid obsure errors during remount when some option changes fail to
+ * apply due to shutdown filesystem.
+ */
+ if (test_opt2(sb, ABORT))
+ ext4_abort(sb, ESHUTDOWN, "Abort forced by user");
- ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
- kfree(orig_data);
return 0;
restore_opts:
+ /*
+ * If there was a failing r/w to ro transition, we may need to
+ * re-enable quota
+ */
+ if (sb_rdonly(sb) && !(old_sb_flags & SB_RDONLY) &&
+ sb_any_quota_suspended(sb))
+ dquot_resume(sb, -1);
+
+ alloc_ctx = ext4_writepages_down_write(sb);
sb->s_flags = old_sb_flags;
sbi->s_mount_opt = old_opts.s_mount_opt;
sbi->s_mount_opt2 = old_opts.s_mount_opt2;
@@ -4879,24 +6808,104 @@ restore_opts:
sbi->s_commit_interval = old_opts.s_commit_interval;
sbi->s_min_batch_time = old_opts.s_min_batch_time;
sbi->s_max_batch_time = old_opts.s_max_batch_time;
+ ext4_writepages_up_write(sb, alloc_ctx);
+
+ if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
+ ext4_release_system_zone(sb);
#ifdef CONFIG_QUOTA
sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
- for (i = 0; i < MAXQUOTAS; i++) {
- kfree(sbi->s_qf_names[i]);
- sbi->s_qf_names[i] = old_opts.s_qf_names[i];
+ for (i = 0; i < EXT4_MAXQUOTAS; i++) {
+ to_free[i] = get_qf_name(sb, sbi, i);
+ rcu_assign_pointer(sbi->s_qf_names[i], old_opts.s_qf_names[i]);
}
+ synchronize_rcu();
+ for (i = 0; i < EXT4_MAXQUOTAS; i++)
+ kfree(to_free[i]);
#endif
- kfree(orig_data);
+ if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
+ ext4_stop_mmpd(sbi);
return err;
}
+static int ext4_reconfigure(struct fs_context *fc)
+{
+ struct super_block *sb = fc->root->d_sb;
+ int ret;
+ bool old_ro = sb_rdonly(sb);
+
+ fc->s_fs_info = EXT4_SB(sb);
+
+ ret = ext4_check_opt_consistency(fc, sb);
+ if (ret < 0)
+ return ret;
+
+ ret = __ext4_remount(fc, sb);
+ if (ret < 0)
+ return ret;
+
+ ext4_msg(sb, KERN_INFO, "re-mounted %pU%s.",
+ &sb->s_uuid,
+ (old_ro != sb_rdonly(sb)) ? (sb_rdonly(sb) ? " ro" : " r/w") : "");
+
+ return 0;
+}
+
+#ifdef CONFIG_QUOTA
+static int ext4_statfs_project(struct super_block *sb,
+ kprojid_t projid, struct kstatfs *buf)
+{
+ struct kqid qid;
+ struct dquot *dquot;
+ u64 limit;
+ u64 curblock;
+
+ qid = make_kqid_projid(projid);
+ dquot = dqget(sb, qid);
+ if (IS_ERR(dquot))
+ return PTR_ERR(dquot);
+ spin_lock(&dquot->dq_dqb_lock);
+
+ limit = min_not_zero(dquot->dq_dqb.dqb_bsoftlimit,
+ dquot->dq_dqb.dqb_bhardlimit);
+ limit >>= sb->s_blocksize_bits;
+
+ if (limit) {
+ uint64_t remaining = 0;
+
+ curblock = (dquot->dq_dqb.dqb_curspace +
+ dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits;
+ if (limit > curblock)
+ remaining = limit - curblock;
+
+ buf->f_blocks = min(buf->f_blocks, limit);
+ buf->f_bfree = min(buf->f_bfree, remaining);
+ buf->f_bavail = min(buf->f_bavail, remaining);
+ }
+
+ limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit,
+ dquot->dq_dqb.dqb_ihardlimit);
+ if (limit) {
+ uint64_t remaining = 0;
+
+ if (limit > dquot->dq_dqb.dqb_curinodes)
+ remaining = limit - dquot->dq_dqb.dqb_curinodes;
+
+ buf->f_files = min(buf->f_files, limit);
+ buf->f_ffree = min(buf->f_ffree, remaining);
+ }
+
+ spin_unlock(&dquot->dq_dqb_lock);
+ dqput(dquot);
+ return 0;
+}
+#endif
+
static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
ext4_fsblk_t overhead = 0, resv_blocks;
- u64 fsid;
s64 bfree;
resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
@@ -4917,26 +6926,23 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_files = le32_to_cpu(es->s_inodes_count);
buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
buf->f_namelen = EXT4_NAME_LEN;
- fsid = le64_to_cpup((void *)es->s_uuid) ^
- le64_to_cpup((void *)es->s_uuid + sizeof(u64));
- buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
- buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
+ buf->f_fsid = uuid_to_fsid(es->s_uuid);
+#ifdef CONFIG_QUOTA
+ if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
+ sb_has_quota_limits_enabled(sb, PRJQUOTA))
+ ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf);
+#endif
return 0;
}
-/* Helper function for writing quotas on sync - we need to start transaction
- * before quota file is locked for write. Otherwise the are possible deadlocks:
- * Process 1 Process 2
- * ext4_create() quota_sync()
- * jbd2_journal_start() write_dquot()
- * dquot_initialize() down(dqio_mutex)
- * down(dqio_mutex) jbd2_journal_start()
- *
- */
#ifdef CONFIG_QUOTA
+/*
+ * Helper functions so that transaction is started before we acquire dqio_sem
+ * to keep correct lock ordering of transaction > dqio_sem
+ */
static inline struct inode *dquot_to_inode(struct dquot *dquot)
{
return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
@@ -4954,6 +6960,10 @@ static int ext4_write_dquot(struct dquot *dquot)
if (IS_ERR(handle))
return PTR_ERR(handle);
ret = dquot_commit(dquot);
+ if (ret < 0)
+ ext4_error_err(dquot->dq_sb, -ret,
+ "Failed to commit dquot type %d",
+ dquot->dq_id.type);
err = ext4_journal_stop(handle);
if (!ret)
ret = err;
@@ -4970,6 +6980,10 @@ static int ext4_acquire_dquot(struct dquot *dquot)
if (IS_ERR(handle))
return PTR_ERR(handle);
ret = dquot_acquire(dquot);
+ if (ret < 0)
+ ext4_error_err(dquot->dq_sb, -ret,
+ "Failed to acquire dquot type %d",
+ dquot->dq_id.type);
err = ext4_journal_stop(handle);
if (!ret)
ret = err;
@@ -4980,29 +6994,47 @@ static int ext4_release_dquot(struct dquot *dquot)
{
int ret, err;
handle_t *handle;
+ bool freeze_protected = false;
+
+ /*
+ * Trying to sb_start_intwrite() in a running transaction
+ * can result in a deadlock. Further, running transactions
+ * are already protected from freezing.
+ */
+ if (!ext4_journal_current_handle()) {
+ sb_start_intwrite(dquot->dq_sb);
+ freeze_protected = true;
+ }
handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
if (IS_ERR(handle)) {
/* Release dquot anyway to avoid endless cycle in dqput() */
dquot_release(dquot);
+ if (freeze_protected)
+ sb_end_intwrite(dquot->dq_sb);
return PTR_ERR(handle);
}
ret = dquot_release(dquot);
+ if (ret < 0)
+ ext4_error_err(dquot->dq_sb, -ret,
+ "Failed to release dquot type %d",
+ dquot->dq_id.type);
err = ext4_journal_stop(handle);
if (!ret)
ret = err;
+
+ if (freeze_protected)
+ sb_end_intwrite(dquot->dq_sb);
+
return ret;
}
static int ext4_mark_dquot_dirty(struct dquot *dquot)
{
struct super_block *sb = dquot->dq_sb;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- /* Are we journaling quotas? */
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) ||
- sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
+ if (ext4_is_quota_journalled(sb)) {
dquot_mark_dquot_dirty(dquot);
return ext4_write_dquot(dquot);
} else {
@@ -5016,7 +7048,7 @@ static int ext4_write_info(struct super_block *sb, int type)
handle_t *handle;
/* Data block + inode block */
- handle = ext4_journal_start(sb->s_root->d_inode, EXT4_HT_QUOTA, 2);
+ handle = ext4_journal_start_sb(sb, EXT4_HT_QUOTA, 2);
if (IS_ERR(handle))
return PTR_ERR(handle);
ret = dquot_commit_info(sb, type);
@@ -5026,21 +7058,25 @@ static int ext4_write_info(struct super_block *sb, int type)
return ret;
}
-/*
- * Turn on quotas during mount time - we need to find
- * the quota file and such...
- */
-static int ext4_quota_on_mount(struct super_block *sb, int type)
+static void lockdep_set_quota_inode(struct inode *inode, int subclass)
{
- return dquot_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
- EXT4_SB(sb)->s_jquota_fmt, type);
+ struct ext4_inode_info *ei = EXT4_I(inode);
+
+ /* The first argument of lockdep_set_subclass has to be
+ * *exactly* the same as the argument to init_rwsem() --- in
+ * this case, in init_once() --- or lockdep gets unhappy
+ * because the name of the lock is set using the
+ * stringification of the argument to init_rwsem().
+ */
+ (void) ei; /* shut up clang warning if !CONFIG_LOCKDEP */
+ lockdep_set_subclass(&ei->i_data_sem, subclass);
}
/*
* Standard function to be called on quota_on
*/
static int ext4_quota_on(struct super_block *sb, int type, int format_id,
- struct path *path)
+ const struct path *path)
{
int err;
@@ -5050,6 +7086,11 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
/* Quotafile not on the same filesystem? */
if (path->dentry->d_sb != sb)
return -EXDEV;
+
+ /* Quota already enabled for this file? */
+ if (IS_NOQUOTA(d_inode(path->dentry)))
+ return -EBUSY;
+
/* Journaling quota? */
if (EXT4_SB(sb)->s_qf_names[type]) {
/* Quotafile not in fs root? */
@@ -5057,26 +7098,58 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
ext4_msg(sb, KERN_WARNING,
"Quota file not on filesystem root. "
"Journaled quota will not work");
+ sb_dqopt(sb)->flags |= DQUOT_NOLIST_DIRTY;
+ } else {
+ /*
+ * Clear the flag just in case mount options changed since
+ * last time.
+ */
+ sb_dqopt(sb)->flags &= ~DQUOT_NOLIST_DIRTY;
}
- /*
- * When we journal data on quota file, we have to flush journal to see
- * all updates to the file when we bypass pagecache...
- */
- if (EXT4_SB(sb)->s_journal &&
- ext4_should_journal_data(path->dentry->d_inode)) {
+ lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA);
+ err = dquot_quota_on(sb, type, format_id, path);
+ if (!err) {
+ struct inode *inode = d_inode(path->dentry);
+ handle_t *handle;
+
/*
- * We don't need to lock updates but journal_flush() could
- * otherwise be livelocked...
+ * Set inode flags to prevent userspace from messing with quota
+ * files. If this fails, we return success anyway since quotas
+ * are already enabled and this is not a hard failure.
*/
- jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
- err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
- jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+ inode_lock(inode);
+ handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
+ if (IS_ERR(handle))
+ goto unlock_inode;
+ EXT4_I(inode)->i_flags |= EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL;
+ inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
+ S_NOATIME | S_IMMUTABLE);
+ err = ext4_mark_inode_dirty(handle, inode);
+ ext4_journal_stop(handle);
+ unlock_inode:
+ inode_unlock(inode);
if (err)
- return err;
+ dquot_quota_off(sb, type);
}
+ if (err)
+ lockdep_set_quota_inode(path->dentry->d_inode,
+ I_DATA_SEM_NORMAL);
+ return err;
+}
- return dquot_quota_on(sb, type, format_id, path);
+static inline bool ext4_check_quota_inum(int type, unsigned long qf_inum)
+{
+ switch (type) {
+ case USRQUOTA:
+ return qf_inum == EXT4_USR_QUOTA_INO;
+ case GRPQUOTA:
+ return qf_inum == EXT4_GRP_QUOTA_INO;
+ case PRJQUOTA:
+ return qf_inum >= EXT4_GOOD_OLD_FIRST_INO;
+ default:
+ BUG();
+ }
}
static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
@@ -5084,49 +7157,70 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
{
int err;
struct inode *qf_inode;
- unsigned long qf_inums[MAXQUOTAS] = {
+ unsigned long qf_inums[EXT4_MAXQUOTAS] = {
le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
- le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
};
- BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA));
+ BUG_ON(!ext4_has_feature_quota(sb));
if (!qf_inums[type])
return -EPERM;
- qf_inode = ext4_iget(sb, qf_inums[type]);
+ if (!ext4_check_quota_inum(type, qf_inums[type])) {
+ ext4_error(sb, "Bad quota inum: %lu, type: %d",
+ qf_inums[type], type);
+ return -EUCLEAN;
+ }
+
+ qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL);
if (IS_ERR(qf_inode)) {
- ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]);
+ ext4_error(sb, "Bad quota inode: %lu, type: %d",
+ qf_inums[type], type);
return PTR_ERR(qf_inode);
}
/* Don't account quota for quota files to avoid recursion */
qf_inode->i_flags |= S_NOQUOTA;
- err = dquot_enable(qf_inode, type, format_id, flags);
+ lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA);
+ err = dquot_load_quota_inode(qf_inode, type, format_id, flags);
+ if (err)
+ lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL);
iput(qf_inode);
return err;
}
/* Enable usage tracking for all quota types. */
-static int ext4_enable_quotas(struct super_block *sb)
+int ext4_enable_quotas(struct super_block *sb)
{
int type, err = 0;
- unsigned long qf_inums[MAXQUOTAS] = {
+ unsigned long qf_inums[EXT4_MAXQUOTAS] = {
le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
- le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
+ };
+ bool quota_mopt[EXT4_MAXQUOTAS] = {
+ test_opt(sb, USRQUOTA),
+ test_opt(sb, GRPQUOTA),
+ test_opt(sb, PRJQUOTA),
};
- sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
- for (type = 0; type < MAXQUOTAS; type++) {
+ sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
+ for (type = 0; type < EXT4_MAXQUOTAS; type++) {
if (qf_inums[type]) {
err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
- DQUOT_USAGE_ENABLED);
+ DQUOT_USAGE_ENABLED |
+ (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
if (err) {
ext4_warning(sb,
"Failed to enable quota tracking "
- "(type=%d, err=%d). Please run "
- "e2fsck to fix.", type, err);
+ "(type=%d, err=%d, ino=%lu). "
+ "Please run e2fsck to fix.", type,
+ err, qf_inums[type]);
+
+ ext4_quotas_off(sb, type);
return err;
}
}
@@ -5134,59 +7228,57 @@ static int ext4_enable_quotas(struct super_block *sb)
return 0;
}
-/*
- * quota_on function that is used when QUOTA feature is set.
- */
-static int ext4_quota_on_sysfile(struct super_block *sb, int type,
- int format_id)
-{
- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
- return -EINVAL;
-
- /*
- * USAGE was enabled at mount time. Only need to enable LIMITS now.
- */
- return ext4_quota_enable(sb, type, format_id, DQUOT_LIMITS_ENABLED);
-}
-
static int ext4_quota_off(struct super_block *sb, int type)
{
struct inode *inode = sb_dqopt(sb)->files[type];
handle_t *handle;
+ int err;
/* Force all delayed allocation blocks to be allocated.
* Caller already holds s_umount sem */
if (test_opt(sb, DELALLOC))
sync_filesystem(sb);
- if (!inode)
+ if (!inode || !igrab(inode))
goto out;
- /* Update modification times of quota files when userspace can
- * start looking at them */
+ err = dquot_quota_off(sb, type);
+ if (err || ext4_has_feature_quota(sb))
+ goto out_put;
+ /*
+ * When the filesystem was remounted read-only first, we cannot cleanup
+ * inode flags here. Bad luck but people should be using QUOTA feature
+ * these days anyway.
+ */
+ if (sb_rdonly(sb))
+ goto out_put;
+
+ inode_lock(inode);
+ /*
+ * Update modification times of quota files when userspace can
+ * start looking at them. If we fail, we return success anyway since
+ * this is not a hard failure and quotas are already disabled.
+ */
handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
- if (IS_ERR(handle))
- goto out;
- inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- ext4_mark_inode_dirty(handle, inode);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ goto out_unlock;
+ }
+ EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL);
+ inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
+ inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
+ err = ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
-
+out_unlock:
+ inode_unlock(inode);
+out_put:
+ lockdep_set_quota_inode(inode, I_DATA_SEM_NORMAL);
+ iput(inode);
+ return err;
out:
return dquot_quota_off(sb, type);
}
-/*
- * quota_off function that is used when QUOTA feature is set.
- */
-static int ext4_quota_off_sysfile(struct super_block *sb, int type)
-{
- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
- return -EINVAL;
-
- /* Disable only the limits. */
- return dquot_disable(sb, type, DQUOT_LIMITS_ENABLED);
-}
-
/* Read data from quotafile - avoid pagecache and such because we cannot afford
* acquiring the locks... As quota files are never truncated and quota code
* itself serializes the operations (and no one else should touch the files)
@@ -5196,7 +7288,6 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
{
struct inode *inode = sb_dqopt(sb)->files[type];
ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
- int err = 0;
int offset = off & (sb->s_blocksize - 1);
int tocopy;
size_t toread;
@@ -5209,11 +7300,10 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
len = i_size-off;
toread = len;
while (toread > 0) {
- tocopy = sb->s_blocksize - offset < toread ?
- sb->s_blocksize - offset : toread;
- bh = ext4_bread(NULL, inode, blk, 0, &err);
- if (err)
- return err;
+ tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread);
+ bh = ext4_bread(NULL, inode, blk, 0);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
if (!bh) /* A hole? */
memset(data, 0, tocopy);
else
@@ -5234,12 +7324,12 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
{
struct inode *inode = sb_dqopt(sb)->files[type];
ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
- int err = 0;
- int offset = off & (sb->s_blocksize - 1);
+ int err = 0, err2 = 0, offset = off & (sb->s_blocksize - 1);
+ int retries = 0;
struct buffer_head *bh;
handle_t *handle = journal_current_handle();
- if (EXT4_SB(sb)->s_journal && !handle) {
+ if (!handle) {
ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
" cancelled because transaction is not started",
(unsigned long long)off, (unsigned long long)len);
@@ -5256,40 +7346,41 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
return -EIO;
}
- bh = ext4_bread(handle, inode, blk, 1, &err);
+ do {
+ bh = ext4_bread(handle, inode, blk,
+ EXT4_GET_BLOCKS_CREATE |
+ EXT4_GET_BLOCKS_METADATA_NOFAIL);
+ } while (PTR_ERR(bh) == -ENOSPC &&
+ ext4_should_retry_alloc(inode->i_sb, &retries));
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
if (!bh)
goto out;
- err = ext4_journal_get_write_access(handle, bh);
+ BUFFER_TRACE(bh, "get write access");
+ err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE);
if (err) {
brelse(bh);
- goto out;
+ return err;
}
lock_buffer(bh);
memcpy(bh->b_data+offset, data, len);
- flush_dcache_page(bh->b_page);
+ flush_dcache_folio(bh->b_folio);
unlock_buffer(bh);
err = ext4_handle_dirty_metadata(handle, NULL, bh);
brelse(bh);
out:
- if (err)
- return err;
if (inode->i_size < off + len) {
i_size_write(inode, off + len);
EXT4_I(inode)->i_disksize = inode->i_size;
- ext4_mark_inode_dirty(handle, inode);
+ err2 = ext4_mark_inode_dirty(handle, inode);
+ if (unlikely(err2 && !err))
+ err = err2;
}
- return len;
+ return err ? err : len;
}
-
#endif
-static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data)
-{
- return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
-}
-
-#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
+#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
static inline void register_as_ext2(void)
{
int err = register_filesystem(&ext2_fs_type);
@@ -5305,11 +7396,11 @@ static inline void unregister_as_ext2(void)
static inline int ext2_feature_set_ok(struct super_block *sb)
{
- if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP))
+ if (ext4_has_unknown_ext2_incompat_features(sb))
return 0;
- if (sb->s_flags & MS_RDONLY)
+ if (sb_rdonly(sb))
return 1;
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))
+ if (ext4_has_unknown_ext2_ro_compat_features(sb))
return 0;
return 1;
}
@@ -5319,7 +7410,6 @@ static inline void unregister_as_ext2(void) { }
static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }
#endif
-#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
static inline void register_as_ext3(void)
{
int err = register_filesystem(&ext3_fs_type);
@@ -5335,113 +7425,84 @@ static inline void unregister_as_ext3(void)
static inline int ext3_feature_set_ok(struct super_block *sb)
{
- if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP))
+ if (ext4_has_unknown_ext3_incompat_features(sb))
return 0;
- if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
+ if (!ext4_has_feature_journal(sb))
return 0;
- if (sb->s_flags & MS_RDONLY)
+ if (sb_rdonly(sb))
return 1;
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP))
+ if (ext4_has_unknown_ext3_ro_compat_features(sb))
return 0;
return 1;
}
-#else
-static inline void register_as_ext3(void) { }
-static inline void unregister_as_ext3(void) { }
-static inline int ext3_feature_set_ok(struct super_block *sb) { return 0; }
-#endif
-static struct file_system_type ext4_fs_type = {
- .owner = THIS_MODULE,
- .name = "ext4",
- .mount = ext4_mount,
- .kill_sb = kill_block_super,
- .fs_flags = FS_REQUIRES_DEV,
-};
-MODULE_ALIAS_FS("ext4");
-
-static int __init ext4_init_feat_adverts(void)
+static void ext4_kill_sb(struct super_block *sb)
{
- struct ext4_features *ef;
- int ret = -ENOMEM;
-
- ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL);
- if (!ef)
- goto out;
-
- ef->f_kobj.kset = ext4_kset;
- init_completion(&ef->f_kobj_unregister);
- ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL,
- "features");
- if (ret) {
- kfree(ef);
- goto out;
- }
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct file *bdev_file = sbi ? sbi->s_journal_bdev_file : NULL;
- ext4_feat = ef;
- ret = 0;
-out:
- return ret;
-}
+ kill_block_super(sb);
-static void ext4_exit_feat_adverts(void)
-{
- kobject_put(&ext4_feat->f_kobj);
- wait_for_completion(&ext4_feat->f_kobj_unregister);
- kfree(ext4_feat);
+ if (bdev_file)
+ bdev_fput(bdev_file);
}
-/* Shared across all ext4 file systems */
-wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
-struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
+static struct file_system_type ext4_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "ext4",
+ .init_fs_context = ext4_init_fs_context,
+ .parameters = ext4_param_specs,
+ .kill_sb = ext4_kill_sb,
+ .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME |
+ FS_LBS,
+};
+MODULE_ALIAS_FS("ext4");
static int __init ext4_init_fs(void)
{
- int i, err;
+ int err;
+ ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
ext4_li_info = NULL;
- mutex_init(&ext4_li_mtx);
/* Build-time check for flags consistency */
ext4_check_flag_values();
- for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
- mutex_init(&ext4__aio_mutex[i]);
- init_waitqueue_head(&ext4__ioend_wq[i]);
- }
-
err = ext4_init_es();
if (err)
return err;
- err = ext4_init_pageio();
+ err = ext4_init_pending();
if (err)
goto out7;
- err = ext4_init_system_zone();
+ err = ext4_init_post_read_processing();
if (err)
goto out6;
- ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
- if (!ext4_kset) {
- err = -ENOMEM;
+
+ err = ext4_init_pageio();
+ if (err)
goto out5;
- }
- ext4_proc_root = proc_mkdir("fs/ext4", NULL);
- err = ext4_init_feat_adverts();
+ err = ext4_init_system_zone();
if (err)
goto out4;
- err = ext4_init_mballoc();
+ err = ext4_init_sysfs();
if (err)
goto out3;
- err = ext4_init_xattr();
+ err = ext4_init_mballoc();
if (err)
goto out2;
err = init_inodecache();
if (err)
goto out1;
+
+ err = ext4_fc_init_dentry_cache();
+ if (err)
+ goto out05;
+
register_as_ext3();
register_as_ext2();
err = register_filesystem(&ext4_fs_type);
@@ -5452,21 +7513,21 @@ static int __init ext4_init_fs(void)
out:
unregister_as_ext2();
unregister_as_ext3();
+ ext4_fc_destroy_dentry_cache();
+out05:
destroy_inodecache();
out1:
- ext4_exit_xattr();
-out2:
ext4_exit_mballoc();
+out2:
+ ext4_exit_sysfs();
out3:
- ext4_exit_feat_adverts();
+ ext4_exit_system_zone();
out4:
- if (ext4_proc_root)
- remove_proc_entry("fs/ext4", NULL);
- kset_unregister(ext4_kset);
+ ext4_exit_pageio();
out5:
- ext4_exit_system_zone();
+ ext4_exit_post_read_processing();
out6:
- ext4_exit_pageio();
+ ext4_exit_pending();
out7:
ext4_exit_es();
@@ -5479,14 +7540,15 @@ static void __exit ext4_exit_fs(void)
unregister_as_ext2();
unregister_as_ext3();
unregister_filesystem(&ext4_fs_type);
+ ext4_fc_destroy_dentry_cache();
destroy_inodecache();
- ext4_exit_xattr();
ext4_exit_mballoc();
- ext4_exit_feat_adverts();
- remove_proc_entry("fs/ext4", NULL);
- kset_unregister(ext4_kset);
+ ext4_exit_sysfs();
ext4_exit_system_zone();
ext4_exit_pageio();
+ ext4_exit_post_read_processing();
+ ext4_exit_es();
+ ext4_exit_pending();
}
MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");