diff options
Diffstat (limited to 'fs/ext4/resize.c')
| -rw-r--r-- | fs/ext4/resize.c | 800 |
1 files changed, 497 insertions, 303 deletions
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index c5adbb318a90..050f26168d97 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext4/resize.c * @@ -9,52 +10,98 @@ */ -#define EXT4FS_DEBUG - #include <linux/errno.h> #include <linux/slab.h> +#include <linux/jiffies.h> #include "ext4_jbd2.h" +struct ext4_rcu_ptr { + struct rcu_head rcu; + void *ptr; +}; + +static void ext4_rcu_ptr_callback(struct rcu_head *head) +{ + struct ext4_rcu_ptr *ptr; + + ptr = container_of(head, struct ext4_rcu_ptr, rcu); + kvfree(ptr->ptr); + kfree(ptr); +} + +void ext4_kvfree_array_rcu(void *to_free) +{ + struct ext4_rcu_ptr *ptr = kzalloc(sizeof(*ptr), GFP_KERNEL); + + if (ptr) { + ptr->ptr = to_free; + call_rcu(&ptr->rcu, ext4_rcu_ptr_callback); + return; + } + synchronize_rcu(); + kvfree(to_free); +} + int ext4_resize_begin(struct super_block *sb) { + struct ext4_sb_info *sbi = EXT4_SB(sb); int ret = 0; if (!capable(CAP_SYS_RESOURCE)) return -EPERM; /* + * If the reserved GDT blocks is non-zero, the resize_inode feature + * should always be set. + */ + if (sbi->s_es->s_reserved_gdt_blocks && + !ext4_has_feature_resize_inode(sb)) { + ext4_error(sb, "resize_inode disabled but reserved GDT blocks non-zero"); + return -EFSCORRUPTED; + } + + /* + * If we are not using the primary superblock/GDT copy don't resize, + * because the user tools have no way of handling this. Probably a + * bad time to do it anyways. + */ + if (EXT4_B2C(sbi, sbi->s_sbh->b_blocknr) != + le32_to_cpu(sbi->s_es->s_first_data_block)) { + ext4_warning(sb, "won't resize using backup superblock at %llu", + (unsigned long long)sbi->s_sbh->b_blocknr); + return -EPERM; + } + + /* * We are not allowed to do online-resizing on a filesystem mounted * with error, because it can destroy the filesystem easily. */ - if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { + if (sbi->s_mount_state & EXT4_ERROR_FS) { ext4_warning(sb, "There are errors in the filesystem, " - "so online resizing is not allowed\n"); + "so online resizing is not allowed"); return -EPERM; } - if (test_and_set_bit_lock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags)) + if (ext4_has_feature_sparse_super2(sb)) { + ext4_msg(sb, KERN_ERR, "Online resizing not supported with sparse_super2"); + return -EOPNOTSUPP; + } + + if (test_and_set_bit_lock(EXT4_FLAGS_RESIZING, + &sbi->s_ext4_flags)) ret = -EBUSY; return ret; } -void ext4_resize_end(struct super_block *sb) +int ext4_resize_end(struct super_block *sb, bool update_backups) { - clear_bit_unlock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags); - smp_mb__after_clear_bit(); -} - -static ext4_group_t ext4_meta_bg_first_group(struct super_block *sb, - ext4_group_t group) { - return (group >> EXT4_DESC_PER_BLOCK_BITS(sb)) << - EXT4_DESC_PER_BLOCK_BITS(sb); -} - -static ext4_fsblk_t ext4_meta_bg_first_block_no(struct super_block *sb, - ext4_group_t group) { - group = ext4_meta_bg_first_group(sb, group); - return ext4_group_first_block_no(sb, group); + clear_bit_unlock(EXT4_FLAGS_RESIZING, &EXT4_SB(sb)->s_ext4_flags); + smp_mb__after_atomic(); + if (update_backups) + return ext4_update_overhead(sb, true); + return 0; } static ext4_grpblk_t ext4_group_overhead_blocks(struct super_block *sb, @@ -93,8 +140,9 @@ static int verify_group_input(struct super_block *sb, overhead = ext4_group_overhead_blocks(sb, group); metaend = start + overhead; - input->free_blocks_count = free_blocks_count = - input->blocks_count - 2 - overhead - sbi->s_itb_per_group; + free_blocks_count = input->blocks_count - 2 - overhead - + sbi->s_itb_per_group; + input->free_clusters_count = EXT4_B2C(sbi, free_blocks_count); if (test_opt(sb, DEBUG)) printk(KERN_DEBUG "EXT4-fs: adding %s group %u: %u blocks " @@ -112,10 +160,12 @@ static int verify_group_input(struct super_block *sb, else if (free_blocks_count < 0) ext4_warning(sb, "Bad blocks count %u", input->blocks_count); - else if (!(bh = sb_bread(sb, end - 1))) + else if (IS_ERR(bh = ext4_sb_bread(sb, end - 1, 0))) { + err = PTR_ERR(bh); + bh = NULL; ext4_warning(sb, "Cannot read last block (%llu)", end - 1); - else if (outside(input->block_bitmap, start, end)) + } else if (outside(input->block_bitmap, start, end)) ext4_warning(sb, "Block bitmap not in group (block %llu)", (unsigned long long)input->block_bitmap); else if (outside(input->inode_bitmap, start, end)) @@ -168,34 +218,56 @@ struct ext4_new_flex_group_data { in the flex group */ __u16 *bg_flags; /* block group flags of groups in @groups */ + ext4_group_t resize_bg; /* number of allocated + new_group_data */ ext4_group_t count; /* number of groups in @groups */ }; /* - * alloc_flex_gd() allocates a ext4_new_flex_group_data with size of - * @flexbg_size. + * Avoiding memory allocation failures due to too many groups added each time. + */ +#define MAX_RESIZE_BG 16384 + +/* + * alloc_flex_gd() allocates an ext4_new_flex_group_data that satisfies the + * resizing from @o_group to @n_group, its size is typically @flexbg_size. * * Returns NULL on failure otherwise address of the allocated structure. */ -static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size) +static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned int flexbg_size, + ext4_group_t o_group, ext4_group_t n_group) { + ext4_group_t last_group; + unsigned int max_resize_bg; struct ext4_new_flex_group_data *flex_gd; flex_gd = kmalloc(sizeof(*flex_gd), GFP_NOFS); if (flex_gd == NULL) goto out3; - if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_flex_group_data)) - goto out2; - flex_gd->count = flexbg_size; + max_resize_bg = umin(flexbg_size, MAX_RESIZE_BG); + flex_gd->resize_bg = max_resize_bg; + + /* Avoid allocating large 'groups' array if not needed */ + last_group = o_group | (flex_gd->resize_bg - 1); + if (n_group <= last_group) + flex_gd->resize_bg = 1 << fls(n_group - o_group); + else if (n_group - last_group < flex_gd->resize_bg) + flex_gd->resize_bg = 1 << max(fls(last_group - o_group), + fls(n_group - last_group)); + + if (WARN_ON_ONCE(flex_gd->resize_bg > max_resize_bg)) + flex_gd->resize_bg = max_resize_bg; - flex_gd->groups = kmalloc(sizeof(struct ext4_new_group_data) * - flexbg_size, GFP_NOFS); + flex_gd->groups = kmalloc_array(flex_gd->resize_bg, + sizeof(struct ext4_new_group_data), + GFP_NOFS); if (flex_gd->groups == NULL) goto out2; - flex_gd->bg_flags = kmalloc(flexbg_size * sizeof(__u16), GFP_NOFS); + flex_gd->bg_flags = kmalloc_array(flex_gd->resize_bg, sizeof(__u16), + GFP_NOFS); if (flex_gd->bg_flags == NULL) goto out1; @@ -231,7 +303,7 @@ static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd) */ static int ext4_alloc_group_tables(struct super_block *sb, struct ext4_new_flex_group_data *flex_gd, - int flexbg_size) + unsigned int flexbg_size) { struct ext4_new_group_data *group_data = flex_gd->groups; ext4_fsblk_t start_blk; @@ -243,6 +315,8 @@ static int ext4_alloc_group_tables(struct super_block *sb, ext4_group_t group; ext4_group_t last_group; unsigned overhead; + __u16 uninit_mask = (flexbg_size > 1) ? ~EXT4_BG_BLOCK_UNINIT : ~0; + int i; BUG_ON(flex_gd->count == 0 || group_data == NULL); @@ -266,7 +340,7 @@ next_group: src_group++; for (; src_group <= last_group; src_group++) { overhead = ext4_group_overhead_blocks(sb, src_group); - if (overhead != 0) + if (overhead == 0) last_blk += group_data[src_group - group].blocks_count; else break; @@ -279,9 +353,8 @@ next_group: group_data[bb_index].block_bitmap = start_blk++; group = ext4_get_group_number(sb, start_blk - 1); group -= group_data[0].group; - group_data[group].free_blocks_count--; - if (flexbg_size > 1) - flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT; + group_data[group].mdata_blocks++; + flex_gd->bg_flags[group] &= uninit_mask; } /* Allocate inode bitmaps */ @@ -291,41 +364,57 @@ next_group: group_data[ib_index].inode_bitmap = start_blk++; group = ext4_get_group_number(sb, start_blk - 1); group -= group_data[0].group; - group_data[group].free_blocks_count--; - if (flexbg_size > 1) - flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT; + group_data[group].mdata_blocks++; + flex_gd->bg_flags[group] &= uninit_mask; } /* Allocate inode tables */ for (; it_index < flex_gd->count; it_index++) { - if (start_blk + EXT4_SB(sb)->s_itb_per_group > last_blk) + unsigned int itb = EXT4_SB(sb)->s_itb_per_group; + ext4_fsblk_t next_group_start; + + if (start_blk + itb > last_blk) goto next_group; group_data[it_index].inode_table = start_blk; - group = ext4_get_group_number(sb, start_blk - 1); + group = ext4_get_group_number(sb, start_blk); + next_group_start = ext4_group_first_block_no(sb, group + 1); group -= group_data[0].group; - group_data[group].free_blocks_count -= - EXT4_SB(sb)->s_itb_per_group; - if (flexbg_size > 1) - flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT; + if (start_blk + itb > next_group_start) { + flex_gd->bg_flags[group + 1] &= uninit_mask; + overhead = start_blk + itb - next_group_start; + group_data[group + 1].mdata_blocks += overhead; + itb -= overhead; + } + + group_data[group].mdata_blocks += itb; + flex_gd->bg_flags[group] &= uninit_mask; start_blk += EXT4_SB(sb)->s_itb_per_group; } + /* Update free clusters count to exclude metadata blocks */ + for (i = 0; i < flex_gd->count; i++) { + group_data[i].free_clusters_count -= + EXT4_NUM_B2C(EXT4_SB(sb), + group_data[i].mdata_blocks); + } + if (test_opt(sb, DEBUG)) { int i; group = group_data[0].group; printk(KERN_DEBUG "EXT4-fs: adding a flex group with " - "%d groups, flexbg size is %d:\n", flex_gd->count, + "%u groups, flexbg size is %u:\n", flex_gd->count, flexbg_size); for (i = 0; i < flex_gd->count; i++) { - printk(KERN_DEBUG "adding %s group %u: %u " - "blocks (%d free)\n", + ext4_debug( + "adding %s group %u: %u blocks (%u free, %u mdata blocks)\n", ext4_bg_has_super(sb, group + i) ? "normal" : "no-super", group + i, group_data[i].blocks_count, - group_data[i].free_blocks_count); + group_data[i].free_clusters_count, + group_data[i].mdata_blocks); } } return 0; @@ -340,7 +429,9 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, bh = sb_getblk(sb, blk); if (unlikely(!bh)) return ERR_PTR(-ENOMEM); - if ((err = ext4_journal_get_write_access(handle, bh))) { + BUFFER_TRACE(bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE); + if (err) { brelse(bh); bh = ERR_PTR(err); } else { @@ -351,32 +442,14 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, return bh; } -/* - * If we have fewer than thresh credits, extend by EXT4_MAX_TRANS_DATA. - * If that fails, restart the transaction & regain write access for the - * buffer head which is used for block_bitmap modifications. - */ -static int extend_or_restart_transaction(handle_t *handle, int thresh) +static int ext4_resize_ensure_credits_batch(handle_t *handle, int credits) { - int err; - - if (ext4_handle_has_enough_credits(handle, thresh)) - return 0; - - err = ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA); - if (err < 0) - return err; - if (err) { - err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA); - if (err) - return err; - } - - return 0; + return ext4_journal_ensure_credits_fn(handle, credits, + EXT4_MAX_TRANS_DATA, 0, 0); } /* - * set_flexbg_block_bitmap() mark @count blocks starting from @block used. + * set_flexbg_block_bitmap() mark clusters [@first_cluster, @last_cluster] used. * * Helper function for ext4_setup_new_group_blocks() which set . * @@ -386,22 +459,25 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh) */ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle, struct ext4_new_flex_group_data *flex_gd, - ext4_fsblk_t block, ext4_group_t count) + ext4_fsblk_t first_cluster, ext4_fsblk_t last_cluster) { + struct ext4_sb_info *sbi = EXT4_SB(sb); + ext4_group_t count = last_cluster - first_cluster + 1; ext4_group_t count2; - ext4_debug("mark blocks [%llu/%u] used\n", block, count); - for (count2 = count; count > 0; count -= count2, block += count2) { + ext4_debug("mark clusters [%llu-%llu] used\n", first_cluster, + last_cluster); + for (; count > 0; count -= count2, first_cluster += count2) { ext4_fsblk_t start; struct buffer_head *bh; ext4_group_t group; int err; - group = ext4_get_group_number(sb, block); - start = ext4_group_first_block_no(sb, group); + group = ext4_get_group_number(sb, EXT4_C2B(sbi, first_cluster)); + start = EXT4_B2C(sbi, ext4_group_first_block_no(sb, group)); group -= flex_gd->groups[0].group; - count2 = sb->s_blocksize * 8 - (block - start); + count2 = EXT4_CLUSTERS_PER_GROUP(sb) - (first_cluster - start); if (count2 > count) count2 = count; @@ -410,25 +486,29 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle, continue; } - err = extend_or_restart_transaction(handle, 1); - if (err) + err = ext4_resize_ensure_credits_batch(handle, 1); + if (err < 0) return err; bh = sb_getblk(sb, flex_gd->groups[group].block_bitmap); if (unlikely(!bh)) return -ENOMEM; - err = ext4_journal_get_write_access(handle, bh); - if (err) + BUFFER_TRACE(bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, sb, bh, + EXT4_JTR_NONE); + if (err) { + brelse(bh); return err; - ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", block, - block - start, count2); - ext4_set_bits(bh->b_data, block - start, count2); + } + ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", + first_cluster, first_cluster - start, count2); + mb_set_bits(bh->b_data, first_cluster - start, count2); err = ext4_handle_dirty_metadata(handle, NULL, bh); + brelse(bh); if (unlikely(err)) return err; - brelse(bh); } return 0; @@ -468,7 +548,7 @@ static int setup_new_flex_group_blocks(struct super_block *sb, group_data[0].group != sbi->s_groups_count); reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks); - meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); + meta_bg = ext4_has_feature_meta_bg(sb); /* This transaction may be extended/restarted along the way */ handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, EXT4_MAX_TRANS_DATA); @@ -486,13 +566,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb, if (meta_bg == 0 && !ext4_bg_has_super(sb, group)) goto handle_itb; - if (meta_bg == 1) { - ext4_group_t first_group; - first_group = ext4_meta_bg_first_group(sb, group); - if (first_group != group + 1 && - first_group != group + EXT4_DESC_PER_BLOCK(sb) - 1) - goto handle_itb; - } + if (meta_bg == 1) + goto handle_itb; block = start + ext4_bg_has_super(sb, group); /* Copy all of the GDT blocks into the backup in this group */ @@ -500,8 +575,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb, struct buffer_head *gdb; ext4_debug("update backup group %#04llx\n", block); - err = extend_or_restart_transaction(handle, 1); - if (err) + err = ext4_resize_ensure_credits_batch(handle, 1); + if (err < 0) goto out; gdb = sb_getblk(sb, block); @@ -510,13 +585,15 @@ static int setup_new_flex_group_blocks(struct super_block *sb, goto out; } - err = ext4_journal_get_write_access(handle, gdb); + BUFFER_TRACE(gdb, "get_write_access"); + err = ext4_journal_get_write_access(handle, sb, gdb, + EXT4_JTR_NONE); if (err) { brelse(gdb); goto out; } - memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data, - gdb->b_size); + memcpy(gdb->b_data, sbi_array_rcu_deref(sbi, + s_group_desc, j)->b_data, gdb->b_size); set_buffer_uptodate(gdb); err = ext4_handle_dirty_metadata(handle, NULL, gdb); @@ -538,7 +615,7 @@ static int setup_new_flex_group_blocks(struct super_block *sb, } handle_itb: - /* Initialize group tables of the grop @group */ + /* Initialize group tables of the group @group */ if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED)) goto handle_bb; @@ -557,8 +634,8 @@ handle_bb: /* Initialize block bitmap of the @group */ block = group_data[i].block_bitmap; - err = extend_or_restart_transaction(handle, 1); - if (err) + err = ext4_resize_ensure_credits_batch(handle, 1); + if (err < 0) goto out; bh = bclean(handle, sb, block); @@ -570,14 +647,15 @@ handle_bb: if (overhead != 0) { ext4_debug("mark backup superblock %#04llx (+0)\n", start); - ext4_set_bits(bh->b_data, 0, overhead); + mb_set_bits(bh->b_data, 0, + EXT4_NUM_B2C(sbi, overhead)); } - ext4_mark_bitmap_end(group_data[i].blocks_count, + ext4_mark_bitmap_end(EXT4_B2C(sbi, group_data[i].blocks_count), sb->s_blocksize * 8, bh->b_data); err = ext4_handle_dirty_metadata(handle, NULL, bh); + brelse(bh); if (err) goto out; - brelse(bh); handle_ib: if (bg_flags[i] & EXT4_BG_INODE_UNINIT) @@ -585,8 +663,8 @@ handle_ib: /* Initialize inode bitmap of the @group */ block = group_data[i].inode_bitmap; - err = extend_or_restart_transaction(handle, 1); - if (err) + err = ext4_resize_ensure_credits_batch(handle, 1); + if (err < 0) goto out; /* Mark unused entries in inode bitmap used */ bh = bclean(handle, sb, block); @@ -598,11 +676,10 @@ handle_ib: ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, bh->b_data); err = ext4_handle_dirty_metadata(handle, NULL, bh); + brelse(bh); if (err) goto out; - brelse(bh); } - bh = NULL; /* Mark group tables in block bitmap */ for (j = 0; j < GROUP_TABLE_COUNT; j++) { @@ -616,24 +693,29 @@ handle_ib: continue; } err = set_flexbg_block_bitmap(sb, handle, - flex_gd, start, count); + flex_gd, + EXT4_B2C(sbi, start), + EXT4_B2C(sbi, + start + count + - 1)); if (err) goto out; count = group_table_count[j]; - start = group_data[i].block_bitmap; + start = (&group_data[i].block_bitmap)[j]; block = start; } - if (count) { - err = set_flexbg_block_bitmap(sb, handle, - flex_gd, start, count); - if (err) - goto out; - } + err = set_flexbg_block_bitmap(sb, handle, + flex_gd, + EXT4_B2C(sbi, start), + EXT4_B2C(sbi, + start + count + - 1)); + if (err) + goto out; } out: - brelse(bh); err2 = ext4_journal_stop(handle); if (err2 && !err) err = err2; @@ -648,15 +730,25 @@ out: * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ... * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ... */ -static unsigned ext4_list_backups(struct super_block *sb, unsigned *three, - unsigned *five, unsigned *seven) +unsigned int ext4_list_backups(struct super_block *sb, unsigned int *three, + unsigned int *five, unsigned int *seven) { - unsigned *min = three; + struct ext4_super_block *es = EXT4_SB(sb)->s_es; + unsigned int *min = three; int mult = 3; - unsigned ret; + unsigned int ret; + + if (ext4_has_feature_sparse_super2(sb)) { + do { + if (*min > 2) + return UINT_MAX; + ret = le32_to_cpu(es->s_backup_bgs[*min - 1]); + *min += 1; + } while (!ret); + return ret; + } - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) { + if (!ext4_has_feature_sparse_super(sb)) { ret = *min; *min += 1; return ret; @@ -732,11 +824,11 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, struct ext4_super_block *es = EXT4_SB(sb)->s_es; unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb); ext4_fsblk_t gdblock = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num; - struct buffer_head **o_group_desc, **n_group_desc; - struct buffer_head *dind; - struct buffer_head *gdb_bh; + struct buffer_head **o_group_desc, **n_group_desc = NULL; + struct buffer_head *dind = NULL; + struct buffer_head *gdb_bh = NULL; int gdbackups; - struct ext4_iloc iloc; + struct ext4_iloc iloc = { .bh = NULL }; __le32 *data; int err; @@ -745,33 +837,22 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n", gdb_num); - /* - * If we are not using the primary superblock/GDT copy don't resize, - * because the user tools have no way of handling this. Probably a - * bad time to do it anyways. - */ - if (EXT4_SB(sb)->s_sbh->b_blocknr != - le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { - ext4_warning(sb, "won't resize using backup superblock at %llu", - (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr); - return -EPERM; - } - - gdb_bh = sb_bread(sb, gdblock); - if (!gdb_bh) - return -EIO; + gdb_bh = ext4_sb_bread(sb, gdblock, 0); + if (IS_ERR(gdb_bh)) + return PTR_ERR(gdb_bh); gdbackups = verify_reserved_gdb(sb, group, gdb_bh); if (gdbackups < 0) { err = gdbackups; - goto exit_bh; + goto errout; } data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK; - dind = sb_bread(sb, le32_to_cpu(*data)); - if (!dind) { - err = -EIO; - goto exit_bh; + dind = ext4_sb_bread(sb, le32_to_cpu(*data), 0); + if (IS_ERR(dind)) { + err = PTR_ERR(dind); + dind = NULL; + goto errout; } data = (__le32 *)dind->b_data; @@ -779,34 +860,39 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, ext4_warning(sb, "new group %u GDT block %llu not reserved", group, gdblock); err = -EINVAL; - goto exit_dind; + goto errout; } - err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); + BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); + err = ext4_journal_get_write_access(handle, sb, EXT4_SB(sb)->s_sbh, + EXT4_JTR_NONE); if (unlikely(err)) - goto exit_dind; + goto errout; - err = ext4_journal_get_write_access(handle, gdb_bh); + BUFFER_TRACE(gdb_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, sb, gdb_bh, EXT4_JTR_NONE); if (unlikely(err)) - goto exit_dind; + goto errout; - err = ext4_journal_get_write_access(handle, dind); - if (unlikely(err)) + BUFFER_TRACE(dind, "get_write_access"); + err = ext4_journal_get_write_access(handle, sb, dind, EXT4_JTR_NONE); + if (unlikely(err)) { ext4_std_error(sb, err); + goto errout; + } /* ext4_reserve_inode_write() gets a reference on the iloc */ err = ext4_reserve_inode_write(handle, inode, &iloc); if (unlikely(err)) - goto exit_dind; + goto errout; - n_group_desc = ext4_kvmalloc((gdb_num + 1) * - sizeof(struct buffer_head *), - GFP_NOFS); + n_group_desc = kvmalloc((gdb_num + 1) * sizeof(struct buffer_head *), + GFP_KERNEL); if (!n_group_desc) { err = -ENOMEM; ext4_warning(sb, "not enough memory for %lu groups", gdb_num + 1); - goto exit_inode; + goto errout; } /* @@ -822,39 +908,42 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, err = ext4_handle_dirty_metadata(handle, NULL, dind); if (unlikely(err)) { ext4_std_error(sb, err); - goto exit_inode; + goto errout; } - inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9; + inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> + (9 - EXT4_SB(sb)->s_cluster_bits); ext4_mark_iloc_dirty(handle, inode, &iloc); memset(gdb_bh->b_data, 0, sb->s_blocksize); err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh); if (unlikely(err)) { ext4_std_error(sb, err); - goto exit_inode; + iloc.bh = NULL; + goto errout; } brelse(dind); - o_group_desc = EXT4_SB(sb)->s_group_desc; + rcu_read_lock(); + o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc); memcpy(n_group_desc, o_group_desc, EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); + rcu_read_unlock(); n_group_desc[gdb_num] = gdb_bh; - EXT4_SB(sb)->s_group_desc = n_group_desc; + rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc); EXT4_SB(sb)->s_gdb_count++; - ext4_kvfree(o_group_desc); + ext4_kvfree_array_rcu(o_group_desc); + lock_buffer(EXT4_SB(sb)->s_sbh); le16_add_cpu(&es->s_reserved_gdt_blocks, -1); - err = ext4_handle_dirty_super(handle, sb); + ext4_superblock_csum_set(sb); + unlock_buffer(EXT4_SB(sb)->s_sbh); + err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); if (err) ext4_std_error(sb, err); - return err; - -exit_inode: - ext4_kvfree(n_group_desc); +errout: + kvfree(n_group_desc); brelse(iloc.bh); -exit_dind: brelse(dind); -exit_bh: brelse(gdb_bh); ext4_debug("leaving with error %d\n", err); @@ -862,7 +951,13 @@ exit_bh: } /* - * add_new_gdb_meta_bg is the sister of add_new_gdb. + * If there is no available space in the existing block group descriptors for + * the new block group and there are no reserved block group descriptors, then + * the meta_bg feature will get enabled, and es->s_first_meta_bg will get set + * to the first block group that is managed using meta_bg and s_first_meta_bg + * must be a multiple of EXT4_DESC_PER_BLOCK(sb). + * This function will be called when first group of meta_bg is added to bring + * new group descriptors block of new added meta_bg. */ static int add_new_gdb_meta_bg(struct super_block *sb, handle_t *handle, ext4_group_t group) { @@ -872,31 +967,39 @@ static int add_new_gdb_meta_bg(struct super_block *sb, unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb); int err; - gdblock = ext4_meta_bg_first_block_no(sb, group) + - ext4_bg_has_super(sb, group); - gdb_bh = sb_bread(sb, gdblock); - if (!gdb_bh) - return -EIO; - n_group_desc = ext4_kvmalloc((gdb_num + 1) * - sizeof(struct buffer_head *), - GFP_NOFS); + gdblock = ext4_group_first_block_no(sb, group) + + ext4_bg_has_super(sb, group); + gdb_bh = ext4_sb_bread(sb, gdblock, 0); + if (IS_ERR(gdb_bh)) + return PTR_ERR(gdb_bh); + n_group_desc = kvmalloc((gdb_num + 1) * sizeof(struct buffer_head *), + GFP_KERNEL); if (!n_group_desc) { + brelse(gdb_bh); err = -ENOMEM; ext4_warning(sb, "not enough memory for %lu groups", gdb_num + 1); return err; } - o_group_desc = EXT4_SB(sb)->s_group_desc; + rcu_read_lock(); + o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc); memcpy(n_group_desc, o_group_desc, EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); + rcu_read_unlock(); n_group_desc[gdb_num] = gdb_bh; - EXT4_SB(sb)->s_group_desc = n_group_desc; - EXT4_SB(sb)->s_gdb_count++; - ext4_kvfree(o_group_desc); - err = ext4_journal_get_write_access(handle, gdb_bh); - if (unlikely(err)) + + BUFFER_TRACE(gdb_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, sb, gdb_bh, EXT4_JTR_NONE); + if (err) { + kvfree(n_group_desc); brelse(gdb_bh); + return err; + } + + rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc); + EXT4_SB(sb)->s_gdb_count++; + ext4_kvfree_array_rcu(o_group_desc); return err; } @@ -918,6 +1021,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, { struct super_block *sb = inode->i_sb; int reserved_gdb =le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks); + int cluster_bits = EXT4_SB(sb)->s_cluster_bits; struct buffer_head **primary; struct buffer_head *dind; struct ext4_iloc iloc; @@ -927,14 +1031,15 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, int res, i; int err; - primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_NOFS); + primary = kmalloc_array(reserved_gdb, sizeof(*primary), GFP_NOFS); if (!primary) return -ENOMEM; data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK; - dind = sb_bread(sb, le32_to_cpu(*data)); - if (!dind) { - err = -EIO; + dind = ext4_sb_bread(sb, le32_to_cpu(*data), 0); + if (IS_ERR(dind)) { + err = PTR_ERR(dind); + dind = NULL; goto exit_free; } @@ -953,9 +1058,10 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, err = -EINVAL; goto exit_bh; } - primary[res] = sb_bread(sb, blk); - if (!primary[res]) { - err = -EIO; + primary[res] = ext4_sb_bread(sb, blk, 0); + if (IS_ERR(primary[res])) { + err = PTR_ERR(primary[res]); + primary[res] = NULL; goto exit_bh; } gdbackups = verify_reserved_gdb(sb, group, primary[res]); @@ -969,7 +1075,9 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, } for (i = 0; i < reserved_gdb; i++) { - if ((err = ext4_journal_get_write_access(handle, primary[i]))) + BUFFER_TRACE(primary[i], "get_write_access"); + if ((err = ext4_journal_get_write_access(handle, sb, primary[i], + EXT4_JTR_NONE))) goto exit_bh; } @@ -984,15 +1092,13 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, for (i = 0; i < reserved_gdb; i++) { int err2; data = (__le32 *)primary[i]->b_data; - /* printk("reserving backup %lu[%u] = %lu\n", - primary[i]->b_blocknr, gdbackups, - blk + primary[i]->b_blocknr); */ data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr); err2 = ext4_handle_dirty_metadata(handle, NULL, primary[i]); if (!err) err = err2; } - inode->i_blocks += reserved_gdb * sb->s_blocksize >> 9; + + inode->i_blocks += reserved_gdb * sb->s_blocksize >> (9 - cluster_bits); ext4_mark_iloc_dirty(handle, inode, &iloc); exit_bh: @@ -1006,6 +1112,16 @@ exit_free: return err; } +static inline void ext4_set_block_group_nr(struct super_block *sb, char *data, + ext4_group_t group) +{ + struct ext4_super_block *es = (struct ext4_super_block *) data; + + es->s_block_group_nr = cpu_to_le16(group); + if (ext4_has_feature_metadata_csum(sb)) + es->s_checksum = ext4_superblock_csum(es); +} + /* * Update the backup copies of the ext4 metadata. These don't need to be part * of the main resize transaction, because e2fsck will re-write them if there @@ -1022,7 +1138,7 @@ exit_free: * do not copy the full number of backups at this time. The resize * which changed s_groups_count will backup again. */ -static void update_backups(struct super_block *sb, int blk_off, char *data, +static void update_backups(struct super_block *sb, sector_t blk_off, char *data, int size, int meta_bg) { struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -1047,26 +1163,25 @@ static void update_backups(struct super_block *sb, int blk_off, char *data, group = ext4_list_backups(sb, &three, &five, &seven); last = sbi->s_groups_count; } else { - group = ext4_meta_bg_first_group(sb, group) + 1; + group = ext4_get_group_number(sb, blk_off) + 1; last = (ext4_group_t)(group + EXT4_DESC_PER_BLOCK(sb) - 2); } while (group < sbi->s_groups_count) { struct buffer_head *bh; ext4_fsblk_t backup_block; + int has_super = ext4_bg_has_super(sb, group); + ext4_fsblk_t first_block = ext4_group_first_block_no(sb, group); /* Out of journal space, and can't get more - abort - so sad */ - if (ext4_handle_valid(handle) && - handle->h_buffer_credits == 0 && - ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA) && - (err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA))) + err = ext4_resize_ensure_credits_batch(handle, 1); + if (err < 0) break; if (meta_bg == 0) - backup_block = group * bpg + blk_off; + backup_block = ((ext4_fsblk_t)group) * bpg + blk_off; else - backup_block = (ext4_group_first_block_no(sb, group) + - ext4_bg_has_super(sb, group)); + backup_block = first_block + has_super; bh = sb_getblk(sb, backup_block); if (unlikely(!bh)) { @@ -1076,12 +1191,18 @@ static void update_backups(struct super_block *sb, int blk_off, char *data, ext4_debug("update metadata backup %llu(+%llu)\n", backup_block, backup_block - ext4_group_first_block_no(sb, group)); - if ((err = ext4_journal_get_write_access(handle, bh))) + BUFFER_TRACE(bh, "get_write_access"); + if ((err = ext4_journal_get_write_access(handle, sb, bh, + EXT4_JTR_NONE))) { + brelse(bh); break; + } lock_buffer(bh); memcpy(bh->b_data, data, size); if (rest) memset(bh->b_data + size, 0, rest); + if (has_super && (backup_block == first_block)) + ext4_set_block_group_nr(sb, bh->b_data, group); set_buffer_uptodate(bh); unlock_buffer(bh); err = ext4_handle_dirty_metadata(handle, NULL, bh); @@ -1139,7 +1260,7 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, int i, gdb_off, gdb_num, err = 0; int meta_bg; - meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); + meta_bg = ext4_has_feature_meta_bg(sb); for (i = 0; i < count; i++, group++) { int reserved_gdb = ext4_bg_has_super(sb, group) ? le16_to_cpu(es->s_reserved_gdt_blocks) : 0; @@ -1154,8 +1275,11 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, * use non-sparse filesystems anymore. This is already checked above. */ if (gdb_off) { - gdb_bh = sbi->s_group_desc[gdb_num]; - err = ext4_journal_get_write_access(handle, gdb_bh); + gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, + gdb_num); + BUFFER_TRACE(gdb_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, sb, gdb_bh, + EXT4_JTR_NONE); if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group)) err = reserve_backup_gdb(handle, resize_inode, group); @@ -1176,7 +1300,7 @@ static struct buffer_head *ext4_get_bitmap(struct super_block *sb, __u64 block) if (unlikely(!bh)) return NULL; if (!bh_uptodate_or_lock(bh)) { - if (bh_submit_read(bh) < 0) { + if (ext4_read_bh(bh, 0, NULL, false) < 0) { brelse(bh); return NULL; } @@ -1186,27 +1310,24 @@ static struct buffer_head *ext4_get_bitmap(struct super_block *sb, __u64 block) } static int ext4_set_bitmap_checksums(struct super_block *sb, - ext4_group_t group, struct ext4_group_desc *gdp, struct ext4_new_group_data *group_data) { struct buffer_head *bh; - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) + if (!ext4_has_feature_metadata_csum(sb)) return 0; bh = ext4_get_bitmap(sb, group_data->inode_bitmap); if (!bh) return -EIO; - ext4_inode_bitmap_csum_set(sb, group, gdp, bh, - EXT4_INODES_PER_GROUP(sb) / 8); + ext4_inode_bitmap_csum_set(sb, gdp, bh); brelse(bh); bh = ext4_get_bitmap(sb, group_data->block_bitmap); if (!bh) return -EIO; - ext4_block_bitmap_csum_set(sb, group, gdp, bh); + ext4_block_bitmap_csum_set(sb, gdp, bh); brelse(bh); return 0; @@ -1225,7 +1346,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, ext4_group_t group; __u16 *bg_flags = flex_gd->bg_flags; int i, gdb_off, gdb_num, err = 0; - + for (i = 0; i < flex_gd->count; i++, group_data++, bg_flags++) { group = group_data->group; @@ -1236,7 +1357,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, /* * get_write_access() has been called on gdb_bh by ext4_add_new_desc(). */ - gdb_bh = sbi->s_group_desc[gdb_num]; + gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, gdb_num); /* Update group descriptor block for new group */ gdp = (struct ext4_group_desc *)(gdb_bh->b_data + gdb_off * EXT4_DESC_SIZE(sb)); @@ -1244,7 +1365,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, memset(gdp, 0, EXT4_DESC_SIZE(sb)); ext4_block_bitmap_set(sb, gdp, group_data->block_bitmap); ext4_inode_bitmap_set(sb, gdp, group_data->inode_bitmap); - err = ext4_set_bitmap_checksums(sb, group, gdp, group_data); + err = ext4_set_bitmap_checksums(sb, gdp, group_data); if (err) { ext4_std_error(sb, err); break; @@ -1252,7 +1373,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, ext4_inode_table_set(sb, gdp, group_data->inode_table); ext4_free_group_clusters_set(sb, gdp, - EXT4_NUM_B2C(sbi, group_data->free_blocks_count)); + group_data->free_clusters_count); ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); if (ext4_has_group_desc_csum(sb)) ext4_itable_unused_set(sb, gdp, @@ -1277,6 +1398,17 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, return err; } +static void ext4_add_overhead(struct super_block *sb, + const ext4_fsblk_t overhead) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_super_block *es = sbi->s_es; + + sbi->s_overhead += overhead; + es->s_overhead_clusters = cpu_to_le32(sbi->s_overhead); + smp_wmb(); +} + /* * ext4_update_super() updates the super block so that the newly added * groups can be seen by the filesystem. @@ -1308,7 +1440,7 @@ static void ext4_update_super(struct super_block *sb, */ for (i = 0; i < flex_gd->count; i++) { blocks_count += group_data[i].blocks_count; - free_blocks += group_data[i].free_blocks_count; + free_blocks += EXT4_C2B(sbi, group_data[i].free_clusters_count); } reserved_blocks = ext4_r_blocks_count(es) * 100; @@ -1316,6 +1448,7 @@ static void ext4_update_super(struct super_block *sb, reserved_blocks *= blocks_count; do_div(reserved_blocks, 100); + lock_buffer(sbi->s_sbh); ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count); ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + free_blocks); le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) * @@ -1362,22 +1495,34 @@ static void ext4_update_super(struct super_block *sb, ext4_debug("free blocks count %llu", percpu_counter_read(&sbi->s_freeclusters_counter)); - if (EXT4_HAS_INCOMPAT_FEATURE(sb, - EXT4_FEATURE_INCOMPAT_FLEX_BG) && - sbi->s_log_groups_per_flex) { + if (ext4_has_feature_flex_bg(sb) && sbi->s_log_groups_per_flex) { ext4_group_t flex_group; + struct flex_groups *fg; + flex_group = ext4_flex_group(sbi, group_data[0].group); + fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group); atomic64_add(EXT4_NUM_B2C(sbi, free_blocks), - &sbi->s_flex_groups[flex_group].free_clusters); + &fg->free_clusters); atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count, - &sbi->s_flex_groups[flex_group].free_inodes); + &fg->free_inodes); } /* - * Update the fs overhead information + * Update the fs overhead information. + * + * For bigalloc, if the superblock already has a properly calculated + * overhead, update it with a value based on numbers already computed + * above for the newly allocated capacity. */ - ext4_calculate_overhead(sb); + if (ext4_has_feature_bigalloc(sb) && (sbi->s_overhead != 0)) + ext4_add_overhead(sb, + EXT4_NUM_B2C(sbi, blocks_count - free_blocks)); + else + ext4_calculate_overhead(sb); + es->s_overhead_clusters = cpu_to_le32(sbi->s_overhead); + ext4_superblock_csum_set(sb); + unlock_buffer(sbi->s_sbh); if (test_opt(sb, DEBUG)) printk(KERN_DEBUG "EXT4-fs: added group %u:" "%llu blocks(%llu free %llu reserved)\n", flex_gd->count, @@ -1413,24 +1558,29 @@ static int ext4_flex_group_add(struct super_block *sb, goto exit; /* * We will always be modifying at least the superblock and GDT - * block. If we are adding a group past the last current GDT block, + * blocks. If we are adding a group past the last current GDT block, * we will also modify the inode and the dindirect block. If we * are adding a group with superblock/GDT backups we will also * modify each of the reserved GDT dindirect blocks. */ - credit = flex_gd->count * 4 + reserved_gdb; + credit = 3; /* sb, resize inode, resize inode dindirect */ + /* GDT blocks */ + credit += 1 + DIV_ROUND_UP(flex_gd->count, EXT4_DESC_PER_BLOCK(sb)); + credit += reserved_gdb; /* Reserved GDT dindirect blocks */ handle = ext4_journal_start_sb(sb, EXT4_HT_RESIZE, credit); if (IS_ERR(handle)) { err = PTR_ERR(handle); goto exit; } - err = ext4_journal_get_write_access(handle, sbi->s_sbh); + BUFFER_TRACE(sbi->s_sbh, "get_write_access"); + err = ext4_journal_get_write_access(handle, sb, sbi->s_sbh, + EXT4_JTR_NONE); if (err) goto exit_journal; group = flex_gd->groups[0].group; - BUG_ON(group != EXT4_SB(sb)->s_groups_count); + BUG_ON(group != sbi->s_groups_count); err = ext4_add_new_descs(handle, sb, group, resize_inode, flex_gd->count); if (err) @@ -1442,7 +1592,7 @@ static int ext4_flex_group_add(struct super_block *sb, ext4_update_super(sb, flex_gd); - err = ext4_handle_dirty_super(handle, sb); + err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); exit_journal: err2 = ext4_journal_stop(handle); @@ -1453,21 +1603,20 @@ exit_journal: int gdb_num = group / EXT4_DESC_PER_BLOCK(sb); int gdb_num_end = ((group + flex_gd->count - 1) / EXT4_DESC_PER_BLOCK(sb)); - int meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, - EXT4_FEATURE_INCOMPAT_META_BG); - sector_t old_gdb = 0; + int meta_bg = ext4_has_feature_meta_bg(sb) && + gdb_num >= le32_to_cpu(es->s_first_meta_bg); + sector_t padding_blocks = meta_bg ? 0 : sbi->s_sbh->b_blocknr - + ext4_group_first_block_no(sb, 0); - update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, - sizeof(struct ext4_super_block), 0); + update_backups(sb, ext4_group_first_block_no(sb, 0), + (char *)es, sizeof(struct ext4_super_block), 0); for (; gdb_num <= gdb_num_end; gdb_num++) { struct buffer_head *gdb_bh; - gdb_bh = sbi->s_group_desc[gdb_num]; - if (old_gdb == gdb_bh->b_blocknr) - continue; - update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, - gdb_bh->b_size, meta_bg); - old_gdb = gdb_bh->b_blocknr; + gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, + gdb_num); + update_backups(sb, gdb_bh->b_blocknr - padding_blocks, + gdb_bh->b_data, gdb_bh->b_size, meta_bg); } } exit: @@ -1476,20 +1625,20 @@ exit: static int ext4_setup_next_flex_gd(struct super_block *sb, struct ext4_new_flex_group_data *flex_gd, - ext4_fsblk_t n_blocks_count, - unsigned long flexbg_size) + ext4_fsblk_t n_blocks_count) { - struct ext4_super_block *es = EXT4_SB(sb)->s_es; + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_super_block *es = sbi->s_es; struct ext4_new_group_data *group_data = flex_gd->groups; ext4_fsblk_t o_blocks_count; ext4_group_t n_group; ext4_group_t group; ext4_group_t last_group; ext4_grpblk_t last; - ext4_grpblk_t blocks_per_group; + ext4_grpblk_t clusters_per_group; unsigned long i; - blocks_per_group = EXT4_BLOCKS_PER_GROUP(sb); + clusters_per_group = EXT4_CLUSTERS_PER_GROUP(sb); o_blocks_count = ext4_blocks_count(es); @@ -1500,7 +1649,7 @@ static int ext4_setup_next_flex_gd(struct super_block *sb, BUG_ON(last); ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &last); - last_group = group | (flexbg_size - 1); + last_group = group | (flex_gd->resize_bg - 1); if (last_group > n_group) last_group = n_group; @@ -1510,9 +1659,10 @@ static int ext4_setup_next_flex_gd(struct super_block *sb, int overhead; group_data[i].group = group + i; - group_data[i].blocks_count = blocks_per_group; + group_data[i].blocks_count = EXT4_BLOCKS_PER_GROUP(sb); overhead = ext4_group_overhead_blocks(sb, group + i); - group_data[i].free_blocks_count = blocks_per_group - overhead; + group_data[i].mdata_blocks = overhead; + group_data[i].free_clusters_count = EXT4_CLUSTERS_PER_GROUP(sb); if (ext4_has_group_desc_csum(sb)) { flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT | EXT4_BG_INODE_UNINIT; @@ -1526,10 +1676,10 @@ static int ext4_setup_next_flex_gd(struct super_block *sb, /* We need to initialize block bitmap of last group. */ flex_gd->bg_flags[i - 1] &= ~EXT4_BG_BLOCK_UNINIT; - if ((last_group == n_group) && (last != blocks_per_group - 1)) { - group_data[i - 1].blocks_count = last + 1; - group_data[i - 1].free_blocks_count -= blocks_per_group- - last - 1; + if ((last_group == n_group) && (last != clusters_per_group - 1)) { + group_data[i - 1].blocks_count = EXT4_C2B(sbi, last + 1); + group_data[i - 1].free_clusters_count -= clusters_per_group - + last - 1; } return 1; @@ -1562,8 +1712,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb); - if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) { + if (gdb_off == 0 && !ext4_has_feature_sparse_super(sb)) { ext4_warning(sb, "Can't resize non-sparse filesystem further"); return -EPERM; } @@ -1581,14 +1730,13 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) } if (reserved_gdb || gdb_off == 0) { - if (!EXT4_HAS_COMPAT_FEATURE(sb, - EXT4_FEATURE_COMPAT_RESIZE_INODE) - || !le16_to_cpu(es->s_reserved_gdt_blocks)) { + if (!ext4_has_feature_resize_inode(sb) || + !le16_to_cpu(es->s_reserved_gdt_blocks)) { ext4_warning(sb, "No reserved GDT blocks, can't resize"); return -EPERM; } - inode = ext4_iget(sb, EXT4_RESIZE_INO); + inode = ext4_iget(sb, EXT4_RESIZE_INO, EXT4_IGET_SPECIAL); if (IS_ERR(inode)) { ext4_warning(sb, "Error opening resize inode"); return PTR_ERR(inode); @@ -1637,21 +1785,26 @@ static int ext4_group_extend_no_check(struct super_block *sb, return err; } - err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); + BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); + err = ext4_journal_get_write_access(handle, sb, EXT4_SB(sb)->s_sbh, + EXT4_JTR_NONE); if (err) { ext4_warning(sb, "error %d on journal write access", err); goto errout; } + lock_buffer(EXT4_SB(sb)->s_sbh); ext4_blocks_count_set(es, o_blocks_count + add); ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + add); + ext4_superblock_csum_set(sb); + unlock_buffer(EXT4_SB(sb)->s_sbh); ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, o_blocks_count + add); /* We add the blocks to the bitmap and set the group need init bit */ err = ext4_group_add_blocks(handle, sb, o_blocks_count, add); if (err) goto errout; - ext4_handle_dirty_super(handle, sb); + ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, o_blocks_count + add); errout: @@ -1663,7 +1816,7 @@ errout: if (test_opt(sb, DEBUG)) printk(KERN_DEBUG "EXT4-fs: extended group to %llu " "blocks\n", ext4_blocks_count(es)); - update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, + update_backups(sb, ext4_group_first_block_no(sb, 0), (char *)es, sizeof(struct ext4_super_block), 0); } return err; @@ -1686,7 +1839,6 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, ext4_grpblk_t last; ext4_grpblk_t add; struct buffer_head *bh; - int err; ext4_group_t group; o_blocks_count = ext4_blocks_count(es); @@ -1703,8 +1855,6 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, ext4_msg(sb, KERN_ERR, "filesystem too large to resize to %llu blocks safely", n_blocks_count); - if (sizeof(sector_t) < 8) - ext4_warning(sb, "CONFIG_LBDAF not enabled"); return -EINVAL; } @@ -1736,15 +1886,14 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, o_blocks_count + add, add); /* See if the device is actually as big as what was requested */ - bh = sb_bread(sb, o_blocks_count + add - 1); - if (!bh) { + bh = ext4_sb_bread(sb, o_blocks_count + add - 1, 0); + if (IS_ERR(bh)) { ext4_warning(sb, "can't read last block, resize aborted"); return -ENOSPC; } brelse(bh); - err = ext4_group_extend_no_check(sb, o_blocks_count, add); - return err; + return ext4_group_extend_no_check(sb, o_blocks_count, add); } /* ext4_group_extend */ @@ -1777,7 +1926,8 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode) } /* Do a quick sanity check of the resize inode */ - if (inode->i_blocks != 1 << (inode->i_blkbits - 9)) + if (inode->i_blocks != 1 << (inode->i_blkbits - + (9 - sbi->s_cluster_bits))) goto invalid_resize_inode; for (i = 0; i < EXT4_N_BLOCKS; i++) { if (i == EXT4_DIND_BLOCK) { @@ -1796,16 +1946,21 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode) if (IS_ERR(handle)) return PTR_ERR(handle); - err = ext4_journal_get_write_access(handle, sbi->s_sbh); + BUFFER_TRACE(sbi->s_sbh, "get_write_access"); + err = ext4_journal_get_write_access(handle, sb, sbi->s_sbh, + EXT4_JTR_NONE); if (err) goto errout; - EXT4_CLEAR_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE); - EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); + lock_buffer(sbi->s_sbh); + ext4_clear_feature_resize_inode(sb); + ext4_set_feature_meta_bg(sb); sbi->s_es->s_first_meta_bg = cpu_to_le32(num_desc_blocks(sb, sbi->s_groups_count)); + ext4_superblock_csum_set(sb); + unlock_buffer(sbi->s_sbh); - err = ext4_handle_dirty_super(handle, sb); + err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); if (err) { ext4_std_error(sb, err); goto errout; @@ -1826,9 +1981,7 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode) errout: ret = ext4_journal_stop(handle); - if (!err) - err = ret; - return ret; + return err ? err : ret; invalid_resize_inode: ext4_error(sb, "corrupted/inconsistent resize inode"); @@ -1856,17 +2009,28 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) ext4_fsblk_t o_blocks_count; ext4_fsblk_t n_blocks_count_retry = 0; unsigned long last_update_time = 0; - int err = 0, flexbg_size = 1 << sbi->s_log_groups_per_flex; + int err = 0; int meta_bg; + unsigned int flexbg_size = ext4_flex_bg_size(sbi); /* See if the device is actually as big as what was requested */ - bh = sb_bread(sb, n_blocks_count - 1); - if (!bh) { + bh = ext4_sb_bread(sb, n_blocks_count - 1, 0); + if (IS_ERR(bh)) { ext4_warning(sb, "can't read last block, resize aborted"); return -ENOSPC; } brelse(bh); + /* + * For bigalloc, trim the requested size to the nearest cluster + * boundary to avoid creating an unusable filesystem. We do this + * silently, instead of returning an error, to avoid breaking + * callers that blindly resize the filesystem to the full size of + * the underlying block device. + */ + if (ext4_has_feature_bigalloc(sb)) + n_blocks_count &= ~((1 << EXT4_CLUSTER_BITS(sb)) - 1); + retry: o_blocks_count = ext4_blocks_count(es); @@ -1884,7 +2048,7 @@ retry: return 0; n_group = ext4_get_group_number(sb, n_blocks_count - 1); - if (n_group > (0xFFFFFFFFUL / EXT4_INODES_PER_GROUP(sb))) { + if (n_group >= (0xFFFFFFFFUL / EXT4_INODES_PER_GROUP(sb))) { ext4_warning(sb, "resize would cause inodes_count overflow"); return -EINVAL; } @@ -1893,9 +2057,9 @@ retry: n_desc_blocks = num_desc_blocks(sb, n_group + 1); o_desc_blocks = num_desc_blocks(sb, sbi->s_groups_count); - meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG); + meta_bg = ext4_has_feature_meta_bg(sb); - if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE)) { + if (ext4_has_feature_resize_inode(sb)) { if (meta_bg) { ext4_error(sb, "resize_inode and meta_bg enabled " "simultaneously"); @@ -1907,19 +2071,22 @@ retry: n_desc_blocks = o_desc_blocks + le16_to_cpu(es->s_reserved_gdt_blocks); n_group = n_desc_blocks * EXT4_DESC_PER_BLOCK(sb); - n_blocks_count = n_group * EXT4_BLOCKS_PER_GROUP(sb); + n_blocks_count = (ext4_fsblk_t)n_group * + EXT4_BLOCKS_PER_GROUP(sb) + + le32_to_cpu(es->s_first_data_block); n_group--; /* set to last group number */ } if (!resize_inode) - resize_inode = ext4_iget(sb, EXT4_RESIZE_INO); + resize_inode = ext4_iget(sb, EXT4_RESIZE_INO, + EXT4_IGET_SPECIAL); if (IS_ERR(resize_inode)) { ext4_warning(sb, "Error opening resize inode"); return PTR_ERR(resize_inode); } } - if ((!resize_inode && !meta_bg) || n_blocks_count == o_blocks_count) { + if ((!resize_inode && !meta_bg && n_desc_blocks > o_desc_blocks) || n_blocks_count == o_blocks_count) { err = ext4_convert_meta_bg(sb, resize_inode); if (err) goto out; @@ -1934,29 +2101,49 @@ retry: } } + /* + * Make sure the last group has enough space so that it's + * guaranteed to have enough space for all metadata blocks + * that it might need to hold. (We might not need to store + * the inode table blocks in the last block group, but there + * will be cases where this might be needed.) + */ + if ((ext4_group_first_block_no(sb, n_group) + + ext4_group_overhead_blocks(sb, n_group) + 2 + + sbi->s_itb_per_group + sbi->s_cluster_ratio) >= n_blocks_count) { + n_blocks_count = ext4_group_first_block_no(sb, n_group); + n_group--; + n_blocks_count_retry = 0; + if (resize_inode) { + iput(resize_inode); + resize_inode = NULL; + } + goto retry; + } + /* extend the last group */ if (n_group == o_group) add = n_blocks_count - o_blocks_count; else - add = EXT4_BLOCKS_PER_GROUP(sb) - (offset + 1); + add = EXT4_C2B(sbi, EXT4_CLUSTERS_PER_GROUP(sb) - (offset + 1)); if (add > 0) { err = ext4_group_extend_no_check(sb, o_blocks_count, add); if (err) goto out; } - if (ext4_blocks_count(es) == n_blocks_count) + if (ext4_blocks_count(es) == n_blocks_count && n_blocks_count_retry == 0) goto out; err = ext4_alloc_flex_bg_array(sb, n_group + 1); if (err) - return err; + goto out; err = ext4_mb_alloc_groupinfo(sb, n_group + 1); if (err) goto out; - flex_gd = alloc_flex_gd(flexbg_size); + flex_gd = alloc_flex_gd(flexbg_size, o_group, n_group); if (flex_gd == NULL) { err = -ENOMEM; goto out; @@ -1965,9 +2152,8 @@ retry: /* Add flex groups. Note that a regular group is a * flex group with 1 group. */ - while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count, - flexbg_size)) { - if (jiffies - last_update_time > HZ * 10) { + while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count)) { + if (time_is_before_jiffies(last_update_time + HZ * 10)) { if (last_update_time) ext4_msg(sb, KERN_INFO, "resized to %llu blocks", @@ -1986,6 +2172,10 @@ retry: n_blocks_count_retry = 0; free_flex_gd(flex_gd); flex_gd = NULL; + if (resize_inode) { + iput(resize_inode); + resize_inode = NULL; + } goto retry; } @@ -1994,6 +2184,10 @@ out: free_flex_gd(flex_gd); if (resize_inode != NULL) iput(resize_inode); - ext4_msg(sb, KERN_INFO, "resized filesystem to %llu", n_blocks_count); + if (err) + ext4_warning(sb, "error (%d) occurred during " + "file system resize", err); + ext4_msg(sb, KERN_INFO, "resized filesystem to %llu", + ext4_blocks_count(es)); return err; } |
