diff options
Diffstat (limited to 'fs/ext4/indirect.c')
| -rw-r--r-- | fs/ext4/indirect.c | 784 |
1 files changed, 439 insertions, 345 deletions
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 87b30cd357e7..da76353b3a57 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext4/indirect.c * @@ -20,10 +21,10 @@ * (sct@redhat.com), 1993, 1998 */ -#include <linux/aio.h> #include "ext4_jbd2.h" #include "truncate.h" -#include "ext4_extents.h" /* Needed for EXT_MAX_BLOCKS */ +#include <linux/dax.h> +#include <linux/uio.h> #include <trace/events/ext4.h> @@ -147,6 +148,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, struct super_block *sb = inode->i_sb; Indirect *p = chain; struct buffer_head *bh; + unsigned int key; int ret = -EIO; *err = 0; @@ -155,14 +157,20 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, if (!p->key) goto no_block; while (--depth) { - bh = sb_getblk(sb, le32_to_cpu(p->key)); + key = le32_to_cpu(p->key); + if (key > ext4_blocks_count(EXT4_SB(sb)->s_es)) { + /* the block was out of range */ + ret = -EFSCORRUPTED; + goto failure; + } + bh = sb_getblk(sb, key); if (unlikely(!bh)) { ret = -ENOMEM; goto failure; } if (!bh_uptodate_or_lock(bh)) { - if (bh_submit_read(bh) < 0) { + if (ext4_read_bh(bh, 0, NULL, false) < 0) { put_bh(bh); goto failure; } @@ -293,14 +301,12 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, } /** - * ext4_alloc_branch - allocate and set up a chain of blocks. - * @handle: handle for this transaction - * @inode: owner - * @indirect_blks: number of allocated indirect blocks - * @blks: number of allocated direct blocks - * @goal: preferred place for allocation - * @offsets: offsets (in the blocks) to store the pointers to next. - * @branch: place to store the chain in. + * ext4_alloc_branch() - allocate and set up a chain of blocks + * @handle: handle for this transaction + * @ar: structure describing the allocation request + * @indirect_blks: number of allocated indirect blocks + * @offsets: offsets (in the blocks) to store the pointers to next. + * @branch: place to store the chain in. * * This function allocates blocks, zeroes out all but the last one, * links them into chain and (if we are synchronous) writes them to disk. @@ -319,34 +325,27 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks, * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain * as described above and return 0. */ -static int ext4_alloc_branch(handle_t *handle, struct inode *inode, - ext4_lblk_t iblock, int indirect_blks, - int *blks, ext4_fsblk_t goal, - ext4_lblk_t *offsets, Indirect *branch) +static int ext4_alloc_branch(handle_t *handle, + struct ext4_allocation_request *ar, + int indirect_blks, ext4_lblk_t *offsets, + Indirect *branch) { - struct ext4_allocation_request ar; struct buffer_head * bh; ext4_fsblk_t b, new_blocks[4]; __le32 *p; int i, j, err, len = 1; - /* - * Set up for the direct block allocation - */ - memset(&ar, 0, sizeof(ar)); - ar.inode = inode; - ar.len = *blks; - ar.logical = iblock; - if (S_ISREG(inode->i_mode)) - ar.flags = EXT4_MB_HINT_DATA; - for (i = 0; i <= indirect_blks; i++) { if (i == indirect_blks) { - ar.goal = goal; - new_blocks[i] = ext4_mb_new_blocks(handle, &ar, &err); - } else - goal = new_blocks[i] = ext4_new_meta_blocks(handle, inode, - goal, 0, NULL, &err); + new_blocks[i] = ext4_mb_new_blocks(handle, ar, &err); + } else { + ar->goal = new_blocks[i] = ext4_new_meta_blocks(handle, + ar->inode, ar->goal, + ar->flags & EXT4_MB_DELALLOC_RESERVED, + NULL, &err); + /* Simplify error cleanup... */ + branch[i+1].bh = NULL; + } if (err) { i--; goto failed; @@ -355,14 +354,15 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, if (i == 0) continue; - bh = branch[i].bh = sb_getblk(inode->i_sb, new_blocks[i-1]); + bh = branch[i].bh = sb_getblk(ar->inode->i_sb, new_blocks[i-1]); if (unlikely(!bh)) { err = -ENOMEM; goto failed; } lock_buffer(bh); BUFFER_TRACE(bh, "call get_create_access"); - err = ext4_journal_get_create_access(handle, bh); + err = ext4_journal_get_create_access(handle, ar->inode->i_sb, + bh, EXT4_JTR_NONE); if (err) { unlock_buffer(bh); goto failed; @@ -373,7 +373,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, b = new_blocks[i]; if (i == indirect_blks) - len = ar.len; + len = ar->len; for (j = 0; j < len; j++) *p++ = cpu_to_le32(b++); @@ -382,41 +382,49 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, unlock_buffer(bh); BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_metadata(handle, inode, bh); + err = ext4_handle_dirty_metadata(handle, ar->inode, bh); if (err) goto failed; } - *blks = ar.len; return 0; failed: + if (i == indirect_blks) { + /* Free data blocks */ + ext4_free_blocks(handle, ar->inode, NULL, new_blocks[i], + ar->len, 0); + i--; + } for (; i >= 0; i--) { - if (i != indirect_blks && branch[i].bh) - ext4_forget(handle, 1, inode, branch[i].bh, - branch[i].bh->b_blocknr); - ext4_free_blocks(handle, inode, NULL, new_blocks[i], - (i == indirect_blks) ? ar.len : 1, 0); + /* + * We want to ext4_forget() only freshly allocated indirect + * blocks. Buffer for new_blocks[i] is at branch[i+1].bh + * (buffer at branch[0].bh is indirect block / inode already + * existing before ext4_alloc_branch() was called). Also + * because blocks are freshly allocated, we don't need to + * revoke them which is why we don't set + * EXT4_FREE_BLOCKS_METADATA. + */ + ext4_free_blocks(handle, ar->inode, branch[i+1].bh, + new_blocks[i], 1, + branch[i+1].bh ? EXT4_FREE_BLOCKS_FORGET : 0); } return err; } /** - * ext4_splice_branch - splice the allocated branch onto inode. + * ext4_splice_branch() - splice the allocated branch onto inode. * @handle: handle for this transaction - * @inode: owner - * @block: (logical) number of block we are adding - * @chain: chain of indirect blocks (with a missing link - see - * ext4_alloc_branch) + * @ar: structure describing the allocation request * @where: location of missing link * @num: number of indirect blocks we are adding - * @blks: number of direct blocks we are adding * * This function fills the missing link and does all housekeeping needed in * inode (->i_blocks, etc.). In case of success we end up with the full * chain to new block and return 0. */ -static int ext4_splice_branch(handle_t *handle, struct inode *inode, - ext4_lblk_t block, Indirect *where, int num, - int blks) +static int ext4_splice_branch(handle_t *handle, + struct ext4_allocation_request *ar, + Indirect *where, int num) { int i; int err = 0; @@ -429,7 +437,8 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, */ if (where->bh) { BUFFER_TRACE(where->bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, where->bh); + err = ext4_journal_get_write_access(handle, ar->inode->i_sb, + where->bh, EXT4_JTR_NONE); if (err) goto err_out; } @@ -441,9 +450,9 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, * Update the host buffer_head or inode to point to more just allocated * direct blocks blocks */ - if (num == 0 && blks > 1) { + if (num == 0 && ar->len > 1) { current_block = le32_to_cpu(where->key) + 1; - for (i = 1; i < blks; i++) + for (i = 1; i < ar->len; i++) *(where->p + i) = cpu_to_le32(current_block++); } @@ -458,17 +467,19 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, * the new i_size. But that is not done here - it is done in * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode. */ - jbd_debug(5, "splicing indirect only\n"); + ext4_debug("splicing indirect only\n"); BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_metadata(handle, inode, where->bh); + err = ext4_handle_dirty_metadata(handle, ar->inode, where->bh); if (err) goto err_out; } else { /* * OK, we spliced it into the inode itself on a direct block. */ - ext4_mark_inode_dirty(handle, inode); - jbd_debug(5, "splicing direct\n"); + err = ext4_mark_inode_dirty(handle, ar->inode); + if (unlikely(err)) + goto err_out; + ext4_debug("splicing direct\n"); } return err; @@ -479,11 +490,11 @@ err_out: * need to revoke the block, which is why we don't * need to set EXT4_FREE_BLOCKS_METADATA. */ - ext4_free_blocks(handle, inode, where[i].bh, 0, 1, + ext4_free_blocks(handle, ar->inode, where[i].bh, 0, 1, EXT4_FREE_BLOCKS_FORGET); } - ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key), - blks, 0); + ext4_free_blocks(handle, ar->inode, NULL, le32_to_cpu(where[num].key), + ar->len, 0); return err; } @@ -520,20 +531,20 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, int flags) { + struct ext4_allocation_request ar; int err = -EIO; ext4_lblk_t offsets[4]; Indirect chain[4]; Indirect *partial; - ext4_fsblk_t goal; int indirect_blks; int blocks_to_boundary = 0; int depth; - int count = 0; + u64 count = 0; ext4_fsblk_t first_block = 0; trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); - J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); - J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); + ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); + ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); depth = ext4_block_to_path(inode, map->m_lblk, offsets, &blocks_to_boundary); @@ -560,21 +571,53 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, goto got_it; } - /* Next simple case - plain lookup or failed read of indirect block */ - if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO) + /* Next simple case - plain lookup failed */ + if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { + unsigned epb = inode->i_sb->s_blocksize / sizeof(u32); + int i; + + /* + * Count number blocks in a subtree under 'partial'. At each + * level we count number of complete empty subtrees beyond + * current offset and then descend into the subtree only + * partially beyond current offset. + */ + count = 0; + for (i = partial - chain + 1; i < depth; i++) + count = count * epb + (epb - offsets[i] - 1); + count++; + /* Fill in size of a hole we found */ + map->m_pblk = 0; + map->m_len = umin(map->m_len, count); + goto cleanup; + } + + /* Failed read of indirect block */ + if (err == -EIO) goto cleanup; /* * Okay, we need to do block allocation. */ - if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { + if (ext4_has_feature_bigalloc(inode->i_sb)) { EXT4_ERROR_INODE(inode, "Can't allocate blocks for " "non-extent mapped inodes with bigalloc"); - return -ENOSPC; + err = -EFSCORRUPTED; + goto out; } - goal = ext4_find_goal(inode, map->m_lblk, partial); + /* Set up for the direct block allocation */ + memset(&ar, 0, sizeof(ar)); + ar.inode = inode; + ar.logical = map->m_lblk; + if (S_ISREG(inode->i_mode)) + ar.flags = EXT4_MB_HINT_DATA; + if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) + ar.flags |= EXT4_MB_DELALLOC_RESERVED; + if (flags & EXT4_GET_BLOCKS_METADATA_NOFAIL) + ar.flags |= EXT4_MB_USE_RESERVED; + + ar.goal = ext4_find_goal(inode, map->m_lblk, partial); /* the number of blocks need to allocate for [d,t]indirect blocks */ indirect_blks = (chain + depth) - partial - 1; @@ -583,13 +626,13 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, * Next look up the indirect map to count the totoal number of * direct blocks to allocate for this branch. */ - count = ext4_blks_to_allocate(partial, indirect_blks, - map->m_len, blocks_to_boundary); + ar.len = ext4_blks_to_allocate(partial, indirect_blks, + map->m_len, blocks_to_boundary); + /* * Block out ext4_truncate while we alter the tree */ - err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks, - &count, goal, + err = ext4_alloc_branch(handle, &ar, indirect_blks, offsets + (partial - chain), partial); /* @@ -600,14 +643,15 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, * may need to return -EAGAIN upwards in the worst case. --sct */ if (!err) - err = ext4_splice_branch(handle, inode, map->m_lblk, - partial, indirect_blks, count); + err = ext4_splice_branch(handle, &ar, partial, indirect_blks); if (err) goto cleanup; map->m_flags |= EXT4_MAP_NEW; ext4_update_inode_fsync_trans(handle, inode, 1); + count = ar.len; + got_it: map->m_flags |= EXT4_MAP_MAPPED; map->m_pblk = le32_to_cpu(chain[depth-1].key); @@ -629,152 +673,6 @@ out: } /* - * O_DIRECT for ext3 (or indirect map) based files - * - * If the O_DIRECT write will extend the file then add this inode to the - * orphan list. So recovery will truncate it back to the original size - * if the machine crashes during the write. - * - * If the O_DIRECT write is intantiating holes inside i_size and the machine - * crashes then stale disk data _may_ be exposed inside the file. But current - * VFS code falls back into buffered path in that case so we are safe. - */ -ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs) -{ - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; - struct ext4_inode_info *ei = EXT4_I(inode); - handle_t *handle; - ssize_t ret; - int orphan = 0; - size_t count = iov_length(iov, nr_segs); - int retries = 0; - - if (rw == WRITE) { - loff_t final_size = offset + count; - - if (final_size > inode->i_size) { - /* Credits for sb + inode write */ - handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out; - } - ret = ext4_orphan_add(handle, inode); - if (ret) { - ext4_journal_stop(handle); - goto out; - } - orphan = 1; - ei->i_disksize = inode->i_size; - ext4_journal_stop(handle); - } - } - -retry: - if (rw == READ && ext4_should_dioread_nolock(inode)) { - /* - * Nolock dioread optimization may be dynamically disabled - * via ext4_inode_block_unlocked_dio(). Check inode's state - * while holding extra i_dio_count ref. - */ - atomic_inc(&inode->i_dio_count); - smp_mb(); - if (unlikely(ext4_test_inode_state(inode, - EXT4_STATE_DIOREAD_LOCK))) { - inode_dio_done(inode); - goto locked; - } - ret = __blockdev_direct_IO(rw, iocb, inode, - inode->i_sb->s_bdev, iov, - offset, nr_segs, - ext4_get_block, NULL, NULL, 0); - inode_dio_done(inode); - } else { -locked: - ret = blockdev_direct_IO(rw, iocb, inode, iov, - offset, nr_segs, ext4_get_block); - - if (unlikely((rw & WRITE) && ret < 0)) { - loff_t isize = i_size_read(inode); - loff_t end = offset + iov_length(iov, nr_segs); - - if (end > isize) - ext4_truncate_failed_write(inode); - } - } - if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) - goto retry; - - if (orphan) { - int err; - - /* Credits for sb + inode write */ - handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); - if (IS_ERR(handle)) { - /* This is really bad luck. We've written the data - * but cannot extend i_size. Bail out and pretend - * the write failed... */ - ret = PTR_ERR(handle); - if (inode->i_nlink) - ext4_orphan_del(NULL, inode); - - goto out; - } - if (inode->i_nlink) - ext4_orphan_del(handle, inode); - if (ret > 0) { - loff_t end = offset + ret; - if (end > inode->i_size) { - ei->i_disksize = end; - i_size_write(inode, end); - /* - * We're going to return a positive `ret' - * here due to non-zero-length I/O, so there's - * no way of reporting error returns from - * ext4_mark_inode_dirty() to userspace. So - * ignore it. - */ - ext4_mark_inode_dirty(handle, inode); - } - } - err = ext4_journal_stop(handle); - if (ret == 0) - ret = err; - } -out: - return ret; -} - -/* - * Calculate the number of metadata blocks need to reserve - * to allocate a new block at @lblocks for non extent file based file - */ -int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock) -{ - struct ext4_inode_info *ei = EXT4_I(inode); - sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1); - int blk_bits; - - if (lblock < EXT4_NDIR_BLOCKS) - return 0; - - lblock -= EXT4_NDIR_BLOCKS; - - if (ei->i_da_metadata_calc_len && - (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) { - ei->i_da_metadata_calc_len++; - return 0; - } - ei->i_da_metadata_calc_last_lblock = lblock & dind_mask; - ei->i_da_metadata_calc_len = 1; - blk_bits = order_base_2(lblock); - return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; -} - -/* * Calculate number of indirect blocks touched by mapping @nrblocks logically * contiguous blocks */ @@ -788,27 +686,64 @@ int ext4_ind_trans_blocks(struct inode *inode, int nrblocks) return DIV_ROUND_UP(nrblocks, EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; } +static int ext4_ind_trunc_restart_fn(handle_t *handle, struct inode *inode, + struct buffer_head *bh, int *dropped) +{ + int err; + + if (bh) { + BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, inode, bh); + if (unlikely(err)) + return err; + } + err = ext4_mark_inode_dirty(handle, inode); + if (unlikely(err)) + return err; + /* + * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this + * moment, get_block can be called only for blocks inside i_size since + * page cache has been already dropped and writes are blocked by + * i_rwsem. So we can safely drop the i_data_sem here. + */ + BUG_ON(EXT4_JOURNAL(inode) == NULL); + ext4_discard_preallocations(inode); + up_write(&EXT4_I(inode)->i_data_sem); + *dropped = 1; + return 0; +} + /* * Truncate transactions can be complex and absolutely huge. So we need to - * be able to restart the transaction at a conventient checkpoint to make + * be able to restart the transaction at a convenient checkpoint to make * sure we don't overflow the journal. * * Try to extend this transaction for the purposes of truncation. If - * extend fails, we need to propagate the failure up and restart the - * transaction in the top-level truncate loop. --sct - * - * Returns 0 if we managed to create more room. If we can't create more - * room, and the transaction must be restarted we return 1. + * extend fails, we restart transaction. */ -static int try_to_extend_transaction(handle_t *handle, struct inode *inode) +static int ext4_ind_truncate_ensure_credits(handle_t *handle, + struct inode *inode, + struct buffer_head *bh, + int revoke_creds) { - if (!ext4_handle_valid(handle)) - return 0; - if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1)) - return 0; - if (!ext4_journal_extend(handle, ext4_blocks_for_truncate(inode))) - return 0; - return 1; + int ret; + int dropped = 0; + + ret = ext4_journal_ensure_credits_fn(handle, EXT4_RESERVE_TRANS_BLOCKS, + ext4_blocks_for_truncate(inode), revoke_creds, + ext4_ind_trunc_restart_fn(handle, inode, bh, &dropped)); + if (dropped) + down_write(&EXT4_I(inode)->i_data_sem); + if (ret <= 0) + return ret; + if (bh) { + BUFFER_TRACE(bh, "retaking write access"); + ret = ext4_journal_get_write_access(handle, inode->i_sb, bh, + EXT4_JTR_NONE); + if (unlikely(ret)) + return ret; + } + return 0; } /* @@ -929,40 +864,23 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, int flags = EXT4_FREE_BLOCKS_VALIDATED; int err; - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) || + ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE)) flags |= EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_METADATA; else if (ext4_should_journal_data(inode)) flags |= EXT4_FREE_BLOCKS_FORGET; - if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, - count)) { + if (!ext4_inode_block_valid(inode, block_to_free, count)) { EXT4_ERROR_INODE(inode, "attempt to clear invalid " "blocks %llu len %lu", (unsigned long long) block_to_free, count); return 1; } - if (try_to_extend_transaction(handle, inode)) { - if (bh) { - BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_metadata(handle, inode, bh); - if (unlikely(err)) - goto out_err; - } - err = ext4_mark_inode_dirty(handle, inode); - if (unlikely(err)) - goto out_err; - err = ext4_truncate_restart_trans(handle, inode, - ext4_blocks_for_truncate(inode)); - if (unlikely(err)) - goto out_err; - if (bh) { - BUFFER_TRACE(bh, "retaking write access"); - err = ext4_journal_get_write_access(handle, bh); - if (unlikely(err)) - goto out_err; - } - } + err = ext4_ind_truncate_ensure_credits(handle, inode, bh, + ext4_free_data_revoke_credits(inode, count)); + if (err < 0) + goto out_err; for (p = first; p < last; p++) *p = 0; @@ -1009,7 +927,8 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, if (this_bh) { /* For indirect block */ BUFFER_TRACE(this_bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, this_bh); + err = ext4_journal_get_write_access(handle, inode->i_sb, + this_bh, EXT4_JTR_NONE); /* Important: if we can't update the indirect pointers * to the blocks, we can't free them. */ if (err) @@ -1097,8 +1016,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, if (!nr) continue; /* A hole */ - if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), - nr, 1)) { + if (!ext4_inode_block_valid(inode, nr, 1)) { EXT4_ERROR_INODE(inode, "invalid indirect mapped " "block %lu (level %d)", @@ -1107,14 +1025,14 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, } /* Go read the buffer for the next level down */ - bh = sb_bread(inode->i_sb, nr); + bh = ext4_sb_bread_nofail(inode->i_sb, nr); /* * A read failure? Report error and clear slot * (should be rare). */ - if (!bh) { - EXT4_ERROR_INODE_BLOCK(inode, nr, + if (IS_ERR(bh)) { + ext4_error_inode_block(inode, nr, -PTR_ERR(bh), "Read failure"); continue; } @@ -1128,7 +1046,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, brelse(bh); /* - * Everything below this this pointer has been + * Everything below this pointer has been * released. Now let this top-of-subtree go. * * We want the freeing of this indirect block to be @@ -1145,11 +1063,11 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, */ if (ext4_handle_is_aborted(handle)) return; - if (try_to_extend_transaction(handle, inode)) { - ext4_mark_inode_dirty(handle, inode); - ext4_truncate_restart_trans(handle, inode, - ext4_blocks_for_truncate(inode)); - } + if (ext4_ind_truncate_ensure_credits(handle, inode, + NULL, + ext4_free_metadata_revoke_credits( + inode->i_sb, 1)) < 0) + return; /* * The forget flag here is critical because if @@ -1173,7 +1091,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, */ BUFFER_TRACE(parent_bh, "get_write_access"); if (!ext4_journal_get_write_access(handle, - parent_bh)){ + inode->i_sb, parent_bh, + EXT4_JTR_NONE)) { *p = 0; BUFFER_TRACE(parent_bh, "call ext4_handle_dirty_metadata"); @@ -1275,106 +1194,281 @@ do_indirects: ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); i_data[EXT4_IND_BLOCK] = 0; } + fallthrough; case EXT4_IND_BLOCK: nr = i_data[EXT4_DIND_BLOCK]; if (nr) { ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); i_data[EXT4_DIND_BLOCK] = 0; } + fallthrough; case EXT4_DIND_BLOCK: nr = i_data[EXT4_TIND_BLOCK]; if (nr) { ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); i_data[EXT4_TIND_BLOCK] = 0; } + fallthrough; case EXT4_TIND_BLOCK: ; } } -static int free_hole_blocks(handle_t *handle, struct inode *inode, - struct buffer_head *parent_bh, __le32 *i_data, - int level, ext4_lblk_t first, - ext4_lblk_t count, int max) +/** + * ext4_ind_remove_space - remove space from the range + * @handle: JBD handle for this transaction + * @inode: inode we are dealing with + * @start: First block to remove + * @end: One block after the last block to remove (exclusive) + * + * Free the blocks in the defined range (end is exclusive endpoint of + * range). This is used by ext4_punch_hole(). + */ +int ext4_ind_remove_space(handle_t *handle, struct inode *inode, + ext4_lblk_t start, ext4_lblk_t end) { - struct buffer_head *bh = NULL; + struct ext4_inode_info *ei = EXT4_I(inode); + __le32 *i_data = ei->i_data; int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); - int ret = 0; - int i, inc; - ext4_lblk_t offset; - __le32 blk; - - inc = 1 << ((EXT4_BLOCK_SIZE_BITS(inode->i_sb) - 2) * level); - for (i = 0, offset = 0; i < max; i++, i_data++, offset += inc) { - if (offset >= count + first) - break; - if (*i_data == 0 || (offset + inc) <= first) - continue; - blk = *i_data; - if (level > 0) { - ext4_lblk_t first2; - bh = sb_bread(inode->i_sb, le32_to_cpu(blk)); - if (!bh) { - EXT4_ERROR_INODE_BLOCK(inode, le32_to_cpu(blk), - "Read failure"); - return -EIO; + ext4_lblk_t offsets[4], offsets2[4]; + Indirect chain[4], chain2[4]; + Indirect *partial, *partial2; + Indirect *p = NULL, *p2 = NULL; + ext4_lblk_t max_block; + __le32 nr = 0, nr2 = 0; + int n = 0, n2 = 0; + unsigned blocksize = inode->i_sb->s_blocksize; + + max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) + >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); + if (end >= max_block) + end = max_block; + if ((start >= end) || (start > max_block)) + return 0; + + n = ext4_block_to_path(inode, start, offsets, NULL); + n2 = ext4_block_to_path(inode, end, offsets2, NULL); + + BUG_ON(n > n2); + + if ((n == 1) && (n == n2)) { + /* We're punching only within direct block range */ + ext4_free_data(handle, inode, NULL, i_data + offsets[0], + i_data + offsets2[0]); + return 0; + } else if (n2 > n) { + /* + * Start and end are on a different levels so we're going to + * free partial block at start, and partial block at end of + * the range. If there are some levels in between then + * do_indirects label will take care of that. + */ + + if (n == 1) { + /* + * Start is at the direct block level, free + * everything to the end of the level. + */ + ext4_free_data(handle, inode, NULL, i_data + offsets[0], + i_data + EXT4_NDIR_BLOCKS); + goto end_range; + } + + + partial = p = ext4_find_shared(inode, n, offsets, chain, &nr); + if (nr) { + if (partial == chain) { + /* Shared branch grows from the inode */ + ext4_free_branches(handle, inode, NULL, + &nr, &nr+1, (chain+n-1) - partial); + *partial->p = 0; + } else { + /* Shared branch grows from an indirect block */ + BUFFER_TRACE(partial->bh, "get_write_access"); + ext4_free_branches(handle, inode, partial->bh, + partial->p, + partial->p+1, (chain+n-1) - partial); } - first2 = (first > offset) ? first - offset : 0; - ret = free_hole_blocks(handle, inode, bh, - (__le32 *)bh->b_data, level - 1, - first2, count - offset, - inode->i_sb->s_blocksize >> 2); - if (ret) { - brelse(bh); - goto err; + } + + /* + * Clear the ends of indirect blocks on the shared branch + * at the start of the range + */ + while (partial > chain) { + ext4_free_branches(handle, inode, partial->bh, + partial->p + 1, + (__le32 *)partial->bh->b_data+addr_per_block, + (chain+n-1) - partial); + partial--; + } + +end_range: + partial2 = p2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2); + if (nr2) { + if (partial2 == chain2) { + /* + * Remember, end is exclusive so here we're at + * the start of the next level we're not going + * to free. Everything was covered by the start + * of the range. + */ + goto do_indirects; } + } else { + /* + * ext4_find_shared returns Indirect structure which + * points to the last element which should not be + * removed by truncate. But this is end of the range + * in punch_hole so we need to point to the next element + */ + partial2->p++; } - if (level == 0 || - (bh && all_zeroes((__le32 *)bh->b_data, - (__le32 *)bh->b_data + addr_per_block))) { - ext4_free_data(handle, inode, parent_bh, &blk, &blk+1); - *i_data = 0; + + /* + * Clear the ends of indirect blocks on the shared branch + * at the end of the range + */ + while (partial2 > chain2) { + ext4_free_branches(handle, inode, partial2->bh, + (__le32 *)partial2->bh->b_data, + partial2->p, + (chain2+n2-1) - partial2); + partial2--; } - brelse(bh); - bh = NULL; + goto do_indirects; } -err: - return ret; -} + /* Punch happened within the same level (n == n2) */ + partial = p = ext4_find_shared(inode, n, offsets, chain, &nr); + partial2 = p2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2); -int ext4_free_hole_blocks(handle_t *handle, struct inode *inode, - ext4_lblk_t first, ext4_lblk_t stop) -{ - int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); - int level, ret = 0; - int num = EXT4_NDIR_BLOCKS; - ext4_lblk_t count, max = EXT4_NDIR_BLOCKS; - __le32 *i_data = EXT4_I(inode)->i_data; - - count = stop - first; - for (level = 0; level < 4; level++, max *= addr_per_block) { - if (first < max) { - ret = free_hole_blocks(handle, inode, NULL, i_data, - level, first, count, num); - if (ret) - goto err; - if (count > max - first) - count -= max - first; - else + /* Free top, but only if partial2 isn't its subtree. */ + if (nr) { + int level = min(partial - chain, partial2 - chain2); + int i; + int subtree = 1; + + for (i = 0; i <= level; i++) { + if (offsets[i] != offsets2[i]) { + subtree = 0; break; - first = 0; - } else { - first -= max; + } } - i_data += num; - if (level == 0) { - num = 1; - max = 1; + + if (!subtree) { + if (partial == chain) { + /* Shared branch grows from the inode */ + ext4_free_branches(handle, inode, NULL, + &nr, &nr+1, + (chain+n-1) - partial); + *partial->p = 0; + } else { + /* Shared branch grows from an indirect block */ + BUFFER_TRACE(partial->bh, "get_write_access"); + ext4_free_branches(handle, inode, partial->bh, + partial->p, + partial->p+1, + (chain+n-1) - partial); + } } } -err: - return ret; -} + if (!nr2) { + /* + * ext4_find_shared returns Indirect structure which + * points to the last element which should not be + * removed by truncate. But this is end of the range + * in punch_hole so we need to point to the next element + */ + partial2->p++; + } + + while (partial > chain || partial2 > chain2) { + int depth = (chain+n-1) - partial; + int depth2 = (chain2+n2-1) - partial2; + + if (partial > chain && partial2 > chain2 && + partial->bh->b_blocknr == partial2->bh->b_blocknr) { + /* + * We've converged on the same block. Clear the range, + * then we're done. + */ + ext4_free_branches(handle, inode, partial->bh, + partial->p + 1, + partial2->p, + (chain+n-1) - partial); + goto cleanup; + } + + /* + * The start and end partial branches may not be at the same + * level even though the punch happened within one level. So, we + * give them a chance to arrive at the same level, then walk + * them in step with each other until we converge on the same + * block. + */ + if (partial > chain && depth <= depth2) { + ext4_free_branches(handle, inode, partial->bh, + partial->p + 1, + (__le32 *)partial->bh->b_data+addr_per_block, + (chain+n-1) - partial); + partial--; + } + if (partial2 > chain2 && depth2 <= depth) { + ext4_free_branches(handle, inode, partial2->bh, + (__le32 *)partial2->bh->b_data, + partial2->p, + (chain2+n2-1) - partial2); + partial2--; + } + } +cleanup: + while (p && p > chain) { + BUFFER_TRACE(p->bh, "call brelse"); + brelse(p->bh); + p--; + } + while (p2 && p2 > chain2) { + BUFFER_TRACE(p2->bh, "call brelse"); + brelse(p2->bh); + p2--; + } + return 0; + +do_indirects: + /* Kill the remaining (whole) subtrees */ + switch (offsets[0]) { + default: + if (++n >= n2) + break; + nr = i_data[EXT4_IND_BLOCK]; + if (nr) { + ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); + i_data[EXT4_IND_BLOCK] = 0; + } + fallthrough; + case EXT4_IND_BLOCK: + if (++n >= n2) + break; + nr = i_data[EXT4_DIND_BLOCK]; + if (nr) { + ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); + i_data[EXT4_DIND_BLOCK] = 0; + } + fallthrough; + case EXT4_DIND_BLOCK: + if (++n >= n2) + break; + nr = i_data[EXT4_TIND_BLOCK]; + if (nr) { + ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); + i_data[EXT4_TIND_BLOCK] = 0; + } + fallthrough; + case EXT4_TIND_BLOCK: + ; + } + goto cleanup; +} |
