summaryrefslogtreecommitdiff
path: root/fs/ext4/indirect.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/indirect.c')
-rw-r--r--fs/ext4/indirect.c282
1 files changed, 150 insertions, 132 deletions
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index bf7fa1507e81..da76353b3a57 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -148,6 +148,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
struct super_block *sb = inode->i_sb;
Indirect *p = chain;
struct buffer_head *bh;
+ unsigned int key;
int ret = -EIO;
*err = 0;
@@ -156,14 +157,20 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
if (!p->key)
goto no_block;
while (--depth) {
- bh = sb_getblk(sb, le32_to_cpu(p->key));
+ key = le32_to_cpu(p->key);
+ if (key > ext4_blocks_count(EXT4_SB(sb)->s_es)) {
+ /* the block was out of range */
+ ret = -EFSCORRUPTED;
+ goto failure;
+ }
+ bh = sb_getblk(sb, key);
if (unlikely(!bh)) {
ret = -ENOMEM;
goto failure;
}
if (!bh_uptodate_or_lock(bh)) {
- if (bh_submit_read(bh) < 0) {
+ if (ext4_read_bh(bh, 0, NULL, false) < 0) {
put_bh(bh);
goto failure;
}
@@ -294,14 +301,12 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
}
/**
- * ext4_alloc_branch - allocate and set up a chain of blocks.
- * @handle: handle for this transaction
- * @inode: owner
- * @indirect_blks: number of allocated indirect blocks
- * @blks: number of allocated direct blocks
- * @goal: preferred place for allocation
- * @offsets: offsets (in the blocks) to store the pointers to next.
- * @branch: place to store the chain in.
+ * ext4_alloc_branch() - allocate and set up a chain of blocks
+ * @handle: handle for this transaction
+ * @ar: structure describing the allocation request
+ * @indirect_blks: number of allocated indirect blocks
+ * @offsets: offsets (in the blocks) to store the pointers to next.
+ * @branch: place to store the chain in.
*
* This function allocates blocks, zeroes out all but the last one,
* links them into chain and (if we are synchronous) writes them to disk.
@@ -333,11 +338,14 @@ static int ext4_alloc_branch(handle_t *handle,
for (i = 0; i <= indirect_blks; i++) {
if (i == indirect_blks) {
new_blocks[i] = ext4_mb_new_blocks(handle, ar, &err);
- } else
+ } else {
ar->goal = new_blocks[i] = ext4_new_meta_blocks(handle,
ar->inode, ar->goal,
ar->flags & EXT4_MB_DELALLOC_RESERVED,
NULL, &err);
+ /* Simplify error cleanup... */
+ branch[i+1].bh = NULL;
+ }
if (err) {
i--;
goto failed;
@@ -353,7 +361,8 @@ static int ext4_alloc_branch(handle_t *handle,
}
lock_buffer(bh);
BUFFER_TRACE(bh, "call get_create_access");
- err = ext4_journal_get_create_access(handle, bh);
+ err = ext4_journal_get_create_access(handle, ar->inode->i_sb,
+ bh, EXT4_JTR_NONE);
if (err) {
unlock_buffer(bh);
goto failed;
@@ -379,32 +388,35 @@ static int ext4_alloc_branch(handle_t *handle,
}
return 0;
failed:
+ if (i == indirect_blks) {
+ /* Free data blocks */
+ ext4_free_blocks(handle, ar->inode, NULL, new_blocks[i],
+ ar->len, 0);
+ i--;
+ }
for (; i >= 0; i--) {
/*
* We want to ext4_forget() only freshly allocated indirect
- * blocks. Buffer for new_blocks[i-1] is at branch[i].bh and
- * buffer at branch[0].bh is indirect block / inode already
- * existing before ext4_alloc_branch() was called.
+ * blocks. Buffer for new_blocks[i] is at branch[i+1].bh
+ * (buffer at branch[0].bh is indirect block / inode already
+ * existing before ext4_alloc_branch() was called). Also
+ * because blocks are freshly allocated, we don't need to
+ * revoke them which is why we don't set
+ * EXT4_FREE_BLOCKS_METADATA.
*/
- if (i > 0 && i != indirect_blks && branch[i].bh)
- ext4_forget(handle, 1, ar->inode, branch[i].bh,
- branch[i].bh->b_blocknr);
- ext4_free_blocks(handle, ar->inode, NULL, new_blocks[i],
- (i == indirect_blks) ? ar->len : 1, 0);
+ ext4_free_blocks(handle, ar->inode, branch[i+1].bh,
+ new_blocks[i], 1,
+ branch[i+1].bh ? EXT4_FREE_BLOCKS_FORGET : 0);
}
return err;
}
/**
- * ext4_splice_branch - splice the allocated branch onto inode.
+ * ext4_splice_branch() - splice the allocated branch onto inode.
* @handle: handle for this transaction
- * @inode: owner
- * @block: (logical) number of block we are adding
- * @chain: chain of indirect blocks (with a missing link - see
- * ext4_alloc_branch)
+ * @ar: structure describing the allocation request
* @where: location of missing link
* @num: number of indirect blocks we are adding
- * @blks: number of direct blocks we are adding
*
* This function fills the missing link and does all housekeeping needed in
* inode (->i_blocks, etc.). In case of success we end up with the full
@@ -425,7 +437,8 @@ static int ext4_splice_branch(handle_t *handle,
*/
if (where->bh) {
BUFFER_TRACE(where->bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, where->bh);
+ err = ext4_journal_get_write_access(handle, ar->inode->i_sb,
+ where->bh, EXT4_JTR_NONE);
if (err)
goto err_out;
}
@@ -454,7 +467,7 @@ static int ext4_splice_branch(handle_t *handle,
* the new i_size. But that is not done here - it is done in
* generic_commit_write->__mark_inode_dirty->ext4_dirty_inode.
*/
- jbd_debug(5, "splicing indirect only\n");
+ ext4_debug("splicing indirect only\n");
BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata");
err = ext4_handle_dirty_metadata(handle, ar->inode, where->bh);
if (err)
@@ -463,8 +476,10 @@ static int ext4_splice_branch(handle_t *handle,
/*
* OK, we spliced it into the inode itself on a direct block.
*/
- ext4_mark_inode_dirty(handle, ar->inode);
- jbd_debug(5, "splicing direct\n");
+ err = ext4_mark_inode_dirty(handle, ar->inode);
+ if (unlikely(err))
+ goto err_out;
+ ext4_debug("splicing direct\n");
}
return err;
@@ -524,12 +539,12 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
int indirect_blks;
int blocks_to_boundary = 0;
int depth;
- int count = 0;
+ u64 count = 0;
ext4_fsblk_t first_block = 0;
trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
- J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
- J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
+ ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
+ ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
depth = ext4_block_to_path(inode, map->m_lblk, offsets,
&blocks_to_boundary);
@@ -573,7 +588,7 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
count++;
/* Fill in size of a hole we found */
map->m_pblk = 0;
- map->m_len = min_t(unsigned int, map->m_len, count);
+ map->m_len = umin(map->m_len, count);
goto cleanup;
}
@@ -587,7 +602,8 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
if (ext4_has_feature_bigalloc(inode->i_sb)) {
EXT4_ERROR_INODE(inode, "Can't allocate blocks for "
"non-extent mapped inodes with bigalloc");
- return -EFSCORRUPTED;
+ err = -EFSCORRUPTED;
+ goto out;
}
/* Set up for the direct block allocation */
@@ -635,6 +651,7 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
ext4_update_inode_fsync_trans(handle, inode, 1);
count = ar.len;
+
got_it:
map->m_flags |= EXT4_MAP_MAPPED;
map->m_pblk = le32_to_cpu(chain[depth-1].key);
@@ -656,32 +673,6 @@ out:
}
/*
- * Calculate the number of metadata blocks need to reserve
- * to allocate a new block at @lblocks for non extent file based file
- */
-int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock)
-{
- struct ext4_inode_info *ei = EXT4_I(inode);
- sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1);
- int blk_bits;
-
- if (lblock < EXT4_NDIR_BLOCKS)
- return 0;
-
- lblock -= EXT4_NDIR_BLOCKS;
-
- if (ei->i_da_metadata_calc_len &&
- (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) {
- ei->i_da_metadata_calc_len++;
- return 0;
- }
- ei->i_da_metadata_calc_last_lblock = lblock & dind_mask;
- ei->i_da_metadata_calc_len = 1;
- blk_bits = order_base_2(lblock);
- return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
-}
-
-/*
* Calculate number of indirect blocks touched by mapping @nrblocks logically
* contiguous blocks
*/
@@ -695,27 +686,64 @@ int ext4_ind_trans_blocks(struct inode *inode, int nrblocks)
return DIV_ROUND_UP(nrblocks, EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4;
}
+static int ext4_ind_trunc_restart_fn(handle_t *handle, struct inode *inode,
+ struct buffer_head *bh, int *dropped)
+{
+ int err;
+
+ if (bh) {
+ BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, inode, bh);
+ if (unlikely(err))
+ return err;
+ }
+ err = ext4_mark_inode_dirty(handle, inode);
+ if (unlikely(err))
+ return err;
+ /*
+ * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this
+ * moment, get_block can be called only for blocks inside i_size since
+ * page cache has been already dropped and writes are blocked by
+ * i_rwsem. So we can safely drop the i_data_sem here.
+ */
+ BUG_ON(EXT4_JOURNAL(inode) == NULL);
+ ext4_discard_preallocations(inode);
+ up_write(&EXT4_I(inode)->i_data_sem);
+ *dropped = 1;
+ return 0;
+}
+
/*
* Truncate transactions can be complex and absolutely huge. So we need to
- * be able to restart the transaction at a conventient checkpoint to make
+ * be able to restart the transaction at a convenient checkpoint to make
* sure we don't overflow the journal.
*
* Try to extend this transaction for the purposes of truncation. If
- * extend fails, we need to propagate the failure up and restart the
- * transaction in the top-level truncate loop. --sct
- *
- * Returns 0 if we managed to create more room. If we can't create more
- * room, and the transaction must be restarted we return 1.
+ * extend fails, we restart transaction.
*/
-static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
+static int ext4_ind_truncate_ensure_credits(handle_t *handle,
+ struct inode *inode,
+ struct buffer_head *bh,
+ int revoke_creds)
{
- if (!ext4_handle_valid(handle))
- return 0;
- if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
- return 0;
- if (!ext4_journal_extend(handle, ext4_blocks_for_truncate(inode)))
- return 0;
- return 1;
+ int ret;
+ int dropped = 0;
+
+ ret = ext4_journal_ensure_credits_fn(handle, EXT4_RESERVE_TRANS_BLOCKS,
+ ext4_blocks_for_truncate(inode), revoke_creds,
+ ext4_ind_trunc_restart_fn(handle, inode, bh, &dropped));
+ if (dropped)
+ down_write(&EXT4_I(inode)->i_data_sem);
+ if (ret <= 0)
+ return ret;
+ if (bh) {
+ BUFFER_TRACE(bh, "retaking write access");
+ ret = ext4_journal_get_write_access(handle, inode->i_sb, bh,
+ EXT4_JTR_NONE);
+ if (unlikely(ret))
+ return ret;
+ }
+ return 0;
}
/*
@@ -842,35 +870,17 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
else if (ext4_should_journal_data(inode))
flags |= EXT4_FREE_BLOCKS_FORGET;
- if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free,
- count)) {
+ if (!ext4_inode_block_valid(inode, block_to_free, count)) {
EXT4_ERROR_INODE(inode, "attempt to clear invalid "
"blocks %llu len %lu",
(unsigned long long) block_to_free, count);
return 1;
}
- if (try_to_extend_transaction(handle, inode)) {
- if (bh) {
- BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_metadata(handle, inode, bh);
- if (unlikely(err))
- goto out_err;
- }
- err = ext4_mark_inode_dirty(handle, inode);
- if (unlikely(err))
- goto out_err;
- err = ext4_truncate_restart_trans(handle, inode,
- ext4_blocks_for_truncate(inode));
- if (unlikely(err))
- goto out_err;
- if (bh) {
- BUFFER_TRACE(bh, "retaking write access");
- err = ext4_journal_get_write_access(handle, bh);
- if (unlikely(err))
- goto out_err;
- }
- }
+ err = ext4_ind_truncate_ensure_credits(handle, inode, bh,
+ ext4_free_data_revoke_credits(inode, count));
+ if (err < 0)
+ goto out_err;
for (p = first; p < last; p++)
*p = 0;
@@ -917,7 +927,8 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
if (this_bh) { /* For indirect block */
BUFFER_TRACE(this_bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, this_bh);
+ err = ext4_journal_get_write_access(handle, inode->i_sb,
+ this_bh, EXT4_JTR_NONE);
/* Important: if we can't update the indirect pointers
* to the blocks, we can't free them. */
if (err)
@@ -1005,8 +1016,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
if (!nr)
continue; /* A hole */
- if (!ext4_data_block_valid(EXT4_SB(inode->i_sb),
- nr, 1)) {
+ if (!ext4_inode_block_valid(inode, nr, 1)) {
EXT4_ERROR_INODE(inode,
"invalid indirect mapped "
"block %lu (level %d)",
@@ -1015,14 +1025,14 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
}
/* Go read the buffer for the next level down */
- bh = sb_bread(inode->i_sb, nr);
+ bh = ext4_sb_bread_nofail(inode->i_sb, nr);
/*
* A read failure? Report error and clear slot
* (should be rare).
*/
- if (!bh) {
- EXT4_ERROR_INODE_BLOCK(inode, nr,
+ if (IS_ERR(bh)) {
+ ext4_error_inode_block(inode, nr, -PTR_ERR(bh),
"Read failure");
continue;
}
@@ -1036,7 +1046,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
brelse(bh);
/*
- * Everything below this this pointer has been
+ * Everything below this pointer has been
* released. Now let this top-of-subtree go.
*
* We want the freeing of this indirect block to be
@@ -1053,11 +1063,11 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
*/
if (ext4_handle_is_aborted(handle))
return;
- if (try_to_extend_transaction(handle, inode)) {
- ext4_mark_inode_dirty(handle, inode);
- ext4_truncate_restart_trans(handle, inode,
- ext4_blocks_for_truncate(inode));
- }
+ if (ext4_ind_truncate_ensure_credits(handle, inode,
+ NULL,
+ ext4_free_metadata_revoke_credits(
+ inode->i_sb, 1)) < 0)
+ return;
/*
* The forget flag here is critical because if
@@ -1081,7 +1091,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
*/
BUFFER_TRACE(parent_bh, "get_write_access");
if (!ext4_journal_get_write_access(handle,
- parent_bh)){
+ inode->i_sb, parent_bh,
+ EXT4_JTR_NONE)) {
*p = 0;
BUFFER_TRACE(parent_bh,
"call ext4_handle_dirty_metadata");
@@ -1183,18 +1194,21 @@ do_indirects:
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
i_data[EXT4_IND_BLOCK] = 0;
}
+ fallthrough;
case EXT4_IND_BLOCK:
nr = i_data[EXT4_DIND_BLOCK];
if (nr) {
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
i_data[EXT4_DIND_BLOCK] = 0;
}
+ fallthrough;
case EXT4_DIND_BLOCK:
nr = i_data[EXT4_TIND_BLOCK];
if (nr) {
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
i_data[EXT4_TIND_BLOCK] = 0;
}
+ fallthrough;
case EXT4_TIND_BLOCK:
;
}
@@ -1219,6 +1233,7 @@ int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
ext4_lblk_t offsets[4], offsets2[4];
Indirect chain[4], chain2[4];
Indirect *partial, *partial2;
+ Indirect *p = NULL, *p2 = NULL;
ext4_lblk_t max_block;
__le32 nr = 0, nr2 = 0;
int n = 0, n2 = 0;
@@ -1260,7 +1275,7 @@ int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
}
- partial = ext4_find_shared(inode, n, offsets, chain, &nr);
+ partial = p = ext4_find_shared(inode, n, offsets, chain, &nr);
if (nr) {
if (partial == chain) {
/* Shared branch grows from the inode */
@@ -1285,13 +1300,11 @@ int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
partial->p + 1,
(__le32 *)partial->bh->b_data+addr_per_block,
(chain+n-1) - partial);
- BUFFER_TRACE(partial->bh, "call brelse");
- brelse(partial->bh);
partial--;
}
end_range:
- partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
+ partial2 = p2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
if (nr2) {
if (partial2 == chain2) {
/*
@@ -1321,16 +1334,14 @@ end_range:
(__le32 *)partial2->bh->b_data,
partial2->p,
(chain2+n2-1) - partial2);
- BUFFER_TRACE(partial2->bh, "call brelse");
- brelse(partial2->bh);
partial2--;
}
goto do_indirects;
}
/* Punch happened within the same level (n == n2) */
- partial = ext4_find_shared(inode, n, offsets, chain, &nr);
- partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
+ partial = p = ext4_find_shared(inode, n, offsets, chain, &nr);
+ partial2 = p2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
/* Free top, but only if partial2 isn't its subtree. */
if (nr) {
@@ -1387,11 +1398,7 @@ end_range:
partial->p + 1,
partial2->p,
(chain+n-1) - partial);
- BUFFER_TRACE(partial->bh, "call brelse");
- brelse(partial->bh);
- BUFFER_TRACE(partial2->bh, "call brelse");
- brelse(partial2->bh);
- return 0;
+ goto cleanup;
}
/*
@@ -1406,8 +1413,6 @@ end_range:
partial->p + 1,
(__le32 *)partial->bh->b_data+addr_per_block,
(chain+n-1) - partial);
- BUFFER_TRACE(partial->bh, "call brelse");
- brelse(partial->bh);
partial--;
}
if (partial2 > chain2 && depth2 <= depth) {
@@ -1415,11 +1420,21 @@ end_range:
(__le32 *)partial2->bh->b_data,
partial2->p,
(chain2+n2-1) - partial2);
- BUFFER_TRACE(partial2->bh, "call brelse");
- brelse(partial2->bh);
partial2--;
}
}
+
+cleanup:
+ while (p && p > chain) {
+ BUFFER_TRACE(p->bh, "call brelse");
+ brelse(p->bh);
+ p--;
+ }
+ while (p2 && p2 > chain2) {
+ BUFFER_TRACE(p2->bh, "call brelse");
+ brelse(p2->bh);
+ p2--;
+ }
return 0;
do_indirects:
@@ -1427,30 +1442,33 @@ do_indirects:
switch (offsets[0]) {
default:
if (++n >= n2)
- return 0;
+ break;
nr = i_data[EXT4_IND_BLOCK];
if (nr) {
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
i_data[EXT4_IND_BLOCK] = 0;
}
+ fallthrough;
case EXT4_IND_BLOCK:
if (++n >= n2)
- return 0;
+ break;
nr = i_data[EXT4_DIND_BLOCK];
if (nr) {
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
i_data[EXT4_DIND_BLOCK] = 0;
}
+ fallthrough;
case EXT4_DIND_BLOCK:
if (++n >= n2)
- return 0;
+ break;
nr = i_data[EXT4_TIND_BLOCK];
if (nr) {
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
i_data[EXT4_TIND_BLOCK] = 0;
}
+ fallthrough;
case EXT4_TIND_BLOCK:
;
}
- return 0;
+ goto cleanup;
}