summaryrefslogtreecommitdiff
path: root/fs/ext4/indirect.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/indirect.c')
-rw-r--r--fs/ext4/indirect.c784
1 files changed, 439 insertions, 345 deletions
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 87b30cd357e7..da76353b3a57 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/ext4/indirect.c
*
@@ -20,10 +21,10 @@
* (sct@redhat.com), 1993, 1998
*/
-#include <linux/aio.h>
#include "ext4_jbd2.h"
#include "truncate.h"
-#include "ext4_extents.h" /* Needed for EXT_MAX_BLOCKS */
+#include <linux/dax.h>
+#include <linux/uio.h>
#include <trace/events/ext4.h>
@@ -147,6 +148,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
struct super_block *sb = inode->i_sb;
Indirect *p = chain;
struct buffer_head *bh;
+ unsigned int key;
int ret = -EIO;
*err = 0;
@@ -155,14 +157,20 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
if (!p->key)
goto no_block;
while (--depth) {
- bh = sb_getblk(sb, le32_to_cpu(p->key));
+ key = le32_to_cpu(p->key);
+ if (key > ext4_blocks_count(EXT4_SB(sb)->s_es)) {
+ /* the block was out of range */
+ ret = -EFSCORRUPTED;
+ goto failure;
+ }
+ bh = sb_getblk(sb, key);
if (unlikely(!bh)) {
ret = -ENOMEM;
goto failure;
}
if (!bh_uptodate_or_lock(bh)) {
- if (bh_submit_read(bh) < 0) {
+ if (ext4_read_bh(bh, 0, NULL, false) < 0) {
put_bh(bh);
goto failure;
}
@@ -293,14 +301,12 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
}
/**
- * ext4_alloc_branch - allocate and set up a chain of blocks.
- * @handle: handle for this transaction
- * @inode: owner
- * @indirect_blks: number of allocated indirect blocks
- * @blks: number of allocated direct blocks
- * @goal: preferred place for allocation
- * @offsets: offsets (in the blocks) to store the pointers to next.
- * @branch: place to store the chain in.
+ * ext4_alloc_branch() - allocate and set up a chain of blocks
+ * @handle: handle for this transaction
+ * @ar: structure describing the allocation request
+ * @indirect_blks: number of allocated indirect blocks
+ * @offsets: offsets (in the blocks) to store the pointers to next.
+ * @branch: place to store the chain in.
*
* This function allocates blocks, zeroes out all but the last one,
* links them into chain and (if we are synchronous) writes them to disk.
@@ -319,34 +325,27 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
* ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain
* as described above and return 0.
*/
-static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
- ext4_lblk_t iblock, int indirect_blks,
- int *blks, ext4_fsblk_t goal,
- ext4_lblk_t *offsets, Indirect *branch)
+static int ext4_alloc_branch(handle_t *handle,
+ struct ext4_allocation_request *ar,
+ int indirect_blks, ext4_lblk_t *offsets,
+ Indirect *branch)
{
- struct ext4_allocation_request ar;
struct buffer_head * bh;
ext4_fsblk_t b, new_blocks[4];
__le32 *p;
int i, j, err, len = 1;
- /*
- * Set up for the direct block allocation
- */
- memset(&ar, 0, sizeof(ar));
- ar.inode = inode;
- ar.len = *blks;
- ar.logical = iblock;
- if (S_ISREG(inode->i_mode))
- ar.flags = EXT4_MB_HINT_DATA;
-
for (i = 0; i <= indirect_blks; i++) {
if (i == indirect_blks) {
- ar.goal = goal;
- new_blocks[i] = ext4_mb_new_blocks(handle, &ar, &err);
- } else
- goal = new_blocks[i] = ext4_new_meta_blocks(handle, inode,
- goal, 0, NULL, &err);
+ new_blocks[i] = ext4_mb_new_blocks(handle, ar, &err);
+ } else {
+ ar->goal = new_blocks[i] = ext4_new_meta_blocks(handle,
+ ar->inode, ar->goal,
+ ar->flags & EXT4_MB_DELALLOC_RESERVED,
+ NULL, &err);
+ /* Simplify error cleanup... */
+ branch[i+1].bh = NULL;
+ }
if (err) {
i--;
goto failed;
@@ -355,14 +354,15 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
if (i == 0)
continue;
- bh = branch[i].bh = sb_getblk(inode->i_sb, new_blocks[i-1]);
+ bh = branch[i].bh = sb_getblk(ar->inode->i_sb, new_blocks[i-1]);
if (unlikely(!bh)) {
err = -ENOMEM;
goto failed;
}
lock_buffer(bh);
BUFFER_TRACE(bh, "call get_create_access");
- err = ext4_journal_get_create_access(handle, bh);
+ err = ext4_journal_get_create_access(handle, ar->inode->i_sb,
+ bh, EXT4_JTR_NONE);
if (err) {
unlock_buffer(bh);
goto failed;
@@ -373,7 +373,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
b = new_blocks[i];
if (i == indirect_blks)
- len = ar.len;
+ len = ar->len;
for (j = 0; j < len; j++)
*p++ = cpu_to_le32(b++);
@@ -382,41 +382,49 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
unlock_buffer(bh);
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_metadata(handle, inode, bh);
+ err = ext4_handle_dirty_metadata(handle, ar->inode, bh);
if (err)
goto failed;
}
- *blks = ar.len;
return 0;
failed:
+ if (i == indirect_blks) {
+ /* Free data blocks */
+ ext4_free_blocks(handle, ar->inode, NULL, new_blocks[i],
+ ar->len, 0);
+ i--;
+ }
for (; i >= 0; i--) {
- if (i != indirect_blks && branch[i].bh)
- ext4_forget(handle, 1, inode, branch[i].bh,
- branch[i].bh->b_blocknr);
- ext4_free_blocks(handle, inode, NULL, new_blocks[i],
- (i == indirect_blks) ? ar.len : 1, 0);
+ /*
+ * We want to ext4_forget() only freshly allocated indirect
+ * blocks. Buffer for new_blocks[i] is at branch[i+1].bh
+ * (buffer at branch[0].bh is indirect block / inode already
+ * existing before ext4_alloc_branch() was called). Also
+ * because blocks are freshly allocated, we don't need to
+ * revoke them which is why we don't set
+ * EXT4_FREE_BLOCKS_METADATA.
+ */
+ ext4_free_blocks(handle, ar->inode, branch[i+1].bh,
+ new_blocks[i], 1,
+ branch[i+1].bh ? EXT4_FREE_BLOCKS_FORGET : 0);
}
return err;
}
/**
- * ext4_splice_branch - splice the allocated branch onto inode.
+ * ext4_splice_branch() - splice the allocated branch onto inode.
* @handle: handle for this transaction
- * @inode: owner
- * @block: (logical) number of block we are adding
- * @chain: chain of indirect blocks (with a missing link - see
- * ext4_alloc_branch)
+ * @ar: structure describing the allocation request
* @where: location of missing link
* @num: number of indirect blocks we are adding
- * @blks: number of direct blocks we are adding
*
* This function fills the missing link and does all housekeeping needed in
* inode (->i_blocks, etc.). In case of success we end up with the full
* chain to new block and return 0.
*/
-static int ext4_splice_branch(handle_t *handle, struct inode *inode,
- ext4_lblk_t block, Indirect *where, int num,
- int blks)
+static int ext4_splice_branch(handle_t *handle,
+ struct ext4_allocation_request *ar,
+ Indirect *where, int num)
{
int i;
int err = 0;
@@ -429,7 +437,8 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
*/
if (where->bh) {
BUFFER_TRACE(where->bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, where->bh);
+ err = ext4_journal_get_write_access(handle, ar->inode->i_sb,
+ where->bh, EXT4_JTR_NONE);
if (err)
goto err_out;
}
@@ -441,9 +450,9 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
* Update the host buffer_head or inode to point to more just allocated
* direct blocks blocks
*/
- if (num == 0 && blks > 1) {
+ if (num == 0 && ar->len > 1) {
current_block = le32_to_cpu(where->key) + 1;
- for (i = 1; i < blks; i++)
+ for (i = 1; i < ar->len; i++)
*(where->p + i) = cpu_to_le32(current_block++);
}
@@ -458,17 +467,19 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
* the new i_size. But that is not done here - it is done in
* generic_commit_write->__mark_inode_dirty->ext4_dirty_inode.
*/
- jbd_debug(5, "splicing indirect only\n");
+ ext4_debug("splicing indirect only\n");
BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_metadata(handle, inode, where->bh);
+ err = ext4_handle_dirty_metadata(handle, ar->inode, where->bh);
if (err)
goto err_out;
} else {
/*
* OK, we spliced it into the inode itself on a direct block.
*/
- ext4_mark_inode_dirty(handle, inode);
- jbd_debug(5, "splicing direct\n");
+ err = ext4_mark_inode_dirty(handle, ar->inode);
+ if (unlikely(err))
+ goto err_out;
+ ext4_debug("splicing direct\n");
}
return err;
@@ -479,11 +490,11 @@ err_out:
* need to revoke the block, which is why we don't
* need to set EXT4_FREE_BLOCKS_METADATA.
*/
- ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
+ ext4_free_blocks(handle, ar->inode, where[i].bh, 0, 1,
EXT4_FREE_BLOCKS_FORGET);
}
- ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key),
- blks, 0);
+ ext4_free_blocks(handle, ar->inode, NULL, le32_to_cpu(where[num].key),
+ ar->len, 0);
return err;
}
@@ -520,20 +531,20 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map,
int flags)
{
+ struct ext4_allocation_request ar;
int err = -EIO;
ext4_lblk_t offsets[4];
Indirect chain[4];
Indirect *partial;
- ext4_fsblk_t goal;
int indirect_blks;
int blocks_to_boundary = 0;
int depth;
- int count = 0;
+ u64 count = 0;
ext4_fsblk_t first_block = 0;
trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
- J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
- J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
+ ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
+ ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
depth = ext4_block_to_path(inode, map->m_lblk, offsets,
&blocks_to_boundary);
@@ -560,21 +571,53 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
goto got_it;
}
- /* Next simple case - plain lookup or failed read of indirect block */
- if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO)
+ /* Next simple case - plain lookup failed */
+ if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
+ unsigned epb = inode->i_sb->s_blocksize / sizeof(u32);
+ int i;
+
+ /*
+ * Count number blocks in a subtree under 'partial'. At each
+ * level we count number of complete empty subtrees beyond
+ * current offset and then descend into the subtree only
+ * partially beyond current offset.
+ */
+ count = 0;
+ for (i = partial - chain + 1; i < depth; i++)
+ count = count * epb + (epb - offsets[i] - 1);
+ count++;
+ /* Fill in size of a hole we found */
+ map->m_pblk = 0;
+ map->m_len = umin(map->m_len, count);
+ goto cleanup;
+ }
+
+ /* Failed read of indirect block */
+ if (err == -EIO)
goto cleanup;
/*
* Okay, we need to do block allocation.
*/
- if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
+ if (ext4_has_feature_bigalloc(inode->i_sb)) {
EXT4_ERROR_INODE(inode, "Can't allocate blocks for "
"non-extent mapped inodes with bigalloc");
- return -ENOSPC;
+ err = -EFSCORRUPTED;
+ goto out;
}
- goal = ext4_find_goal(inode, map->m_lblk, partial);
+ /* Set up for the direct block allocation */
+ memset(&ar, 0, sizeof(ar));
+ ar.inode = inode;
+ ar.logical = map->m_lblk;
+ if (S_ISREG(inode->i_mode))
+ ar.flags = EXT4_MB_HINT_DATA;
+ if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+ ar.flags |= EXT4_MB_DELALLOC_RESERVED;
+ if (flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
+ ar.flags |= EXT4_MB_USE_RESERVED;
+
+ ar.goal = ext4_find_goal(inode, map->m_lblk, partial);
/* the number of blocks need to allocate for [d,t]indirect blocks */
indirect_blks = (chain + depth) - partial - 1;
@@ -583,13 +626,13 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
* Next look up the indirect map to count the totoal number of
* direct blocks to allocate for this branch.
*/
- count = ext4_blks_to_allocate(partial, indirect_blks,
- map->m_len, blocks_to_boundary);
+ ar.len = ext4_blks_to_allocate(partial, indirect_blks,
+ map->m_len, blocks_to_boundary);
+
/*
* Block out ext4_truncate while we alter the tree
*/
- err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks,
- &count, goal,
+ err = ext4_alloc_branch(handle, &ar, indirect_blks,
offsets + (partial - chain), partial);
/*
@@ -600,14 +643,15 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
* may need to return -EAGAIN upwards in the worst case. --sct
*/
if (!err)
- err = ext4_splice_branch(handle, inode, map->m_lblk,
- partial, indirect_blks, count);
+ err = ext4_splice_branch(handle, &ar, partial, indirect_blks);
if (err)
goto cleanup;
map->m_flags |= EXT4_MAP_NEW;
ext4_update_inode_fsync_trans(handle, inode, 1);
+ count = ar.len;
+
got_it:
map->m_flags |= EXT4_MAP_MAPPED;
map->m_pblk = le32_to_cpu(chain[depth-1].key);
@@ -629,152 +673,6 @@ out:
}
/*
- * O_DIRECT for ext3 (or indirect map) based files
- *
- * If the O_DIRECT write will extend the file then add this inode to the
- * orphan list. So recovery will truncate it back to the original size
- * if the machine crashes during the write.
- *
- * If the O_DIRECT write is intantiating holes inside i_size and the machine
- * crashes then stale disk data _may_ be exposed inside the file. But current
- * VFS code falls back into buffered path in that case so we are safe.
- */
-ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
- const struct iovec *iov, loff_t offset,
- unsigned long nr_segs)
-{
- struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
- struct ext4_inode_info *ei = EXT4_I(inode);
- handle_t *handle;
- ssize_t ret;
- int orphan = 0;
- size_t count = iov_length(iov, nr_segs);
- int retries = 0;
-
- if (rw == WRITE) {
- loff_t final_size = offset + count;
-
- if (final_size > inode->i_size) {
- /* Credits for sb + inode write */
- handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- goto out;
- }
- ret = ext4_orphan_add(handle, inode);
- if (ret) {
- ext4_journal_stop(handle);
- goto out;
- }
- orphan = 1;
- ei->i_disksize = inode->i_size;
- ext4_journal_stop(handle);
- }
- }
-
-retry:
- if (rw == READ && ext4_should_dioread_nolock(inode)) {
- /*
- * Nolock dioread optimization may be dynamically disabled
- * via ext4_inode_block_unlocked_dio(). Check inode's state
- * while holding extra i_dio_count ref.
- */
- atomic_inc(&inode->i_dio_count);
- smp_mb();
- if (unlikely(ext4_test_inode_state(inode,
- EXT4_STATE_DIOREAD_LOCK))) {
- inode_dio_done(inode);
- goto locked;
- }
- ret = __blockdev_direct_IO(rw, iocb, inode,
- inode->i_sb->s_bdev, iov,
- offset, nr_segs,
- ext4_get_block, NULL, NULL, 0);
- inode_dio_done(inode);
- } else {
-locked:
- ret = blockdev_direct_IO(rw, iocb, inode, iov,
- offset, nr_segs, ext4_get_block);
-
- if (unlikely((rw & WRITE) && ret < 0)) {
- loff_t isize = i_size_read(inode);
- loff_t end = offset + iov_length(iov, nr_segs);
-
- if (end > isize)
- ext4_truncate_failed_write(inode);
- }
- }
- if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
- goto retry;
-
- if (orphan) {
- int err;
-
- /* Credits for sb + inode write */
- handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
- if (IS_ERR(handle)) {
- /* This is really bad luck. We've written the data
- * but cannot extend i_size. Bail out and pretend
- * the write failed... */
- ret = PTR_ERR(handle);
- if (inode->i_nlink)
- ext4_orphan_del(NULL, inode);
-
- goto out;
- }
- if (inode->i_nlink)
- ext4_orphan_del(handle, inode);
- if (ret > 0) {
- loff_t end = offset + ret;
- if (end > inode->i_size) {
- ei->i_disksize = end;
- i_size_write(inode, end);
- /*
- * We're going to return a positive `ret'
- * here due to non-zero-length I/O, so there's
- * no way of reporting error returns from
- * ext4_mark_inode_dirty() to userspace. So
- * ignore it.
- */
- ext4_mark_inode_dirty(handle, inode);
- }
- }
- err = ext4_journal_stop(handle);
- if (ret == 0)
- ret = err;
- }
-out:
- return ret;
-}
-
-/*
- * Calculate the number of metadata blocks need to reserve
- * to allocate a new block at @lblocks for non extent file based file
- */
-int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock)
-{
- struct ext4_inode_info *ei = EXT4_I(inode);
- sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1);
- int blk_bits;
-
- if (lblock < EXT4_NDIR_BLOCKS)
- return 0;
-
- lblock -= EXT4_NDIR_BLOCKS;
-
- if (ei->i_da_metadata_calc_len &&
- (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) {
- ei->i_da_metadata_calc_len++;
- return 0;
- }
- ei->i_da_metadata_calc_last_lblock = lblock & dind_mask;
- ei->i_da_metadata_calc_len = 1;
- blk_bits = order_base_2(lblock);
- return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
-}
-
-/*
* Calculate number of indirect blocks touched by mapping @nrblocks logically
* contiguous blocks
*/
@@ -788,27 +686,64 @@ int ext4_ind_trans_blocks(struct inode *inode, int nrblocks)
return DIV_ROUND_UP(nrblocks, EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4;
}
+static int ext4_ind_trunc_restart_fn(handle_t *handle, struct inode *inode,
+ struct buffer_head *bh, int *dropped)
+{
+ int err;
+
+ if (bh) {
+ BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, inode, bh);
+ if (unlikely(err))
+ return err;
+ }
+ err = ext4_mark_inode_dirty(handle, inode);
+ if (unlikely(err))
+ return err;
+ /*
+ * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this
+ * moment, get_block can be called only for blocks inside i_size since
+ * page cache has been already dropped and writes are blocked by
+ * i_rwsem. So we can safely drop the i_data_sem here.
+ */
+ BUG_ON(EXT4_JOURNAL(inode) == NULL);
+ ext4_discard_preallocations(inode);
+ up_write(&EXT4_I(inode)->i_data_sem);
+ *dropped = 1;
+ return 0;
+}
+
/*
* Truncate transactions can be complex and absolutely huge. So we need to
- * be able to restart the transaction at a conventient checkpoint to make
+ * be able to restart the transaction at a convenient checkpoint to make
* sure we don't overflow the journal.
*
* Try to extend this transaction for the purposes of truncation. If
- * extend fails, we need to propagate the failure up and restart the
- * transaction in the top-level truncate loop. --sct
- *
- * Returns 0 if we managed to create more room. If we can't create more
- * room, and the transaction must be restarted we return 1.
+ * extend fails, we restart transaction.
*/
-static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
+static int ext4_ind_truncate_ensure_credits(handle_t *handle,
+ struct inode *inode,
+ struct buffer_head *bh,
+ int revoke_creds)
{
- if (!ext4_handle_valid(handle))
- return 0;
- if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
- return 0;
- if (!ext4_journal_extend(handle, ext4_blocks_for_truncate(inode)))
- return 0;
- return 1;
+ int ret;
+ int dropped = 0;
+
+ ret = ext4_journal_ensure_credits_fn(handle, EXT4_RESERVE_TRANS_BLOCKS,
+ ext4_blocks_for_truncate(inode), revoke_creds,
+ ext4_ind_trunc_restart_fn(handle, inode, bh, &dropped));
+ if (dropped)
+ down_write(&EXT4_I(inode)->i_data_sem);
+ if (ret <= 0)
+ return ret;
+ if (bh) {
+ BUFFER_TRACE(bh, "retaking write access");
+ ret = ext4_journal_get_write_access(handle, inode->i_sb, bh,
+ EXT4_JTR_NONE);
+ if (unlikely(ret))
+ return ret;
+ }
+ return 0;
}
/*
@@ -929,40 +864,23 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
int flags = EXT4_FREE_BLOCKS_VALIDATED;
int err;
- if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
+ ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE))
flags |= EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_METADATA;
else if (ext4_should_journal_data(inode))
flags |= EXT4_FREE_BLOCKS_FORGET;
- if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free,
- count)) {
+ if (!ext4_inode_block_valid(inode, block_to_free, count)) {
EXT4_ERROR_INODE(inode, "attempt to clear invalid "
"blocks %llu len %lu",
(unsigned long long) block_to_free, count);
return 1;
}
- if (try_to_extend_transaction(handle, inode)) {
- if (bh) {
- BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_metadata(handle, inode, bh);
- if (unlikely(err))
- goto out_err;
- }
- err = ext4_mark_inode_dirty(handle, inode);
- if (unlikely(err))
- goto out_err;
- err = ext4_truncate_restart_trans(handle, inode,
- ext4_blocks_for_truncate(inode));
- if (unlikely(err))
- goto out_err;
- if (bh) {
- BUFFER_TRACE(bh, "retaking write access");
- err = ext4_journal_get_write_access(handle, bh);
- if (unlikely(err))
- goto out_err;
- }
- }
+ err = ext4_ind_truncate_ensure_credits(handle, inode, bh,
+ ext4_free_data_revoke_credits(inode, count));
+ if (err < 0)
+ goto out_err;
for (p = first; p < last; p++)
*p = 0;
@@ -1009,7 +927,8 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
if (this_bh) { /* For indirect block */
BUFFER_TRACE(this_bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, this_bh);
+ err = ext4_journal_get_write_access(handle, inode->i_sb,
+ this_bh, EXT4_JTR_NONE);
/* Important: if we can't update the indirect pointers
* to the blocks, we can't free them. */
if (err)
@@ -1097,8 +1016,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
if (!nr)
continue; /* A hole */
- if (!ext4_data_block_valid(EXT4_SB(inode->i_sb),
- nr, 1)) {
+ if (!ext4_inode_block_valid(inode, nr, 1)) {
EXT4_ERROR_INODE(inode,
"invalid indirect mapped "
"block %lu (level %d)",
@@ -1107,14 +1025,14 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
}
/* Go read the buffer for the next level down */
- bh = sb_bread(inode->i_sb, nr);
+ bh = ext4_sb_bread_nofail(inode->i_sb, nr);
/*
* A read failure? Report error and clear slot
* (should be rare).
*/
- if (!bh) {
- EXT4_ERROR_INODE_BLOCK(inode, nr,
+ if (IS_ERR(bh)) {
+ ext4_error_inode_block(inode, nr, -PTR_ERR(bh),
"Read failure");
continue;
}
@@ -1128,7 +1046,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
brelse(bh);
/*
- * Everything below this this pointer has been
+ * Everything below this pointer has been
* released. Now let this top-of-subtree go.
*
* We want the freeing of this indirect block to be
@@ -1145,11 +1063,11 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
*/
if (ext4_handle_is_aborted(handle))
return;
- if (try_to_extend_transaction(handle, inode)) {
- ext4_mark_inode_dirty(handle, inode);
- ext4_truncate_restart_trans(handle, inode,
- ext4_blocks_for_truncate(inode));
- }
+ if (ext4_ind_truncate_ensure_credits(handle, inode,
+ NULL,
+ ext4_free_metadata_revoke_credits(
+ inode->i_sb, 1)) < 0)
+ return;
/*
* The forget flag here is critical because if
@@ -1173,7 +1091,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
*/
BUFFER_TRACE(parent_bh, "get_write_access");
if (!ext4_journal_get_write_access(handle,
- parent_bh)){
+ inode->i_sb, parent_bh,
+ EXT4_JTR_NONE)) {
*p = 0;
BUFFER_TRACE(parent_bh,
"call ext4_handle_dirty_metadata");
@@ -1275,106 +1194,281 @@ do_indirects:
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
i_data[EXT4_IND_BLOCK] = 0;
}
+ fallthrough;
case EXT4_IND_BLOCK:
nr = i_data[EXT4_DIND_BLOCK];
if (nr) {
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
i_data[EXT4_DIND_BLOCK] = 0;
}
+ fallthrough;
case EXT4_DIND_BLOCK:
nr = i_data[EXT4_TIND_BLOCK];
if (nr) {
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
i_data[EXT4_TIND_BLOCK] = 0;
}
+ fallthrough;
case EXT4_TIND_BLOCK:
;
}
}
-static int free_hole_blocks(handle_t *handle, struct inode *inode,
- struct buffer_head *parent_bh, __le32 *i_data,
- int level, ext4_lblk_t first,
- ext4_lblk_t count, int max)
+/**
+ * ext4_ind_remove_space - remove space from the range
+ * @handle: JBD handle for this transaction
+ * @inode: inode we are dealing with
+ * @start: First block to remove
+ * @end: One block after the last block to remove (exclusive)
+ *
+ * Free the blocks in the defined range (end is exclusive endpoint of
+ * range). This is used by ext4_punch_hole().
+ */
+int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
+ ext4_lblk_t start, ext4_lblk_t end)
{
- struct buffer_head *bh = NULL;
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ __le32 *i_data = ei->i_data;
int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
- int ret = 0;
- int i, inc;
- ext4_lblk_t offset;
- __le32 blk;
-
- inc = 1 << ((EXT4_BLOCK_SIZE_BITS(inode->i_sb) - 2) * level);
- for (i = 0, offset = 0; i < max; i++, i_data++, offset += inc) {
- if (offset >= count + first)
- break;
- if (*i_data == 0 || (offset + inc) <= first)
- continue;
- blk = *i_data;
- if (level > 0) {
- ext4_lblk_t first2;
- bh = sb_bread(inode->i_sb, le32_to_cpu(blk));
- if (!bh) {
- EXT4_ERROR_INODE_BLOCK(inode, le32_to_cpu(blk),
- "Read failure");
- return -EIO;
+ ext4_lblk_t offsets[4], offsets2[4];
+ Indirect chain[4], chain2[4];
+ Indirect *partial, *partial2;
+ Indirect *p = NULL, *p2 = NULL;
+ ext4_lblk_t max_block;
+ __le32 nr = 0, nr2 = 0;
+ int n = 0, n2 = 0;
+ unsigned blocksize = inode->i_sb->s_blocksize;
+
+ max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
+ >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
+ if (end >= max_block)
+ end = max_block;
+ if ((start >= end) || (start > max_block))
+ return 0;
+
+ n = ext4_block_to_path(inode, start, offsets, NULL);
+ n2 = ext4_block_to_path(inode, end, offsets2, NULL);
+
+ BUG_ON(n > n2);
+
+ if ((n == 1) && (n == n2)) {
+ /* We're punching only within direct block range */
+ ext4_free_data(handle, inode, NULL, i_data + offsets[0],
+ i_data + offsets2[0]);
+ return 0;
+ } else if (n2 > n) {
+ /*
+ * Start and end are on a different levels so we're going to
+ * free partial block at start, and partial block at end of
+ * the range. If there are some levels in between then
+ * do_indirects label will take care of that.
+ */
+
+ if (n == 1) {
+ /*
+ * Start is at the direct block level, free
+ * everything to the end of the level.
+ */
+ ext4_free_data(handle, inode, NULL, i_data + offsets[0],
+ i_data + EXT4_NDIR_BLOCKS);
+ goto end_range;
+ }
+
+
+ partial = p = ext4_find_shared(inode, n, offsets, chain, &nr);
+ if (nr) {
+ if (partial == chain) {
+ /* Shared branch grows from the inode */
+ ext4_free_branches(handle, inode, NULL,
+ &nr, &nr+1, (chain+n-1) - partial);
+ *partial->p = 0;
+ } else {
+ /* Shared branch grows from an indirect block */
+ BUFFER_TRACE(partial->bh, "get_write_access");
+ ext4_free_branches(handle, inode, partial->bh,
+ partial->p,
+ partial->p+1, (chain+n-1) - partial);
}
- first2 = (first > offset) ? first - offset : 0;
- ret = free_hole_blocks(handle, inode, bh,
- (__le32 *)bh->b_data, level - 1,
- first2, count - offset,
- inode->i_sb->s_blocksize >> 2);
- if (ret) {
- brelse(bh);
- goto err;
+ }
+
+ /*
+ * Clear the ends of indirect blocks on the shared branch
+ * at the start of the range
+ */
+ while (partial > chain) {
+ ext4_free_branches(handle, inode, partial->bh,
+ partial->p + 1,
+ (__le32 *)partial->bh->b_data+addr_per_block,
+ (chain+n-1) - partial);
+ partial--;
+ }
+
+end_range:
+ partial2 = p2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
+ if (nr2) {
+ if (partial2 == chain2) {
+ /*
+ * Remember, end is exclusive so here we're at
+ * the start of the next level we're not going
+ * to free. Everything was covered by the start
+ * of the range.
+ */
+ goto do_indirects;
}
+ } else {
+ /*
+ * ext4_find_shared returns Indirect structure which
+ * points to the last element which should not be
+ * removed by truncate. But this is end of the range
+ * in punch_hole so we need to point to the next element
+ */
+ partial2->p++;
}
- if (level == 0 ||
- (bh && all_zeroes((__le32 *)bh->b_data,
- (__le32 *)bh->b_data + addr_per_block))) {
- ext4_free_data(handle, inode, parent_bh, &blk, &blk+1);
- *i_data = 0;
+
+ /*
+ * Clear the ends of indirect blocks on the shared branch
+ * at the end of the range
+ */
+ while (partial2 > chain2) {
+ ext4_free_branches(handle, inode, partial2->bh,
+ (__le32 *)partial2->bh->b_data,
+ partial2->p,
+ (chain2+n2-1) - partial2);
+ partial2--;
}
- brelse(bh);
- bh = NULL;
+ goto do_indirects;
}
-err:
- return ret;
-}
+ /* Punch happened within the same level (n == n2) */
+ partial = p = ext4_find_shared(inode, n, offsets, chain, &nr);
+ partial2 = p2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2);
-int ext4_free_hole_blocks(handle_t *handle, struct inode *inode,
- ext4_lblk_t first, ext4_lblk_t stop)
-{
- int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
- int level, ret = 0;
- int num = EXT4_NDIR_BLOCKS;
- ext4_lblk_t count, max = EXT4_NDIR_BLOCKS;
- __le32 *i_data = EXT4_I(inode)->i_data;
-
- count = stop - first;
- for (level = 0; level < 4; level++, max *= addr_per_block) {
- if (first < max) {
- ret = free_hole_blocks(handle, inode, NULL, i_data,
- level, first, count, num);
- if (ret)
- goto err;
- if (count > max - first)
- count -= max - first;
- else
+ /* Free top, but only if partial2 isn't its subtree. */
+ if (nr) {
+ int level = min(partial - chain, partial2 - chain2);
+ int i;
+ int subtree = 1;
+
+ for (i = 0; i <= level; i++) {
+ if (offsets[i] != offsets2[i]) {
+ subtree = 0;
break;
- first = 0;
- } else {
- first -= max;
+ }
}
- i_data += num;
- if (level == 0) {
- num = 1;
- max = 1;
+
+ if (!subtree) {
+ if (partial == chain) {
+ /* Shared branch grows from the inode */
+ ext4_free_branches(handle, inode, NULL,
+ &nr, &nr+1,
+ (chain+n-1) - partial);
+ *partial->p = 0;
+ } else {
+ /* Shared branch grows from an indirect block */
+ BUFFER_TRACE(partial->bh, "get_write_access");
+ ext4_free_branches(handle, inode, partial->bh,
+ partial->p,
+ partial->p+1,
+ (chain+n-1) - partial);
+ }
}
}
-err:
- return ret;
-}
+ if (!nr2) {
+ /*
+ * ext4_find_shared returns Indirect structure which
+ * points to the last element which should not be
+ * removed by truncate. But this is end of the range
+ * in punch_hole so we need to point to the next element
+ */
+ partial2->p++;
+ }
+
+ while (partial > chain || partial2 > chain2) {
+ int depth = (chain+n-1) - partial;
+ int depth2 = (chain2+n2-1) - partial2;
+
+ if (partial > chain && partial2 > chain2 &&
+ partial->bh->b_blocknr == partial2->bh->b_blocknr) {
+ /*
+ * We've converged on the same block. Clear the range,
+ * then we're done.
+ */
+ ext4_free_branches(handle, inode, partial->bh,
+ partial->p + 1,
+ partial2->p,
+ (chain+n-1) - partial);
+ goto cleanup;
+ }
+
+ /*
+ * The start and end partial branches may not be at the same
+ * level even though the punch happened within one level. So, we
+ * give them a chance to arrive at the same level, then walk
+ * them in step with each other until we converge on the same
+ * block.
+ */
+ if (partial > chain && depth <= depth2) {
+ ext4_free_branches(handle, inode, partial->bh,
+ partial->p + 1,
+ (__le32 *)partial->bh->b_data+addr_per_block,
+ (chain+n-1) - partial);
+ partial--;
+ }
+ if (partial2 > chain2 && depth2 <= depth) {
+ ext4_free_branches(handle, inode, partial2->bh,
+ (__le32 *)partial2->bh->b_data,
+ partial2->p,
+ (chain2+n2-1) - partial2);
+ partial2--;
+ }
+ }
+cleanup:
+ while (p && p > chain) {
+ BUFFER_TRACE(p->bh, "call brelse");
+ brelse(p->bh);
+ p--;
+ }
+ while (p2 && p2 > chain2) {
+ BUFFER_TRACE(p2->bh, "call brelse");
+ brelse(p2->bh);
+ p2--;
+ }
+ return 0;
+
+do_indirects:
+ /* Kill the remaining (whole) subtrees */
+ switch (offsets[0]) {
+ default:
+ if (++n >= n2)
+ break;
+ nr = i_data[EXT4_IND_BLOCK];
+ if (nr) {
+ ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
+ i_data[EXT4_IND_BLOCK] = 0;
+ }
+ fallthrough;
+ case EXT4_IND_BLOCK:
+ if (++n >= n2)
+ break;
+ nr = i_data[EXT4_DIND_BLOCK];
+ if (nr) {
+ ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
+ i_data[EXT4_DIND_BLOCK] = 0;
+ }
+ fallthrough;
+ case EXT4_DIND_BLOCK:
+ if (++n >= n2)
+ break;
+ nr = i_data[EXT4_TIND_BLOCK];
+ if (nr) {
+ ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
+ i_data[EXT4_TIND_BLOCK] = 0;
+ }
+ fallthrough;
+ case EXT4_TIND_BLOCK:
+ ;
+ }
+ goto cleanup;
+}