diff options
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r-- | fs/ext4/extents.c | 1533 |
1 files changed, 773 insertions, 760 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e57054bdc5fd..b543a46fc809 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -50,10 +50,9 @@ static __le32 ext4_extent_block_csum(struct inode *inode, struct ext4_extent_header *eh) { struct ext4_inode_info *ei = EXT4_I(inode); - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); __u32 csum; - csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)eh, + csum = ext4_chksum(ei->i_csum_seed, (__u8 *)eh, EXT4_EXTENT_TAIL_OFFSET(eh)); return cpu_to_le32(csum); } @@ -63,7 +62,7 @@ static int ext4_extent_block_csum_verify(struct inode *inode, { struct ext4_extent_tail *et; - if (!ext4_has_metadata_csum(inode->i_sb)) + if (!ext4_has_feature_metadata_csum(inode->i_sb)) return 1; et = find_ext4_extent_tail(eh); @@ -77,19 +76,18 @@ static void ext4_extent_block_csum_set(struct inode *inode, { struct ext4_extent_tail *et; - if (!ext4_has_metadata_csum(inode->i_sb)) + if (!ext4_has_feature_metadata_csum(inode->i_sb)) return; et = find_ext4_extent_tail(eh); et->et_checksum = ext4_extent_block_csum(inode, eh); } -static int ext4_split_extent_at(handle_t *handle, - struct inode *inode, - struct ext4_ext_path **ppath, - ext4_lblk_t split, - int split_flag, - int flags); +static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle, + struct inode *inode, + struct ext4_ext_path *path, + ext4_lblk_t split, + int split_flag, int flags); static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped) { @@ -106,21 +104,27 @@ static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped) return 0; } +static inline void ext4_ext_path_brelse(struct ext4_ext_path *path) +{ + brelse(path->p_bh); + path->p_bh = NULL; +} + static void ext4_ext_drop_refs(struct ext4_ext_path *path) { int depth, i; - if (!path) + if (IS_ERR_OR_NULL(path)) return; depth = path->p_depth; - for (i = 0; i <= depth; i++, path++) { - brelse(path->p_bh); - path->p_bh = NULL; - } + for (i = 0; i <= depth; i++, path++) + ext4_ext_path_brelse(path); } void ext4_free_ext_path(struct ext4_ext_path *path) { + if (IS_ERR_OR_NULL(path)) + return; ext4_ext_drop_refs(path); kfree(path); } @@ -323,19 +327,18 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check) return size; } -static inline int +static inline struct ext4_ext_path * ext4_force_split_extent_at(handle_t *handle, struct inode *inode, - struct ext4_ext_path **ppath, ext4_lblk_t lblk, + struct ext4_ext_path *path, ext4_lblk_t lblk, int nofail) { - struct ext4_ext_path *path = *ppath; int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext); int flags = EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO; if (nofail) flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL | EXT4_EX_NOFAIL; - return ext4_split_extent_at(handle, inode, ppath, lblk, unwritten ? + return ext4_split_extent_at(handle, inode, path, lblk, unwritten ? EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0, flags); } @@ -564,7 +567,7 @@ __read_extent_tree_block(const char *function, unsigned int line, if (!bh_uptodate_or_lock(bh)) { trace_ext4_ext_load_extent(inode, pblk, _RET_IP_); - err = ext4_read_bh(bh, 0, NULL); + err = ext4_read_bh(bh, 0, NULL, false); if (err < 0) goto errout; } @@ -607,6 +610,8 @@ int ext4_ext_precache(struct inode *inode) if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) return 0; /* not an extent-mapped inode */ + ext4_check_map_extents_env(inode); + down_read(&ei->i_data_sem); depth = ext_depth(inode); @@ -635,8 +640,7 @@ int ext4_ext_precache(struct inode *inode) */ if ((i == depth) || path[i].p_idx > EXT_LAST_INDEX(path[i].p_hdr)) { - brelse(path[i].p_bh); - path[i].p_bh = NULL; + ext4_ext_path_brelse(path + i); i--; continue; } @@ -689,7 +693,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) struct ext4_extent *ex; int i; - if (!path) + if (IS_ERR_OR_NULL(path)) return; eh = path[depth].p_hdr; @@ -881,11 +885,10 @@ void ext4_ext_tree_init(handle_t *handle, struct inode *inode) struct ext4_ext_path * ext4_find_extent(struct inode *inode, ext4_lblk_t block, - struct ext4_ext_path **orig_path, int flags) + struct ext4_ext_path *path, int flags) { struct ext4_extent_header *eh; struct buffer_head *bh; - struct ext4_ext_path *path = orig_path ? *orig_path : NULL; short int depth, i, ppos = 0; int ret; gfp_t gfp_flags = GFP_NOFS; @@ -906,7 +909,7 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, ext4_ext_drop_refs(path); if (depth > path[0].p_maxdepth) { kfree(path); - *orig_path = path = NULL; + path = NULL; } } if (!path) { @@ -961,8 +964,6 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, err: ext4_free_ext_path(path); - if (orig_path) - *orig_path = NULL; return ERR_PTR(ret); } @@ -1395,15 +1396,15 @@ out: * finds empty index and adds new leaf. * if no free index is found, then it requests in-depth growing. */ -static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, - unsigned int mb_flags, - unsigned int gb_flags, - struct ext4_ext_path **ppath, - struct ext4_extent *newext) +static struct ext4_ext_path * +ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, + unsigned int mb_flags, unsigned int gb_flags, + struct ext4_ext_path *path, + struct ext4_extent *newext) { - struct ext4_ext_path *path = *ppath; struct ext4_ext_path *curp; int depth, i, err = 0; + ext4_lblk_t ee_block = le32_to_cpu(newext->ee_block); repeat: i = depth = ext_depth(inode); @@ -1422,42 +1423,38 @@ repeat: * entry: create all needed subtree and add new leaf */ err = ext4_ext_split(handle, inode, mb_flags, path, newext, i); if (err) - goto out; + goto errout; /* refill path */ - path = ext4_find_extent(inode, - (ext4_lblk_t)le32_to_cpu(newext->ee_block), - ppath, gb_flags); - if (IS_ERR(path)) - err = PTR_ERR(path); - } else { - /* tree is full, time to grow in depth */ - err = ext4_ext_grow_indepth(handle, inode, mb_flags); - if (err) - goto out; + path = ext4_find_extent(inode, ee_block, path, gb_flags); + return path; + } - /* refill path */ - path = ext4_find_extent(inode, - (ext4_lblk_t)le32_to_cpu(newext->ee_block), - ppath, gb_flags); - if (IS_ERR(path)) { - err = PTR_ERR(path); - goto out; - } + /* tree is full, time to grow in depth */ + err = ext4_ext_grow_indepth(handle, inode, mb_flags); + if (err) + goto errout; - /* - * only first (depth 0 -> 1) produces free space; - * in all other cases we have to split the grown tree - */ - depth = ext_depth(inode); - if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { - /* now we need to split */ - goto repeat; - } + /* refill path */ + path = ext4_find_extent(inode, ee_block, path, gb_flags); + if (IS_ERR(path)) + return path; + + /* + * only first (depth 0 -> 1) produces free space; + * in all other cases we have to split the grown tree + */ + depth = ext_depth(inode); + if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { + /* now we need to split */ + goto repeat; } -out: - return err; + return path; + +errout: + ext4_free_ext_path(path); + return ERR_PTR(err); } /* @@ -1534,7 +1531,7 @@ static int ext4_ext_search_left(struct inode *inode, static int ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path, ext4_lblk_t *logical, ext4_fsblk_t *phys, - struct ext4_extent *ret_ex) + struct ext4_extent *ret_ex, int flags) { struct buffer_head *bh = NULL; struct ext4_extent_header *eh; @@ -1608,7 +1605,8 @@ got_index: ix++; while (++depth < path->p_depth) { /* subtract from p_depth to get proper eh_depth */ - bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0); + bh = read_extent_tree_block(inode, ix, path->p_depth - depth, + flags); if (IS_ERR(bh)) return PTR_ERR(bh); eh = ext_block_hdr(bh); @@ -1616,7 +1614,7 @@ got_index: put_bh(bh); } - bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0); + bh = read_extent_tree_block(inode, ix, path->p_depth - depth, flags); if (IS_ERR(bh)) return PTR_ERR(bh); eh = ext_block_hdr(bh); @@ -1749,12 +1747,23 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, break; err = ext4_ext_get_access(handle, inode, path + k); if (err) - break; + goto clean; path[k].p_idx->ei_block = border; err = ext4_ext_dirty(handle, inode, path + k); if (err) - break; + goto clean; } + return 0; + +clean: + /* + * The path[k].p_bh is either unmodified or with no verified bit + * set (see ext4_ext_get_access()). So just clear the verified bit + * of the successfully modified extents buffers, which will force + * these extents to be checked to avoid using inconsistent data. + */ + while (++k < depth) + clear_buffer_verified(path[k].p_bh); return err; } @@ -1876,7 +1885,7 @@ static void ext4_ext_try_to_merge_up(handle_t *handle, (path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr)); path[0].p_hdr->eh_max = cpu_to_le16(max_root); - brelse(path[1].p_bh); + ext4_ext_path_brelse(path + 1); ext4_free_blocks(handle, inode, NULL, blk, 1, EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); } @@ -1964,16 +1973,15 @@ out: * inserts requested extent as new one into the tree, * creating new leaf in the no-space case. */ -int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, - struct ext4_ext_path **ppath, - struct ext4_extent *newext, int gb_flags) +struct ext4_ext_path * +ext4_ext_insert_extent(handle_t *handle, struct inode *inode, + struct ext4_ext_path *path, + struct ext4_extent *newext, int gb_flags) { - struct ext4_ext_path *path = *ppath; struct ext4_extent_header *eh; struct ext4_extent *ex, *fex; struct ext4_extent *nearex; /* nearest extent */ - struct ext4_ext_path *npath = NULL; - int depth, len, err; + int depth, len, err = 0; ext4_lblk_t next; int mb_flags = 0, unwritten; @@ -1981,14 +1989,16 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, mb_flags |= EXT4_MB_DELALLOC_RESERVED; if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); - return -EFSCORRUPTED; + err = -EFSCORRUPTED; + goto errout; } depth = ext_depth(inode); ex = path[depth].p_ext; eh = path[depth].p_hdr; if (unlikely(path[depth].p_hdr == NULL)) { EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); - return -EFSCORRUPTED; + err = -EFSCORRUPTED; + goto errout; } /* try to insert block into found extent and return */ @@ -2026,7 +2036,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, err = ext4_ext_get_access(handle, inode, path + depth); if (err) - return err; + goto errout; unwritten = ext4_ext_is_unwritten(ex); ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) + ext4_ext_get_actual_len(newext)); @@ -2051,7 +2061,7 @@ prepend: err = ext4_ext_get_access(handle, inode, path + depth); if (err) - return err; + goto errout; unwritten = ext4_ext_is_unwritten(ex); ex->ee_block = newext->ee_block; @@ -2076,21 +2086,26 @@ prepend: if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)) next = ext4_ext_next_leaf_block(path); if (next != EXT_MAX_BLOCKS) { + struct ext4_ext_path *npath; + ext_debug(inode, "next leaf block - %u\n", next); - BUG_ON(npath != NULL); npath = ext4_find_extent(inode, next, NULL, gb_flags); - if (IS_ERR(npath)) - return PTR_ERR(npath); + if (IS_ERR(npath)) { + err = PTR_ERR(npath); + goto errout; + } BUG_ON(npath->p_depth != path->p_depth); eh = npath[depth].p_hdr; if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) { ext_debug(inode, "next leaf isn't full(%d)\n", le16_to_cpu(eh->eh_entries)); + ext4_free_ext_path(path); path = npath; goto has_space; } ext_debug(inode, "next leaf has no free space(%d,%d)\n", le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); + ext4_free_ext_path(npath); } /* @@ -2099,10 +2114,10 @@ prepend: */ if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL) mb_flags |= EXT4_MB_USE_RESERVED; - err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags, - ppath, newext); - if (err) - goto cleanup; + path = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags, + path, newext); + if (IS_ERR(path)) + return path; depth = ext_depth(inode); eh = path[depth].p_hdr; @@ -2111,7 +2126,7 @@ has_space: err = ext4_ext_get_access(handle, inode, path + depth); if (err) - goto cleanup; + goto errout; if (!nearex) { /* there is no extent in this leaf, create first one */ @@ -2169,17 +2184,20 @@ merge: if (!(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) ext4_ext_try_to_merge(handle, inode, path, nearex); - /* time to correct all indexes above */ err = ext4_ext_correct_indexes(handle, inode, path); if (err) - goto cleanup; + goto errout; err = ext4_ext_dirty(handle, inode, path + path->p_depth); + if (err) + goto errout; -cleanup: - ext4_free_ext_path(npath); - return err; + return path; + +errout: + ext4_free_ext_path(path); + return ERR_PTR(err); } static int ext4_fill_es_cache_info(struct inode *inode, @@ -2279,27 +2297,26 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, { int err; ext4_fsblk_t leaf; + int k = depth - 1; /* free index block */ - depth--; - path = path + depth; - leaf = ext4_idx_pblock(path->p_idx); - if (unlikely(path->p_hdr->eh_entries == 0)) { - EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); + leaf = ext4_idx_pblock(path[k].p_idx); + if (unlikely(path[k].p_hdr->eh_entries == 0)) { + EXT4_ERROR_INODE(inode, "path[%d].p_hdr->eh_entries == 0", k); return -EFSCORRUPTED; } - err = ext4_ext_get_access(handle, inode, path); + err = ext4_ext_get_access(handle, inode, path + k); if (err) return err; - if (path->p_idx != EXT_LAST_INDEX(path->p_hdr)) { - int len = EXT_LAST_INDEX(path->p_hdr) - path->p_idx; + if (path[k].p_idx != EXT_LAST_INDEX(path[k].p_hdr)) { + int len = EXT_LAST_INDEX(path[k].p_hdr) - path[k].p_idx; len *= sizeof(struct ext4_extent_idx); - memmove(path->p_idx, path->p_idx + 1, len); + memmove(path[k].p_idx, path[k].p_idx + 1, len); } - le16_add_cpu(&path->p_hdr->eh_entries, -1); - err = ext4_ext_dirty(handle, inode, path); + le16_add_cpu(&path[k].p_hdr->eh_entries, -1); + err = ext4_ext_dirty(handle, inode, path + k); if (err) return err; ext_debug(inode, "index is empty, remove it, free block %llu\n", leaf); @@ -2308,18 +2325,29 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, ext4_free_blocks(handle, inode, NULL, leaf, 1, EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); - while (--depth >= 0) { - if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr)) + while (--k >= 0) { + if (path[k + 1].p_idx != EXT_FIRST_INDEX(path[k + 1].p_hdr)) break; - path--; - err = ext4_ext_get_access(handle, inode, path); + err = ext4_ext_get_access(handle, inode, path + k); if (err) - break; - path->p_idx->ei_block = (path+1)->p_idx->ei_block; - err = ext4_ext_dirty(handle, inode, path); + goto clean; + path[k].p_idx->ei_block = path[k + 1].p_idx->ei_block; + err = ext4_ext_dirty(handle, inode, path + k); if (err) - break; + goto clean; } + return 0; + +clean: + /* + * The path[k].p_bh is either unmodified or with no verified bit + * set (see ext4_ext_get_access()). So just clear the verified bit + * of the successfully modified extents buffers, which will force + * these extents to be checked to avoid using inconsistent data. + */ + while (++k < depth) + clear_buffer_verified(path[k].p_bh); + return err; } @@ -2370,18 +2398,20 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks, int ext4_ext_index_trans_blocks(struct inode *inode, int extents) { int index; - int depth; /* If we are converting the inline data, only one is needed here. */ if (ext4_has_inline_data(inode)) return 1; - depth = ext_depth(inode); - + /* + * Extent tree can change between the time we estimate credits and + * the time we actually modify the tree. Assume the worst case. + */ if (extents <= 1) - index = depth * 2; + index = (EXT4_MAX_EXTENT_DEPTH * 2) + extents; else - index = depth * 3; + index = (EXT4_MAX_EXTENT_DEPTH * 3) + + DIV_ROUND_UP(extents, ext4_ext_space_block(inode, 0)); return index; } @@ -2795,6 +2825,7 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, struct partial_cluster partial; handle_t *handle; int i = 0, err = 0; + int flags = EXT4_EX_NOCACHE | EXT4_EX_NOFAIL; partial.pclu = 0; partial.lblk = 0; @@ -2825,8 +2856,7 @@ again: ext4_fsblk_t pblk; /* find extent for or closest extent to this block */ - path = ext4_find_extent(inode, end, NULL, - EXT4_EX_NOCACHE | EXT4_EX_NOFAIL); + path = ext4_find_extent(inode, end, NULL, flags); if (IS_ERR(path)) { ext4_journal_stop(handle); return PTR_ERR(path); @@ -2872,11 +2902,12 @@ again: * fail removing space due to ENOSPC so try to use * reserved block if that happens. */ - err = ext4_force_split_extent_at(handle, inode, &path, - end + 1, 1); - if (err < 0) + path = ext4_force_split_extent_at(handle, inode, path, + end + 1, 1); + if (IS_ERR(path)) { + err = PTR_ERR(path); goto out; - + } } else if (sbi->s_cluster_ratio > 1 && end >= ex_end && partial.state == initial) { /* @@ -2891,7 +2922,7 @@ again: */ lblk = ex_end + 1; err = ext4_ext_search_right(inode, path, &lblk, &pblk, - NULL); + NULL, flags); if (err < 0) goto out; if (pblk) { @@ -2934,8 +2965,7 @@ again: err = ext4_ext_rm_leaf(handle, inode, path, &partial, start, end); /* root level has p_bh == NULL, brelse() eats this */ - brelse(path[i].p_bh); - path[i].p_bh = NULL; + ext4_ext_path_brelse(path + i); i--; continue; } @@ -2968,8 +2998,7 @@ again: i + 1, ext4_idx_pblock(path[i].p_idx)); memset(path + i + 1, 0, sizeof(*path)); bh = read_extent_tree_block(inode, path[i].p_idx, - depth - i - 1, - EXT4_EX_NOCACHE); + depth - i - 1, flags); if (IS_ERR(bh)) { /* should we reset i_size? */ err = PTR_ERR(bh); @@ -2997,8 +3026,7 @@ again: err = ext4_ext_rm_idx(handle, inode, path, i); } /* root level has p_bh == NULL, brelse() eats this */ - brelse(path[i].p_bh); - path[i].p_bh = NULL; + ext4_ext_path_brelse(path + i); i--; ext_debug(inode, "return to level %d\n", i); } @@ -3113,7 +3141,7 @@ static void ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex) return; ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock, - EXTENT_STATUS_WRITTEN); + EXTENT_STATUS_WRITTEN, false); } /* FIXME!! we need to try to merge to left or right after zero-out */ @@ -3147,16 +3175,14 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) * a> the extent are splitted into two extent. * b> split is not needed, and just mark the extent. * - * return 0 on success. + * Return an extent path pointer on success, or an error pointer on failure. */ -static int ext4_split_extent_at(handle_t *handle, - struct inode *inode, - struct ext4_ext_path **ppath, - ext4_lblk_t split, - int split_flag, - int flags) +static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle, + struct inode *inode, + struct ext4_ext_path *path, + ext4_lblk_t split, + int split_flag, int flags) { - struct ext4_ext_path *path = *ppath; ext4_fsblk_t newblock; ext4_lblk_t ee_block; struct ext4_extent *ex, newex, orig_ex, zero_ex; @@ -3226,10 +3252,31 @@ static int ext4_split_extent_at(handle_t *handle, if (split_flag & EXT4_EXT_MARK_UNWRIT2) ext4_ext_mark_unwritten(ex2); - err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags); - if (err != -ENOSPC && err != -EDQUOT && err != -ENOMEM) + path = ext4_ext_insert_extent(handle, inode, path, &newex, flags); + if (!IS_ERR(path)) goto out; + err = PTR_ERR(path); + if (err != -ENOSPC && err != -EDQUOT && err != -ENOMEM) + return path; + + /* + * Get a new path to try to zeroout or fix the extent length. + * Using EXT4_EX_NOFAIL guarantees that ext4_find_extent() + * will not return -ENOMEM, otherwise -ENOMEM will cause a + * retry in do_writepages(), and a WARN_ON may be triggered + * in ext4_da_update_reserve_space() due to an incorrect + * ee_len causing the i_reserved_data_blocks exception. + */ + path = ext4_find_extent(inode, ee_block, NULL, flags | EXT4_EX_NOFAIL); + if (IS_ERR(path)) { + EXT4_ERROR_INODE(inode, "Failed split extent on %u, err %ld", + split, PTR_ERR(path)); + return path; + } + depth = ext_depth(inode); + ex = path[depth].p_ext; + if (EXT4_EXT_MAY_ZEROOUT & split_flag) { if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { if (split_flag & EXT4_EXT_DATA_VALID1) { @@ -3280,14 +3327,17 @@ fix_extent_len: * and err is a non-zero error code. */ ext4_ext_dirty(handle, inode, path + path->p_depth); - return err; out: + if (err) { + ext4_free_ext_path(path); + path = ERR_PTR(err); + } ext4_ext_show_leaf(inode, path); - return err; + return path; } /* - * ext4_split_extents() splits an extent and mark extent which is covered + * ext4_split_extent() splits an extent and mark extent which is covered * by @map as split_flags indicates * * It may result in splitting the extent into multiple extents (up to three) @@ -3297,21 +3347,18 @@ out: * c> Splits in three extents: Somone is splitting in middle of the extent * */ -static int ext4_split_extent(handle_t *handle, - struct inode *inode, - struct ext4_ext_path **ppath, - struct ext4_map_blocks *map, - int split_flag, - int flags) +static struct ext4_ext_path *ext4_split_extent(handle_t *handle, + struct inode *inode, + struct ext4_ext_path *path, + struct ext4_map_blocks *map, + int split_flag, int flags, + unsigned int *allocated) { - struct ext4_ext_path *path = *ppath; ext4_lblk_t ee_block; struct ext4_extent *ex; unsigned int ee_len, depth; - int err = 0; int unwritten; int split_flag1, flags1; - int allocated = map->m_len; depth = ext_depth(inode); ex = path[depth].p_ext; @@ -3327,28 +3374,27 @@ static int ext4_split_extent(handle_t *handle, EXT4_EXT_MARK_UNWRIT2; if (split_flag & EXT4_EXT_DATA_VALID2) split_flag1 |= EXT4_EXT_DATA_VALID1; - err = ext4_split_extent_at(handle, inode, ppath, + path = ext4_split_extent_at(handle, inode, path, map->m_lblk + map->m_len, split_flag1, flags1); - if (err) - goto out; - } else { - allocated = ee_len - (map->m_lblk - ee_block); - } - /* - * Update path is required because previous ext4_split_extent_at() may - * result in split of original leaf or extent zeroout. - */ - path = ext4_find_extent(inode, map->m_lblk, ppath, flags); - if (IS_ERR(path)) - return PTR_ERR(path); - depth = ext_depth(inode); - ex = path[depth].p_ext; - if (!ex) { - EXT4_ERROR_INODE(inode, "unexpected hole at %lu", - (unsigned long) map->m_lblk); - return -EFSCORRUPTED; + if (IS_ERR(path)) + return path; + /* + * Update path is required because previous ext4_split_extent_at + * may result in split of original leaf or extent zeroout. + */ + path = ext4_find_extent(inode, map->m_lblk, path, flags); + if (IS_ERR(path)) + return path; + depth = ext_depth(inode); + ex = path[depth].p_ext; + if (!ex) { + EXT4_ERROR_INODE(inode, "unexpected hole at %lu", + (unsigned long) map->m_lblk); + ext4_free_ext_path(path); + return ERR_PTR(-EFSCORRUPTED); + } + unwritten = ext4_ext_is_unwritten(ex); } - unwritten = ext4_ext_is_unwritten(ex); if (map->m_lblk >= ee_block) { split_flag1 = split_flag & EXT4_EXT_DATA_VALID2; @@ -3357,15 +3403,20 @@ static int ext4_split_extent(handle_t *handle, split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT | EXT4_EXT_MARK_UNWRIT2); } - err = ext4_split_extent_at(handle, inode, ppath, + path = ext4_split_extent_at(handle, inode, path, map->m_lblk, split_flag1, flags); - if (err) - goto out; + if (IS_ERR(path)) + return path; } + if (allocated) { + if (map->m_lblk + map->m_len > ee_block + ee_len) + *allocated = ee_len - (map->m_lblk - ee_block); + else + *allocated = map->m_len; + } ext4_ext_show_leaf(inode, path); -out: - return err ? err : allocated; + return path; } /* @@ -3388,13 +3439,11 @@ out: * that are allocated and initialized. * It is guaranteed to be >= map->m_len. */ -static int ext4_ext_convert_to_initialized(handle_t *handle, - struct inode *inode, - struct ext4_map_blocks *map, - struct ext4_ext_path **ppath, - int flags) +static struct ext4_ext_path * +ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode, + struct ext4_map_blocks *map, struct ext4_ext_path *path, + int flags, unsigned int *allocated) { - struct ext4_ext_path *path = *ppath; struct ext4_sb_info *sbi; struct ext4_extent_header *eh; struct ext4_map_blocks split_map; @@ -3402,9 +3451,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, struct ext4_extent *ex, *abut_ex; ext4_lblk_t ee_block, eof_block; unsigned int ee_len, depth, map_len = map->m_len; - int allocated = 0, max_zeroout = 0; int err = 0; int split_flag = EXT4_EXT_DATA_VALID2; + unsigned int max_zeroout = 0; ext_debug(inode, "logical block %llu, max_blocks %u\n", (unsigned long long)map->m_lblk, map_len); @@ -3444,6 +3493,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, * - L2: we only attempt to merge with an extent stored in the * same extent tree node. */ + *allocated = 0; if ((map->m_lblk == ee_block) && /* See if we can merge left */ (map_len < ee_len) && /*L1*/ @@ -3473,7 +3523,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, (prev_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/ err = ext4_ext_get_access(handle, inode, path + depth); if (err) - goto out; + goto errout; trace_ext4_ext_convert_to_initialized_fastpath(inode, map, ex, abut_ex); @@ -3488,7 +3538,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, abut_ex->ee_len = cpu_to_le16(prev_len + map_len); /* Result: number of initialized blocks past m_lblk */ - allocated = map_len; + *allocated = map_len; } } else if (((map->m_lblk + map_len) == (ee_block + ee_len)) && (map_len < ee_len) && /*L1*/ @@ -3519,7 +3569,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, (next_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/ err = ext4_ext_get_access(handle, inode, path + depth); if (err) - goto out; + goto errout; trace_ext4_ext_convert_to_initialized_fastpath(inode, map, ex, abut_ex); @@ -3534,18 +3584,20 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, abut_ex->ee_len = cpu_to_le16(next_len + map_len); /* Result: number of initialized blocks past m_lblk */ - allocated = map_len; + *allocated = map_len; } } - if (allocated) { + if (*allocated) { /* Mark the block containing both extents as dirty */ err = ext4_ext_dirty(handle, inode, path + depth); /* Update path to point to the right extent */ path[depth].p_ext = abut_ex; + if (err) + goto errout; goto out; } else - allocated = ee_len - (map->m_lblk - ee_block); + *allocated = ee_len - (map->m_lblk - ee_block); WARN_ON(map->m_lblk < ee_block); /* @@ -3572,21 +3624,21 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, split_map.m_lblk = map->m_lblk; split_map.m_len = map->m_len; - if (max_zeroout && (allocated > split_map.m_len)) { - if (allocated <= max_zeroout) { + if (max_zeroout && (*allocated > split_map.m_len)) { + if (*allocated <= max_zeroout) { /* case 3 or 5 */ zero_ex1.ee_block = cpu_to_le32(split_map.m_lblk + split_map.m_len); zero_ex1.ee_len = - cpu_to_le16(allocated - split_map.m_len); + cpu_to_le16(*allocated - split_map.m_len); ext4_ext_store_pblock(&zero_ex1, ext4_ext_pblock(ex) + split_map.m_lblk + split_map.m_len - ee_block); err = ext4_ext_zeroout(inode, &zero_ex1); if (err) goto fallback; - split_map.m_len = allocated; + split_map.m_len = *allocated; } if (split_map.m_lblk - ee_block + split_map.m_len < max_zeroout) { @@ -3604,22 +3656,24 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, split_map.m_len += split_map.m_lblk - ee_block; split_map.m_lblk = ee_block; - allocated = map->m_len; + *allocated = map->m_len; } } fallback: - err = ext4_split_extent(handle, inode, ppath, &split_map, split_flag, - flags); - if (err > 0) - err = 0; + path = ext4_split_extent(handle, inode, path, &split_map, split_flag, + flags, NULL); + if (IS_ERR(path)) + return path; out: /* If we have gotten a failure, don't zero out status tree */ - if (!err) { - ext4_zeroout_es(inode, &zero_ex1); - ext4_zeroout_es(inode, &zero_ex2); - } - return err ? err : allocated; + ext4_zeroout_es(inode, &zero_ex1); + ext4_zeroout_es(inode, &zero_ex2); + return path; + +errout: + ext4_free_ext_path(path); + return ERR_PTR(err); } /* @@ -3644,15 +3698,16 @@ out: * being filled will be convert to initialized by the end_io callback function * via ext4_convert_unwritten_extents(). * - * Returns the size of unwritten extent to be written on success. + * The size of unwritten extent to be written is passed to the caller via the + * allocated pointer. Return an extent path pointer on success, or an error + * pointer on failure. */ -static int ext4_split_convert_extents(handle_t *handle, +static struct ext4_ext_path *ext4_split_convert_extents(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, - struct ext4_ext_path **ppath, - int flags) + struct ext4_ext_path *path, + int flags, unsigned int *allocated) { - struct ext4_ext_path *path = *ppath; ext4_lblk_t eof_block; ext4_lblk_t ee_block; struct ext4_extent *ex; @@ -3685,15 +3740,15 @@ static int ext4_split_convert_extents(handle_t *handle, split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2); } flags |= EXT4_GET_BLOCKS_PRE_IO; - return ext4_split_extent(handle, inode, ppath, map, split_flag, flags); + return ext4_split_extent(handle, inode, path, map, split_flag, flags, + allocated); } -static int ext4_convert_unwritten_extents_endio(handle_t *handle, - struct inode *inode, - struct ext4_map_blocks *map, - struct ext4_ext_path **ppath) +static struct ext4_ext_path * +ext4_convert_unwritten_extents_endio(handle_t *handle, struct inode *inode, + struct ext4_map_blocks *map, + struct ext4_ext_path *path) { - struct ext4_ext_path *path = *ppath; struct ext4_extent *ex; ext4_lblk_t ee_block; unsigned int ee_len; @@ -3721,20 +3776,21 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, inode->i_ino, (unsigned long long)ee_block, ee_len, (unsigned long long)map->m_lblk, map->m_len); #endif - err = ext4_split_convert_extents(handle, inode, map, ppath, - EXT4_GET_BLOCKS_CONVERT); - if (err < 0) - return err; - path = ext4_find_extent(inode, map->m_lblk, ppath, 0); + path = ext4_split_convert_extents(handle, inode, map, path, + EXT4_GET_BLOCKS_CONVERT, NULL); if (IS_ERR(path)) - return PTR_ERR(path); + return path; + + path = ext4_find_extent(inode, map->m_lblk, path, 0); + if (IS_ERR(path)) + return path; depth = ext_depth(inode); ex = path[depth].p_ext; } err = ext4_ext_get_access(handle, inode, path + depth); if (err) - goto out; + goto errout; /* first mark the extent as initialized */ ext4_ext_mark_initialized(ex); @@ -3745,18 +3801,23 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, /* Mark modified extent as dirty */ err = ext4_ext_dirty(handle, inode, path + path->p_depth); -out: + if (err) + goto errout; + ext4_ext_show_leaf(inode, path); - return err; + return path; + +errout: + ext4_free_ext_path(path); + return ERR_PTR(err); } -static int +static struct ext4_ext_path * convert_initialized_extent(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, - struct ext4_ext_path **ppath, + struct ext4_ext_path *path, unsigned int *allocated) { - struct ext4_ext_path *path = *ppath; struct ext4_extent *ex; ext4_lblk_t ee_block; unsigned int ee_len; @@ -3779,25 +3840,27 @@ convert_initialized_extent(handle_t *handle, struct inode *inode, (unsigned long long)ee_block, ee_len); if (ee_block != map->m_lblk || ee_len > map->m_len) { - err = ext4_split_convert_extents(handle, inode, map, ppath, - EXT4_GET_BLOCKS_CONVERT_UNWRITTEN); - if (err < 0) - return err; - path = ext4_find_extent(inode, map->m_lblk, ppath, 0); + path = ext4_split_convert_extents(handle, inode, map, path, + EXT4_GET_BLOCKS_CONVERT_UNWRITTEN, NULL); if (IS_ERR(path)) - return PTR_ERR(path); + return path; + + path = ext4_find_extent(inode, map->m_lblk, path, 0); + if (IS_ERR(path)) + return path; depth = ext_depth(inode); ex = path[depth].p_ext; if (!ex) { EXT4_ERROR_INODE(inode, "unexpected hole at %lu", (unsigned long) map->m_lblk); - return -EFSCORRUPTED; + err = -EFSCORRUPTED; + goto errout; } } err = ext4_ext_get_access(handle, inode, path + depth); if (err) - return err; + goto errout; /* first mark the extent as unwritten */ ext4_ext_mark_unwritten(ex); @@ -3809,7 +3872,7 @@ convert_initialized_extent(handle_t *handle, struct inode *inode, /* Mark modified extent as dirty */ err = ext4_ext_dirty(handle, inode, path + path->p_depth); if (err) - return err; + goto errout; ext4_ext_show_leaf(inode, path); ext4_update_inode_fsync_trans(handle, inode, 1); @@ -3818,22 +3881,24 @@ convert_initialized_extent(handle_t *handle, struct inode *inode, if (*allocated > map->m_len) *allocated = map->m_len; map->m_len = *allocated; - return 0; + return path; + +errout: + ext4_free_ext_path(path); + return ERR_PTR(err); } -static int +static struct ext4_ext_path * ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, - struct ext4_ext_path **ppath, int flags, - unsigned int allocated, ext4_fsblk_t newblock) + struct ext4_ext_path *path, int flags, + unsigned int *allocated, ext4_fsblk_t newblock) { - struct ext4_ext_path __maybe_unused *path = *ppath; - int ret = 0; int err = 0; ext_debug(inode, "logical block %llu, max_blocks %u, flags 0x%x, allocated %u\n", (unsigned long long)map->m_lblk, map->m_len, flags, - allocated); + *allocated); ext4_ext_show_leaf(inode, path); /* @@ -3843,36 +3908,34 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL; trace_ext4_ext_handle_unwritten_extents(inode, map, flags, - allocated, newblock); + *allocated, newblock); /* get_block() before submitting IO, split the extent */ if (flags & EXT4_GET_BLOCKS_PRE_IO) { - ret = ext4_split_convert_extents(handle, inode, map, ppath, - flags | EXT4_GET_BLOCKS_CONVERT); - if (ret < 0) { - err = ret; - goto out2; - } + path = ext4_split_convert_extents(handle, inode, map, path, + flags | EXT4_GET_BLOCKS_CONVERT, allocated); + if (IS_ERR(path)) + return path; /* - * shouldn't get a 0 return when splitting an extent unless + * shouldn't get a 0 allocated when splitting an extent unless * m_len is 0 (bug) or extent has been corrupted */ - if (unlikely(ret == 0)) { + if (unlikely(*allocated == 0)) { EXT4_ERROR_INODE(inode, - "unexpected ret == 0, m_len = %u", + "unexpected allocated == 0, m_len = %u", map->m_len); err = -EFSCORRUPTED; - goto out2; + goto errout; } map->m_flags |= EXT4_MAP_UNWRITTEN; goto out; } /* IO end_io complete, convert the filled extent to written */ if (flags & EXT4_GET_BLOCKS_CONVERT) { - err = ext4_convert_unwritten_extents_endio(handle, inode, map, - ppath); - if (err < 0) - goto out2; + path = ext4_convert_unwritten_extents_endio(handle, inode, + map, path); + if (IS_ERR(path)) + return path; ext4_update_inode_fsync_trans(handle, inode, 1); goto map_out; } @@ -3904,36 +3967,37 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, * For buffered writes, at writepage time, etc. Convert a * discovered unwritten extent to written. */ - ret = ext4_ext_convert_to_initialized(handle, inode, map, ppath, flags); - if (ret < 0) { - err = ret; - goto out2; - } + path = ext4_ext_convert_to_initialized(handle, inode, map, path, + flags, allocated); + if (IS_ERR(path)) + return path; ext4_update_inode_fsync_trans(handle, inode, 1); /* - * shouldn't get a 0 return when converting an unwritten extent + * shouldn't get a 0 allocated when converting an unwritten extent * unless m_len is 0 (bug) or extent has been corrupted */ - if (unlikely(ret == 0)) { - EXT4_ERROR_INODE(inode, "unexpected ret == 0, m_len = %u", + if (unlikely(*allocated == 0)) { + EXT4_ERROR_INODE(inode, "unexpected allocated == 0, m_len = %u", map->m_len); err = -EFSCORRUPTED; - goto out2; + goto errout; } out: - allocated = ret; map->m_flags |= EXT4_MAP_NEW; map_out: map->m_flags |= EXT4_MAP_MAPPED; out1: map->m_pblk = newblock; - if (allocated > map->m_len) - allocated = map->m_len; - map->m_len = allocated; + if (*allocated > map->m_len) + *allocated = map->m_len; + map->m_len = *allocated; ext4_ext_show_leaf(inode, path); -out2: - return err ? err : allocated; + return path; + +errout: + ext4_free_ext_path(path); + return ERR_PTR(err); } /* @@ -4096,7 +4160,8 @@ again: insert_hole: /* Put just found gap into cache to speed up subsequent requests */ ext_debug(inode, " -> %u:%u\n", hole_start, len); - ext4_es_insert_extent(inode, hole_start, len, ~0, EXTENT_STATUS_HOLE); + ext4_es_insert_extent(inode, hole_start, len, ~0, + EXTENT_STATUS_HOLE, false); /* Update hole_len to reflect hole size after lblk */ if (hole_start != lblk) @@ -4130,7 +4195,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, struct ext4_extent newex, *ex, ex2; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); ext4_fsblk_t newblock = 0, pblk; - int err = 0, depth, ret; + int err = 0, depth; unsigned int allocated = 0, offset = 0; unsigned int allocated_clusters = 0; struct ext4_allocation_request ar; @@ -4140,10 +4205,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); /* find extent for this block */ - path = ext4_find_extent(inode, map->m_lblk, NULL, 0); + path = ext4_find_extent(inode, map->m_lblk, NULL, flags); if (IS_ERR(path)) { err = PTR_ERR(path); - path = NULL; goto out; } @@ -4192,8 +4256,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, */ if ((!ext4_ext_is_unwritten(ex)) && (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) { - err = convert_initialized_extent(handle, - inode, map, &path, &allocated); + path = convert_initialized_extent(handle, + inode, map, path, &allocated); + if (IS_ERR(path)) + err = PTR_ERR(path); goto out; } else if (!ext4_ext_is_unwritten(ex)) { map->m_flags |= EXT4_MAP_MAPPED; @@ -4205,13 +4271,11 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, goto out; } - ret = ext4_ext_handle_unwritten_extents( - handle, inode, map, &path, flags, - allocated, newblock); - if (ret < 0) - err = ret; - else - allocated = ret; + path = ext4_ext_handle_unwritten_extents( + handle, inode, map, path, flags, + &allocated, newblock); + if (IS_ERR(path)) + err = PTR_ERR(path); goto out; } } @@ -4253,7 +4317,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, if (err) goto out; ar.lright = map->m_lblk; - err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright, &ex2); + err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright, + &ex2, flags); if (err < 0) goto out; @@ -4263,6 +4328,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, get_implied_cluster_alloc(inode->i_sb, map, &ex2, path)) { ar.len = allocated = map->m_len; newblock = map->m_pblk; + err = 0; goto got_allocated_blocks; } @@ -4335,8 +4401,9 @@ got_allocated_blocks: map->m_flags |= EXT4_MAP_UNWRITTEN; } - err = ext4_ext_insert_extent(handle, inode, &path, &newex, flags); - if (err) { + path = ext4_ext_insert_extent(handle, inode, path, &newex, flags); + if (IS_ERR(path)) { + err = PTR_ERR(path); if (allocated_clusters) { int fb_flags = 0; @@ -4356,43 +4423,6 @@ got_allocated_blocks: } /* - * Reduce the reserved cluster count to reflect successful deferred - * allocation of delayed allocated clusters or direct allocation of - * clusters discovered to be delayed allocated. Once allocated, a - * cluster is not included in the reserved count. - */ - if (test_opt(inode->i_sb, DELALLOC) && allocated_clusters) { - if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) { - /* - * When allocating delayed allocated clusters, simply - * reduce the reserved cluster count and claim quota - */ - ext4_da_update_reserve_space(inode, allocated_clusters, - 1); - } else { - ext4_lblk_t lblk, len; - unsigned int n; - - /* - * When allocating non-delayed allocated clusters - * (from fallocate, filemap, DIO, or clusters - * allocated when delalloc has been disabled by - * ext4_nonda_switch), reduce the reserved cluster - * count by the number of allocated clusters that - * have previously been delayed allocated. Quota - * has been claimed by ext4_mb_new_blocks() above, - * so release the quota reservations made for any - * previously delayed allocated clusters. - */ - lblk = EXT4_LBLK_CMASK(sbi, map->m_lblk); - len = allocated_clusters << sbi->s_cluster_bits; - n = ext4_es_delayed_clu(inode, lblk, len); - if (n > 0) - ext4_da_update_reserve_space(inode, (int) n, 0); - } - } - - /* * Cache the extent and update transaction to commit on fdatasync only * when it is _not_ an unwritten extent. */ @@ -4407,6 +4437,20 @@ got_allocated_blocks: allocated = map->m_len; ext4_ext_show_leaf(inode, path); out: + /* + * We never use EXT4_GET_BLOCKS_QUERY_LAST_IN_LEAF with CREATE flag. + * So we know that the depth used here is correct, since there was no + * block allocation done if EXT4_GET_BLOCKS_QUERY_LAST_IN_LEAF is set. + * If tomorrow we start using this QUERY flag with CREATE, then we will + * need to re-calculate the depth as it might have changed due to block + * allocation. + */ + if (flags & EXT4_GET_BLOCKS_QUERY_LAST_IN_LEAF) { + WARN_ON_ONCE(flags & EXT4_GET_BLOCKS_CREATE); + if (!err && ex && (ex == EXT_LAST_EXTENT(path[depth].p_hdr))) + map->m_flags |= EXT4_MAP_QUERY_LAST_IN_LEAF; + } + ext4_free_ext_path(path); trace_ext4_ext_map_blocks_exit(inode, flags, map, @@ -4456,7 +4500,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, int depth = 0; struct ext4_map_blocks map; unsigned int credits; - loff_t epos; + loff_t epos, old_size = i_size_read(inode); BUG_ON(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)); map.m_lblk = offset; @@ -4515,6 +4559,11 @@ retry: if (ext4_update_inode_size(inode, epos) & 0x1) inode_set_mtime_to_ts(inode, inode_get_ctime(inode)); + if (epos > old_size) { + pagecache_isize_extended(inode, old_size, epos); + ext4_zero_partial_blocks(handle, inode, + old_size, epos - old_size); + } } ret2 = ext4_mark_inode_dirty(handle, inode); ext4_update_inode_fsync_trans(handle, inode, 1); @@ -4537,131 +4586,65 @@ static long ext4_zero_range(struct file *file, loff_t offset, loff_t len, int mode) { struct inode *inode = file_inode(file); - struct address_space *mapping = file->f_mapping; handle_t *handle = NULL; - unsigned int max_blocks; loff_t new_size = 0; - int ret = 0; - int flags; - int credits; - int partial_begin, partial_end; - loff_t start, end; - ext4_lblk_t lblk; + loff_t end = offset + len; + ext4_lblk_t start_lblk, end_lblk; + unsigned int blocksize = i_blocksize(inode); unsigned int blkbits = inode->i_blkbits; + int ret, flags, credits; trace_ext4_zero_range(inode, offset, len, mode); + WARN_ON_ONCE(!inode_is_locked(inode)); - /* - * Round up offset. This is not fallocate, we need to zero out - * blocks, so convert interior block aligned part of the range to - * unwritten and possibly manually zero out unaligned parts of the - * range. Here, start and partial_begin are inclusive, end and - * partial_end are exclusive. - */ - start = round_up(offset, 1 << blkbits); - end = round_down((offset + len), 1 << blkbits); - - if (start < offset || end > offset + len) - return -EINVAL; - partial_begin = offset & ((1 << blkbits) - 1); - partial_end = (offset + len) & ((1 << blkbits) - 1); - - lblk = start >> blkbits; - max_blocks = (end >> blkbits); - if (max_blocks < lblk) - max_blocks = 0; - else - max_blocks -= lblk; - - inode_lock(inode); - - /* - * Indirect files do not support unwritten extents - */ - if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { - ret = -EOPNOTSUPP; - goto out_mutex; - } + /* Indirect files do not support unwritten extents */ + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) + return -EOPNOTSUPP; if (!(mode & FALLOC_FL_KEEP_SIZE) && - (offset + len > inode->i_size || - offset + len > EXT4_I(inode)->i_disksize)) { - new_size = offset + len; + (end > inode->i_size || end > EXT4_I(inode)->i_disksize)) { + new_size = end; ret = inode_newsize_ok(inode, new_size); if (ret) - goto out_mutex; + return ret; } flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT; - - /* Wait all existing dio workers, newcomers will block on i_rwsem */ - inode_dio_wait(inode); - - ret = file_modified(file); - if (ret) - goto out_mutex; - /* Preallocate the range including the unaligned edges */ - if (partial_begin || partial_end) { - ret = ext4_alloc_file_blocks(file, - round_down(offset, 1 << blkbits) >> blkbits, - (round_up((offset + len), 1 << blkbits) - - round_down(offset, 1 << blkbits)) >> blkbits, - new_size, flags); - if (ret) - goto out_mutex; + if (!IS_ALIGNED(offset | end, blocksize)) { + ext4_lblk_t alloc_lblk = offset >> blkbits; + ext4_lblk_t len_lblk = EXT4_MAX_BLOCKS(len, offset, blkbits); + ret = ext4_alloc_file_blocks(file, alloc_lblk, len_lblk, + new_size, flags); + if (ret) + return ret; } - /* Zero range excluding the unaligned edges */ - if (max_blocks > 0) { - flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | - EXT4_EX_NOCACHE); - - /* - * Prevent page faults from reinstantiating pages we have - * released from page cache. - */ - filemap_invalidate_lock(mapping); - - ret = ext4_break_layouts(inode); - if (ret) { - filemap_invalidate_unlock(mapping); - goto out_mutex; - } - - ret = ext4_update_disksize_before_punch(inode, offset, len); - if (ret) { - filemap_invalidate_unlock(mapping); - goto out_mutex; - } - - /* - * For journalled data we need to write (and checkpoint) pages - * before discarding page cache to avoid inconsitent data on - * disk in case of crash before zeroing trans is committed. - */ - if (ext4_should_journal_data(inode)) { - ret = filemap_write_and_wait_range(mapping, start, - end - 1); - if (ret) { - filemap_invalidate_unlock(mapping); - goto out_mutex; - } - } + ret = ext4_update_disksize_before_punch(inode, offset, len); + if (ret) + return ret; - /* Now release the pages and zero block aligned part of pages */ - truncate_pagecache_range(inode, start, end - 1); - inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); + /* Now release the pages and zero block aligned part of pages */ + ret = ext4_truncate_page_cache_block_range(inode, offset, end); + if (ret) + return ret; - ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, - flags); - filemap_invalidate_unlock(mapping); + /* Zero range excluding the unaligned edges */ + start_lblk = EXT4_B_TO_LBLK(inode, offset); + end_lblk = end >> blkbits; + if (end_lblk > start_lblk) { + ext4_lblk_t zero_blks = end_lblk - start_lblk; + + flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | EXT4_EX_NOCACHE); + ret = ext4_alloc_file_blocks(file, start_lblk, zero_blks, + new_size, flags); if (ret) - goto out_mutex; + return ret; } - if (!partial_begin && !partial_end) - goto out_mutex; + /* Finish zeroing out if it doesn't contain partial block */ + if (IS_ALIGNED(offset | end, blocksize)) + return ret; /* * In worst case we have to writeout two nonadjacent unwritten @@ -4674,27 +4657,69 @@ static long ext4_zero_range(struct file *file, loff_t offset, if (IS_ERR(handle)) { ret = PTR_ERR(handle); ext4_std_error(inode->i_sb, ret); - goto out_mutex; + return ret; } - inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); + /* Zero out partial block at the edges of the range */ + ret = ext4_zero_partial_blocks(handle, inode, offset, len); + if (ret) + goto out_handle; + if (new_size) ext4_update_inode_size(inode, new_size); ret = ext4_mark_inode_dirty(handle, inode); if (unlikely(ret)) goto out_handle; - /* Zero out partial block at the edges of the range */ - ret = ext4_zero_partial_blocks(handle, inode, offset, len); - if (ret >= 0) - ext4_update_inode_fsync_trans(handle, inode, 1); + ext4_update_inode_fsync_trans(handle, inode, 1); if (file->f_flags & O_SYNC) ext4_handle_sync(handle); out_handle: ext4_journal_stop(handle); -out_mutex: - inode_unlock(inode); + return ret; +} + +static long ext4_do_fallocate(struct file *file, loff_t offset, + loff_t len, int mode) +{ + struct inode *inode = file_inode(file); + loff_t end = offset + len; + loff_t new_size = 0; + ext4_lblk_t start_lblk, len_lblk; + int ret; + + trace_ext4_fallocate_enter(inode, offset, len, mode); + WARN_ON_ONCE(!inode_is_locked(inode)); + + start_lblk = offset >> inode->i_blkbits; + len_lblk = EXT4_MAX_BLOCKS(len, offset, inode->i_blkbits); + + /* We only support preallocation for extent-based files only. */ + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { + ret = -EOPNOTSUPP; + goto out; + } + + if (!(mode & FALLOC_FL_KEEP_SIZE) && + (end > inode->i_size || end > EXT4_I(inode)->i_disksize)) { + new_size = end; + ret = inode_newsize_ok(inode, new_size); + if (ret) + goto out; + } + + ret = ext4_alloc_file_blocks(file, start_lblk, len_lblk, new_size, + EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT); + if (ret) + goto out; + + if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) { + ret = ext4_fc_commit(EXT4_SB(inode->i_sb)->s_journal, + EXT4_I(inode)->i_sync_tid); + } +out: + trace_ext4_fallocate_exit(inode, offset, len_lblk, ret); return ret; } @@ -4708,12 +4733,8 @@ out_mutex: long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); - loff_t new_size = 0; - unsigned int max_blocks; - int ret = 0; - int flags; - ext4_lblk_t lblk; - unsigned int blkbits = inode->i_blkbits; + struct address_space *mapping = file->f_mapping; + int ret; /* * Encrypted inodes can't handle collapse range or insert @@ -4733,74 +4754,135 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) inode_lock(inode); ret = ext4_convert_inline_data(inode); - inode_unlock(inode); if (ret) - goto exit; + goto out_inode_lock; - if (mode & FALLOC_FL_PUNCH_HOLE) { - ret = ext4_punch_hole(file, offset, len); - goto exit; - } + /* Wait all existing dio workers, newcomers will block on i_rwsem */ + inode_dio_wait(inode); - if (mode & FALLOC_FL_COLLAPSE_RANGE) { - ret = ext4_collapse_range(file, offset, len); - goto exit; - } + ret = file_modified(file); + if (ret) + goto out_inode_lock; - if (mode & FALLOC_FL_INSERT_RANGE) { - ret = ext4_insert_range(file, offset, len); - goto exit; + if ((mode & FALLOC_FL_MODE_MASK) == FALLOC_FL_ALLOCATE_RANGE) { + ret = ext4_do_fallocate(file, offset, len, mode); + goto out_inode_lock; } - if (mode & FALLOC_FL_ZERO_RANGE) { + /* + * Follow-up operations will drop page cache, hold invalidate lock + * to prevent page faults from reinstantiating pages we have + * released from page cache. + */ + filemap_invalidate_lock(mapping); + + ret = ext4_break_layouts(inode); + if (ret) + goto out_invalidate_lock; + + if (mode & FALLOC_FL_PUNCH_HOLE) + ret = ext4_punch_hole(file, offset, len); + else if (mode & FALLOC_FL_COLLAPSE_RANGE) + ret = ext4_collapse_range(file, offset, len); + else if (mode & FALLOC_FL_INSERT_RANGE) + ret = ext4_insert_range(file, offset, len); + else if (mode & FALLOC_FL_ZERO_RANGE) ret = ext4_zero_range(file, offset, len, mode); - goto exit; - } - trace_ext4_fallocate_enter(inode, offset, len, mode); - lblk = offset >> blkbits; + else + ret = -EOPNOTSUPP; + +out_invalidate_lock: + filemap_invalidate_unlock(mapping); +out_inode_lock: + inode_unlock(inode); + return ret; +} + +/* + * This function converts a range of blocks to written extents. The caller of + * this function will pass the start offset and the size. all unwritten extents + * within this range will be converted to written extents. + * + * This function is called from the direct IO end io call back function for + * atomic writes, to convert the unwritten extents after IO is completed. + * + * Note that the requirement for atomic writes is that all conversion should + * happen atomically in a single fs journal transaction. We mainly only allocate + * unwritten extents either on a hole on a pre-exiting unwritten extent range in + * ext4_map_blocks_atomic_write(). The only case where we can have multiple + * unwritten extents in a range [offset, offset+len) is when there is a split + * unwritten extent between two leaf nodes which was cached in extent status + * cache during ext4_iomap_alloc() time. That will allow + * ext4_map_blocks_atomic_write() to return the unwritten extent range w/o going + * into the slow path. That means we might need a loop for conversion of this + * unwritten extent split across leaf block within a single journal transaction. + * Split extents across leaf nodes is a rare case, but let's still handle that + * to meet the requirements of multi-fsblock atomic writes. + * + * Returns 0 on success. + */ +int ext4_convert_unwritten_extents_atomic(handle_t *handle, struct inode *inode, + loff_t offset, ssize_t len) +{ + unsigned int max_blocks; + int ret = 0, ret2 = 0, ret3 = 0; + struct ext4_map_blocks map; + unsigned int blkbits = inode->i_blkbits; + unsigned int credits = 0; + int flags = EXT4_GET_BLOCKS_IO_CONVERT_EXT | EXT4_EX_NOCACHE; + map.m_lblk = offset >> blkbits; max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits); - flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT; - inode_lock(inode); + if (!handle) { + /* + * TODO: An optimization can be added later by having an extent + * status flag e.g. EXTENT_STATUS_SPLIT_LEAF. If we query that + * it can tell if the extent in the cache is a split extent. + * But for now let's assume pextents as 2 always. + */ + credits = ext4_meta_trans_blocks(inode, max_blocks, 2); + } - /* - * We only support preallocation for extent-based files only - */ - if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { - ret = -EOPNOTSUPP; - goto out; + if (credits) { + handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + return ret; + } } - if (!(mode & FALLOC_FL_KEEP_SIZE) && - (offset + len > inode->i_size || - offset + len > EXT4_I(inode)->i_disksize)) { - new_size = offset + len; - ret = inode_newsize_ok(inode, new_size); - if (ret) - goto out; + while (ret >= 0 && ret < max_blocks) { + map.m_lblk += ret; + map.m_len = (max_blocks -= ret); + ret = ext4_map_blocks(handle, inode, &map, flags); + if (ret != max_blocks) + ext4_msg(inode->i_sb, KERN_INFO, + "inode #%lu: block %u: len %u: " + "split block mapping found for atomic write, " + "ret = %d", + inode->i_ino, map.m_lblk, + map.m_len, ret); + if (ret <= 0) + break; } - /* Wait all existing dio workers, newcomers will block on i_rwsem */ - inode_dio_wait(inode); + ret2 = ext4_mark_inode_dirty(handle, inode); - ret = file_modified(file); - if (ret) - goto out; + if (credits) { + ret3 = ext4_journal_stop(handle); + if (unlikely(ret3)) + ret2 = ret3; + } - ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags); - if (ret) - goto out; + if (ret <= 0 || ret2) + ext4_warning(inode->i_sb, + "inode #%lu: block %u: len %u: " + "returned %d or %d", + inode->i_ino, map.m_lblk, + map.m_len, ret, ret2); - if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) { - ret = ext4_fc_commit(EXT4_SB(inode->i_sb)->s_journal, - EXT4_I(inode)->i_sync_tid); - } -out: - inode_unlock(inode); - trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); -exit: - return ret; + return ret > 0 ? ret2 : ret; } /* @@ -4842,8 +4924,14 @@ int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, break; } } + /* + * Do not cache any unrelated extents, as it does not hold the + * i_rwsem or invalidate_lock, which could corrupt the extent + * status tree. + */ ret = ext4_map_blocks(handle, inode, &map, - EXT4_GET_BLOCKS_IO_CONVERT_EXT); + EXT4_GET_BLOCKS_IO_CONVERT_EXT | + EXT4_EX_NOCACHE); if (ret <= 0) ext4_warning(inode->i_sb, "inode #%lu: block %u: len %u: " @@ -4954,12 +5042,7 @@ static const struct iomap_ops ext4_iomap_xattr_ops = { static int ext4_fiemap_check_ranges(struct inode *inode, u64 start, u64 *len) { - u64 maxbytes; - - if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) - maxbytes = inode->i_sb->s_maxbytes; - else - maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes; + u64 maxbytes = ext4_get_maxbytes(inode); if (*len == 0) return -EINVAL; @@ -4979,10 +5062,11 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, { int error = 0; + inode_lock_shared(inode); if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) { error = ext4_ext_precache(inode); if (error) - return error; + goto unlock; fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE; } @@ -4993,15 +5077,19 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, */ error = ext4_fiemap_check_ranges(inode, start, &len); if (error) - return error; + goto unlock; if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { fieinfo->fi_flags &= ~FIEMAP_FLAG_XATTR; - return iomap_fiemap(inode, fieinfo, start, len, - &ext4_iomap_xattr_ops); + error = iomap_fiemap(inode, fieinfo, start, len, + &ext4_iomap_xattr_ops); + } else { + error = iomap_fiemap(inode, fieinfo, start, len, + &ext4_iomap_report_ops); } - - return iomap_fiemap(inode, fieinfo, start, len, &ext4_iomap_report_ops); +unlock: + inode_unlock_shared(inode); + return error; } int ext4_get_es_cache(struct inode *inode, struct fiemap_extent_info *fieinfo, @@ -5022,7 +5110,9 @@ int ext4_get_es_cache(struct inode *inode, struct fiemap_extent_info *fieinfo, } if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) { + inode_lock_shared(inode); error = ext4_ext_precache(inode); + inode_unlock_shared(inode); if (error) return error; fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE; @@ -5183,7 +5273,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, * won't be shifted beyond EXT_MAX_BLOCKS. */ if (SHIFT == SHIFT_LEFT) { - path = ext4_find_extent(inode, start - 1, &path, + path = ext4_find_extent(inode, start - 1, path, EXT4_EX_NOCACHE); if (IS_ERR(path)) return PTR_ERR(path); @@ -5232,7 +5322,7 @@ again: * becomes NULL to indicate the end of the loop. */ while (iterator && start <= stop) { - path = ext4_find_extent(inode, *iterator, &path, + path = ext4_find_extent(inode, *iterator, path, EXT4_EX_NOCACHE); if (IS_ERR(path)) return PTR_ERR(path); @@ -5301,109 +5391,74 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len) struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; struct address_space *mapping = inode->i_mapping; - ext4_lblk_t punch_start, punch_stop; + loff_t end = offset + len; + ext4_lblk_t start_lblk, end_lblk; handle_t *handle; unsigned int credits; - loff_t new_size, ioffset; + loff_t start, new_size; int ret; - /* - * We need to test this early because xfstests assumes that a - * collapse range of (0, 1) will return EOPNOTSUPP if the file - * system does not support collapse range. - */ + trace_ext4_collapse_range(inode, offset, len); + WARN_ON_ONCE(!inode_is_locked(inode)); + + /* Currently just for extent based files */ if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) return -EOPNOTSUPP; - /* Collapse range works only on fs cluster size aligned regions. */ if (!IS_ALIGNED(offset | len, EXT4_CLUSTER_SIZE(sb))) return -EINVAL; - - trace_ext4_collapse_range(inode, offset, len); - - punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb); - punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb); - - inode_lock(inode); /* * There is no need to overlap collapse range with EOF, in which case * it is effectively a truncate operation */ - if (offset + len >= inode->i_size) { - ret = -EINVAL; - goto out_mutex; - } - - /* Currently just for extent based files */ - if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { - ret = -EOPNOTSUPP; - goto out_mutex; - } - - /* Wait for existing dio to complete */ - inode_dio_wait(inode); - - ret = file_modified(file); - if (ret) - goto out_mutex; - - /* - * Prevent page faults from reinstantiating pages we have released from - * page cache. - */ - filemap_invalidate_lock(mapping); - - ret = ext4_break_layouts(inode); - if (ret) - goto out_mmap; + if (end >= inode->i_size) + return -EINVAL; /* + * Write tail of the last page before removed range and data that + * will be shifted since they will get removed from the page cache + * below. We are also protected from pages becoming dirty by + * i_rwsem and invalidate_lock. * Need to round down offset to be aligned with page size boundary * for page size > block size. */ - ioffset = round_down(offset, PAGE_SIZE); - /* - * Write tail of the last page before removed range since it will get - * removed from the page cache below. - */ - ret = filemap_write_and_wait_range(mapping, ioffset, offset); + start = round_down(offset, PAGE_SIZE); + ret = filemap_write_and_wait_range(mapping, start, offset); + if (!ret) + ret = filemap_write_and_wait_range(mapping, end, LLONG_MAX); if (ret) - goto out_mmap; - /* - * Write data that will be shifted to preserve them when discarding - * page cache below. We are also protected from pages becoming dirty - * by i_rwsem and invalidate_lock. - */ - ret = filemap_write_and_wait_range(mapping, offset + len, - LLONG_MAX); - if (ret) - goto out_mmap; - truncate_pagecache(inode, ioffset); + return ret; + + truncate_pagecache(inode, start); credits = ext4_writepage_trans_blocks(inode); handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out_mmap; - } + if (IS_ERR(handle)) + return PTR_ERR(handle); + ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle); + start_lblk = offset >> inode->i_blkbits; + end_lblk = (offset + len) >> inode->i_blkbits; + + ext4_check_map_extents_env(inode); + down_write(&EXT4_I(inode)->i_data_sem); ext4_discard_preallocations(inode); - ext4_es_remove_extent(inode, punch_start, EXT_MAX_BLOCKS - punch_start); + ext4_es_remove_extent(inode, start_lblk, EXT_MAX_BLOCKS - start_lblk); - ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1); + ret = ext4_ext_remove_space(inode, start_lblk, end_lblk - 1); if (ret) { up_write(&EXT4_I(inode)->i_data_sem); - goto out_stop; + goto out_handle; } ext4_discard_preallocations(inode); - ret = ext4_ext_shift_extents(inode, handle, punch_stop, - punch_stop - punch_start, SHIFT_LEFT); + ret = ext4_ext_shift_extents(inode, handle, end_lblk, + end_lblk - start_lblk, SHIFT_LEFT); if (ret) { up_write(&EXT4_I(inode)->i_data_sem); - goto out_stop; + goto out_handle; } new_size = inode->i_size - len; @@ -5411,18 +5466,16 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len) EXT4_I(inode)->i_disksize = new_size; up_write(&EXT4_I(inode)->i_data_sem); - if (IS_SYNC(inode)) - ext4_handle_sync(handle); - inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); ret = ext4_mark_inode_dirty(handle, inode); + if (ret) + goto out_handle; + ext4_update_inode_fsync_trans(handle, inode, 1); + if (IS_SYNC(inode)) + ext4_handle_sync(handle); -out_stop: +out_handle: ext4_journal_stop(handle); -out_mmap: - filemap_invalidate_unlock(mapping); -out_mutex: - inode_unlock(inode); return ret; } @@ -5442,99 +5495,65 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len) handle_t *handle; struct ext4_ext_path *path; struct ext4_extent *extent; - ext4_lblk_t offset_lblk, len_lblk, ee_start_lblk = 0; + ext4_lblk_t start_lblk, len_lblk, ee_start_lblk = 0; unsigned int credits, ee_len; - int ret = 0, depth, split_flag = 0; - loff_t ioffset; + int ret, depth, split_flag = 0; + loff_t start; - /* - * We need to test this early because xfstests assumes that an - * insert range of (0, 1) will return EOPNOTSUPP if the file - * system does not support insert range. - */ + trace_ext4_insert_range(inode, offset, len); + WARN_ON_ONCE(!inode_is_locked(inode)); + + /* Currently just for extent based files */ if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) return -EOPNOTSUPP; - /* Insert range works only on fs cluster size aligned regions. */ if (!IS_ALIGNED(offset | len, EXT4_CLUSTER_SIZE(sb))) return -EINVAL; - - trace_ext4_insert_range(inode, offset, len); - - offset_lblk = offset >> EXT4_BLOCK_SIZE_BITS(sb); - len_lblk = len >> EXT4_BLOCK_SIZE_BITS(sb); - - inode_lock(inode); - /* Currently just for extent based files */ - if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { - ret = -EOPNOTSUPP; - goto out_mutex; - } - - /* Check whether the maximum file size would be exceeded */ - if (len > inode->i_sb->s_maxbytes - inode->i_size) { - ret = -EFBIG; - goto out_mutex; - } - /* Offset must be less than i_size */ - if (offset >= inode->i_size) { - ret = -EINVAL; - goto out_mutex; - } - - /* Wait for existing dio to complete */ - inode_dio_wait(inode); - - ret = file_modified(file); - if (ret) - goto out_mutex; + if (offset >= inode->i_size) + return -EINVAL; + /* Check whether the maximum file size would be exceeded */ + if (len > inode->i_sb->s_maxbytes - inode->i_size) + return -EFBIG; /* - * Prevent page faults from reinstantiating pages we have released from - * page cache. + * Write out all dirty pages. Need to round down to align start offset + * to page size boundary for page size > block size. */ - filemap_invalidate_lock(mapping); - - ret = ext4_break_layouts(inode); + start = round_down(offset, PAGE_SIZE); + ret = filemap_write_and_wait_range(mapping, start, LLONG_MAX); if (ret) - goto out_mmap; + return ret; - /* - * Need to round down to align start offset to page size boundary - * for page size > block size. - */ - ioffset = round_down(offset, PAGE_SIZE); - /* Write out all dirty pages */ - ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, - LLONG_MAX); - if (ret) - goto out_mmap; - truncate_pagecache(inode, ioffset); + truncate_pagecache(inode, start); credits = ext4_writepage_trans_blocks(inode); handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out_mmap; - } + if (IS_ERR(handle)) + return PTR_ERR(handle); + ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle); /* Expand file to avoid data loss if there is error while shifting */ inode->i_size += len; EXT4_I(inode)->i_disksize += len; - inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); ret = ext4_mark_inode_dirty(handle, inode); if (ret) - goto out_stop; + goto out_handle; + + start_lblk = offset >> inode->i_blkbits; + len_lblk = len >> inode->i_blkbits; + + ext4_check_map_extents_env(inode); down_write(&EXT4_I(inode)->i_data_sem); ext4_discard_preallocations(inode); - path = ext4_find_extent(inode, offset_lblk, NULL, 0); + path = ext4_find_extent(inode, start_lblk, NULL, 0); if (IS_ERR(path)) { up_write(&EXT4_I(inode)->i_data_sem); - goto out_stop; + ret = PTR_ERR(path); + goto out_handle; } depth = ext_depth(inode); @@ -5544,51 +5563,47 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len) ee_len = ext4_ext_get_actual_len(extent); /* - * If offset_lblk is not the starting block of extent, split - * the extent @offset_lblk + * If start_lblk is not the starting block of extent, split + * the extent @start_lblk */ - if ((offset_lblk > ee_start_lblk) && - (offset_lblk < (ee_start_lblk + ee_len))) { + if ((start_lblk > ee_start_lblk) && + (start_lblk < (ee_start_lblk + ee_len))) { if (ext4_ext_is_unwritten(extent)) split_flag = EXT4_EXT_MARK_UNWRIT1 | EXT4_EXT_MARK_UNWRIT2; - ret = ext4_split_extent_at(handle, inode, &path, - offset_lblk, split_flag, + path = ext4_split_extent_at(handle, inode, path, + start_lblk, split_flag, EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_METADATA_NOFAIL); } - ext4_free_ext_path(path); - if (ret < 0) { + if (IS_ERR(path)) { up_write(&EXT4_I(inode)->i_data_sem); - goto out_stop; + ret = PTR_ERR(path); + goto out_handle; } - } else { - ext4_free_ext_path(path); } - ext4_es_remove_extent(inode, offset_lblk, EXT_MAX_BLOCKS - offset_lblk); + ext4_free_ext_path(path); + ext4_es_remove_extent(inode, start_lblk, EXT_MAX_BLOCKS - start_lblk); /* - * if offset_lblk lies in a hole which is at start of file, use + * if start_lblk lies in a hole which is at start of file, use * ee_start_lblk to shift extents */ ret = ext4_ext_shift_extents(inode, handle, - max(ee_start_lblk, offset_lblk), len_lblk, SHIFT_RIGHT); - + max(ee_start_lblk, start_lblk), len_lblk, SHIFT_RIGHT); up_write(&EXT4_I(inode)->i_data_sem); + if (ret) + goto out_handle; + + ext4_update_inode_fsync_trans(handle, inode, 1); if (IS_SYNC(inode)) ext4_handle_sync(handle); - if (ret >= 0) - ext4_update_inode_fsync_trans(handle, inode, 1); -out_stop: +out_handle: ext4_journal_stop(handle); -out_mmap: - filemap_invalidate_unlock(mapping); -out_mutex: - inode_unlock(inode); return ret; } @@ -5635,25 +5650,21 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1, int e1_len, e2_len, len; int split = 0; - path1 = ext4_find_extent(inode1, lblk1, NULL, EXT4_EX_NOCACHE); + path1 = ext4_find_extent(inode1, lblk1, path1, EXT4_EX_NOCACHE); if (IS_ERR(path1)) { *erp = PTR_ERR(path1); - path1 = NULL; - finish: - count = 0; - goto repeat; + goto errout; } - path2 = ext4_find_extent(inode2, lblk2, NULL, EXT4_EX_NOCACHE); + path2 = ext4_find_extent(inode2, lblk2, path2, EXT4_EX_NOCACHE); if (IS_ERR(path2)) { *erp = PTR_ERR(path2); - path2 = NULL; - goto finish; + goto errout; } ex1 = path1[path1->p_depth].p_ext; ex2 = path2[path2->p_depth].p_ext; /* Do we have something to swap ? */ if (unlikely(!ex2 || !ex1)) - goto finish; + goto errout; e1_blk = le32_to_cpu(ex1->ee_block); e2_blk = le32_to_cpu(ex2->ee_block); @@ -5675,7 +5686,7 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1, next2 = e2_blk; /* Do we have something to swap */ if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS) - goto finish; + goto errout; /* Move to the rightest boundary */ len = next1 - lblk1; if (len < next2 - lblk2) @@ -5685,28 +5696,32 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1, lblk1 += len; lblk2 += len; count -= len; - goto repeat; + continue; } /* Prepare left boundary */ if (e1_blk < lblk1) { split = 1; - *erp = ext4_force_split_extent_at(handle, inode1, - &path1, lblk1, 0); - if (unlikely(*erp)) - goto finish; + path1 = ext4_force_split_extent_at(handle, inode1, + path1, lblk1, 0); + if (IS_ERR(path1)) { + *erp = PTR_ERR(path1); + goto errout; + } } if (e2_blk < lblk2) { split = 1; - *erp = ext4_force_split_extent_at(handle, inode2, - &path2, lblk2, 0); - if (unlikely(*erp)) - goto finish; + path2 = ext4_force_split_extent_at(handle, inode2, + path2, lblk2, 0); + if (IS_ERR(path2)) { + *erp = PTR_ERR(path2); + goto errout; + } } /* ext4_split_extent_at() may result in leaf extent split, * path must to be revalidated. */ if (split) - goto repeat; + continue; /* Prepare right boundary */ len = count; @@ -5717,30 +5732,34 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1, if (len != e1_len) { split = 1; - *erp = ext4_force_split_extent_at(handle, inode1, - &path1, lblk1 + len, 0); - if (unlikely(*erp)) - goto finish; + path1 = ext4_force_split_extent_at(handle, inode1, + path1, lblk1 + len, 0); + if (IS_ERR(path1)) { + *erp = PTR_ERR(path1); + goto errout; + } } if (len != e2_len) { split = 1; - *erp = ext4_force_split_extent_at(handle, inode2, - &path2, lblk2 + len, 0); - if (*erp) - goto finish; + path2 = ext4_force_split_extent_at(handle, inode2, + path2, lblk2 + len, 0); + if (IS_ERR(path2)) { + *erp = PTR_ERR(path2); + goto errout; + } } /* ext4_split_extent_at() may result in leaf extent split, * path must to be revalidated. */ if (split) - goto repeat; + continue; BUG_ON(e2_len != e1_len); *erp = ext4_ext_get_access(handle, inode1, path1 + path1->p_depth); if (unlikely(*erp)) - goto finish; + goto errout; *erp = ext4_ext_get_access(handle, inode2, path2 + path2->p_depth); if (unlikely(*erp)) - goto finish; + goto errout; /* Both extents are fully inside boundaries. Swap it now */ tmp_ex = *ex1; @@ -5758,7 +5777,7 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1, *erp = ext4_ext_dirty(handle, inode2, path2 + path2->p_depth); if (unlikely(*erp)) - goto finish; + goto errout; *erp = ext4_ext_dirty(handle, inode1, path1 + path1->p_depth); /* @@ -5768,17 +5787,17 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1, * aborted anyway. */ if (unlikely(*erp)) - goto finish; + goto errout; + lblk1 += len; lblk2 += len; replaced_count += len; count -= len; - - repeat: - ext4_free_ext_path(path1); - ext4_free_ext_path(path2); - path1 = path2 = NULL; } + +errout: + ext4_free_ext_path(path1); + ext4_free_ext_path(path2); return replaced_count; } @@ -5813,11 +5832,8 @@ int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu) /* search for the extent closest to the first block in the cluster */ path = ext4_find_extent(inode, EXT4_C2B(sbi, lclu), NULL, 0); - if (IS_ERR(path)) { - err = PTR_ERR(path); - path = NULL; - goto out; - } + if (IS_ERR(path)) + return PTR_ERR(path); depth = ext_depth(inode); @@ -5879,7 +5895,7 @@ out: int ext4_ext_replay_update_ex(struct inode *inode, ext4_lblk_t start, int len, int unwritten, ext4_fsblk_t pblk) { - struct ext4_ext_path *path = NULL, *ppath; + struct ext4_ext_path *path; struct ext4_extent *ex; int ret; @@ -5895,30 +5911,34 @@ int ext4_ext_replay_update_ex(struct inode *inode, ext4_lblk_t start, if (le32_to_cpu(ex->ee_block) != start || ext4_ext_get_actual_len(ex) != len) { /* We need to split this extent to match our extent first */ - ppath = path; down_write(&EXT4_I(inode)->i_data_sem); - ret = ext4_force_split_extent_at(NULL, inode, &ppath, start, 1); + path = ext4_force_split_extent_at(NULL, inode, path, start, 1); up_write(&EXT4_I(inode)->i_data_sem); - if (ret) + if (IS_ERR(path)) { + ret = PTR_ERR(path); goto out; - kfree(path); - path = ext4_find_extent(inode, start, NULL, 0); + } + + path = ext4_find_extent(inode, start, path, 0); if (IS_ERR(path)) - return -1; - ppath = path; + return PTR_ERR(path); + ex = path[path->p_depth].p_ext; WARN_ON(le32_to_cpu(ex->ee_block) != start); + if (ext4_ext_get_actual_len(ex) != len) { down_write(&EXT4_I(inode)->i_data_sem); - ret = ext4_force_split_extent_at(NULL, inode, &ppath, - start + len, 1); + path = ext4_force_split_extent_at(NULL, inode, path, + start + len, 1); up_write(&EXT4_I(inode)->i_data_sem); - if (ret) + if (IS_ERR(path)) { + ret = PTR_ERR(path); goto out; - kfree(path); - path = ext4_find_extent(inode, start, NULL, 0); + } + + path = ext4_find_extent(inode, start, path, 0); if (IS_ERR(path)) - return -EINVAL; + return PTR_ERR(path); ex = path[path->p_depth].p_ext; } } @@ -6000,12 +6020,9 @@ int ext4_ext_replay_set_iblocks(struct inode *inode) if (IS_ERR(path)) return PTR_ERR(path); ex = path[path->p_depth].p_ext; - if (!ex) { - ext4_free_ext_path(path); + if (!ex) goto out; - } end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex); - ext4_free_ext_path(path); /* Count the number of data blocks */ cur = 0; @@ -6031,32 +6048,28 @@ int ext4_ext_replay_set_iblocks(struct inode *inode) ret = skip_hole(inode, &cur); if (ret < 0) goto out; - path = ext4_find_extent(inode, cur, NULL, 0); + path = ext4_find_extent(inode, cur, path, 0); if (IS_ERR(path)) goto out; numblks += path->p_depth; - ext4_free_ext_path(path); while (cur < end) { - path = ext4_find_extent(inode, cur, NULL, 0); + path = ext4_find_extent(inode, cur, path, 0); if (IS_ERR(path)) break; ex = path[path->p_depth].p_ext; - if (!ex) { - ext4_free_ext_path(path); - return 0; - } + if (!ex) + goto cleanup; + cur = max(cur + 1, le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex)); ret = skip_hole(inode, &cur); - if (ret < 0) { - ext4_free_ext_path(path); + if (ret < 0) break; - } - path2 = ext4_find_extent(inode, cur, NULL, 0); - if (IS_ERR(path2)) { - ext4_free_ext_path(path); + + path2 = ext4_find_extent(inode, cur, path2, 0); + if (IS_ERR(path2)) break; - } + for (i = 0; i <= max(path->p_depth, path2->p_depth); i++) { cmp1 = cmp2 = 0; if (i <= path->p_depth) @@ -6068,13 +6081,14 @@ int ext4_ext_replay_set_iblocks(struct inode *inode) if (cmp1 != cmp2 && cmp2 != 0) numblks++; } - ext4_free_ext_path(path); - ext4_free_ext_path(path2); } out: inode->i_blocks = numblks << (inode->i_sb->s_blocksize_bits - 9); ext4_mark_inode_dirty(NULL, inode); +cleanup: + ext4_free_ext_path(path); + ext4_free_ext_path(path2); return 0; } @@ -6095,12 +6109,9 @@ int ext4_ext_clear_bb(struct inode *inode) if (IS_ERR(path)) return PTR_ERR(path); ex = path[path->p_depth].p_ext; - if (!ex) { - ext4_free_ext_path(path); - return 0; - } + if (!ex) + goto out; end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex); - ext4_free_ext_path(path); cur = 0; while (cur < end) { @@ -6110,16 +6121,16 @@ int ext4_ext_clear_bb(struct inode *inode) if (ret < 0) break; if (ret > 0) { - path = ext4_find_extent(inode, map.m_lblk, NULL, 0); - if (!IS_ERR_OR_NULL(path)) { + path = ext4_find_extent(inode, map.m_lblk, path, 0); + if (!IS_ERR(path)) { for (j = 0; j < path->p_depth; j++) { - ext4_mb_mark_bb(inode->i_sb, path[j].p_block, 1, false); ext4_fc_record_regions(inode->i_sb, inode->i_ino, 0, path[j].p_block, 1, 1); } - ext4_free_ext_path(path); + } else { + path = NULL; } ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, false); ext4_fc_record_regions(inode->i_sb, inode->i_ino, @@ -6128,5 +6139,7 @@ int ext4_ext_clear_bb(struct inode *inode) cur = cur + map.m_len; } +out: + ext4_free_ext_path(path); return 0; } |