diff options
Diffstat (limited to 'fs/ocfs2/move_extents.c')
| -rw-r--r-- | fs/ocfs2/move_extents.c | 311 |
1 files changed, 126 insertions, 185 deletions
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index f1fc172175b6..ce978a2497d9 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -1,18 +1,8 @@ -/* -*- mode: c; c-basic-offset: 8; -*- - * vim: noexpandtab sw=8 ts=8 sts=0: - * +// SPDX-License-Identifier: GPL-2.0-only +/* * move_extents.c * * Copyright (C) 2011 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ #include <linux/fs.h> #include <linux/types.h> @@ -25,6 +15,7 @@ #include "ocfs2_ioctl.h" #include "alloc.h" +#include "localalloc.h" #include "aops.h" #include "dlmglue.h" #include "extent_map.h" @@ -69,7 +60,7 @@ static int __ocfs2_move_extent(handle_t *handle, u64 ino = ocfs2_metadata_cache_owner(context->et.et_ci); u64 old_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cpos); - ret = ocfs2_duplicate_clusters_by_page(handle, context->file, cpos, + ret = ocfs2_duplicate_clusters_by_page(handle, inode, cpos, p_cpos, new_p_cpos, len); if (ret) { mlog_errno(ret); @@ -98,32 +89,28 @@ static int __ocfs2_move_extent(handle_t *handle, el = path_leaf_el(path); index = ocfs2_search_extent_list(el, cpos); - if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { - ocfs2_error(inode->i_sb, - "Inode %llu has an extent at cpos %u which can no " - "longer be found.\n", - (unsigned long long)ino, cpos); - ret = -EROFS; + if (index == -1) { + ret = ocfs2_error(inode->i_sb, + "Inode %llu has an extent at cpos %u which can no longer be found\n", + (unsigned long long)ino, cpos); goto out; } rec = &el->l_recs[index]; - BUG_ON(ext_flags != rec->e_flags); + if (ext_flags != rec->e_flags) { + ret = ocfs2_error(inode->i_sb, + "Inode %llu has corrupted extent %d with flags 0x%x at cpos %u\n", + (unsigned long long)ino, index, rec->e_flags, cpos); + goto out; + } + /* * after moving/defraging to new location, the extent is not going * to be refcounted anymore. */ replace_rec.e_flags = ext_flags & ~OCFS2_EXT_REFCOUNTED; - ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), - context->et.et_root_bh, - OCFS2_JOURNAL_ACCESS_WRITE); - if (ret) { - mlog_errno(ret); - goto out; - } - ret = ocfs2_split_extent(handle, &context->et, path, index, &replace_rec, context->meta_ac, &context->dealloc); @@ -132,8 +119,6 @@ static int __ocfs2_move_extent(handle_t *handle, goto out; } - ocfs2_journal_dirty(handle, context->et.et_root_bh); - context->new_phys_cpos = new_p_cpos; /* @@ -151,23 +136,21 @@ static int __ocfs2_move_extent(handle_t *handle, old_blkno, len); } + ocfs2_update_inode_fsync_trans(handle, inode, 0); out: + ocfs2_free_path(path); return ret; } /* - * lock allocators, and reserving appropriate number of bits for - * meta blocks and data clusters. - * - * in some cases, we don't need to reserve clusters, just let data_ac - * be NULL. + * lock allocator, and reserve appropriate number of bits for + * meta blocks. */ -static int ocfs2_lock_allocators_move_extents(struct inode *inode, +static int ocfs2_lock_meta_allocator_move_extents(struct inode *inode, struct ocfs2_extent_tree *et, u32 clusters_to_move, u32 extents_to_split, struct ocfs2_alloc_context **meta_ac, - struct ocfs2_alloc_context **data_ac, int extra_blocks, int *credits) { @@ -175,7 +158,7 @@ static int ocfs2_lock_allocators_move_extents(struct inode *inode, unsigned int max_recs_needed = 2 * extents_to_split + clusters_to_move; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - num_free_extents = ocfs2_num_free_extents(osb, et); + num_free_extents = ocfs2_num_free_extents(et); if (num_free_extents < 0) { ret = num_free_extents; mlog_errno(ret); @@ -192,16 +175,8 @@ static int ocfs2_lock_allocators_move_extents(struct inode *inode, goto out; } - if (data_ac) { - ret = ocfs2_reserve_clusters(osb, clusters_to_move, data_ac); - if (ret) { - mlog_errno(ret); - goto out; - } - } - *credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el, - clusters_to_move + 2); + *credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el); mlog(0, "reserve metadata_blocks: %d, data_clusters: %u, credits: %d\n", extra_blocks, clusters_to_move, *credits); @@ -234,12 +209,10 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, struct ocfs2_refcount_tree *ref_tree = NULL; u32 new_phys_cpos, new_len; u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); + int need_free = 0; if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) { - - BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & - OCFS2_HAS_REFCOUNT_FL)); - + BUG_ON(!ocfs2_is_refcount_inode(inode)); BUG_ON(!context->refcount_loc); ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1, @@ -261,10 +234,10 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, } } - ret = ocfs2_lock_allocators_move_extents(inode, &context->et, *len, 1, - &context->meta_ac, - &context->data_ac, - extra_blocks, &credits); + ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et, + *len, 1, + &context->meta_ac, + extra_blocks, &credits); if (ret) { mlog_errno(ret); goto out; @@ -277,7 +250,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, * context->data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv; */ - mutex_lock(&tl_inode->i_mutex); + inode_lock(tl_inode); if (ocfs2_truncate_log_needs_flush(osb)) { ret = __ocfs2_flush_truncate_log(osb); @@ -287,6 +260,21 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, } } + /* + * Make sure ocfs2_reserve_cluster is called after + * __ocfs2_flush_truncate_log, otherwise, dead lock may happen. + * + * If ocfs2_reserve_cluster is called + * before __ocfs2_flush_truncate_log, dead lock on global bitmap + * may happen. + * + */ + ret = ocfs2_reserve_clusters(osb, *len, &context->data_ac); + if (ret) { + mlog_errno(ret); + goto out_unlock_mutex; + } + handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); @@ -312,6 +300,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, if (!partial) { context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE; ret = -ENOSPC; + need_free = 1; goto out_commit; } } @@ -336,10 +325,24 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, mlog_errno(ret); out_commit: + if (need_free && context->data_ac) { + struct ocfs2_alloc_context *data_ac = context->data_ac; + + if (context->data_ac->ac_which == OCFS2_AC_USE_LOCAL) + ocfs2_free_local_alloc_bits(osb, handle, data_ac, + new_phys_cpos, new_len); + else + ocfs2_free_clusters(handle, + data_ac->ac_inode, + data_ac->ac_bh, + ocfs2_clusters_to_blocks(osb->sb, new_phys_cpos), + new_len); + } + ocfs2_commit_trans(osb, handle); out_unlock_mutex: - mutex_unlock(&tl_inode->i_mutex); + inode_unlock(tl_inode); if (context->data_ac) { ocfs2_free_alloc_context(context->data_ac); @@ -367,7 +370,7 @@ static int ocfs2_find_victim_alloc_group(struct inode *inode, int *vict_bit, struct buffer_head **ret_bh) { - int ret, i, bits_per_unit = 0; + int ret, i, len, bits_per_unit = 0; u64 blkno; char namebuf[40]; @@ -378,9 +381,9 @@ static int ocfs2_find_victim_alloc_group(struct inode *inode, struct ocfs2_dinode *ac_dinode; struct ocfs2_group_desc *bg; - ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type, slot); - ret = ocfs2_lookup_ino_from_name(osb->sys_root_inode, namebuf, - strlen(namebuf), &blkno); + len = ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type, slot); + ret = ocfs2_lookup_ino_from_name(osb->sys_root_inode, namebuf, len, &blkno); + if (ret) { ret = -ENOENT; goto out; @@ -403,7 +406,7 @@ static int ocfs2_find_victim_alloc_group(struct inode *inode, * 'vict_blkno' was out of the valid range. */ if ((vict_blkno < le64_to_cpu(rec->c_blkno)) || - (vict_blkno >= (le32_to_cpu(ac_dinode->id1.bitmap1.i_total) << + (vict_blkno >= ((u64)le32_to_cpu(ac_dinode->id1.bitmap1.i_total) << bits_per_unit))) { ret = -EINVAL; goto out; @@ -437,7 +440,7 @@ static int ocfs2_find_victim_alloc_group(struct inode *inode, bg = (struct ocfs2_group_desc *)gd_bh->b_data; if (vict_blkno < (le64_to_cpu(bg->bg_blkno) + - le16_to_cpu(bg->bg_bits))) { + (le16_to_cpu(bg->bg_bits) << bits_per_unit))) { *ret_bh = gd_bh; *vict_bit = (vict_blkno - blkno) >> @@ -495,7 +498,7 @@ static int ocfs2_validate_and_adjust_move_goal(struct inode *inode, bg = (struct ocfs2_group_desc *)gd_bh->b_data; /* - * moving goal is not allowd to start with a group desc blok(#0 blk) + * moving goal is not allowed to start with a group desc blok(#0 blk) * let's compromise to the latter cluster. */ if (range->me_goal == le64_to_cpu(bg->bg_blkno)) @@ -552,6 +555,7 @@ static void ocfs2_probe_alloc_group(struct inode *inode, struct buffer_head *bh, last_free_bits++; if (last_free_bits == move_len) { + i -= move_len; *goal_bit = i; *phys_cpos = base_cpos + i; break; @@ -561,83 +565,6 @@ static void ocfs2_probe_alloc_group(struct inode *inode, struct buffer_head *bh, mlog(0, "found phys_cpos: %u to fit the wanted moving.\n", *phys_cpos); } -static int ocfs2_alloc_dinode_update_counts(struct inode *inode, - handle_t *handle, - struct buffer_head *di_bh, - u32 num_bits, - u16 chain) -{ - int ret; - u32 tmp_used; - struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; - struct ocfs2_chain_list *cl = - (struct ocfs2_chain_list *) &di->id2.i_chain; - - ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, - OCFS2_JOURNAL_ACCESS_WRITE); - if (ret < 0) { - mlog_errno(ret); - goto out; - } - - tmp_used = le32_to_cpu(di->id1.bitmap1.i_used); - di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used); - le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits); - ocfs2_journal_dirty(handle, di_bh); - -out: - return ret; -} - -static inline int ocfs2_block_group_set_bits(handle_t *handle, - struct inode *alloc_inode, - struct ocfs2_group_desc *bg, - struct buffer_head *group_bh, - unsigned int bit_off, - unsigned int num_bits) -{ - int status; - void *bitmap = bg->bg_bitmap; - int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; - - /* All callers get the descriptor via - * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ - BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); - BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits); - - mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off, - num_bits); - - if (ocfs2_is_cluster_bitmap(alloc_inode)) - journal_type = OCFS2_JOURNAL_ACCESS_UNDO; - - status = ocfs2_journal_access_gd(handle, - INODE_CACHE(alloc_inode), - group_bh, - journal_type); - if (status < 0) { - mlog_errno(status); - goto bail; - } - - le16_add_cpu(&bg->bg_free_bits_count, -num_bits); - if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) { - ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit" - " count %u but claims %u are freed. num_bits %d", - (unsigned long long)le64_to_cpu(bg->bg_blkno), - le16_to_cpu(bg->bg_bits), - le16_to_cpu(bg->bg_free_bits_count), num_bits); - return -EROFS; - } - while (num_bits--) - ocfs2_set_bit(bit_off++, bitmap); - - ocfs2_journal_dirty(handle, group_bh); - -bail: - return status; -} - static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, u32 cpos, u32 phys_cpos, u32 *new_phys_cpos, u32 len, int ext_flags) @@ -659,10 +586,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); if ((ext_flags & OCFS2_EXT_REFCOUNTED) && len) { - - BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & - OCFS2_HAS_REFCOUNT_FL)); - + BUG_ON(!ocfs2_is_refcount_inode(inode)); BUG_ON(!context->refcount_loc); ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1, @@ -684,9 +608,10 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, } } - ret = ocfs2_lock_allocators_move_extents(inode, &context->et, len, 1, - &context->meta_ac, - NULL, extra_blocks, &credits); + ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et, + len, 1, + &context->meta_ac, + extra_blocks, &credits); if (ret) { mlog_errno(ret); goto out; @@ -698,6 +623,8 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, */ credits += OCFS2_INODE_UPDATE_CREDITS + 1; + inode_lock(tl_inode); + /* * ocfs2_move_extent() didn't reserve any clusters in lock_allocators() * logic, while we still need to lock the global_bitmap. @@ -707,24 +634,22 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, if (!gb_inode) { mlog(ML_ERROR, "unable to get global_bitmap inode\n"); ret = -EIO; - goto out; + goto out_unlock_tl_inode; } - mutex_lock(&gb_inode->i_mutex); + inode_lock(gb_inode); ret = ocfs2_inode_lock(gb_inode, &gb_bh, 1); if (ret) { mlog_errno(ret); - goto out_unlock_gb_mutex; + goto out_unlock_gb_inode; } - mutex_lock(&tl_inode->i_mutex); - handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); mlog_errno(ret); - goto out_unlock_tl_inode; + goto out_unlock; } new_phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *new_phys_cpos); @@ -739,7 +664,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, /* * probe the victim cluster group to find a proper - * region to fit wanted movement, it even will perfrom + * region to fit wanted movement, it even will perform * a best-effort attempt by compromising to a threshold * around the goal. */ @@ -766,9 +691,12 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, } ret = ocfs2_block_group_set_bits(handle, gb_inode, gd, gd_bh, - goal_bit, len); - if (ret) + goal_bit, len, 0, 0); + if (ret) { + ocfs2_rollback_alloc_dinode_counts(gb_inode, gb_bh, len, + le16_to_cpu(gd->bg_chain)); mlog_errno(ret); + } /* * Here we should write the new page out first if we are @@ -781,15 +709,14 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, out_commit: ocfs2_commit_trans(osb, handle); brelse(gd_bh); - -out_unlock_tl_inode: - mutex_unlock(&tl_inode->i_mutex); - +out_unlock: ocfs2_inode_unlock(gb_inode, 1); -out_unlock_gb_mutex: - mutex_unlock(&gb_inode->i_mutex); +out_unlock_gb_inode: + inode_unlock(gb_inode); brelse(gb_bh); iput(gb_inode); +out_unlock_tl_inode: + inode_unlock(tl_inode); out: if (context->meta_ac) { @@ -845,7 +772,7 @@ static int __ocfs2_move_extents_range(struct buffer_head *di_bh, struct ocfs2_move_extents *range = context->range; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - if ((inode->i_size == 0) || (range->me_len == 0)) + if ((i_size_read(inode) == 0) || (range->me_len == 0)) return 0; if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) @@ -946,6 +873,11 @@ static int __ocfs2_move_extents_range(struct buffer_head *di_bh, mlog_errno(ret); goto out; } + /* + * Invalidate extent cache after moving/defragging to prevent + * stale cached data with outdated extent flags. + */ + ocfs2_extent_map_trunc(inode, cpos); context->clusters_moved += alloc_size; next: @@ -977,13 +909,10 @@ static int ocfs2_move_extents(struct ocfs2_move_extents_context *context) struct buffer_head *di_bh = NULL; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - if (!inode) - return -ENOENT; - if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) return -EROFS; - mutex_lock(&inode->i_mutex); + inode_lock(inode); /* * This prevents concurrent writes from other nodes @@ -1001,7 +930,7 @@ static int ocfs2_move_extents(struct ocfs2_move_extents_context *context) } /* - * rememer ip_xattr_sem also needs to be held if necessary + * remember ip_xattr_sem also needs to be held if necessary */ down_write(&OCFS2_I(inode)->ip_alloc_sem); @@ -1031,9 +960,10 @@ static int ocfs2_move_extents(struct ocfs2_move_extents_context *context) } di = (struct ocfs2_dinode *)di_bh->b_data; - inode->i_ctime = CURRENT_TIME; - di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); - di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); + inode_set_ctime_current(inode); + di->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode)); + di->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode)); + ocfs2_update_inode_fsync_trans(handle, inode, 0); ocfs2_journal_dirty(handle, di_bh); @@ -1046,7 +976,7 @@ out_inode_unlock: out_rw_unlock: ocfs2_rw_unlock(inode, 1); out: - mutex_unlock(&inode->i_mutex); + inode_unlock(inode); return status; } @@ -1066,8 +996,10 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp) if (status) return status; - if ((!S_ISREG(inode->i_mode)) || !(filp->f_mode & FMODE_WRITE)) + if ((!S_ISREG(inode->i_mode)) || !(filp->f_mode & FMODE_WRITE)) { + status = -EPERM; goto out_drop; + } if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) { status = -EPERM; @@ -1089,26 +1021,35 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp) goto out_free; } - if (range.me_start > i_size_read(inode)) + if (range.me_start > i_size_read(inode)) { + status = -EINVAL; goto out_free; + } if (range.me_start + range.me_len > i_size_read(inode)) range.me_len = i_size_read(inode) - range.me_start; context->range = ⦥ + /* + * ok, the default threshold for the defragmentation + * is 1M, since our maximum clustersize was 1M also. + * any thought? + */ + if (!range.me_threshold) + range.me_threshold = 1024 * 1024; + + if (range.me_threshold > i_size_read(inode)) + range.me_threshold = i_size_read(inode); + + if (range.me_flags & ~(OCFS2_MOVE_EXT_FL_AUTO_DEFRAG | + OCFS2_MOVE_EXT_FL_PART_DEFRAG)) { + status = -EINVAL; + goto out_free; + } + if (range.me_flags & OCFS2_MOVE_EXT_FL_AUTO_DEFRAG) { context->auto_defrag = 1; - /* - * ok, the default theshold for the defragmentation - * is 1M, since our maximum clustersize was 1M also. - * any thought? - */ - if (!range.me_threshold) - range.me_threshold = 1024 * 1024; - - if (range.me_threshold > i_size_read(inode)) - range.me_threshold = i_size_read(inode); if (range.me_flags & OCFS2_MOVE_EXT_FL_PART_DEFRAG) context->partial = 1; |
