diff options
Diffstat (limited to 'fs/ocfs2/move_extents.c')
| -rw-r--r-- | fs/ocfs2/move_extents.c | 174 |
1 files changed, 97 insertions, 77 deletions
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index e52a2852d50d..ce978a2497d9 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -1,18 +1,8 @@ -/* -*- mode: c; c-basic-offset: 8; -*- - * vim: noexpandtab sw=8 ts=8 sts=0: - * +// SPDX-License-Identifier: GPL-2.0-only +/* * move_extents.c * * Copyright (C) 2011 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ #include <linux/fs.h> #include <linux/types.h> @@ -25,6 +15,7 @@ #include "ocfs2_ioctl.h" #include "alloc.h" +#include "localalloc.h" #include "aops.h" #include "dlmglue.h" #include "extent_map.h" @@ -107,21 +98,19 @@ static int __ocfs2_move_extent(handle_t *handle, rec = &el->l_recs[index]; - BUG_ON(ext_flags != rec->e_flags); + if (ext_flags != rec->e_flags) { + ret = ocfs2_error(inode->i_sb, + "Inode %llu has corrupted extent %d with flags 0x%x at cpos %u\n", + (unsigned long long)ino, index, rec->e_flags, cpos); + goto out; + } + /* * after moving/defraging to new location, the extent is not going * to be refcounted anymore. */ replace_rec.e_flags = ext_flags & ~OCFS2_EXT_REFCOUNTED; - ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), - context->et.et_root_bh, - OCFS2_JOURNAL_ACCESS_WRITE); - if (ret) { - mlog_errno(ret); - goto out; - } - ret = ocfs2_split_extent(handle, &context->et, path, index, &replace_rec, context->meta_ac, &context->dealloc); @@ -130,8 +119,6 @@ static int __ocfs2_move_extent(handle_t *handle, goto out; } - ocfs2_journal_dirty(handle, context->et.et_root_bh); - context->new_phys_cpos = new_p_cpos; /* @@ -156,18 +143,14 @@ out: } /* - * lock allocators, and reserving appropriate number of bits for - * meta blocks and data clusters. - * - * in some cases, we don't need to reserve clusters, just let data_ac - * be NULL. + * lock allocator, and reserve appropriate number of bits for + * meta blocks. */ -static int ocfs2_lock_allocators_move_extents(struct inode *inode, +static int ocfs2_lock_meta_allocator_move_extents(struct inode *inode, struct ocfs2_extent_tree *et, u32 clusters_to_move, u32 extents_to_split, struct ocfs2_alloc_context **meta_ac, - struct ocfs2_alloc_context **data_ac, int extra_blocks, int *credits) { @@ -175,7 +158,7 @@ static int ocfs2_lock_allocators_move_extents(struct inode *inode, unsigned int max_recs_needed = 2 * extents_to_split + clusters_to_move; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - num_free_extents = ocfs2_num_free_extents(osb, et); + num_free_extents = ocfs2_num_free_extents(et); if (num_free_extents < 0) { ret = num_free_extents; mlog_errno(ret); @@ -192,13 +175,6 @@ static int ocfs2_lock_allocators_move_extents(struct inode *inode, goto out; } - if (data_ac) { - ret = ocfs2_reserve_clusters(osb, clusters_to_move, data_ac); - if (ret) { - mlog_errno(ret); - goto out; - } - } *credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el); @@ -233,6 +209,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, struct ocfs2_refcount_tree *ref_tree = NULL; u32 new_phys_cpos, new_len; u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); + int need_free = 0; if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) { BUG_ON(!ocfs2_is_refcount_inode(inode)); @@ -257,10 +234,10 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, } } - ret = ocfs2_lock_allocators_move_extents(inode, &context->et, *len, 1, - &context->meta_ac, - &context->data_ac, - extra_blocks, &credits); + ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et, + *len, 1, + &context->meta_ac, + extra_blocks, &credits); if (ret) { mlog_errno(ret); goto out; @@ -283,6 +260,21 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, } } + /* + * Make sure ocfs2_reserve_cluster is called after + * __ocfs2_flush_truncate_log, otherwise, dead lock may happen. + * + * If ocfs2_reserve_cluster is called + * before __ocfs2_flush_truncate_log, dead lock on global bitmap + * may happen. + * + */ + ret = ocfs2_reserve_clusters(osb, *len, &context->data_ac); + if (ret) { + mlog_errno(ret); + goto out_unlock_mutex; + } + handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); @@ -308,6 +300,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, if (!partial) { context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE; ret = -ENOSPC; + need_free = 1; goto out_commit; } } @@ -332,6 +325,20 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, mlog_errno(ret); out_commit: + if (need_free && context->data_ac) { + struct ocfs2_alloc_context *data_ac = context->data_ac; + + if (context->data_ac->ac_which == OCFS2_AC_USE_LOCAL) + ocfs2_free_local_alloc_bits(osb, handle, data_ac, + new_phys_cpos, new_len); + else + ocfs2_free_clusters(handle, + data_ac->ac_inode, + data_ac->ac_bh, + ocfs2_clusters_to_blocks(osb->sb, new_phys_cpos), + new_len); + } + ocfs2_commit_trans(osb, handle); out_unlock_mutex: @@ -363,7 +370,7 @@ static int ocfs2_find_victim_alloc_group(struct inode *inode, int *vict_bit, struct buffer_head **ret_bh) { - int ret, i, bits_per_unit = 0; + int ret, i, len, bits_per_unit = 0; u64 blkno; char namebuf[40]; @@ -374,9 +381,9 @@ static int ocfs2_find_victim_alloc_group(struct inode *inode, struct ocfs2_dinode *ac_dinode; struct ocfs2_group_desc *bg; - ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type, slot); - ret = ocfs2_lookup_ino_from_name(osb->sys_root_inode, namebuf, - strlen(namebuf), &blkno); + len = ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type, slot); + ret = ocfs2_lookup_ino_from_name(osb->sys_root_inode, namebuf, len, &blkno); + if (ret) { ret = -ENOENT; goto out; @@ -433,7 +440,7 @@ static int ocfs2_find_victim_alloc_group(struct inode *inode, bg = (struct ocfs2_group_desc *)gd_bh->b_data; if (vict_blkno < (le64_to_cpu(bg->bg_blkno) + - le16_to_cpu(bg->bg_bits))) { + (le16_to_cpu(bg->bg_bits) << bits_per_unit))) { *ret_bh = gd_bh; *vict_bit = (vict_blkno - blkno) >> @@ -491,7 +498,7 @@ static int ocfs2_validate_and_adjust_move_goal(struct inode *inode, bg = (struct ocfs2_group_desc *)gd_bh->b_data; /* - * moving goal is not allowd to start with a group desc blok(#0 blk) + * moving goal is not allowed to start with a group desc blok(#0 blk) * let's compromise to the latter cluster. */ if (range->me_goal == le64_to_cpu(bg->bg_blkno)) @@ -548,6 +555,7 @@ static void ocfs2_probe_alloc_group(struct inode *inode, struct buffer_head *bh, last_free_bits++; if (last_free_bits == move_len) { + i -= move_len; *goal_bit = i; *phys_cpos = base_cpos + i; break; @@ -600,9 +608,10 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, } } - ret = ocfs2_lock_allocators_move_extents(inode, &context->et, len, 1, - &context->meta_ac, - NULL, extra_blocks, &credits); + ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et, + len, 1, + &context->meta_ac, + extra_blocks, &credits); if (ret) { mlog_errno(ret); goto out; @@ -614,6 +623,8 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, */ credits += OCFS2_INODE_UPDATE_CREDITS + 1; + inode_lock(tl_inode); + /* * ocfs2_move_extent() didn't reserve any clusters in lock_allocators() * logic, while we still need to lock the global_bitmap. @@ -623,7 +634,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, if (!gb_inode) { mlog(ML_ERROR, "unable to get global_bitmap inode\n"); ret = -EIO; - goto out; + goto out_unlock_tl_inode; } inode_lock(gb_inode); @@ -631,16 +642,14 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, ret = ocfs2_inode_lock(gb_inode, &gb_bh, 1); if (ret) { mlog_errno(ret); - goto out_unlock_gb_mutex; + goto out_unlock_gb_inode; } - inode_lock(tl_inode); - handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); mlog_errno(ret); - goto out_unlock_tl_inode; + goto out_unlock; } new_phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *new_phys_cpos); @@ -655,7 +664,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, /* * probe the victim cluster group to find a proper - * region to fit wanted movement, it even will perfrom + * region to fit wanted movement, it even will perform * a best-effort attempt by compromising to a threshold * around the goal. */ @@ -682,7 +691,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, } ret = ocfs2_block_group_set_bits(handle, gb_inode, gd, gd_bh, - goal_bit, len); + goal_bit, len, 0, 0); if (ret) { ocfs2_rollback_alloc_dinode_counts(gb_inode, gb_bh, len, le16_to_cpu(gd->bg_chain)); @@ -700,15 +709,14 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, out_commit: ocfs2_commit_trans(osb, handle); brelse(gd_bh); - -out_unlock_tl_inode: - inode_unlock(tl_inode); - +out_unlock: ocfs2_inode_unlock(gb_inode, 1); -out_unlock_gb_mutex: +out_unlock_gb_inode: inode_unlock(gb_inode); brelse(gb_bh); iput(gb_inode); +out_unlock_tl_inode: + inode_unlock(tl_inode); out: if (context->meta_ac) { @@ -865,6 +873,11 @@ static int __ocfs2_move_extents_range(struct buffer_head *di_bh, mlog_errno(ret); goto out; } + /* + * Invalidate extent cache after moving/defragging to prevent + * stale cached data with outdated extent flags. + */ + ocfs2_extent_map_trunc(inode, cpos); context->clusters_moved += alloc_size; next: @@ -917,7 +930,7 @@ static int ocfs2_move_extents(struct ocfs2_move_extents_context *context) } /* - * rememer ip_xattr_sem also needs to be held if necessary + * remember ip_xattr_sem also needs to be held if necessary */ down_write(&OCFS2_I(inode)->ip_alloc_sem); @@ -947,9 +960,9 @@ static int ocfs2_move_extents(struct ocfs2_move_extents_context *context) } di = (struct ocfs2_dinode *)di_bh->b_data; - inode->i_ctime = current_time(inode); - di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); - di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); + inode_set_ctime_current(inode); + di->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode)); + di->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode)); ocfs2_update_inode_fsync_trans(handle, inode, 0); ocfs2_journal_dirty(handle, di_bh); @@ -1018,18 +1031,25 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp) context->range = ⦥ + /* + * ok, the default threshold for the defragmentation + * is 1M, since our maximum clustersize was 1M also. + * any thought? + */ + if (!range.me_threshold) + range.me_threshold = 1024 * 1024; + + if (range.me_threshold > i_size_read(inode)) + range.me_threshold = i_size_read(inode); + + if (range.me_flags & ~(OCFS2_MOVE_EXT_FL_AUTO_DEFRAG | + OCFS2_MOVE_EXT_FL_PART_DEFRAG)) { + status = -EINVAL; + goto out_free; + } + if (range.me_flags & OCFS2_MOVE_EXT_FL_AUTO_DEFRAG) { context->auto_defrag = 1; - /* - * ok, the default theshold for the defragmentation - * is 1M, since our maximum clustersize was 1M also. - * any thought? - */ - if (!range.me_threshold) - range.me_threshold = 1024 * 1024; - - if (range.me_threshold > i_size_read(inode)) - range.me_threshold = i_size_read(inode); if (range.me_flags & OCFS2_MOVE_EXT_FL_PART_DEFRAG) context->partial = 1; |
