summaryrefslogtreecommitdiff
path: root/fs/ocfs2/move_extents.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2/move_extents.c')
-rw-r--r--fs/ocfs2/move_extents.c311
1 files changed, 126 insertions, 185 deletions
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index f1fc172175b6..ce978a2497d9 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -1,18 +1,8 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+// SPDX-License-Identifier: GPL-2.0-only
+/*
* move_extents.c
*
* Copyright (C) 2011 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
*/
#include <linux/fs.h>
#include <linux/types.h>
@@ -25,6 +15,7 @@
#include "ocfs2_ioctl.h"
#include "alloc.h"
+#include "localalloc.h"
#include "aops.h"
#include "dlmglue.h"
#include "extent_map.h"
@@ -69,7 +60,7 @@ static int __ocfs2_move_extent(handle_t *handle,
u64 ino = ocfs2_metadata_cache_owner(context->et.et_ci);
u64 old_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cpos);
- ret = ocfs2_duplicate_clusters_by_page(handle, context->file, cpos,
+ ret = ocfs2_duplicate_clusters_by_page(handle, inode, cpos,
p_cpos, new_p_cpos, len);
if (ret) {
mlog_errno(ret);
@@ -98,32 +89,28 @@ static int __ocfs2_move_extent(handle_t *handle,
el = path_leaf_el(path);
index = ocfs2_search_extent_list(el, cpos);
- if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
- ocfs2_error(inode->i_sb,
- "Inode %llu has an extent at cpos %u which can no "
- "longer be found.\n",
- (unsigned long long)ino, cpos);
- ret = -EROFS;
+ if (index == -1) {
+ ret = ocfs2_error(inode->i_sb,
+ "Inode %llu has an extent at cpos %u which can no longer be found\n",
+ (unsigned long long)ino, cpos);
goto out;
}
rec = &el->l_recs[index];
- BUG_ON(ext_flags != rec->e_flags);
+ if (ext_flags != rec->e_flags) {
+ ret = ocfs2_error(inode->i_sb,
+ "Inode %llu has corrupted extent %d with flags 0x%x at cpos %u\n",
+ (unsigned long long)ino, index, rec->e_flags, cpos);
+ goto out;
+ }
+
/*
* after moving/defraging to new location, the extent is not going
* to be refcounted anymore.
*/
replace_rec.e_flags = ext_flags & ~OCFS2_EXT_REFCOUNTED;
- ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
- context->et.et_root_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (ret) {
- mlog_errno(ret);
- goto out;
- }
-
ret = ocfs2_split_extent(handle, &context->et, path, index,
&replace_rec, context->meta_ac,
&context->dealloc);
@@ -132,8 +119,6 @@ static int __ocfs2_move_extent(handle_t *handle,
goto out;
}
- ocfs2_journal_dirty(handle, context->et.et_root_bh);
-
context->new_phys_cpos = new_p_cpos;
/*
@@ -151,23 +136,21 @@ static int __ocfs2_move_extent(handle_t *handle,
old_blkno, len);
}
+ ocfs2_update_inode_fsync_trans(handle, inode, 0);
out:
+ ocfs2_free_path(path);
return ret;
}
/*
- * lock allocators, and reserving appropriate number of bits for
- * meta blocks and data clusters.
- *
- * in some cases, we don't need to reserve clusters, just let data_ac
- * be NULL.
+ * lock allocator, and reserve appropriate number of bits for
+ * meta blocks.
*/
-static int ocfs2_lock_allocators_move_extents(struct inode *inode,
+static int ocfs2_lock_meta_allocator_move_extents(struct inode *inode,
struct ocfs2_extent_tree *et,
u32 clusters_to_move,
u32 extents_to_split,
struct ocfs2_alloc_context **meta_ac,
- struct ocfs2_alloc_context **data_ac,
int extra_blocks,
int *credits)
{
@@ -175,7 +158,7 @@ static int ocfs2_lock_allocators_move_extents(struct inode *inode,
unsigned int max_recs_needed = 2 * extents_to_split + clusters_to_move;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- num_free_extents = ocfs2_num_free_extents(osb, et);
+ num_free_extents = ocfs2_num_free_extents(et);
if (num_free_extents < 0) {
ret = num_free_extents;
mlog_errno(ret);
@@ -192,16 +175,8 @@ static int ocfs2_lock_allocators_move_extents(struct inode *inode,
goto out;
}
- if (data_ac) {
- ret = ocfs2_reserve_clusters(osb, clusters_to_move, data_ac);
- if (ret) {
- mlog_errno(ret);
- goto out;
- }
- }
- *credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el,
- clusters_to_move + 2);
+ *credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el);
mlog(0, "reserve metadata_blocks: %d, data_clusters: %u, credits: %d\n",
extra_blocks, clusters_to_move, *credits);
@@ -234,12 +209,10 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
struct ocfs2_refcount_tree *ref_tree = NULL;
u32 new_phys_cpos, new_len;
u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
+ int need_free = 0;
if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) {
-
- BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
- OCFS2_HAS_REFCOUNT_FL));
-
+ BUG_ON(!ocfs2_is_refcount_inode(inode));
BUG_ON(!context->refcount_loc);
ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1,
@@ -261,10 +234,10 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
}
}
- ret = ocfs2_lock_allocators_move_extents(inode, &context->et, *len, 1,
- &context->meta_ac,
- &context->data_ac,
- extra_blocks, &credits);
+ ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et,
+ *len, 1,
+ &context->meta_ac,
+ extra_blocks, &credits);
if (ret) {
mlog_errno(ret);
goto out;
@@ -277,7 +250,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
* context->data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv;
*/
- mutex_lock(&tl_inode->i_mutex);
+ inode_lock(tl_inode);
if (ocfs2_truncate_log_needs_flush(osb)) {
ret = __ocfs2_flush_truncate_log(osb);
@@ -287,6 +260,21 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
}
}
+ /*
+ * Make sure ocfs2_reserve_cluster is called after
+ * __ocfs2_flush_truncate_log, otherwise, dead lock may happen.
+ *
+ * If ocfs2_reserve_cluster is called
+ * before __ocfs2_flush_truncate_log, dead lock on global bitmap
+ * may happen.
+ *
+ */
+ ret = ocfs2_reserve_clusters(osb, *len, &context->data_ac);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_unlock_mutex;
+ }
+
handle = ocfs2_start_trans(osb, credits);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
@@ -312,6 +300,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
if (!partial) {
context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE;
ret = -ENOSPC;
+ need_free = 1;
goto out_commit;
}
}
@@ -336,10 +325,24 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
mlog_errno(ret);
out_commit:
+ if (need_free && context->data_ac) {
+ struct ocfs2_alloc_context *data_ac = context->data_ac;
+
+ if (context->data_ac->ac_which == OCFS2_AC_USE_LOCAL)
+ ocfs2_free_local_alloc_bits(osb, handle, data_ac,
+ new_phys_cpos, new_len);
+ else
+ ocfs2_free_clusters(handle,
+ data_ac->ac_inode,
+ data_ac->ac_bh,
+ ocfs2_clusters_to_blocks(osb->sb, new_phys_cpos),
+ new_len);
+ }
+
ocfs2_commit_trans(osb, handle);
out_unlock_mutex:
- mutex_unlock(&tl_inode->i_mutex);
+ inode_unlock(tl_inode);
if (context->data_ac) {
ocfs2_free_alloc_context(context->data_ac);
@@ -367,7 +370,7 @@ static int ocfs2_find_victim_alloc_group(struct inode *inode,
int *vict_bit,
struct buffer_head **ret_bh)
{
- int ret, i, bits_per_unit = 0;
+ int ret, i, len, bits_per_unit = 0;
u64 blkno;
char namebuf[40];
@@ -378,9 +381,9 @@ static int ocfs2_find_victim_alloc_group(struct inode *inode,
struct ocfs2_dinode *ac_dinode;
struct ocfs2_group_desc *bg;
- ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type, slot);
- ret = ocfs2_lookup_ino_from_name(osb->sys_root_inode, namebuf,
- strlen(namebuf), &blkno);
+ len = ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type, slot);
+ ret = ocfs2_lookup_ino_from_name(osb->sys_root_inode, namebuf, len, &blkno);
+
if (ret) {
ret = -ENOENT;
goto out;
@@ -403,7 +406,7 @@ static int ocfs2_find_victim_alloc_group(struct inode *inode,
* 'vict_blkno' was out of the valid range.
*/
if ((vict_blkno < le64_to_cpu(rec->c_blkno)) ||
- (vict_blkno >= (le32_to_cpu(ac_dinode->id1.bitmap1.i_total) <<
+ (vict_blkno >= ((u64)le32_to_cpu(ac_dinode->id1.bitmap1.i_total) <<
bits_per_unit))) {
ret = -EINVAL;
goto out;
@@ -437,7 +440,7 @@ static int ocfs2_find_victim_alloc_group(struct inode *inode,
bg = (struct ocfs2_group_desc *)gd_bh->b_data;
if (vict_blkno < (le64_to_cpu(bg->bg_blkno) +
- le16_to_cpu(bg->bg_bits))) {
+ (le16_to_cpu(bg->bg_bits) << bits_per_unit))) {
*ret_bh = gd_bh;
*vict_bit = (vict_blkno - blkno) >>
@@ -495,7 +498,7 @@ static int ocfs2_validate_and_adjust_move_goal(struct inode *inode,
bg = (struct ocfs2_group_desc *)gd_bh->b_data;
/*
- * moving goal is not allowd to start with a group desc blok(#0 blk)
+ * moving goal is not allowed to start with a group desc blok(#0 blk)
* let's compromise to the latter cluster.
*/
if (range->me_goal == le64_to_cpu(bg->bg_blkno))
@@ -552,6 +555,7 @@ static void ocfs2_probe_alloc_group(struct inode *inode, struct buffer_head *bh,
last_free_bits++;
if (last_free_bits == move_len) {
+ i -= move_len;
*goal_bit = i;
*phys_cpos = base_cpos + i;
break;
@@ -561,83 +565,6 @@ static void ocfs2_probe_alloc_group(struct inode *inode, struct buffer_head *bh,
mlog(0, "found phys_cpos: %u to fit the wanted moving.\n", *phys_cpos);
}
-static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
- handle_t *handle,
- struct buffer_head *di_bh,
- u32 num_bits,
- u16 chain)
-{
- int ret;
- u32 tmp_used;
- struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
- struct ocfs2_chain_list *cl =
- (struct ocfs2_chain_list *) &di->id2.i_chain;
-
- ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (ret < 0) {
- mlog_errno(ret);
- goto out;
- }
-
- tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
- di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used);
- le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits);
- ocfs2_journal_dirty(handle, di_bh);
-
-out:
- return ret;
-}
-
-static inline int ocfs2_block_group_set_bits(handle_t *handle,
- struct inode *alloc_inode,
- struct ocfs2_group_desc *bg,
- struct buffer_head *group_bh,
- unsigned int bit_off,
- unsigned int num_bits)
-{
- int status;
- void *bitmap = bg->bg_bitmap;
- int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
-
- /* All callers get the descriptor via
- * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
- BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
- BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits);
-
- mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off,
- num_bits);
-
- if (ocfs2_is_cluster_bitmap(alloc_inode))
- journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
-
- status = ocfs2_journal_access_gd(handle,
- INODE_CACHE(alloc_inode),
- group_bh,
- journal_type);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
-
- le16_add_cpu(&bg->bg_free_bits_count, -num_bits);
- if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) {
- ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit"
- " count %u but claims %u are freed. num_bits %d",
- (unsigned long long)le64_to_cpu(bg->bg_blkno),
- le16_to_cpu(bg->bg_bits),
- le16_to_cpu(bg->bg_free_bits_count), num_bits);
- return -EROFS;
- }
- while (num_bits--)
- ocfs2_set_bit(bit_off++, bitmap);
-
- ocfs2_journal_dirty(handle, group_bh);
-
-bail:
- return status;
-}
-
static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
u32 cpos, u32 phys_cpos, u32 *new_phys_cpos,
u32 len, int ext_flags)
@@ -659,10 +586,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
if ((ext_flags & OCFS2_EXT_REFCOUNTED) && len) {
-
- BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
- OCFS2_HAS_REFCOUNT_FL));
-
+ BUG_ON(!ocfs2_is_refcount_inode(inode));
BUG_ON(!context->refcount_loc);
ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1,
@@ -684,9 +608,10 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
}
}
- ret = ocfs2_lock_allocators_move_extents(inode, &context->et, len, 1,
- &context->meta_ac,
- NULL, extra_blocks, &credits);
+ ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et,
+ len, 1,
+ &context->meta_ac,
+ extra_blocks, &credits);
if (ret) {
mlog_errno(ret);
goto out;
@@ -698,6 +623,8 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
*/
credits += OCFS2_INODE_UPDATE_CREDITS + 1;
+ inode_lock(tl_inode);
+
/*
* ocfs2_move_extent() didn't reserve any clusters in lock_allocators()
* logic, while we still need to lock the global_bitmap.
@@ -707,24 +634,22 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
if (!gb_inode) {
mlog(ML_ERROR, "unable to get global_bitmap inode\n");
ret = -EIO;
- goto out;
+ goto out_unlock_tl_inode;
}
- mutex_lock(&gb_inode->i_mutex);
+ inode_lock(gb_inode);
ret = ocfs2_inode_lock(gb_inode, &gb_bh, 1);
if (ret) {
mlog_errno(ret);
- goto out_unlock_gb_mutex;
+ goto out_unlock_gb_inode;
}
- mutex_lock(&tl_inode->i_mutex);
-
handle = ocfs2_start_trans(osb, credits);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
mlog_errno(ret);
- goto out_unlock_tl_inode;
+ goto out_unlock;
}
new_phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *new_phys_cpos);
@@ -739,7 +664,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
/*
* probe the victim cluster group to find a proper
- * region to fit wanted movement, it even will perfrom
+ * region to fit wanted movement, it even will perform
* a best-effort attempt by compromising to a threshold
* around the goal.
*/
@@ -766,9 +691,12 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
}
ret = ocfs2_block_group_set_bits(handle, gb_inode, gd, gd_bh,
- goal_bit, len);
- if (ret)
+ goal_bit, len, 0, 0);
+ if (ret) {
+ ocfs2_rollback_alloc_dinode_counts(gb_inode, gb_bh, len,
+ le16_to_cpu(gd->bg_chain));
mlog_errno(ret);
+ }
/*
* Here we should write the new page out first if we are
@@ -781,15 +709,14 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
out_commit:
ocfs2_commit_trans(osb, handle);
brelse(gd_bh);
-
-out_unlock_tl_inode:
- mutex_unlock(&tl_inode->i_mutex);
-
+out_unlock:
ocfs2_inode_unlock(gb_inode, 1);
-out_unlock_gb_mutex:
- mutex_unlock(&gb_inode->i_mutex);
+out_unlock_gb_inode:
+ inode_unlock(gb_inode);
brelse(gb_bh);
iput(gb_inode);
+out_unlock_tl_inode:
+ inode_unlock(tl_inode);
out:
if (context->meta_ac) {
@@ -845,7 +772,7 @@ static int __ocfs2_move_extents_range(struct buffer_head *di_bh,
struct ocfs2_move_extents *range = context->range;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- if ((inode->i_size == 0) || (range->me_len == 0))
+ if ((i_size_read(inode) == 0) || (range->me_len == 0))
return 0;
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
@@ -946,6 +873,11 @@ static int __ocfs2_move_extents_range(struct buffer_head *di_bh,
mlog_errno(ret);
goto out;
}
+ /*
+ * Invalidate extent cache after moving/defragging to prevent
+ * stale cached data with outdated extent flags.
+ */
+ ocfs2_extent_map_trunc(inode, cpos);
context->clusters_moved += alloc_size;
next:
@@ -977,13 +909,10 @@ static int ocfs2_move_extents(struct ocfs2_move_extents_context *context)
struct buffer_head *di_bh = NULL;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- if (!inode)
- return -ENOENT;
-
if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
return -EROFS;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/*
* This prevents concurrent writes from other nodes
@@ -1001,7 +930,7 @@ static int ocfs2_move_extents(struct ocfs2_move_extents_context *context)
}
/*
- * rememer ip_xattr_sem also needs to be held if necessary
+ * remember ip_xattr_sem also needs to be held if necessary
*/
down_write(&OCFS2_I(inode)->ip_alloc_sem);
@@ -1031,9 +960,10 @@ static int ocfs2_move_extents(struct ocfs2_move_extents_context *context)
}
di = (struct ocfs2_dinode *)di_bh->b_data;
- inode->i_ctime = CURRENT_TIME;
- di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
- di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+ inode_set_ctime_current(inode);
+ di->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode));
+ di->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode));
+ ocfs2_update_inode_fsync_trans(handle, inode, 0);
ocfs2_journal_dirty(handle, di_bh);
@@ -1046,7 +976,7 @@ out_inode_unlock:
out_rw_unlock:
ocfs2_rw_unlock(inode, 1);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return status;
}
@@ -1066,8 +996,10 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp)
if (status)
return status;
- if ((!S_ISREG(inode->i_mode)) || !(filp->f_mode & FMODE_WRITE))
+ if ((!S_ISREG(inode->i_mode)) || !(filp->f_mode & FMODE_WRITE)) {
+ status = -EPERM;
goto out_drop;
+ }
if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) {
status = -EPERM;
@@ -1089,26 +1021,35 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp)
goto out_free;
}
- if (range.me_start > i_size_read(inode))
+ if (range.me_start > i_size_read(inode)) {
+ status = -EINVAL;
goto out_free;
+ }
if (range.me_start + range.me_len > i_size_read(inode))
range.me_len = i_size_read(inode) - range.me_start;
context->range = &range;
+ /*
+ * ok, the default threshold for the defragmentation
+ * is 1M, since our maximum clustersize was 1M also.
+ * any thought?
+ */
+ if (!range.me_threshold)
+ range.me_threshold = 1024 * 1024;
+
+ if (range.me_threshold > i_size_read(inode))
+ range.me_threshold = i_size_read(inode);
+
+ if (range.me_flags & ~(OCFS2_MOVE_EXT_FL_AUTO_DEFRAG |
+ OCFS2_MOVE_EXT_FL_PART_DEFRAG)) {
+ status = -EINVAL;
+ goto out_free;
+ }
+
if (range.me_flags & OCFS2_MOVE_EXT_FL_AUTO_DEFRAG) {
context->auto_defrag = 1;
- /*
- * ok, the default theshold for the defragmentation
- * is 1M, since our maximum clustersize was 1M also.
- * any thought?
- */
- if (!range.me_threshold)
- range.me_threshold = 1024 * 1024;
-
- if (range.me_threshold > i_size_read(inode))
- range.me_threshold = i_size_read(inode);
if (range.me_flags & OCFS2_MOVE_EXT_FL_PART_DEFRAG)
context->partial = 1;