summaryrefslogtreecommitdiff
path: root/fs/gfs2/rgrp.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/gfs2/rgrp.c')
-rw-r--r--fs/gfs2/rgrp.c1383
1 files changed, 861 insertions, 522 deletions
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 69317435faa7..b14e54b38ee8 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1,12 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
@@ -32,19 +31,28 @@
#include "log.h"
#include "inode.h"
#include "trace_gfs2.h"
+#include "dir.h"
#define BFITNOENT ((u32)~0)
#define NO_BLOCK ((u64)~0)
-#if BITS_PER_LONG == 32
-#define LBITMASK (0x55555555UL)
-#define LBITSKIP55 (0x55555555UL)
-#define LBITSKIP00 (0x00000000UL)
-#else
-#define LBITMASK (0x5555555555555555UL)
-#define LBITSKIP55 (0x5555555555555555UL)
-#define LBITSKIP00 (0x0000000000000000UL)
-#endif
+struct gfs2_rbm {
+ struct gfs2_rgrpd *rgd;
+ u32 offset; /* The offset is bitmap relative */
+ int bii; /* Bitmap index */
+};
+
+static inline struct gfs2_bitmap *rbm_bi(const struct gfs2_rbm *rbm)
+{
+ return rbm->rgd->rd_bits + rbm->bii;
+}
+
+static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm)
+{
+ BUG_ON(rbm->offset >= rbm->rgd->rd_data);
+ return rbm->rgd->rd_data0 + (rbm_bi(rbm)->bi_start * GFS2_NBBY) +
+ rbm->offset;
+}
/*
* These routines are used by the resource group routines (rgrp.c)
@@ -57,6 +65,11 @@
* 3 = Used (metadata)
*/
+struct gfs2_extent {
+ struct gfs2_rbm rbm;
+ u32 len;
+};
+
static const char valid_change[16] = {
/* current */
/* n */ 0, 1, 1, 1,
@@ -65,8 +78,8 @@ static const char valid_change[16] = {
1, 0, 0, 0
};
-static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext,
- const struct gfs2_inode *ip, bool nowrap);
+static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
+ struct gfs2_blkreserv *rs, bool nowrap);
/**
@@ -81,32 +94,36 @@ static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
unsigned char new_state)
{
unsigned char *byte1, *byte2, *end, cur_state;
- unsigned int buflen = rbm->bi->bi_len;
+ struct gfs2_bitmap *bi = rbm_bi(rbm);
+ unsigned int buflen = bi->bi_bytes;
const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
- byte1 = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY);
- end = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + buflen;
+ byte1 = bi->bi_bh->b_data + bi->bi_offset + (rbm->offset / GFS2_NBBY);
+ end = bi->bi_bh->b_data + bi->bi_offset + buflen;
BUG_ON(byte1 >= end);
cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
if (unlikely(!valid_change[new_state * 4 + cur_state])) {
- printk(KERN_WARNING "GFS2: buf_blk = 0x%x old_state=%d, "
- "new_state=%d\n", rbm->offset, cur_state, new_state);
- printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%x\n",
- (unsigned long long)rbm->rgd->rd_addr,
- rbm->bi->bi_start);
- printk(KERN_WARNING "GFS2: bi_offset=0x%x bi_len=0x%x\n",
- rbm->bi->bi_offset, rbm->bi->bi_len);
+ struct gfs2_sbd *sdp = rbm->rgd->rd_sbd;
+
+ fs_warn(sdp, "buf_blk = 0x%x old_state=%d, new_state=%d\n",
+ rbm->offset, cur_state, new_state);
+ fs_warn(sdp, "rgrp=0x%llx bi_start=0x%x biblk: 0x%llx\n",
+ (unsigned long long)rbm->rgd->rd_addr, bi->bi_start,
+ (unsigned long long)bi->bi_bh->b_blocknr);
+ fs_warn(sdp, "bi_offset=0x%x bi_bytes=0x%x block=0x%llx\n",
+ bi->bi_offset, bi->bi_bytes,
+ (unsigned long long)gfs2_rbm_to_block(rbm));
dump_stack();
gfs2_consist_rgrpd(rbm->rgd);
return;
}
*byte1 ^= (cur_state ^ new_state) << bit;
- if (do_clone && rbm->bi->bi_clone) {
- byte2 = rbm->bi->bi_clone + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY);
+ if (do_clone && bi->bi_clone) {
+ byte2 = bi->bi_clone + bi->bi_offset + (rbm->offset / GFS2_NBBY);
cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
*byte2 ^= (cur_state ^ new_state) << bit;
}
@@ -115,16 +132,26 @@ static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
/**
* gfs2_testbit - test a bit in the bitmaps
* @rbm: The bit to test
+ * @use_clone: If true, test the clone bitmap, not the official bitmap.
+ *
+ * Some callers like gfs2_unaligned_extlen need to test the clone bitmaps,
+ * not the "real" bitmaps, to avoid allocating recently freed blocks.
*
* Returns: The two bit block state of the requested bit
*/
-static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm)
+static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm, bool use_clone)
{
- const u8 *buffer = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset;
+ struct gfs2_bitmap *bi = rbm_bi(rbm);
+ const u8 *buffer;
const u8 *byte;
unsigned int bit;
+ if (use_clone && bi->bi_clone)
+ buffer = bi->bi_clone;
+ else
+ buffer = bi->bi_bh->b_data;
+ buffer += bi->bi_offset;
byte = buffer + (rbm->offset / GFS2_NBBY);
bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
@@ -132,13 +159,13 @@ static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm)
}
/**
- * gfs2_bit_search
+ * gfs2_bit_search - search bitmap for a state
* @ptr: Pointer to bitmap data
* @mask: Mask to use (normally 0x55555.... but adjusted for search start)
* @state: The state we are searching for
*
- * We xor the bitmap data with a patter which is the bitwise opposite
- * of what we are looking for, this gives rise to a pattern of ones
+ * We xor the bitmap data with a pattern which is the bitwise opposite
+ * of what we are looking for. This gives rise to a pattern of ones
* wherever there is a match. Since we have two bits per entry, we
* take this pattern, shift it down by one place and then and it with
* the original. All the even bit positions (0,2,4, etc) then represent
@@ -166,7 +193,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
/**
* rs_cmp - multi-block reservation range compare
- * @blk: absolute file system block number of the new reservation
+ * @start: start of the new reservation
* @len: number of blocks in the new reservation
* @rs: existing reservation to compare against
*
@@ -174,13 +201,11 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
* -1 if the block range is before the start of the reservation
* 0 if the block range overlaps with the reservation
*/
-static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
+static inline int rs_cmp(u64 start, u32 len, struct gfs2_blkreserv *rs)
{
- u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm);
-
- if (blk >= startblk + rs->rs_free)
+ if (start >= rs->rs_start + rs->rs_requested)
return 1;
- if (blk + len - 1 < startblk)
+ if (rs->rs_start >= start + len)
return -1;
return 0;
}
@@ -251,53 +276,80 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
{
- u64 rblock = block - rbm->rgd->rd_data0;
- u32 x;
-
- if (WARN_ON_ONCE(rblock > UINT_MAX))
- return -EINVAL;
- if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data)
+ if (!rgrp_contains_block(rbm->rgd, block))
return -E2BIG;
-
- rbm->bi = rbm->rgd->rd_bits;
- rbm->offset = (u32)(rblock);
+ rbm->bii = 0;
+ rbm->offset = block - rbm->rgd->rd_data0;
/* Check if the block is within the first block */
- if (rbm->offset < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY)
+ if (rbm->offset < rbm_bi(rbm)->bi_blocks)
return 0;
/* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */
rbm->offset += (sizeof(struct gfs2_rgrp) -
sizeof(struct gfs2_meta_header)) * GFS2_NBBY;
- x = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
- rbm->offset -= x * rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
- rbm->bi += x;
+ rbm->bii = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
+ rbm->offset -= rbm->bii * rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
return 0;
}
/**
+ * gfs2_rbm_add - add a number of blocks to an rbm
+ * @rbm: The rbm with rgd already set correctly
+ * @blocks: The number of blocks to add to rpm
+ *
+ * This function takes an existing rbm structure and adds a number of blocks to
+ * it.
+ *
+ * Returns: True if the new rbm would point past the end of the rgrp.
+ */
+
+static bool gfs2_rbm_add(struct gfs2_rbm *rbm, u32 blocks)
+{
+ struct gfs2_rgrpd *rgd = rbm->rgd;
+ struct gfs2_bitmap *bi = rgd->rd_bits + rbm->bii;
+
+ if (rbm->offset + blocks < bi->bi_blocks) {
+ rbm->offset += blocks;
+ return false;
+ }
+ blocks -= bi->bi_blocks - rbm->offset;
+
+ for(;;) {
+ bi++;
+ if (bi == rgd->rd_bits + rgd->rd_length)
+ return true;
+ if (blocks < bi->bi_blocks) {
+ rbm->offset = blocks;
+ rbm->bii = bi - rgd->rd_bits;
+ return false;
+ }
+ blocks -= bi->bi_blocks;
+ }
+}
+
+/**
* gfs2_unaligned_extlen - Look for free blocks which are not byte aligned
* @rbm: Position to search (value/result)
* @n_unaligned: Number of unaligned blocks to check
* @len: Decremented for each block found (terminate on zero)
*
- * Returns: true if a non-free block is encountered
+ * Returns: true if a non-free block is encountered or the end of the resource
+ * group is reached.
*/
static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len)
{
- u64 block;
u32 n;
u8 res;
for (n = 0; n < n_unaligned; n++) {
- res = gfs2_testbit(rbm);
+ res = gfs2_testbit(rbm, true);
if (res != GFS2_BLKST_FREE)
return true;
(*len)--;
if (*len == 0)
return true;
- block = gfs2_rbm_to_block(rbm);
- if (gfs2_rbm_from_block(rbm, block + 1))
+ if (gfs2_rbm_add(rbm, 1))
return true;
}
@@ -306,7 +358,7 @@ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *le
/**
* gfs2_free_extlen - Return extent length of free blocks
- * @rbm: Starting position
+ * @rrbm: Starting position
* @len: Max length to check
*
* Starting at the block specified by the rbm, see how many free blocks
@@ -328,6 +380,7 @@ static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
u32 chunk_size;
u8 *ptr, *start, *end;
u64 block;
+ struct gfs2_bitmap *bi;
if (n_unaligned &&
gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len))
@@ -336,11 +389,12 @@ static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len)
n_unaligned = len & 3;
/* Start is now byte aligned */
while (len > 3) {
- start = rbm.bi->bi_bh->b_data;
- if (rbm.bi->bi_clone)
- start = rbm.bi->bi_clone;
- end = start + rbm.bi->bi_bh->b_size;
- start += rbm.bi->bi_offset;
+ bi = rbm_bi(&rbm);
+ start = bi->bi_bh->b_data;
+ if (bi->bi_clone)
+ start = bi->bi_clone;
+ start += bi->bi_offset;
+ end = start + bi->bi_bytes;
BUG_ON(rbm.offset & 3);
start += (rbm.offset / GFS2_NBBY);
bytes = min_t(u32, len / GFS2_NBBY, (end - start));
@@ -425,45 +479,45 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
count[x] += gfs2_bitcount(rgd,
bi->bi_bh->b_data +
bi->bi_offset,
- bi->bi_len, x);
+ bi->bi_bytes, x);
}
if (count[0] != rgd->rd_free) {
- if (gfs2_consist_rgrpd(rgd))
- fs_err(sdp, "free data mismatch: %u != %u\n",
- count[0], rgd->rd_free);
+ gfs2_lm(sdp, "free data mismatch: %u != %u\n",
+ count[0], rgd->rd_free);
+ gfs2_consist_rgrpd(rgd);
return;
}
tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes;
if (count[1] != tmp) {
- if (gfs2_consist_rgrpd(rgd))
- fs_err(sdp, "used data mismatch: %u != %u\n",
- count[1], tmp);
+ gfs2_lm(sdp, "used data mismatch: %u != %u\n",
+ count[1], tmp);
+ gfs2_consist_rgrpd(rgd);
return;
}
if (count[2] + count[3] != rgd->rd_dinodes) {
- if (gfs2_consist_rgrpd(rgd))
- fs_err(sdp, "used metadata mismatch: %u != %u\n",
- count[2] + count[3], rgd->rd_dinodes);
+ gfs2_lm(sdp, "used metadata mismatch: %u != %u\n",
+ count[2] + count[3], rgd->rd_dinodes);
+ gfs2_consist_rgrpd(rgd);
return;
}
}
-static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
-{
- u64 first = rgd->rd_data0;
- u64 last = first + rgd->rd_data;
- return first <= block && block < last;
-}
-
/**
* gfs2_blk2rgrpd - Find resource group for a given data/meta block number
* @sdp: The GFS2 superblock
* @blk: The data block number
* @exact: True if this needs to be an exact match
*
+ * The @exact argument should be set to true by most callers. The exception
+ * is when we need to match blocks which are not represented by the rgrp
+ * bitmap, but which are part of the rgrp (i.e. padding blocks) which are
+ * there for alignment purposes. Another way of looking at it is that @exact
+ * matches only valid data/metadata blocks, but with @exact false, it will
+ * match any block within the extent of the rgrp.
+ *
* Returns: The resource group, or NULL if not found
*/
@@ -544,6 +598,13 @@ struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
return rgd;
}
+void check_and_update_goal(struct gfs2_inode *ip)
+{
+ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+ if (!ip->i_goal || gfs2_blk2rgrpd(sdp, ip->i_goal, 1) == NULL)
+ ip->i_goal = ip->i_no_addr;
+}
+
void gfs2_free_clones(struct gfs2_rgrpd *rgd)
{
int x;
@@ -555,36 +616,16 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd)
}
}
-/**
- * gfs2_rs_alloc - make sure we have a reservation assigned to the inode
- * @ip: the inode for this reservation
- */
-int gfs2_rs_alloc(struct gfs2_inode *ip)
+static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs,
+ const char *fs_id_buf)
{
- int error = 0;
+ struct gfs2_inode *ip = container_of(rs, struct gfs2_inode, i_res);
- down_write(&ip->i_rw_mutex);
- if (ip->i_res)
- goto out;
-
- ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
- if (!ip->i_res) {
- error = -ENOMEM;
- goto out;
- }
-
- RB_CLEAR_NODE(&ip->i_res->rs_node);
-out:
- up_write(&ip->i_rw_mutex);
- return error;
-}
-
-static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
-{
- gfs2_print_dbg(seq, " B: n:%llu s:%llu b:%u f:%u\n",
- (unsigned long long)rs->rs_inum,
- (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
- rs->rs_rbm.offset, rs->rs_free);
+ gfs2_print_dbg(seq, "%s B: n:%llu s:%llu f:%u\n",
+ fs_id_buf,
+ (unsigned long long)ip->i_no_addr,
+ (unsigned long long)rs->rs_start,
+ rs->rs_requested);
}
/**
@@ -599,18 +640,22 @@ static void __rs_deltree(struct gfs2_blkreserv *rs)
if (!gfs2_rs_active(rs))
return;
- rgd = rs->rs_rbm.rgd;
+ rgd = rs->rs_rgd;
trace_gfs2_rs(rs, TRACE_RS_TREEDEL);
rb_erase(&rs->rs_node, &rgd->rd_rstree);
RB_CLEAR_NODE(&rs->rs_node);
- if (rs->rs_free) {
- /* return reserved blocks to the rgrp */
- BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free);
- rs->rs_rbm.rgd->rd_reserved -= rs->rs_free;
- rs->rs_free = 0;
- clear_bit(GBF_FULL, &rs->rs_rbm.bi->bi_flags);
- smp_mb__after_clear_bit();
+ if (rs->rs_requested) {
+ /* return requested blocks to the rgrp */
+ BUG_ON(rs->rs_rgd->rd_requested < rs->rs_requested);
+ rs->rs_rgd->rd_requested -= rs->rs_requested;
+
+ /* The rgrp extent failure point is likely not to increase;
+ it will only do so if the freed blocks are somehow
+ contiguous with a span of free blocks that follows. Still,
+ it will force the number to be recalculated later. */
+ rgd->rd_extfail_pt += rs->rs_requested;
+ rs->rs_requested = 0;
}
}
@@ -623,10 +668,11 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
{
struct gfs2_rgrpd *rgd;
- rgd = rs->rs_rbm.rgd;
+ rgd = rs->rs_rgd;
if (rgd) {
spin_lock(&rgd->rd_rsspin);
__rs_deltree(rs);
+ BUG_ON(rs->rs_requested);
spin_unlock(&rgd->rd_rsspin);
}
}
@@ -641,12 +687,8 @@ void gfs2_rs_delete(struct gfs2_inode *ip)
struct inode *inode = &ip->i_inode;
down_write(&ip->i_rw_mutex);
- if (ip->i_res && atomic_read(&inode->i_writecount) <= 1) {
- gfs2_rs_deltree(ip->i_res);
- BUG_ON(ip->i_res->rs_free);
- kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
- ip->i_res = NULL;
- }
+ if (atomic_read(&inode->i_writecount) <= 1)
+ gfs2_rs_deltree(&ip->i_res);
up_write(&ip->i_rw_mutex);
}
@@ -684,31 +726,25 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
rb_erase(n, &sdp->sd_rindex_tree);
if (gl) {
- spin_lock(&gl->gl_spin);
- gl->gl_object = NULL;
- spin_unlock(&gl->gl_spin);
- gfs2_glock_add_to_lru(gl);
+ if (gl->gl_state != LM_ST_UNLOCKED) {
+ gfs2_glock_cb(gl, LM_ST_UNLOCKED);
+ flush_delayed_work(&gl->gl_work);
+ }
+ gfs2_rgrp_brelse(rgd);
+ glock_clear_object(gl, rgd);
gfs2_glock_put(gl);
}
gfs2_free_clones(rgd);
- kfree(rgd->rd_bits);
return_all_reservations(rgd);
+ kfree(rgd->rd_bits);
+ rgd->rd_bits = NULL;
kmem_cache_free(gfs2_rgrpd_cachep, rgd);
}
}
-static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
-{
- printk(KERN_INFO " ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
- printk(KERN_INFO " ri_length = %u\n", rgd->rd_length);
- printk(KERN_INFO " ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0);
- printk(KERN_INFO " ri_data = %u\n", rgd->rd_data);
- printk(KERN_INFO " ri_bitbytes = %u\n", rgd->rd_bitbytes);
-}
-
/**
- * gfs2_compute_bitstructs - Compute the bitmap sizes
+ * compute_bitstructs - Compute the bitmap sizes
* @rgd: The resource group descriptor
*
* Calculates bitmap descriptors, one for each block that contains bitmap data
@@ -742,26 +778,30 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
bytes = bytes_left;
bi->bi_offset = sizeof(struct gfs2_rgrp);
bi->bi_start = 0;
- bi->bi_len = bytes;
+ bi->bi_bytes = bytes;
+ bi->bi_blocks = bytes * GFS2_NBBY;
/* header block */
} else if (x == 0) {
bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);
bi->bi_offset = sizeof(struct gfs2_rgrp);
bi->bi_start = 0;
- bi->bi_len = bytes;
+ bi->bi_bytes = bytes;
+ bi->bi_blocks = bytes * GFS2_NBBY;
/* last block */
} else if (x + 1 == length) {
bytes = bytes_left;
bi->bi_offset = sizeof(struct gfs2_meta_header);
bi->bi_start = rgd->rd_bitbytes - bytes_left;
- bi->bi_len = bytes;
+ bi->bi_bytes = bytes;
+ bi->bi_blocks = bytes * GFS2_NBBY;
/* other blocks */
} else {
bytes = sdp->sd_sb.sb_bsize -
sizeof(struct gfs2_meta_header);
bi->bi_offset = sizeof(struct gfs2_meta_header);
bi->bi_start = rgd->rd_bitbytes - bytes_left;
- bi->bi_len = bytes;
+ bi->bi_bytes = bytes;
+ bi->bi_blocks = bytes * GFS2_NBBY;
}
bytes_left -= bytes;
@@ -772,12 +812,21 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
return -EIO;
}
bi = rgd->rd_bits + (length - 1);
- if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) {
- if (gfs2_consist_rgrpd(rgd)) {
- gfs2_rindex_print(rgd);
- fs_err(sdp, "start=%u len=%u offset=%u\n",
- bi->bi_start, bi->bi_len, bi->bi_offset);
- }
+ if ((bi->bi_start + bi->bi_bytes) * GFS2_NBBY != rgd->rd_data) {
+ gfs2_lm(sdp,
+ "ri_addr=%llu "
+ "ri_length=%u "
+ "ri_data0=%llu "
+ "ri_data=%u "
+ "ri_bitbytes=%u "
+ "start=%u len=%u offset=%u\n",
+ (unsigned long long)rgd->rd_addr,
+ rgd->rd_length,
+ (unsigned long long)rgd->rd_data0,
+ rgd->rd_data,
+ rgd->rd_bitbytes,
+ bi->bi_start, bi->bi_bytes, bi->bi_offset);
+ gfs2_consist_rgrpd(rgd);
return -EIO;
}
@@ -872,37 +921,71 @@ static int read_rindex_entry(struct gfs2_inode *ip)
rgd->rd_data = be32_to_cpu(buf.ri_data);
rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
spin_lock_init(&rgd->rd_rsspin);
-
- error = compute_bitstructs(rgd);
- if (error)
- goto fail;
+ mutex_init(&rgd->rd_mutex);
error = gfs2_glock_get(sdp, rgd->rd_addr,
&gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
if (error)
goto fail;
- rgd->rd_gl->gl_object = rgd;
+ error = compute_bitstructs(rgd);
+ if (error)
+ goto fail_glock;
+
rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
- rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
+ rgd->rd_flags &= ~GFS2_RDF_PREFERRED;
if (rgd->rd_data > sdp->sd_max_rg_data)
sdp->sd_max_rg_data = rgd->rd_data;
spin_lock(&sdp->sd_rindex_spin);
error = rgd_insert(rgd);
spin_unlock(&sdp->sd_rindex_spin);
- if (!error)
+ if (!error) {
+ glock_set_object(rgd->rd_gl, rgd);
return 0;
+ }
error = 0; /* someone else read in the rgrp; free it and ignore it */
+fail_glock:
gfs2_glock_put(rgd->rd_gl);
fail:
kfree(rgd->rd_bits);
+ rgd->rd_bits = NULL;
kmem_cache_free(gfs2_rgrpd_cachep, rgd);
return error;
}
/**
+ * set_rgrp_preferences - Run all the rgrps, selecting some we prefer to use
+ * @sdp: the GFS2 superblock
+ *
+ * The purpose of this function is to select a subset of the resource groups
+ * and mark them as PREFERRED. We do it in such a way that each node prefers
+ * to use a unique set of rgrps to minimize glock contention.
+ */
+static void set_rgrp_preferences(struct gfs2_sbd *sdp)
+{
+ struct gfs2_rgrpd *rgd, *first;
+ int i;
+
+ /* Skip an initial number of rgrps, based on this node's journal ID.
+ That should start each node out on its own set. */
+ rgd = gfs2_rgrpd_get_first(sdp);
+ for (i = 0; i < sdp->sd_lockstruct.ls_jid; i++)
+ rgd = gfs2_rgrpd_get_next(rgd);
+ first = rgd;
+
+ do {
+ rgd->rd_flags |= GFS2_RDF_PREFERRED;
+ for (i = 0; i < sdp->sd_journals; i++) {
+ rgd = gfs2_rgrpd_get_next(rgd);
+ if (!rgd || rgd == first)
+ break;
+ }
+ } while (rgd && rgd != first);
+}
+
+/**
* gfs2_ri_update - Pull in a new resource index from the disk
* @ip: pointer to the rindex inode
*
@@ -921,6 +1004,12 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
if (error < 0)
return error;
+ if (RB_EMPTY_ROOT(&sdp->sd_rindex_tree)) {
+ fs_err(sdp, "no resource groups found in the file system.\n");
+ return -ENOENT;
+ }
+ set_rgrp_preferences(sdp);
+
sdp->sd_rindex_uptodate = 1;
return 0;
}
@@ -979,49 +1068,80 @@ static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf)
rgd->rd_free = be32_to_cpu(str->rg_free);
rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes);
rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration);
+ /* rd_data0, rd_data and rd_bitbytes already set from rindex */
+}
+
+static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf)
+{
+ const struct gfs2_rgrp *str = buf;
+
+ rgl->rl_magic = cpu_to_be32(GFS2_MAGIC);
+ rgl->rl_flags = str->rg_flags;
+ rgl->rl_free = str->rg_free;
+ rgl->rl_dinodes = str->rg_dinodes;
+ rgl->rl_igeneration = str->rg_igeneration;
+ rgl->__pad = 0UL;
}
static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
{
+ struct gfs2_rgrpd *next = gfs2_rgrpd_get_next(rgd);
struct gfs2_rgrp *str = buf;
+ u32 crc;
str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK);
str->rg_free = cpu_to_be32(rgd->rd_free);
str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes);
- str->__pad = cpu_to_be32(0);
+ if (next == NULL)
+ str->rg_skip = 0;
+ else if (next->rd_addr > rgd->rd_addr)
+ str->rg_skip = cpu_to_be32(next->rd_addr - rgd->rd_addr);
str->rg_igeneration = cpu_to_be64(rgd->rd_igeneration);
+ str->rg_data0 = cpu_to_be64(rgd->rd_data0);
+ str->rg_data = cpu_to_be32(rgd->rd_data);
+ str->rg_bitbytes = cpu_to_be32(rgd->rd_bitbytes);
+ str->rg_crc = 0;
+ crc = gfs2_disk_hash(buf, sizeof(struct gfs2_rgrp));
+ str->rg_crc = cpu_to_be32(crc);
+
memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
+ gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, buf);
}
static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd)
{
struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data;
+ struct gfs2_sbd *sdp = rgd->rd_sbd;
+ int valid = 1;
- if (rgl->rl_flags != str->rg_flags || rgl->rl_free != str->rg_free ||
- rgl->rl_dinodes != str->rg_dinodes ||
- rgl->rl_igeneration != str->rg_igeneration)
- return 0;
- return 1;
-}
-
-static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf)
-{
- const struct gfs2_rgrp *str = buf;
-
- rgl->rl_magic = cpu_to_be32(GFS2_MAGIC);
- rgl->rl_flags = str->rg_flags;
- rgl->rl_free = str->rg_free;
- rgl->rl_dinodes = str->rg_dinodes;
- rgl->rl_igeneration = str->rg_igeneration;
- rgl->__pad = 0UL;
-}
-
-static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change)
-{
- struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
- u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change;
- rgl->rl_unlinked = cpu_to_be32(unlinked);
+ if (rgl->rl_flags != str->rg_flags) {
+ fs_warn(sdp, "GFS2: rgd: %llu lvb flag mismatch %u/%u",
+ (unsigned long long)rgd->rd_addr,
+ be32_to_cpu(rgl->rl_flags), be32_to_cpu(str->rg_flags));
+ valid = 0;
+ }
+ if (rgl->rl_free != str->rg_free) {
+ fs_warn(sdp, "GFS2: rgd: %llu lvb free mismatch %u/%u",
+ (unsigned long long)rgd->rd_addr,
+ be32_to_cpu(rgl->rl_free), be32_to_cpu(str->rg_free));
+ valid = 0;
+ }
+ if (rgl->rl_dinodes != str->rg_dinodes) {
+ fs_warn(sdp, "GFS2: rgd: %llu lvb dinode mismatch %u/%u",
+ (unsigned long long)rgd->rd_addr,
+ be32_to_cpu(rgl->rl_dinodes),
+ be32_to_cpu(str->rg_dinodes));
+ valid = 0;
+ }
+ if (rgl->rl_igeneration != str->rg_igeneration) {
+ fs_warn(sdp, "GFS2: rgd: %llu lvb igen mismatch %llu/%llu",
+ (unsigned long long)rgd->rd_addr,
+ (unsigned long long)be64_to_cpu(rgl->rl_igeneration),
+ (unsigned long long)be64_to_cpu(str->rg_igeneration));
+ valid = 0;
+ }
+ return valid;
}
static u32 count_unlinked(struct gfs2_rgrpd *rgd)
@@ -1035,8 +1155,8 @@ static u32 count_unlinked(struct gfs2_rgrpd *rgd)
goal = 0;
buffer = bi->bi_bh->b_data + bi->bi_offset;
WARN_ON(!buffer_uptodate(bi->bi_bh));
- while (goal < bi->bi_len * GFS2_NBBY) {
- goal = gfs2_bitfit(buffer, bi->bi_len, goal,
+ while (goal < bi->bi_blocks) {
+ goal = gfs2_bitfit(buffer, bi->bi_bytes, goal,
GFS2_BLKST_UNLINKED);
if (goal == BFITNOENT)
break;
@@ -1048,21 +1168,38 @@ static u32 count_unlinked(struct gfs2_rgrpd *rgd)
return count;
}
+static void rgrp_set_bitmap_flags(struct gfs2_rgrpd *rgd)
+{
+ struct gfs2_bitmap *bi;
+ int x;
+
+ if (rgd->rd_free) {
+ for (x = 0; x < rgd->rd_length; x++) {
+ bi = rgd->rd_bits + x;
+ clear_bit(GBF_FULL, &bi->bi_flags);
+ }
+ } else {
+ for (x = 0; x < rgd->rd_length; x++) {
+ bi = rgd->rd_bits + x;
+ set_bit(GBF_FULL, &bi->bi_flags);
+ }
+ }
+}
/**
- * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
- * @rgd: the struct gfs2_rgrpd describing the RG to read in
+ * gfs2_rgrp_go_instantiate - Read in a RG's header and bitmaps
+ * @gl: the glock representing the rgrpd to read in
*
* Read in all of a Resource Group's header and bitmap blocks.
- * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
+ * Caller must eventually call gfs2_rgrp_brelse() to free the bitmaps.
*
* Returns: errno
*/
-int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
+int gfs2_rgrp_go_instantiate(struct gfs2_glock *gl)
{
+ struct gfs2_rgrpd *rgd = gl->gl_object;
struct gfs2_sbd *sdp = rgd->rd_sbd;
- struct gfs2_glock *gl = rgd->rd_gl;
unsigned int length = rgd->rd_length;
struct gfs2_bitmap *bi;
unsigned int x, y;
@@ -1073,7 +1210,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
for (x = 0; x < length; x++) {
bi = rgd->rd_bits + x;
- error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh);
+ error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, 0, &bi->bi_bh);
if (error)
goto fail;
}
@@ -1090,19 +1227,18 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
}
}
- if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) {
- for (x = 0; x < length; x++)
- clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags);
- gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
- rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
- rgd->rd_free_clone = rgd->rd_free;
- }
- if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
+ gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
+ rgrp_set_bitmap_flags(rgd);
+ rgd->rd_flags |= GFS2_RDF_CHECK;
+ rgd->rd_free_clone = rgd->rd_free;
+ GLOCK_BUG_ON(rgd->rd_gl, rgd->rd_reserved);
+ /* max out the rgrp allocation failure point */
+ rgd->rd_extfail_pt = rgd->rd_free;
+ if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
gfs2_rgrp_ondisk2lvb(rgd->rd_rgl,
rgd->rd_bits[0].bi_bh->b_data);
- }
- else if (sdp->sd_args.ar_rgrplvb) {
+ } else if (sdp->sd_args.ar_rgrplvb) {
if (!gfs2_rgrp_lvb_valid(rgd)){
gfs2_consist_rgrpd(rgd);
error = -EIO;
@@ -1120,52 +1256,44 @@ fail:
bi->bi_bh = NULL;
gfs2_assert_warn(sdp, !bi->bi_clone);
}
-
return error;
}
-int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
+static int update_rgrp_lvb(struct gfs2_rgrpd *rgd, struct gfs2_holder *gh)
{
u32 rl_flags;
- if (rgd->rd_flags & GFS2_RDF_UPTODATE)
+ if (!test_bit(GLF_INSTANTIATE_NEEDED, &gh->gh_gl->gl_flags))
return 0;
- if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
- return gfs2_rgrp_bh_get(rgd);
+ if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
+ return gfs2_instantiate(gh);
rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
rl_flags &= ~GFS2_RDF_MASK;
rgd->rd_flags &= GFS2_RDF_MASK;
- rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
+ rgd->rd_flags |= (rl_flags | GFS2_RDF_CHECK);
if (rgd->rd_rgl->rl_unlinked == 0)
rgd->rd_flags &= ~GFS2_RDF_CHECK;
rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
+ rgrp_set_bitmap_flags(rgd);
rgd->rd_free_clone = rgd->rd_free;
+ GLOCK_BUG_ON(rgd->rd_gl, rgd->rd_reserved);
+ /* max out the rgrp allocation failure point */
+ rgd->rd_extfail_pt = rgd->rd_free;
rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration);
return 0;
}
-int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
-{
- struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
- struct gfs2_sbd *sdp = rgd->rd_sbd;
-
- if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb)
- return 0;
- return gfs2_rgrp_bh_get((struct gfs2_rgrpd *)gh->gh_gl->gl_object);
-}
-
/**
- * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get()
- * @gh: The glock holder for the resource group
+ * gfs2_rgrp_brelse - Release RG bitmaps read in with gfs2_rgrp_bh_get()
+ * @rgd: The resource group
*
*/
-void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
+void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd)
{
- struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
int x, length = rgd->rd_length;
for (x = 0; x < length; x++) {
@@ -1175,7 +1303,7 @@ void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
bi->bi_bh = NULL;
}
}
-
+ set_bit(GLF_INSTANTIATE_NEEDED, &rgd->rd_gl->gl_flags);
}
int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
@@ -1186,12 +1314,12 @@ int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
u64 blk;
sector_t start = 0;
sector_t nr_blks = 0;
- int rv;
+ int rv = -EIO;
unsigned int x;
u32 trimmed = 0;
u8 diff;
- for (x = 0; x < bi->bi_len; x++) {
+ for (x = 0; x < bi->bi_bytes; x++) {
const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data;
clone += bi->bi_offset;
clone += x;
@@ -1240,9 +1368,9 @@ start_new_extent:
fail:
if (sdp->sd_args.ar_discard)
- fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv);
+ fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem\n", rv);
sdp->sd_args.ar_discard = 0;
- return -EIO;
+ return rv;
}
/**
@@ -1257,7 +1385,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
{
struct inode *inode = file_inode(filp);
struct gfs2_sbd *sdp = GFS2_SB(inode);
- struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev);
+ struct block_device *bdev = sdp->sd_vfs->s_bdev;
struct buffer_head *bh;
struct gfs2_rgrpd *rgd;
struct gfs2_rgrpd *rgd_end;
@@ -1273,7 +1401,10 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (!blk_queue_discard(q))
+ if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
+ return -EROFS;
+
+ if (!bdev_max_discard_sectors(bdev))
return -EOPNOTSUPP;
if (copy_from_user(&r, argp, sizeof(r)))
@@ -1285,8 +1416,8 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
start = r.start >> bs_shift;
end = start + (r.len >> bs_shift);
- minlen = max_t(u64, r.minlen,
- q->limits.discard_granularity) >> bs_shift;
+ minlen = max_t(u64, r.minlen, sdp->sd_sb.sb_bsize);
+ minlen = max_t(u64, minlen, bdev_discard_granularity(bdev)) >> bs_shift;
if (end <= start || minlen > sdp->sd_max_rg_data)
return -EINVAL;
@@ -1300,7 +1431,8 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
while (1) {
- ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
+ ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
+ LM_FLAG_NODE_SCOPE, &gh);
if (ret)
goto out;
@@ -1308,9 +1440,11 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
/* Trim each bitmap in the rgrp */
for (x = 0; x < rgd->rd_length; x++) {
struct gfs2_bitmap *bi = rgd->rd_bits + x;
+ rgrp_lock_local(rgd);
ret = gfs2_rgrp_send_discards(sdp,
rgd->rd_data0, NULL, bi, minlen,
&amt);
+ rgrp_unlock_local(rgd);
if (ret) {
gfs2_glock_dq_uninit(&gh);
goto out;
@@ -1322,10 +1456,11 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
if (ret == 0) {
bh = rgd->rd_bits[0].bi_bh;
+ rgrp_lock_local(rgd);
rgd->rd_flags |= GFS2_RGF_TRIMMED;
gfs2_trans_add_meta(rgd->rd_gl, bh);
gfs2_rgrp_out(rgd, bh->b_data);
- gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data);
+ rgrp_unlock_local(rgd);
gfs2_trans_end(sdp);
}
}
@@ -1354,9 +1489,8 @@ static void rs_insert(struct gfs2_inode *ip)
{
struct rb_node **newn, *parent = NULL;
int rc;
- struct gfs2_blkreserv *rs = ip->i_res;
- struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd;
- u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm);
+ struct gfs2_blkreserv *rs = &ip->i_res;
+ struct gfs2_rgrpd *rgd = rs->rs_rgd;
BUG_ON(gfs2_rs_active(rs));
@@ -1367,7 +1501,7 @@ static void rs_insert(struct gfs2_inode *ip)
rb_entry(*newn, struct gfs2_blkreserv, rs_node);
parent = *newn;
- rc = rs_cmp(fsblock, rs->rs_free, cur);
+ rc = rs_cmp(rs->rs_start, rs->rs_requested, cur);
if (rc > 0)
newn = &((*newn)->rb_right);
else if (rc < 0)
@@ -1383,37 +1517,75 @@ static void rs_insert(struct gfs2_inode *ip)
rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
/* Do our rgrp accounting for the reservation */
- rgd->rd_reserved += rs->rs_free; /* blocks reserved */
+ rgd->rd_requested += rs->rs_requested; /* blocks requested */
spin_unlock(&rgd->rd_rsspin);
trace_gfs2_rs(rs, TRACE_RS_INSERT);
}
/**
+ * rgd_free - return the number of free blocks we can allocate
+ * @rgd: the resource group
+ * @rs: The reservation to free
+ *
+ * This function returns the number of free blocks for an rgrp.
+ * That's the clone-free blocks (blocks that are free, not including those
+ * still being used for unlinked files that haven't been deleted.)
+ *
+ * It also subtracts any blocks reserved by someone else, but does not
+ * include free blocks that are still part of our current reservation,
+ * because obviously we can (and will) allocate them.
+ */
+static inline u32 rgd_free(struct gfs2_rgrpd *rgd, struct gfs2_blkreserv *rs)
+{
+ u32 tot_reserved, tot_free;
+
+ if (WARN_ON_ONCE(rgd->rd_requested < rs->rs_requested))
+ return 0;
+ tot_reserved = rgd->rd_requested - rs->rs_requested;
+
+ if (rgd->rd_free_clone < tot_reserved)
+ tot_reserved = 0;
+
+ tot_free = rgd->rd_free_clone - tot_reserved;
+
+ return tot_free;
+}
+
+/**
* rg_mblk_search - find a group of multiple free blocks to form a reservation
* @rgd: the resource group descriptor
* @ip: pointer to the inode for which we're reserving blocks
- * @requested: number of blocks required for this allocation
+ * @ap: the allocation parameters
*
*/
static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
- unsigned requested)
+ const struct gfs2_alloc_parms *ap)
{
struct gfs2_rbm rbm = { .rgd = rgd, };
u64 goal;
- struct gfs2_blkreserv *rs = ip->i_res;
+ struct gfs2_blkreserv *rs = &ip->i_res;
u32 extlen;
- u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved;
+ u32 free_blocks, blocks_available;
int ret;
struct inode *inode = &ip->i_inode;
+ spin_lock(&rgd->rd_rsspin);
+ free_blocks = rgd_free(rgd, rs);
+ if (rgd->rd_free_clone < rgd->rd_requested)
+ free_blocks = 0;
+ blocks_available = rgd->rd_free_clone - rgd->rd_reserved;
+ if (rgd == rs->rs_rgd)
+ blocks_available += rs->rs_reserved;
+ spin_unlock(&rgd->rd_rsspin);
+
if (S_ISDIR(inode->i_mode))
extlen = 1;
else {
- extlen = max_t(u32, atomic_read(&rs->rs_sizehint), requested);
- extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks);
+ extlen = max_t(u32, atomic_read(&ip->i_sizehint), ap->target);
+ extlen = clamp(extlen, (u32)RGRP_RSRV_MINBLKS, free_blocks);
}
- if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
+ if (free_blocks < extlen || blocks_available < extlen)
return;
/* Find bitmap block that contains bits for goal block */
@@ -1425,11 +1597,10 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
if (WARN_ON(gfs2_rbm_from_block(&rbm, goal)))
return;
- ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, extlen, ip, true);
+ ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, &ip->i_res, true);
if (ret == 0) {
- rs->rs_rbm = rbm;
- rs->rs_free = extlen;
- rs->rs_inum = ip->i_no_addr;
+ rs->rs_start = gfs2_rbm_to_block(&rbm);
+ rs->rs_requested = extlen;
rs_insert(ip);
} else {
if (goal == rgd->rd_last_alloc + rgd->rd_data0)
@@ -1442,7 +1613,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
* @rgd: The resource group
* @block: The starting block
* @length: The required length
- * @ip: Ignore any reservations for this inode
+ * @ignore_rs: Reservation to ignore
*
* If the block does not appear in any reservation, then return the
* block number unchanged. If it does appear in the reservation, then
@@ -1452,7 +1623,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
u32 length,
- const struct gfs2_inode *ip)
+ struct gfs2_blkreserv *ignore_rs)
{
struct gfs2_blkreserv *rs;
struct rb_node *n;
@@ -1472,8 +1643,8 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
}
if (n) {
- while ((rs_cmp(block, length, rs) == 0) && (ip->i_res != rs)) {
- block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free;
+ while (rs_cmp(block, length, rs) == 0 && rs != ignore_rs) {
+ block = rs->rs_start + rs->rs_requested;
n = n->rb_right;
if (n == NULL)
break;
@@ -1488,8 +1659,9 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
/**
* gfs2_reservation_check_and_update - Check for reservations during block alloc
* @rbm: The current position in the resource group
- * @ip: The inode for which we are searching for blocks
+ * @rs: Our own reservation
* @minext: The minimum extent length
+ * @maxext: A pointer to the maximum extent structure
*
* This checks the current position in the rgrp to see whether there is
* a reservation covering this block. If not then this function is a
@@ -1501,22 +1673,21 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
*/
static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
- const struct gfs2_inode *ip,
- u32 minext)
+ struct gfs2_blkreserv *rs,
+ u32 minext,
+ struct gfs2_extent *maxext)
{
u64 block = gfs2_rbm_to_block(rbm);
u32 extlen = 1;
u64 nblock;
- int ret;
/*
* If we have a minimum extent length, then skip over any extent
* which is less than the min extent length in size.
*/
- if (minext) {
+ if (minext > 1) {
extlen = gfs2_free_extlen(rbm, minext);
- nblock = block + extlen;
- if (extlen < minext)
+ if (extlen <= maxext->len)
goto fail;
}
@@ -1524,13 +1695,24 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
* Check the extent which has been found against the reservations
* and skip if parts of it are already reserved
*/
- nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip);
- if (nblock == block)
- return 0;
+ nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, rs);
+ if (nblock == block) {
+ if (!minext || extlen >= minext)
+ return 0;
+
+ if (extlen > maxext->len) {
+ maxext->len = extlen;
+ maxext->rbm = *rbm;
+ }
+ } else {
+ u64 len = nblock - block;
+ if (len >= (u64)1 << 32)
+ return -E2BIG;
+ extlen = len;
+ }
fail:
- ret = gfs2_rbm_from_block(rbm, nblock);
- if (ret < 0)
- return ret;
+ if (gfs2_rbm_add(rbm, extlen))
+ return -E2BIG;
return 1;
}
@@ -1538,92 +1720,112 @@ fail:
* gfs2_rbm_find - Look for blocks of a particular state
* @rbm: Value/result starting position and final position
* @state: The state which we want to find
- * @minext: The requested extent length (0 for a single block)
- * @ip: If set, check for reservations
+ * @minext: Pointer to the requested extent length
+ * This is updated to be the actual reservation size.
+ * @rs: Our own reservation (NULL to skip checking for reservations)
* @nowrap: Stop looking at the end of the rgrp, rather than wrapping
* around until we've reached the starting point.
*
* Side effects:
* - If looking for free blocks, we set GBF_FULL on each bitmap which
* has no free blocks in it.
+ * - If looking for free blocks, we set rd_extfail_pt on each rgrp which
+ * has come up short on a free block search.
*
* Returns: 0 on success, -ENOSPC if there is no block of the requested state
*/
-static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext,
- const struct gfs2_inode *ip, bool nowrap)
+static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
+ struct gfs2_blkreserv *rs, bool nowrap)
{
+ bool scan_from_start = rbm->bii == 0 && rbm->offset == 0;
struct buffer_head *bh;
- struct gfs2_bitmap *initial_bi;
- u32 initial_offset;
+ int last_bii;
u32 offset;
u8 *buffer;
- int index;
- int n = 0;
- int iters = rbm->rgd->rd_length;
+ bool wrapped = false;
int ret;
+ struct gfs2_bitmap *bi;
+ struct gfs2_extent maxext = { .rbm.rgd = rbm->rgd, };
- /* If we are not starting at the beginning of a bitmap, then we
- * need to add one to the bitmap count to ensure that we search
- * the starting bitmap twice.
+ /*
+ * Determine the last bitmap to search. If we're not starting at the
+ * beginning of a bitmap, we need to search that bitmap twice to scan
+ * the entire resource group.
*/
- if (rbm->offset != 0)
- iters++;
+ last_bii = rbm->bii - (rbm->offset == 0);
while(1) {
- if (test_bit(GBF_FULL, &rbm->bi->bi_flags) &&
+ bi = rbm_bi(rbm);
+ if (test_bit(GBF_FULL, &bi->bi_flags) &&
(state == GFS2_BLKST_FREE))
goto next_bitmap;
- bh = rbm->bi->bi_bh;
- buffer = bh->b_data + rbm->bi->bi_offset;
+ bh = bi->bi_bh;
+ buffer = bh->b_data + bi->bi_offset;
WARN_ON(!buffer_uptodate(bh));
- if (state != GFS2_BLKST_UNLINKED && rbm->bi->bi_clone)
- buffer = rbm->bi->bi_clone + rbm->bi->bi_offset;
- initial_offset = rbm->offset;
- offset = gfs2_bitfit(buffer, rbm->bi->bi_len, rbm->offset, state);
- if (offset == BFITNOENT)
- goto bitmap_full;
+ if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
+ buffer = bi->bi_clone + bi->bi_offset;
+ offset = gfs2_bitfit(buffer, bi->bi_bytes, rbm->offset, state);
+ if (offset == BFITNOENT) {
+ if (state == GFS2_BLKST_FREE && rbm->offset == 0)
+ set_bit(GBF_FULL, &bi->bi_flags);
+ goto next_bitmap;
+ }
rbm->offset = offset;
- if (ip == NULL)
+ if (!rs || !minext)
return 0;
- initial_bi = rbm->bi;
- ret = gfs2_reservation_check_and_update(rbm, ip, minext);
+ ret = gfs2_reservation_check_and_update(rbm, rs, *minext,
+ &maxext);
if (ret == 0)
return 0;
- if (ret > 0) {
- n += (rbm->bi - initial_bi);
+ if (ret > 0)
goto next_iter;
- }
if (ret == -E2BIG) {
- index = 0;
+ rbm->bii = 0;
rbm->offset = 0;
- n += (rbm->bi - initial_bi);
goto res_covered_end_of_rgrp;
}
return ret;
-bitmap_full: /* Mark bitmap as full and fall through */
- if ((state == GFS2_BLKST_FREE) && initial_offset == 0)
- set_bit(GBF_FULL, &rbm->bi->bi_flags);
-
next_bitmap: /* Find next bitmap in the rgrp */
rbm->offset = 0;
- index = rbm->bi - rbm->rgd->rd_bits;
- index++;
- if (index == rbm->rgd->rd_length)
- index = 0;
+ rbm->bii++;
+ if (rbm->bii == rbm->rgd->rd_length)
+ rbm->bii = 0;
res_covered_end_of_rgrp:
- rbm->bi = &rbm->rgd->rd_bits[index];
- if ((index == 0) && nowrap)
- break;
- n++;
+ if (rbm->bii == 0) {
+ if (wrapped)
+ break;
+ wrapped = true;
+ if (nowrap)
+ break;
+ }
next_iter:
- if (n >= iters)
+ /* Have we scanned the entire resource group? */
+ if (wrapped && rbm->bii > last_bii)
break;
}
+ if (state != GFS2_BLKST_FREE)
+ return -ENOSPC;
+
+ /* If the extent was too small, and it's smaller than the smallest
+ to have failed before, remember for future reference that it's
+ useless to search this rgrp again for this amount or more. */
+ if (wrapped && (scan_from_start || rbm->bii > last_bii) &&
+ *minext < rbm->rgd->rd_extfail_pt)
+ rbm->rgd->rd_extfail_pt = *minext - 1;
+
+ /* If the maximum extent we found is big enough to fulfill the
+ minimum requirements, use it anyway. */
+ if (maxext.len) {
+ *rbm = maxext.rbm;
+ *minext = maxext.len;
+ return 0;
+ }
+
return -ENOSPC;
}
@@ -1645,12 +1847,11 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
struct gfs2_inode *ip;
int error;
int found = 0;
- struct gfs2_rbm rbm = { .rgd = rgd, .bi = rgd->rd_bits, .offset = 0 };
+ struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 };
while (1) {
- down_write(&sdp->sd_log_flush_lock);
- error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, 0, NULL, true);
- up_write(&sdp->sd_log_flush_lock);
+ error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL,
+ true);
if (error == -ENOSPC)
break;
if (WARN_ON_ONCE(error))
@@ -1665,7 +1866,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
continue;
*last_unlinked = block;
- error = gfs2_glock_get(sdp, block, &gfs2_inode_glops, CREATE, &gl);
+ error = gfs2_glock_get(sdp, block, &gfs2_iopen_glops, CREATE, &gl);
if (error)
continue;
@@ -1678,7 +1879,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
*/
ip = gl->gl_object;
- if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
+ if (ip || !gfs2_queue_verify_delete(gl, false))
gfs2_glock_put(gl);
else
found++;
@@ -1722,17 +1923,26 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
{
const struct gfs2_glock *gl = rgd->rd_gl;
- const struct gfs2_sbd *sdp = gl->gl_sbd;
+ const struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct gfs2_lkstats *st;
- s64 r_dcount, l_dcount;
- s64 r_srttb, l_srttb;
+ u64 r_dcount, l_dcount;
+ u64 l_srttb, a_srttb = 0;
s64 srttb_diff;
- s64 sqr_diff;
- s64 var;
+ u64 sqr_diff;
+ u64 var;
+ int cpu, nonzero = 0;
preempt_disable();
+ for_each_present_cpu(cpu) {
+ st = &per_cpu_ptr(sdp->sd_lkstats, cpu)->lkstats[LM_TYPE_RGRP];
+ if (st->stats[GFS2_LKS_SRTTB]) {
+ a_srttb += st->stats[GFS2_LKS_SRTTB];
+ nonzero++;
+ }
+ }
st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP];
- r_srttb = st->stats[GFS2_LKS_SRTTB];
+ if (nonzero)
+ do_div(a_srttb, nonzero);
r_dcount = st->stats[GFS2_LKS_DCOUNT];
var = st->stats[GFS2_LKS_SRTTVARB] +
gl->gl_stats.stats[GFS2_LKS_SRTTVARB];
@@ -1741,10 +1951,10 @@ static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB];
l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT];
- if ((l_dcount < 1) || (r_dcount < 1) || (r_srttb == 0))
+ if ((l_dcount < 1) || (r_dcount < 1) || (a_srttb == 0))
return false;
- srttb_diff = r_srttb - l_srttb;
+ srttb_diff = a_srttb - l_srttb;
sqr_diff = srttb_diff * srttb_diff;
var *= 2;
@@ -1757,7 +1967,7 @@ static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
}
/**
- * gfs2_rgrp_used_recently
+ * gfs2_rgrp_used_recently - test if an rgrp has been used recently
* @rs: The block reservation with the rgrp to test
* @msecs: The time limit in milliseconds
*
@@ -1769,7 +1979,7 @@ static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
u64 tdiff;
tdiff = ktime_to_ns(ktime_sub(ktime_get_real(),
- rs->rs_rbm.rgd->rd_gl->gl_dstamp));
+ rs->rs_rgd->rd_gl->gl_dstamp));
return tdiff > (msecs * 1000 * 1000);
}
@@ -1777,10 +1987,8 @@ static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
static u32 gfs2_orlov_skip(const struct gfs2_inode *ip)
{
const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- u32 skip;
- get_random_bytes(&skip, sizeof(skip));
- return skip % sdp->sd_rgrps;
+ return get_random_u32() % sdp->sd_rgrps;
}
static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin)
@@ -1798,104 +2006,164 @@ static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *b
}
/**
+ * fast_to_acquire - determine if a resource group will be fast to acquire
+ * @rgd: The rgrp
+ *
+ * If this is one of our preferred rgrps, it should be quicker to acquire,
+ * because we tried to set ourselves up as dlm lock master.
+ */
+static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
+{
+ struct gfs2_glock *gl = rgd->rd_gl;
+
+ if (gl->gl_state != LM_ST_UNLOCKED && list_empty(&gl->gl_holders) &&
+ !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
+ !test_bit(GLF_DEMOTE, &gl->gl_flags))
+ return 1;
+ if (rgd->rd_flags & GFS2_RDF_PREFERRED)
+ return 1;
+ return 0;
+}
+
+/**
* gfs2_inplace_reserve - Reserve space in the filesystem
* @ip: the inode to reserve space for
- * @requested: the number of blocks to be reserved
+ * @ap: the allocation parameters
*
- * Returns: errno
+ * We try our best to find an rgrp that has at least ap->target blocks
+ * available. After a couple of passes (loops == 2), the prospects of finding
+ * such an rgrp diminish. At this stage, we return the first rgrp that has
+ * at least ap->min_target blocks available.
+ *
+ * Returns: 0 on success,
+ * -ENOMEM if a suitable rgrp can't be found
+ * errno otherwise
*/
-int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 aflags)
+int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrpd *begin = NULL;
- struct gfs2_blkreserv *rs = ip->i_res;
- int error = 0, rg_locked, flags = 0;
+ struct gfs2_blkreserv *rs = &ip->i_res;
+ int error = 0, flags = LM_FLAG_NODE_SCOPE;
+ bool rg_locked;
u64 last_unlinked = NO_BLOCK;
+ u32 target = ap->target;
int loops = 0;
- u32 skip = 0;
+ u32 free_blocks, blocks_available, skip = 0;
+
+ BUG_ON(rs->rs_reserved);
if (sdp->sd_args.ar_rgrplvb)
flags |= GL_SKIP;
- if (gfs2_assert_warn(sdp, requested))
+ if (gfs2_assert_warn(sdp, target))
return -EINVAL;
if (gfs2_rs_active(rs)) {
- begin = rs->rs_rbm.rgd;
- flags = 0; /* Yoda: Do or do not. There is no try */
- } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) {
- rs->rs_rbm.rgd = begin = ip->i_rgd;
+ begin = rs->rs_rgd;
+ } else if (rs->rs_rgd &&
+ rgrp_contains_block(rs->rs_rgd, ip->i_goal)) {
+ begin = rs->rs_rgd;
} else {
- rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
+ check_and_update_goal(ip);
+ rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
}
- if (S_ISDIR(ip->i_inode.i_mode) && (aflags & GFS2_AF_ORLOV))
+ if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV))
skip = gfs2_orlov_skip(ip);
- if (rs->rs_rbm.rgd == NULL)
+ if (rs->rs_rgd == NULL)
return -EBADSLT;
while (loops < 3) {
- rg_locked = 1;
+ struct gfs2_rgrpd *rgd;
- if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
- rg_locked = 0;
+ rg_locked = gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl);
+ if (rg_locked) {
+ rgrp_lock_local(rs->rs_rgd);
+ } else {
if (skip && skip--)
goto next_rgrp;
- if (!gfs2_rs_active(rs) && (loops < 2) &&
- gfs2_rgrp_used_recently(rs, 1000) &&
- gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
- goto next_rgrp;
- error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
+ if (!gfs2_rs_active(rs)) {
+ if (loops == 0 &&
+ !fast_to_acquire(rs->rs_rgd))
+ goto next_rgrp;
+ if ((loops < 2) &&
+ gfs2_rgrp_used_recently(rs, 1000) &&
+ gfs2_rgrp_congested(rs->rs_rgd, loops))
+ goto next_rgrp;
+ }
+ error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl,
LM_ST_EXCLUSIVE, flags,
- &rs->rs_rgd_gh);
+ &ip->i_rgd_gh);
if (unlikely(error))
return error;
+ rgrp_lock_local(rs->rs_rgd);
if (!gfs2_rs_active(rs) && (loops < 2) &&
- gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
+ gfs2_rgrp_congested(rs->rs_rgd, loops))
goto skip_rgrp;
if (sdp->sd_args.ar_rgrplvb) {
- error = update_rgrp_lvb(rs->rs_rbm.rgd);
+ error = update_rgrp_lvb(rs->rs_rgd,
+ &ip->i_rgd_gh);
if (unlikely(error)) {
- gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
+ rgrp_unlock_local(rs->rs_rgd);
+ gfs2_glock_dq_uninit(&ip->i_rgd_gh);
return error;
}
}
}
- /* Skip unuseable resource groups */
- if (rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR))
+ /* Skip unusable resource groups */
+ if ((rs->rs_rgd->rd_flags & (GFS2_RGF_NOALLOC |
+ GFS2_RDF_ERROR)) ||
+ (loops == 0 && target > rs->rs_rgd->rd_extfail_pt))
goto skip_rgrp;
- if (sdp->sd_args.ar_rgrplvb)
- gfs2_rgrp_bh_get(rs->rs_rbm.rgd);
+ if (sdp->sd_args.ar_rgrplvb) {
+ error = gfs2_instantiate(&ip->i_rgd_gh);
+ if (error)
+ goto skip_rgrp;
+ }
/* Get a reservation if we don't already have one */
if (!gfs2_rs_active(rs))
- rg_mblk_search(rs->rs_rbm.rgd, ip, requested);
+ rg_mblk_search(rs->rs_rgd, ip, ap);
/* Skip rgrps when we can't get a reservation on first pass */
if (!gfs2_rs_active(rs) && (loops < 1))
goto check_rgrp;
/* If rgrp has enough free space, use it */
- if (rs->rs_rbm.rgd->rd_free_clone >= requested) {
- ip->i_rgd = rs->rs_rbm.rgd;
- return 0;
+ rgd = rs->rs_rgd;
+ spin_lock(&rgd->rd_rsspin);
+ free_blocks = rgd_free(rgd, rs);
+ blocks_available = rgd->rd_free_clone - rgd->rd_reserved;
+ if (free_blocks < target || blocks_available < target) {
+ spin_unlock(&rgd->rd_rsspin);
+ goto check_rgrp;
}
-
- /* Drop reservation, if we couldn't use reserved rgrp */
- if (gfs2_rs_active(rs))
- gfs2_rs_deltree(rs);
+ rs->rs_reserved = ap->target;
+ if (rs->rs_reserved > blocks_available)
+ rs->rs_reserved = blocks_available;
+ rgd->rd_reserved += rs->rs_reserved;
+ spin_unlock(&rgd->rd_rsspin);
+ rgrp_unlock_local(rs->rs_rgd);
+ return 0;
check_rgrp:
/* Check for unlinked inodes which can be reclaimed */
- if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
- try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked,
+ if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK)
+ try_rgrp_unlink(rs->rs_rgd, &last_unlinked,
ip->i_no_addr);
skip_rgrp:
+ rgrp_unlock_local(rs->rs_rgd);
+
+ /* Drop reservation, if we couldn't use reserved rgrp */
+ if (gfs2_rs_active(rs))
+ gfs2_rs_deltree(rs);
+
/* Unlock rgrp if required */
if (!rg_locked)
- gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
+ gfs2_glock_dq_uninit(&ip->i_rgd_gh);
next_rgrp:
/* Find the next rgrp, and continue looking */
- if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin))
+ if (gfs2_select_rgrp(&rs->rs_rgd, begin))
continue;
if (skip)
continue;
@@ -1912,8 +2180,12 @@ next_rgrp:
return error;
}
/* Flushing the log may release space */
- if (loops == 2)
- gfs2_log_flush(sdp, NULL);
+ if (loops == 2) {
+ if (ap->min_target)
+ target = ap->min_target;
+ gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
+ GFS2_LFC_INPLACE_RESERVE);
+ }
}
return -ENOSPC;
@@ -1928,32 +2200,21 @@ next_rgrp:
void gfs2_inplace_release(struct gfs2_inode *ip)
{
- struct gfs2_blkreserv *rs = ip->i_res;
-
- if (rs->rs_rgd_gh.gh_gl)
- gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
-}
-
-/**
- * gfs2_get_block_type - Check a block in a RG is of given type
- * @rgd: the resource group holding the block
- * @block: the block number
- *
- * Returns: The block type (GFS2_BLKST_*)
- */
-
-static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
-{
- struct gfs2_rbm rbm = { .rgd = rgd, };
- int ret;
+ struct gfs2_blkreserv *rs = &ip->i_res;
- ret = gfs2_rbm_from_block(&rbm, block);
- WARN_ON_ONCE(ret != 0);
+ if (rs->rs_reserved) {
+ struct gfs2_rgrpd *rgd = rs->rs_rgd;
- return gfs2_testbit(&rbm);
+ spin_lock(&rgd->rd_rsspin);
+ GLOCK_BUG_ON(rgd->rd_gl, rgd->rd_reserved < rs->rs_reserved);
+ rgd->rd_reserved -= rs->rs_reserved;
+ spin_unlock(&rgd->rd_rsspin);
+ rs->rs_reserved = 0;
+ }
+ if (gfs2_holder_initialized(&ip->i_rgd_gh))
+ gfs2_glock_dq_uninit(&ip->i_rgd_gh);
}
-
/**
* gfs2_alloc_extent - allocate an extent from a given bitmap
* @rbm: the resource group information
@@ -1973,14 +2234,14 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
*n = 1;
block = gfs2_rbm_to_block(rbm);
- gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm->bi->bi_bh);
+ gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm_bi(rbm)->bi_bh);
gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
block++;
while (*n < elen) {
ret = gfs2_rbm_from_block(&pos, block);
- if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE)
+ if (ret || gfs2_testbit(&pos, true) != GFS2_BLKST_FREE)
break;
- gfs2_trans_add_meta(pos.rgd->rd_gl, pos.bi->bi_bh);
+ gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh);
gfs2_setbit(&pos, true, GFS2_BLKST_USED);
(*n)++;
block++;
@@ -1990,77 +2251,84 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
/**
* rgblk_free - Change alloc state of given block(s)
* @sdp: the filesystem
+ * @rgd: the resource group the blocks are in
* @bstart: the start of a run of blocks to free
* @blen: the length of the block run (all must lie within ONE RG!)
* @new_state: GFS2_BLKST_XXX the after-allocation block state
- *
- * Returns: Resource group containing the block(s)
*/
-static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
- u32 blen, unsigned char new_state)
+static void rgblk_free(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd,
+ u64 bstart, u32 blen, unsigned char new_state)
{
struct gfs2_rbm rbm;
+ struct gfs2_bitmap *bi, *bi_prev = NULL;
- rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1);
- if (!rbm.rgd) {
- if (gfs2_consist(sdp))
- fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);
- return NULL;
- }
-
+ rbm.rgd = rgd;
+ if (WARN_ON_ONCE(gfs2_rbm_from_block(&rbm, bstart)))
+ return;
while (blen--) {
- gfs2_rbm_from_block(&rbm, bstart);
- bstart++;
- if (!rbm.bi->bi_clone) {
- rbm.bi->bi_clone = kmalloc(rbm.bi->bi_bh->b_size,
- GFP_NOFS | __GFP_NOFAIL);
- memcpy(rbm.bi->bi_clone + rbm.bi->bi_offset,
- rbm.bi->bi_bh->b_data + rbm.bi->bi_offset,
- rbm.bi->bi_len);
+ bi = rbm_bi(&rbm);
+ if (bi != bi_prev) {
+ if (!bi->bi_clone) {
+ bi->bi_clone = kmalloc(bi->bi_bh->b_size,
+ GFP_NOFS | __GFP_NOFAIL);
+ memcpy(bi->bi_clone + bi->bi_offset,
+ bi->bi_bh->b_data + bi->bi_offset,
+ bi->bi_bytes);
+ }
+ gfs2_trans_add_meta(rbm.rgd->rd_gl, bi->bi_bh);
+ bi_prev = bi;
}
- gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.bi->bi_bh);
gfs2_setbit(&rbm, false, new_state);
+ gfs2_rbm_add(&rbm, 1);
}
-
- return rbm.rgd;
}
/**
* gfs2_rgrp_dump - print out an rgrp
* @seq: The iterator
- * @gl: The glock in question
+ * @rgd: The rgrp in question
+ * @fs_id_buf: pointer to file system id (if requested)
*
*/
-int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
+void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd,
+ const char *fs_id_buf)
{
- struct gfs2_rgrpd *rgd = gl->gl_object;
struct gfs2_blkreserv *trs;
const struct rb_node *n;
- if (rgd == NULL)
- return 0;
- gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u\n",
+ spin_lock(&rgd->rd_rsspin);
+ gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u q:%u r:%u e:%u\n",
+ fs_id_buf,
(unsigned long long)rgd->rd_addr, rgd->rd_flags,
rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
- rgd->rd_reserved);
- spin_lock(&rgd->rd_rsspin);
+ rgd->rd_requested, rgd->rd_reserved, rgd->rd_extfail_pt);
+ if (rgd->rd_sbd->sd_args.ar_rgrplvb && rgd->rd_rgl) {
+ struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
+
+ gfs2_print_dbg(seq, "%s L: f:%02x b:%u i:%u\n", fs_id_buf,
+ be32_to_cpu(rgl->rl_flags),
+ be32_to_cpu(rgl->rl_free),
+ be32_to_cpu(rgl->rl_dinodes));
+ }
for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
- dump_rs(seq, trs);
+ dump_rs(seq, trs, fs_id_buf);
}
spin_unlock(&rgd->rd_rsspin);
- return 0;
}
static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
{
struct gfs2_sbd *sdp = rgd->rd_sbd;
+ char fs_id_buf[sizeof(sdp->sd_fsname) + 7];
+
fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n",
(unsigned long long)rgd->rd_addr);
fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
- gfs2_rgrp_dump(NULL, rgd->rd_gl);
+ sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
+ gfs2_rgrp_dump(NULL, rgd, fs_id_buf);
rgd->rd_flags |= GFS2_RDF_ERROR;
}
@@ -2078,28 +2346,61 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
static void gfs2_adjust_reservation(struct gfs2_inode *ip,
const struct gfs2_rbm *rbm, unsigned len)
{
- struct gfs2_blkreserv *rs = ip->i_res;
+ struct gfs2_blkreserv *rs = &ip->i_res;
struct gfs2_rgrpd *rgd = rbm->rgd;
- unsigned rlen;
- u64 block;
- int ret;
- spin_lock(&rgd->rd_rsspin);
+ BUG_ON(rs->rs_reserved < len);
+ rs->rs_reserved -= len;
if (gfs2_rs_active(rs)) {
- if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) {
- block = gfs2_rbm_to_block(rbm);
- ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len);
- rlen = min(rs->rs_free, len);
- rs->rs_free -= rlen;
- rgd->rd_reserved -= rlen;
+ u64 start = gfs2_rbm_to_block(rbm);
+
+ if (rs->rs_start == start) {
+ unsigned int rlen;
+
+ rs->rs_start += len;
+ rlen = min(rs->rs_requested, len);
+ rs->rs_requested -= rlen;
+ rgd->rd_requested -= rlen;
trace_gfs2_rs(rs, TRACE_RS_CLAIM);
- if (rs->rs_free && !ret)
- goto out;
+ if (rs->rs_start < rgd->rd_data0 + rgd->rd_data &&
+ rs->rs_requested)
+ return;
+ /* We used up our block reservation, so we should
+ reserve more blocks next time. */
+ atomic_add(RGRP_RSRV_ADDBLKS, &ip->i_sizehint);
}
__rs_deltree(rs);
}
-out:
- spin_unlock(&rgd->rd_rsspin);
+}
+
+/**
+ * gfs2_set_alloc_start - Set starting point for block allocation
+ * @rbm: The rbm which will be set to the required location
+ * @ip: The gfs2 inode
+ * @dinode: Flag to say if allocation includes a new inode
+ *
+ * This sets the starting point from the reservation if one is active
+ * otherwise it falls back to guessing a start point based on the
+ * inode's goal block or the last allocation point in the rgrp.
+ */
+
+static void gfs2_set_alloc_start(struct gfs2_rbm *rbm,
+ const struct gfs2_inode *ip, bool dinode)
+{
+ u64 goal;
+
+ if (gfs2_rs_active(&ip->i_res)) {
+ goal = ip->i_res.rs_start;
+ } else {
+ if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal))
+ goal = ip->i_goal;
+ else
+ goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0;
+ }
+ if (WARN_ON_ONCE(gfs2_rbm_from_block(rbm, goal))) {
+ rbm->bii = 0;
+ rbm->offset = 0;
+ }
}
/**
@@ -2108,56 +2409,46 @@ out:
* @bn: Used to return the starting block number
* @nblocks: requested number of blocks/extent length (value/result)
* @dinode: 1 if we're allocating a dinode block, else 0
- * @generation: the generation number of the inode
*
* Returns: 0 or error
*/
int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
- bool dinode, u64 *generation)
+ bool dinode)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct buffer_head *dibh;
- struct gfs2_rbm rbm = { .rgd = ip->i_rgd, };
- unsigned int ndata;
- u64 goal;
+ struct gfs2_rbm rbm = { .rgd = ip->i_res.rs_rgd, };
u64 block; /* block, within the file system scope */
- int error;
-
- if (gfs2_rs_active(ip->i_res))
- goal = gfs2_rbm_to_block(&ip->i_res->rs_rbm);
- else if (!dinode && rgrp_contains_block(rbm.rgd, ip->i_goal))
- goal = ip->i_goal;
- else
- goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0;
+ u32 minext = 1;
+ int error = -ENOSPC;
- gfs2_rbm_from_block(&rbm, goal);
- error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, ip, false);
+ BUG_ON(ip->i_res.rs_reserved < *nblocks);
+ rgrp_lock_local(rbm.rgd);
+ if (gfs2_rs_active(&ip->i_res)) {
+ gfs2_set_alloc_start(&rbm, ip, dinode);
+ error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, &ip->i_res, false);
+ }
if (error == -ENOSPC) {
- gfs2_rbm_from_block(&rbm, goal);
- error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, NULL, false);
+ gfs2_set_alloc_start(&rbm, ip, dinode);
+ error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, NULL, false);
}
/* Since all blocks are reserved in advance, this shouldn't happen */
if (error) {
- fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d\n",
+ fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d fail_pt=%d\n",
(unsigned long long)ip->i_no_addr, error, *nblocks,
- test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags));
+ test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags),
+ rbm.rgd->rd_extfail_pt);
goto rgrp_error;
}
gfs2_alloc_extent(&rbm, dinode, nblocks);
block = gfs2_rbm_to_block(&rbm);
rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0;
- if (gfs2_rs_active(ip->i_res))
- gfs2_adjust_reservation(ip, &rbm, *nblocks);
- ndata = *nblocks;
- if (dinode)
- ndata--;
-
if (!dinode) {
- ip->i_goal = block + ndata - 1;
+ ip->i_goal = block + *nblocks - 1;
error = gfs2_meta_inode_buffer(ip, &dibh);
if (error == 0) {
struct gfs2_dinode *di =
@@ -2168,36 +2459,47 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
brelse(dibh);
}
}
- if (rbm.rgd->rd_free < *nblocks) {
- printk(KERN_WARNING "nblocks=%u\n", *nblocks);
+ spin_lock(&rbm.rgd->rd_rsspin);
+ gfs2_adjust_reservation(ip, &rbm, *nblocks);
+ if (rbm.rgd->rd_free < *nblocks || rbm.rgd->rd_reserved < *nblocks) {
+ fs_warn(sdp, "nblocks=%u\n", *nblocks);
+ spin_unlock(&rbm.rgd->rd_rsspin);
goto rgrp_error;
}
-
+ GLOCK_BUG_ON(rbm.rgd->rd_gl, rbm.rgd->rd_reserved < *nblocks);
+ GLOCK_BUG_ON(rbm.rgd->rd_gl, rbm.rgd->rd_free_clone < *nblocks);
+ GLOCK_BUG_ON(rbm.rgd->rd_gl, rbm.rgd->rd_free < *nblocks);
+ rbm.rgd->rd_reserved -= *nblocks;
+ rbm.rgd->rd_free_clone -= *nblocks;
rbm.rgd->rd_free -= *nblocks;
+ spin_unlock(&rbm.rgd->rd_rsspin);
if (dinode) {
+ u64 generation;
+
rbm.rgd->rd_dinodes++;
- *generation = rbm.rgd->rd_igeneration++;
- if (*generation == 0)
- *generation = rbm.rgd->rd_igeneration++;
+ generation = rbm.rgd->rd_igeneration++;
+ if (generation == 0)
+ generation = rbm.rgd->rd_igeneration++;
+ ip->i_generation = generation;
}
gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
- gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data);
+ rgrp_unlock_local(rbm.rgd);
gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
if (dinode)
- gfs2_trans_add_unrevoke(sdp, block, 1);
+ gfs2_trans_remove_revoke(sdp, block, *nblocks);
gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid);
- rbm.rgd->rd_free_clone -= *nblocks;
trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
*bn = block;
return 0;
rgrp_error:
+ rgrp_unlock_local(rbm.rgd);
gfs2_rgrp_error(rbm.rgd);
return -EIO;
}
@@ -2205,45 +2507,47 @@ rgrp_error:
/**
* __gfs2_free_blocks - free a contiguous run of block(s)
* @ip: the inode these blocks are being freed from
+ * @rgd: the resource group the blocks are in
* @bstart: first block of a run of contiguous blocks
* @blen: the length of the block run
* @meta: 1 if the blocks represent metadata
*
*/
-void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
+void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
+ u64 bstart, u32 blen, int meta)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- struct gfs2_rgrpd *rgd;
- rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
- if (!rgd)
- return;
+ rgrp_lock_local(rgd);
+ rgblk_free(sdp, rgd, bstart, blen, GFS2_BLKST_FREE);
trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
rgd->rd_free += blen;
rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
- gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
+ rgrp_unlock_local(rgd);
/* Directories keep their data in the metadata address space */
- if (meta || ip->i_depth)
- gfs2_meta_wipe(ip, bstart, blen);
+ if (meta || ip->i_depth || gfs2_is_jdata(ip))
+ gfs2_journal_wipe(ip, bstart, blen);
}
/**
* gfs2_free_meta - free a contiguous run of data block(s)
* @ip: the inode these blocks are being freed from
+ * @rgd: the resource group the blocks are in
* @bstart: first block of a run of contiguous blocks
* @blen: the length of the block run
*
*/
-void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
+void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
+ u64 bstart, u32 blen)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- __gfs2_free_blocks(ip, bstart, blen, 1);
+ __gfs2_free_blocks(ip, rgd, bstart, blen, 1);
gfs2_statfs_change(sdp, 0, +blen, 0);
gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
}
@@ -2255,26 +2559,24 @@ void gfs2_unlink_di(struct inode *inode)
struct gfs2_rgrpd *rgd;
u64 blkno = ip->i_no_addr;
- rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
+ rgd = gfs2_blk2rgrpd(sdp, blkno, true);
if (!rgd)
return;
+ rgrp_lock_local(rgd);
+ rgblk_free(sdp, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
- gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
- update_rgrp_lvb_unlinked(rgd, 1);
+ be32_add_cpu(&rgd->rd_rgl->rl_unlinked, 1);
+ rgrp_unlock_local(rgd);
}
-static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
+void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
{
struct gfs2_sbd *sdp = rgd->rd_sbd;
- struct gfs2_rgrpd *tmp_rgd;
-
- tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE);
- if (!tmp_rgd)
- return;
- gfs2_assert_withdraw(sdp, rgd == tmp_rgd);
+ rgrp_lock_local(rgd);
+ rgblk_free(sdp, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
if (!rgd->rd_dinodes)
gfs2_consist_rgrpd(rgd);
rgd->rd_dinodes--;
@@ -2282,19 +2584,13 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
- gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
- update_rgrp_lvb_unlinked(rgd, -1);
+ be32_add_cpu(&rgd->rd_rgl->rl_unlinked, -1);
+ rgrp_unlock_local(rgd);
gfs2_statfs_change(sdp, 0, +1, -1);
-}
-
-
-void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
-{
- gfs2_free_uninit_di(rgd, ip->i_no_addr);
trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
- gfs2_meta_wipe(ip, ip->i_no_addr, 1);
+ gfs2_journal_wipe(ip, ip->i_no_addr, 1);
}
/**
@@ -2303,6 +2599,10 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
* @no_addr: The block number to check
* @type: The block type we are looking for
*
+ * The inode glock of @no_addr must be held. The @type to check for is either
+ * GFS2_BLKST_DINODE or GFS2_BLKST_UNLINKED; checking for type GFS2_BLKST_FREE
+ * or GFS2_BLKST_USED would make no sense.
+ *
* Returns: 0 if the block type matches the expected type
* -ESTALE if it doesn't match
* or -ve errno if something went wrong while checking
@@ -2312,6 +2612,7 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
{
struct gfs2_rgrpd *rgd;
struct gfs2_holder rgd_gh;
+ struct gfs2_rbm rbm;
int error = -EINVAL;
rgd = gfs2_blk2rgrpd(sdp, no_addr, 1);
@@ -2322,10 +2623,22 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
if (error)
goto fail;
- if (gfs2_get_block_type(rgd, no_addr) != type)
- error = -ESTALE;
+ rbm.rgd = rgd;
+ error = gfs2_rbm_from_block(&rbm, no_addr);
+ if (!WARN_ON_ONCE(error)) {
+ /*
+ * No need to take the local resource group lock here; the
+ * inode glock of @no_addr provides the necessary
+ * synchronization in case the block is an inode. (In case
+ * the block is not an inode, the block type will not match
+ * the @type we are looking for.)
+ */
+ if (gfs2_testbit(&rbm, false) != type)
+ error = -ESTALE;
+ }
gfs2_glock_dq_uninit(&rgd_gh);
+
fail:
return error;
}
@@ -2354,19 +2667,34 @@ void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
return;
- if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block))
- rgd = ip->i_rgd;
- else
+ /*
+ * The resource group last accessed is kept in the last position.
+ */
+
+ if (rlist->rl_rgrps) {
+ rgd = rlist->rl_rgd[rlist->rl_rgrps - 1];
+ if (rgrp_contains_block(rgd, block))
+ return;
rgd = gfs2_blk2rgrpd(sdp, block, 1);
+ } else {
+ rgd = ip->i_res.rs_rgd;
+ if (!rgd || !rgrp_contains_block(rgd, block))
+ rgd = gfs2_blk2rgrpd(sdp, block, 1);
+ }
+
if (!rgd) {
- fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block);
+ fs_err(sdp, "rlist_add: no rgrp for block %llu\n",
+ (unsigned long long)block);
return;
}
- ip->i_rgd = rgd;
- for (x = 0; x < rlist->rl_rgrps; x++)
- if (rlist->rl_rgd[x] == rgd)
+ for (x = 0; x < rlist->rl_rgrps; x++) {
+ if (rlist->rl_rgd[x] == rgd) {
+ swap(rlist->rl_rgd[x],
+ rlist->rl_rgd[rlist->rl_rgrps - 1]);
return;
+ }
+ }
if (rlist->rl_rgrps == rlist->rl_space) {
new_space = rlist->rl_space + 10;
@@ -2391,27 +2719,29 @@ void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
* gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate
* and initialize an array of glock holders for them
* @rlist: the list of resource groups
- * @state: the lock state to acquire the RG lock in
+ * @state: the state we're requesting
+ * @flags: the modifier flags
*
* FIXME: Don't use NOFAIL
*
*/
-void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state)
+void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist,
+ unsigned int state, u16 flags)
{
unsigned int x;
- rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder),
- GFP_NOFS | __GFP_NOFAIL);
+ rlist->rl_ghs = kmalloc_array(rlist->rl_rgrps,
+ sizeof(struct gfs2_holder),
+ GFP_NOFS | __GFP_NOFAIL);
for (x = 0; x < rlist->rl_rgrps; x++)
- gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
- state, 0,
- &rlist->rl_ghs[x]);
+ gfs2_holder_init(rlist->rl_rgd[x]->rd_gl, state, flags,
+ &rlist->rl_ghs[x]);
}
/**
* gfs2_rlist_free - free a resource group list
- * @list: the list of resource groups
+ * @rlist: the list of resource groups
*
*/
@@ -2429,3 +2759,12 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
}
}
+void rgrp_lock_local(struct gfs2_rgrpd *rgd)
+{
+ mutex_lock(&rgd->rd_mutex);
+}
+
+void rgrp_unlock_local(struct gfs2_rgrpd *rgd)
+{
+ mutex_unlock(&rgd->rd_mutex);
+}