diff options
Diffstat (limited to 'fs/gfs2/rgrp.c')
| -rw-r--r-- | fs/gfs2/rgrp.c | 789 |
1 files changed, 431 insertions, 358 deletions
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 831d7cb5a49c..b14e54b38ee8 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -39,15 +36,23 @@ #define BFITNOENT ((u32)~0) #define NO_BLOCK ((u64)~0) -#if BITS_PER_LONG == 32 -#define LBITMASK (0x55555555UL) -#define LBITSKIP55 (0x55555555UL) -#define LBITSKIP00 (0x00000000UL) -#else -#define LBITMASK (0x5555555555555555UL) -#define LBITSKIP55 (0x5555555555555555UL) -#define LBITSKIP00 (0x0000000000000000UL) -#endif +struct gfs2_rbm { + struct gfs2_rgrpd *rgd; + u32 offset; /* The offset is bitmap relative */ + int bii; /* Bitmap index */ +}; + +static inline struct gfs2_bitmap *rbm_bi(const struct gfs2_rbm *rbm) +{ + return rbm->rgd->rd_bits + rbm->bii; +} + +static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm) +{ + BUG_ON(rbm->offset >= rbm->rgd->rd_data); + return rbm->rgd->rd_data0 + (rbm_bi(rbm)->bi_start * GFS2_NBBY) + + rbm->offset; +} /* * These routines are used by the resource group routines (rgrp.c) @@ -74,7 +79,7 @@ static const char valid_change[16] = { }; static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, - const struct gfs2_inode *ip, bool nowrap); + struct gfs2_blkreserv *rs, bool nowrap); /** @@ -154,13 +159,13 @@ static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm, bool use_clone) } /** - * gfs2_bit_search + * gfs2_bit_search - search bitmap for a state * @ptr: Pointer to bitmap data * @mask: Mask to use (normally 0x55555.... but adjusted for search start) * @state: The state we are searching for * - * We xor the bitmap data with a patter which is the bitwise opposite - * of what we are looking for, this gives rise to a pattern of ones + * We xor the bitmap data with a pattern which is the bitwise opposite + * of what we are looking for. This gives rise to a pattern of ones * wherever there is a match. Since we have two bits per entry, we * take this pattern, shift it down by one place and then and it with * the original. All the even bit positions (0,2,4, etc) then represent @@ -188,7 +193,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state) /** * rs_cmp - multi-block reservation range compare - * @blk: absolute file system block number of the new reservation + * @start: start of the new reservation * @len: number of blocks in the new reservation * @rs: existing reservation to compare against * @@ -196,13 +201,11 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state) * -1 if the block range is before the start of the reservation * 0 if the block range overlaps with the reservation */ -static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) +static inline int rs_cmp(u64 start, u32 len, struct gfs2_blkreserv *rs) { - u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm); - - if (blk >= startblk + rs->rs_free) + if (start >= rs->rs_start + rs->rs_requested) return 1; - if (blk + len - 1 < startblk) + if (rs->rs_start >= start + len) return -1; return 0; } @@ -290,29 +293,38 @@ static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) } /** - * gfs2_rbm_incr - increment an rbm structure + * gfs2_rbm_add - add a number of blocks to an rbm * @rbm: The rbm with rgd already set correctly + * @blocks: The number of blocks to add to rpm * - * This function takes an existing rbm structure and increments it to the next - * viable block offset. - * - * Returns: If incrementing the offset would cause the rbm to go past the - * end of the rgrp, true is returned, otherwise false. + * This function takes an existing rbm structure and adds a number of blocks to + * it. * + * Returns: True if the new rbm would point past the end of the rgrp. */ -static bool gfs2_rbm_incr(struct gfs2_rbm *rbm) +static bool gfs2_rbm_add(struct gfs2_rbm *rbm, u32 blocks) { - if (rbm->offset + 1 < rbm_bi(rbm)->bi_blocks) { /* in the same bitmap */ - rbm->offset++; + struct gfs2_rgrpd *rgd = rbm->rgd; + struct gfs2_bitmap *bi = rgd->rd_bits + rbm->bii; + + if (rbm->offset + blocks < bi->bi_blocks) { + rbm->offset += blocks; return false; } - if (rbm->bii == rbm->rgd->rd_length - 1) /* at the last bitmap */ - return true; + blocks -= bi->bi_blocks - rbm->offset; - rbm->offset = 0; - rbm->bii++; - return false; + for(;;) { + bi++; + if (bi == rgd->rd_bits + rgd->rd_length) + return true; + if (blocks < bi->bi_blocks) { + rbm->offset = blocks; + rbm->bii = bi - rgd->rd_bits; + return false; + } + blocks -= bi->bi_blocks; + } } /** @@ -321,7 +333,8 @@ static bool gfs2_rbm_incr(struct gfs2_rbm *rbm) * @n_unaligned: Number of unaligned blocks to check * @len: Decremented for each block found (terminate on zero) * - * Returns: true if a non-free block is encountered + * Returns: true if a non-free block is encountered or the end of the resource + * group is reached. */ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len) @@ -336,7 +349,7 @@ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *le (*len)--; if (*len == 0) return true; - if (gfs2_rbm_incr(rbm)) + if (gfs2_rbm_add(rbm, 1)) return true; } @@ -470,24 +483,24 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd) } if (count[0] != rgd->rd_free) { - if (gfs2_consist_rgrpd(rgd)) - fs_err(sdp, "free data mismatch: %u != %u\n", - count[0], rgd->rd_free); + gfs2_lm(sdp, "free data mismatch: %u != %u\n", + count[0], rgd->rd_free); + gfs2_consist_rgrpd(rgd); return; } tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes; if (count[1] != tmp) { - if (gfs2_consist_rgrpd(rgd)) - fs_err(sdp, "used data mismatch: %u != %u\n", - count[1], tmp); + gfs2_lm(sdp, "used data mismatch: %u != %u\n", + count[1], tmp); + gfs2_consist_rgrpd(rgd); return; } if (count[2] + count[3] != rgd->rd_dinodes) { - if (gfs2_consist_rgrpd(rgd)) - fs_err(sdp, "used metadata mismatch: %u != %u\n", - count[2] + count[3], rgd->rd_dinodes); + gfs2_lm(sdp, "used metadata mismatch: %u != %u\n", + count[2] + count[3], rgd->rd_dinodes); + gfs2_consist_rgrpd(rgd); return; } } @@ -603,24 +616,16 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd) } } -/** - * gfs2_rsqa_alloc - make sure we have a reservation assigned to the inode - * plus a quota allocations data structure, if necessary - * @ip: the inode for this reservation - */ -int gfs2_rsqa_alloc(struct gfs2_inode *ip) -{ - return gfs2_qa_alloc(ip); -} - -static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) +static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs, + const char *fs_id_buf) { struct gfs2_inode *ip = container_of(rs, struct gfs2_inode, i_res); - gfs2_print_dbg(seq, " B: n:%llu s:%llu b:%u f:%u\n", + gfs2_print_dbg(seq, "%s B: n:%llu s:%llu f:%u\n", + fs_id_buf, (unsigned long long)ip->i_no_addr, - (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm), - rs->rs_rbm.offset, rs->rs_free); + (unsigned long long)rs->rs_start, + rs->rs_requested); } /** @@ -635,33 +640,22 @@ static void __rs_deltree(struct gfs2_blkreserv *rs) if (!gfs2_rs_active(rs)) return; - rgd = rs->rs_rbm.rgd; + rgd = rs->rs_rgd; trace_gfs2_rs(rs, TRACE_RS_TREEDEL); rb_erase(&rs->rs_node, &rgd->rd_rstree); RB_CLEAR_NODE(&rs->rs_node); - if (rs->rs_free) { - u64 last_block = gfs2_rbm_to_block(&rs->rs_rbm) + - rs->rs_free - 1; - struct gfs2_rbm last_rbm = { .rgd = rs->rs_rbm.rgd, }; - struct gfs2_bitmap *start, *last; + if (rs->rs_requested) { + /* return requested blocks to the rgrp */ + BUG_ON(rs->rs_rgd->rd_requested < rs->rs_requested); + rs->rs_rgd->rd_requested -= rs->rs_requested; - /* return reserved blocks to the rgrp */ - BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free); - rs->rs_rbm.rgd->rd_reserved -= rs->rs_free; /* The rgrp extent failure point is likely not to increase; it will only do so if the freed blocks are somehow contiguous with a span of free blocks that follows. Still, it will force the number to be recalculated later. */ - rgd->rd_extfail_pt += rs->rs_free; - rs->rs_free = 0; - if (gfs2_rbm_from_block(&last_rbm, last_block)) - return; - start = rbm_bi(&rs->rs_rbm); - last = rbm_bi(&last_rbm); - do - clear_bit(GBF_FULL, &start->bi_flags); - while (start++ != last); + rgd->rd_extfail_pt += rs->rs_requested; + rs->rs_requested = 0; } } @@ -674,28 +668,28 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs) { struct gfs2_rgrpd *rgd; - rgd = rs->rs_rbm.rgd; + rgd = rs->rs_rgd; if (rgd) { spin_lock(&rgd->rd_rsspin); __rs_deltree(rs); - BUG_ON(rs->rs_free); + BUG_ON(rs->rs_requested); spin_unlock(&rgd->rd_rsspin); } } /** - * gfs2_rsqa_delete - delete a multi-block reservation and quota allocation + * gfs2_rs_delete - delete a multi-block reservation * @ip: The inode for this reservation - * @wcount: The inode's write count, or NULL * */ -void gfs2_rsqa_delete(struct gfs2_inode *ip, atomic_t *wcount) +void gfs2_rs_delete(struct gfs2_inode *ip) { + struct inode *inode = &ip->i_inode; + down_write(&ip->i_rw_mutex); - if ((wcount == NULL) || (atomic_read(wcount) <= 1)) + if (atomic_read(&inode->i_writecount) <= 1) gfs2_rs_deltree(&ip->i_res); up_write(&ip->i_rw_mutex); - gfs2_qa_delete(ip, wcount); } /** @@ -732,32 +726,25 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) rb_erase(n, &sdp->sd_rindex_tree); if (gl) { - glock_clear_object(gl, rgd); + if (gl->gl_state != LM_ST_UNLOCKED) { + gfs2_glock_cb(gl, LM_ST_UNLOCKED); + flush_delayed_work(&gl->gl_work); + } gfs2_rgrp_brelse(rgd); + glock_clear_object(gl, rgd); gfs2_glock_put(gl); } gfs2_free_clones(rgd); + return_all_reservations(rgd); kfree(rgd->rd_bits); rgd->rd_bits = NULL; - return_all_reservations(rgd); kmem_cache_free(gfs2_rgrpd_cachep, rgd); } } -static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd) -{ - struct gfs2_sbd *sdp = rgd->rd_sbd; - - fs_info(sdp, "ri_addr = %llu\n", (unsigned long long)rgd->rd_addr); - fs_info(sdp, "ri_length = %u\n", rgd->rd_length); - fs_info(sdp, "ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0); - fs_info(sdp, "ri_data = %u\n", rgd->rd_data); - fs_info(sdp, "ri_bitbytes = %u\n", rgd->rd_bitbytes); -} - /** - * gfs2_compute_bitstructs - Compute the bitmap sizes + * compute_bitstructs - Compute the bitmap sizes * @rgd: The resource group descriptor * * Calculates bitmap descriptors, one for each block that contains bitmap data @@ -826,11 +813,20 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd) } bi = rgd->rd_bits + (length - 1); if ((bi->bi_start + bi->bi_bytes) * GFS2_NBBY != rgd->rd_data) { - if (gfs2_consist_rgrpd(rgd)) { - gfs2_rindex_print(rgd); - fs_err(sdp, "start=%u len=%u offset=%u\n", - bi->bi_start, bi->bi_bytes, bi->bi_offset); - } + gfs2_lm(sdp, + "ri_addr=%llu " + "ri_length=%u " + "ri_data0=%llu " + "ri_data=%u " + "ri_bitbytes=%u " + "start=%u len=%u offset=%u\n", + (unsigned long long)rgd->rd_addr, + rgd->rd_length, + (unsigned long long)rgd->rd_data0, + rgd->rd_data, + rgd->rd_bitbytes, + bi->bi_start, bi->bi_bytes, bi->bi_offset); + gfs2_consist_rgrpd(rgd); return -EIO; } @@ -899,7 +895,6 @@ static int rgd_insert(struct gfs2_rgrpd *rgd) static int read_rindex_entry(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - const unsigned bsize = sdp->sd_sb.sb_bsize; loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex); struct gfs2_rindex buf; int error; @@ -926,18 +921,19 @@ static int read_rindex_entry(struct gfs2_inode *ip) rgd->rd_data = be32_to_cpu(buf.ri_data); rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes); spin_lock_init(&rgd->rd_rsspin); - - error = compute_bitstructs(rgd); - if (error) - goto fail; + mutex_init(&rgd->rd_mutex); error = gfs2_glock_get(sdp, rgd->rd_addr, &gfs2_rgrp_glops, CREATE, &rgd->rd_gl); if (error) goto fail; + error = compute_bitstructs(rgd); + if (error) + goto fail_glock; + rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr; - rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED); + rgd->rd_flags &= ~GFS2_RDF_PREFERRED; if (rgd->rd_data > sdp->sd_max_rg_data) sdp->sd_max_rg_data = rgd->rd_data; spin_lock(&sdp->sd_rindex_spin); @@ -945,13 +941,11 @@ static int read_rindex_entry(struct gfs2_inode *ip) spin_unlock(&sdp->sd_rindex_spin); if (!error) { glock_set_object(rgd->rd_gl, rgd); - rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_MASK; - rgd->rd_gl->gl_vm.end = PAGE_ALIGN((rgd->rd_addr + - rgd->rd_length) * bsize) - 1; return 0; } error = 0; /* someone else read in the rgrp; free it and ignore it */ +fail_glock: gfs2_glock_put(rgd->rd_gl); fail: @@ -1010,6 +1004,10 @@ static int gfs2_ri_update(struct gfs2_inode *ip) if (error < 0) return error; + if (RB_EMPTY_ROOT(&sdp->sd_rindex_tree)) { + fs_err(sdp, "no resource groups found in the file system.\n"); + return -ENOENT; + } set_rgrp_preferences(sdp); sdp->sd_rindex_uptodate = 1; @@ -1114,32 +1112,33 @@ static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd) { struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data; + struct gfs2_sbd *sdp = rgd->rd_sbd; int valid = 1; if (rgl->rl_flags != str->rg_flags) { - printk(KERN_WARNING "GFS2: rgd: %llu lvb flag mismatch %u/%u", - (unsigned long long)rgd->rd_addr, + fs_warn(sdp, "GFS2: rgd: %llu lvb flag mismatch %u/%u", + (unsigned long long)rgd->rd_addr, be32_to_cpu(rgl->rl_flags), be32_to_cpu(str->rg_flags)); valid = 0; } if (rgl->rl_free != str->rg_free) { - printk(KERN_WARNING "GFS2: rgd: %llu lvb free mismatch %u/%u", - (unsigned long long)rgd->rd_addr, - be32_to_cpu(rgl->rl_free), be32_to_cpu(str->rg_free)); + fs_warn(sdp, "GFS2: rgd: %llu lvb free mismatch %u/%u", + (unsigned long long)rgd->rd_addr, + be32_to_cpu(rgl->rl_free), be32_to_cpu(str->rg_free)); valid = 0; } if (rgl->rl_dinodes != str->rg_dinodes) { - printk(KERN_WARNING "GFS2: rgd: %llu lvb dinode mismatch %u/%u", - (unsigned long long)rgd->rd_addr, - be32_to_cpu(rgl->rl_dinodes), - be32_to_cpu(str->rg_dinodes)); + fs_warn(sdp, "GFS2: rgd: %llu lvb dinode mismatch %u/%u", + (unsigned long long)rgd->rd_addr, + be32_to_cpu(rgl->rl_dinodes), + be32_to_cpu(str->rg_dinodes)); valid = 0; } if (rgl->rl_igeneration != str->rg_igeneration) { - printk(KERN_WARNING "GFS2: rgd: %llu lvb igen mismatch " - "%llu/%llu", (unsigned long long)rgd->rd_addr, - (unsigned long long)be64_to_cpu(rgl->rl_igeneration), - (unsigned long long)be64_to_cpu(str->rg_igeneration)); + fs_warn(sdp, "GFS2: rgd: %llu lvb igen mismatch %llu/%llu", + (unsigned long long)rgd->rd_addr, + (unsigned long long)be64_to_cpu(rgl->rl_igeneration), + (unsigned long long)be64_to_cpu(str->rg_igeneration)); valid = 0; } return valid; @@ -1169,10 +1168,27 @@ static u32 count_unlinked(struct gfs2_rgrpd *rgd) return count; } +static void rgrp_set_bitmap_flags(struct gfs2_rgrpd *rgd) +{ + struct gfs2_bitmap *bi; + int x; + + if (rgd->rd_free) { + for (x = 0; x < rgd->rd_length; x++) { + bi = rgd->rd_bits + x; + clear_bit(GBF_FULL, &bi->bi_flags); + } + } else { + for (x = 0; x < rgd->rd_length; x++) { + bi = rgd->rd_bits + x; + set_bit(GBF_FULL, &bi->bi_flags); + } + } +} /** - * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps - * @rgd: the struct gfs2_rgrpd describing the RG to read in + * gfs2_rgrp_go_instantiate - Read in a RG's header and bitmaps + * @gl: the glock representing the rgrpd to read in * * Read in all of a Resource Group's header and bitmap blocks. * Caller must eventually call gfs2_rgrp_brelse() to free the bitmaps. @@ -1180,10 +1196,10 @@ static u32 count_unlinked(struct gfs2_rgrpd *rgd) * Returns: errno */ -static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) +int gfs2_rgrp_go_instantiate(struct gfs2_glock *gl) { + struct gfs2_rgrpd *rgd = gl->gl_object; struct gfs2_sbd *sdp = rgd->rd_sbd; - struct gfs2_glock *gl = rgd->rd_gl; unsigned int length = rgd->rd_length; struct gfs2_bitmap *bi; unsigned int x, y; @@ -1211,21 +1227,18 @@ static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) } } - if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) { - for (x = 0; x < length; x++) - clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags); - gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); - rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); - rgd->rd_free_clone = rgd->rd_free; - /* max out the rgrp allocation failure point */ - rgd->rd_extfail_pt = rgd->rd_free; - } + gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); + rgrp_set_bitmap_flags(rgd); + rgd->rd_flags |= GFS2_RDF_CHECK; + rgd->rd_free_clone = rgd->rd_free; + GLOCK_BUG_ON(rgd->rd_gl, rgd->rd_reserved); + /* max out the rgrp allocation failure point */ + rgd->rd_extfail_pt = rgd->rd_free; if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd)); gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); - } - else if (sdp->sd_args.ar_rgrplvb) { + } else if (sdp->sd_args.ar_rgrplvb) { if (!gfs2_rgrp_lvb_valid(rgd)){ gfs2_consist_rgrpd(rgd); error = -EIO; @@ -1243,19 +1256,18 @@ fail: bi->bi_bh = NULL; gfs2_assert_warn(sdp, !bi->bi_clone); } - return error; } -static int update_rgrp_lvb(struct gfs2_rgrpd *rgd) +static int update_rgrp_lvb(struct gfs2_rgrpd *rgd, struct gfs2_holder *gh) { u32 rl_flags; - if (rgd->rd_flags & GFS2_RDF_UPTODATE) + if (!test_bit(GLF_INSTANTIATE_NEEDED, &gh->gh_gl->gl_flags)) return 0; if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) - return gfs2_rgrp_bh_get(rgd); + return gfs2_instantiate(gh); rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags); rl_flags &= ~GFS2_RDF_MASK; @@ -1264,22 +1276,16 @@ static int update_rgrp_lvb(struct gfs2_rgrpd *rgd) if (rgd->rd_rgl->rl_unlinked == 0) rgd->rd_flags &= ~GFS2_RDF_CHECK; rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free); + rgrp_set_bitmap_flags(rgd); rgd->rd_free_clone = rgd->rd_free; + GLOCK_BUG_ON(rgd->rd_gl, rgd->rd_reserved); + /* max out the rgrp allocation failure point */ + rgd->rd_extfail_pt = rgd->rd_free; rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes); rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration); return 0; } -int gfs2_rgrp_go_lock(struct gfs2_holder *gh) -{ - struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; - struct gfs2_sbd *sdp = rgd->rd_sbd; - - if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb) - return 0; - return gfs2_rgrp_bh_get(rgd); -} - /** * gfs2_rgrp_brelse - Release RG bitmaps read in with gfs2_rgrp_bh_get() * @rgd: The resource group @@ -1297,23 +1303,7 @@ void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd) bi->bi_bh = NULL; } } - -} - -/** - * gfs2_rgrp_go_unlock - Unlock a rgrp glock - * @gh: The glock holder for the resource group - * - */ - -void gfs2_rgrp_go_unlock(struct gfs2_holder *gh) -{ - struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; - int demote_requested = test_bit(GLF_DEMOTE, &gh->gh_gl->gl_flags) | - test_bit(GLF_PENDING_DEMOTE, &gh->gh_gl->gl_flags); - - if (rgd && demote_requested) - gfs2_rgrp_brelse(rgd); + set_bit(GLF_INSTANTIATE_NEEDED, &rgd->rd_gl->gl_flags); } int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, @@ -1324,7 +1314,7 @@ int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, u64 blk; sector_t start = 0; sector_t nr_blks = 0; - int rv; + int rv = -EIO; unsigned int x; u32 trimmed = 0; u8 diff; @@ -1380,7 +1370,7 @@ fail: if (sdp->sd_args.ar_discard) fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem\n", rv); sdp->sd_args.ar_discard = 0; - return -EIO; + return rv; } /** @@ -1395,7 +1385,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp) { struct inode *inode = file_inode(filp); struct gfs2_sbd *sdp = GFS2_SB(inode); - struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev); + struct block_device *bdev = sdp->sd_vfs->s_bdev; struct buffer_head *bh; struct gfs2_rgrpd *rgd; struct gfs2_rgrpd *rgd_end; @@ -1411,7 +1401,10 @@ int gfs2_fitrim(struct file *filp, void __user *argp) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!blk_queue_discard(q)) + if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) + return -EROFS; + + if (!bdev_max_discard_sectors(bdev)) return -EOPNOTSUPP; if (copy_from_user(&r, argp, sizeof(r))) @@ -1423,8 +1416,8 @@ int gfs2_fitrim(struct file *filp, void __user *argp) start = r.start >> bs_shift; end = start + (r.len >> bs_shift); - minlen = max_t(u64, r.minlen, - q->limits.discard_granularity) >> bs_shift; + minlen = max_t(u64, r.minlen, sdp->sd_sb.sb_bsize); + minlen = max_t(u64, minlen, bdev_discard_granularity(bdev)) >> bs_shift; if (end <= start || minlen > sdp->sd_max_rg_data) return -EINVAL; @@ -1438,7 +1431,8 @@ int gfs2_fitrim(struct file *filp, void __user *argp) while (1) { - ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh); + ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, + LM_FLAG_NODE_SCOPE, &gh); if (ret) goto out; @@ -1446,9 +1440,11 @@ int gfs2_fitrim(struct file *filp, void __user *argp) /* Trim each bitmap in the rgrp */ for (x = 0; x < rgd->rd_length; x++) { struct gfs2_bitmap *bi = rgd->rd_bits + x; + rgrp_lock_local(rgd); ret = gfs2_rgrp_send_discards(sdp, rgd->rd_data0, NULL, bi, minlen, &amt); + rgrp_unlock_local(rgd); if (ret) { gfs2_glock_dq_uninit(&gh); goto out; @@ -1460,9 +1456,11 @@ int gfs2_fitrim(struct file *filp, void __user *argp) ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0); if (ret == 0) { bh = rgd->rd_bits[0].bi_bh; + rgrp_lock_local(rgd); rgd->rd_flags |= GFS2_RGF_TRIMMED; gfs2_trans_add_meta(rgd->rd_gl, bh); gfs2_rgrp_out(rgd, bh->b_data); + rgrp_unlock_local(rgd); gfs2_trans_end(sdp); } } @@ -1492,8 +1490,7 @@ static void rs_insert(struct gfs2_inode *ip) struct rb_node **newn, *parent = NULL; int rc; struct gfs2_blkreserv *rs = &ip->i_res; - struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd; - u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm); + struct gfs2_rgrpd *rgd = rs->rs_rgd; BUG_ON(gfs2_rs_active(rs)); @@ -1504,7 +1501,7 @@ static void rs_insert(struct gfs2_inode *ip) rb_entry(*newn, struct gfs2_blkreserv, rs_node); parent = *newn; - rc = rs_cmp(fsblock, rs->rs_free, cur); + rc = rs_cmp(rs->rs_start, rs->rs_requested, cur); if (rc > 0) newn = &((*newn)->rb_right); else if (rc < 0) @@ -1520,14 +1517,15 @@ static void rs_insert(struct gfs2_inode *ip) rb_insert_color(&rs->rs_node, &rgd->rd_rstree); /* Do our rgrp accounting for the reservation */ - rgd->rd_reserved += rs->rs_free; /* blocks reserved */ + rgd->rd_requested += rs->rs_requested; /* blocks requested */ spin_unlock(&rgd->rd_rsspin); trace_gfs2_rs(rs, TRACE_RS_INSERT); } /** - * rgd_free - return the number of free blocks we can allocate. + * rgd_free - return the number of free blocks we can allocate * @rgd: the resource group + * @rs: The reservation to free * * This function returns the number of free blocks for an rgrp. * That's the clone-free blocks (blocks that are free, not including those @@ -1541,9 +1539,9 @@ static inline u32 rgd_free(struct gfs2_rgrpd *rgd, struct gfs2_blkreserv *rs) { u32 tot_reserved, tot_free; - if (WARN_ON_ONCE(rgd->rd_reserved < rs->rs_free)) + if (WARN_ON_ONCE(rgd->rd_requested < rs->rs_requested)) return 0; - tot_reserved = rgd->rd_reserved - rs->rs_free; + tot_reserved = rgd->rd_requested - rs->rs_requested; if (rgd->rd_free_clone < tot_reserved) tot_reserved = 0; @@ -1568,17 +1566,26 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, u64 goal; struct gfs2_blkreserv *rs = &ip->i_res; u32 extlen; - u32 free_blocks = rgd_free(rgd, rs); + u32 free_blocks, blocks_available; int ret; struct inode *inode = &ip->i_inode; + spin_lock(&rgd->rd_rsspin); + free_blocks = rgd_free(rgd, rs); + if (rgd->rd_free_clone < rgd->rd_requested) + free_blocks = 0; + blocks_available = rgd->rd_free_clone - rgd->rd_reserved; + if (rgd == rs->rs_rgd) + blocks_available += rs->rs_reserved; + spin_unlock(&rgd->rd_rsspin); + if (S_ISDIR(inode->i_mode)) extlen = 1; else { extlen = max_t(u32, atomic_read(&ip->i_sizehint), ap->target); extlen = clamp(extlen, (u32)RGRP_RSRV_MINBLKS, free_blocks); } - if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen)) + if (free_blocks < extlen || blocks_available < extlen) return; /* Find bitmap block that contains bits for goal block */ @@ -1590,10 +1597,10 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, if (WARN_ON(gfs2_rbm_from_block(&rbm, goal))) return; - ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true); + ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, &ip->i_res, true); if (ret == 0) { - rs->rs_rbm = rbm; - rs->rs_free = extlen; + rs->rs_start = gfs2_rbm_to_block(&rbm); + rs->rs_requested = extlen; rs_insert(ip); } else { if (goal == rgd->rd_last_alloc + rgd->rd_data0) @@ -1606,7 +1613,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, * @rgd: The resource group * @block: The starting block * @length: The required length - * @ip: Ignore any reservations for this inode + * @ignore_rs: Reservation to ignore * * If the block does not appear in any reservation, then return the * block number unchanged. If it does appear in the reservation, then @@ -1616,7 +1623,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, u32 length, - const struct gfs2_inode *ip) + struct gfs2_blkreserv *ignore_rs) { struct gfs2_blkreserv *rs; struct rb_node *n; @@ -1636,8 +1643,8 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, } if (n) { - while ((rs_cmp(block, length, rs) == 0) && (&ip->i_res != rs)) { - block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free; + while (rs_cmp(block, length, rs) == 0 && rs != ignore_rs) { + block = rs->rs_start + rs->rs_requested; n = n->rb_right; if (n == NULL) break; @@ -1652,7 +1659,7 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, /** * gfs2_reservation_check_and_update - Check for reservations during block alloc * @rbm: The current position in the resource group - * @ip: The inode for which we are searching for blocks + * @rs: Our own reservation * @minext: The minimum extent length * @maxext: A pointer to the maximum extent structure * @@ -1666,20 +1673,19 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, */ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, - const struct gfs2_inode *ip, + struct gfs2_blkreserv *rs, u32 minext, struct gfs2_extent *maxext) { u64 block = gfs2_rbm_to_block(rbm); u32 extlen = 1; u64 nblock; - int ret; /* * If we have a minimum extent length, then skip over any extent * which is less than the min extent length in size. */ - if (minext) { + if (minext > 1) { extlen = gfs2_free_extlen(rbm, minext); if (extlen <= maxext->len) goto fail; @@ -1689,7 +1695,7 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, * Check the extent which has been found against the reservations * and skip if parts of it are already reserved */ - nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip); + nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, rs); if (nblock == block) { if (!minext || extlen >= minext) return 0; @@ -1698,12 +1704,15 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, maxext->len = extlen; maxext->rbm = *rbm; } -fail: - nblock = block + extlen; + } else { + u64 len = nblock - block; + if (len >= (u64)1 << 32) + return -E2BIG; + extlen = len; } - ret = gfs2_rbm_from_block(rbm, nblock); - if (ret < 0) - return ret; +fail: + if (gfs2_rbm_add(rbm, extlen)) + return -E2BIG; return 1; } @@ -1711,9 +1720,9 @@ fail: * gfs2_rbm_find - Look for blocks of a particular state * @rbm: Value/result starting position and final position * @state: The state which we want to find - * @minext: Pointer to the requested extent length (NULL for a single block) + * @minext: Pointer to the requested extent length * This is updated to be the actual reservation size. - * @ip: If set, check for reservations + * @rs: Our own reservation (NULL to skip checking for reservations) * @nowrap: Stop looking at the end of the rgrp, rather than wrapping * around until we've reached the starting point. * @@ -1727,32 +1736,28 @@ fail: */ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, - const struct gfs2_inode *ip, bool nowrap) + struct gfs2_blkreserv *rs, bool nowrap) { + bool scan_from_start = rbm->bii == 0 && rbm->offset == 0; struct buffer_head *bh; - int initial_bii; - u32 initial_offset; - int first_bii = rbm->bii; - u32 first_offset = rbm->offset; + int last_bii; u32 offset; u8 *buffer; - int n = 0; - int iters = rbm->rgd->rd_length; + bool wrapped = false; int ret; struct gfs2_bitmap *bi; struct gfs2_extent maxext = { .rbm.rgd = rbm->rgd, }; - /* If we are not starting at the beginning of a bitmap, then we - * need to add one to the bitmap count to ensure that we search - * the starting bitmap twice. + /* + * Determine the last bitmap to search. If we're not starting at the + * beginning of a bitmap, we need to search that bitmap twice to scan + * the entire resource group. */ - if (rbm->offset != 0) - iters++; + last_bii = rbm->bii - (rbm->offset == 0); while(1) { bi = rbm_bi(rbm); - if ((ip == NULL || !gfs2_rs_active(&ip->i_res)) && - test_bit(GBF_FULL, &bi->bi_flags) && + if (test_bit(GBF_FULL, &bi->bi_flags) && (state == GFS2_BLKST_FREE)) goto next_bitmap; @@ -1761,59 +1766,57 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, WARN_ON(!buffer_uptodate(bh)); if (state != GFS2_BLKST_UNLINKED && bi->bi_clone) buffer = bi->bi_clone + bi->bi_offset; - initial_offset = rbm->offset; offset = gfs2_bitfit(buffer, bi->bi_bytes, rbm->offset, state); - if (offset == BFITNOENT) - goto bitmap_full; + if (offset == BFITNOENT) { + if (state == GFS2_BLKST_FREE && rbm->offset == 0) + set_bit(GBF_FULL, &bi->bi_flags); + goto next_bitmap; + } rbm->offset = offset; - if (ip == NULL) + if (!rs || !minext) return 0; - initial_bii = rbm->bii; - ret = gfs2_reservation_check_and_update(rbm, ip, - minext ? *minext : 0, + ret = gfs2_reservation_check_and_update(rbm, rs, *minext, &maxext); if (ret == 0) return 0; - if (ret > 0) { - n += (rbm->bii - initial_bii); + if (ret > 0) goto next_iter; - } if (ret == -E2BIG) { - n += rbm->bii - initial_bii; rbm->bii = 0; rbm->offset = 0; goto res_covered_end_of_rgrp; } return ret; -bitmap_full: /* Mark bitmap as full and fall through */ - if ((state == GFS2_BLKST_FREE) && initial_offset == 0) - set_bit(GBF_FULL, &bi->bi_flags); - next_bitmap: /* Find next bitmap in the rgrp */ rbm->offset = 0; rbm->bii++; if (rbm->bii == rbm->rgd->rd_length) rbm->bii = 0; res_covered_end_of_rgrp: - if ((rbm->bii == 0) && nowrap) - break; - n++; + if (rbm->bii == 0) { + if (wrapped) + break; + wrapped = true; + if (nowrap) + break; + } next_iter: - if (n >= iters) + /* Have we scanned the entire resource group? */ + if (wrapped && rbm->bii > last_bii) break; } - if (minext == NULL || state != GFS2_BLKST_FREE) + if (state != GFS2_BLKST_FREE) return -ENOSPC; /* If the extent was too small, and it's smaller than the smallest to have failed before, remember for future reference that it's useless to search this rgrp again for this amount or more. */ - if ((first_offset == 0) && (first_bii == 0) && - (*minext < rbm->rgd->rd_extfail_pt)) - rbm->rgd->rd_extfail_pt = *minext; + if (wrapped && (scan_from_start || rbm->bii > last_bii) && + *minext < rbm->rgd->rd_extfail_pt) + rbm->rgd->rd_extfail_pt = *minext - 1; /* If the maximum extent we found is big enough to fulfill the minimum requirements, use it anyway. */ @@ -1847,10 +1850,8 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 }; while (1) { - down_write(&sdp->sd_log_flush_lock); error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL, true); - up_write(&sdp->sd_log_flush_lock); if (error == -ENOSPC) break; if (WARN_ON_ONCE(error)) @@ -1878,7 +1879,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip */ ip = gl->gl_object; - if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0) + if (ip || !gfs2_queue_verify_delete(gl, false)) gfs2_glock_put(gl); else found++; @@ -1966,7 +1967,7 @@ static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops) } /** - * gfs2_rgrp_used_recently + * gfs2_rgrp_used_recently - test if an rgrp has been used recently * @rs: The block reservation with the rgrp to test * @msecs: The time limit in milliseconds * @@ -1978,7 +1979,7 @@ static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs, u64 tdiff; tdiff = ktime_to_ns(ktime_sub(ktime_get_real(), - rs->rs_rbm.rgd->rd_gl->gl_dstamp)); + rs->rs_rgd->rd_gl->gl_dstamp)); return tdiff > (msecs * 1000 * 1000); } @@ -1986,10 +1987,8 @@ static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs, static u32 gfs2_orlov_skip(const struct gfs2_inode *ip) { const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - u32 skip; - get_random_bytes(&skip, sizeof(skip)); - return skip % sdp->sd_rgrps; + return get_random_u32() % sdp->sd_rgrps; } static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) @@ -2008,6 +2007,7 @@ static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *b /** * fast_to_acquire - determine if a resource group will be fast to acquire + * @rgd: The rgrp * * If this is one of our preferred rgrps, it should be quicker to acquire, * because we tried to set ourselves up as dlm lock master. @@ -2033,8 +2033,7 @@ static inline int fast_to_acquire(struct gfs2_rgrpd *rgd) * We try our best to find an rgrp that has at least ap->target blocks * available. After a couple of passes (loops == 2), the prospects of finding * such an rgrp diminish. At this stage, we return the first rgrp that has - * at least ap->min_target blocks available. Either way, we set ap->allowed to - * the number of blocks available in the chosen rgrp. + * at least ap->min_target blocks available. * * Returns: 0 on success, * -ENOMEM if a suitable rgrp can't be found @@ -2046,56 +2045,65 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *begin = NULL; struct gfs2_blkreserv *rs = &ip->i_res; - int error = 0, rg_locked, flags = 0; + int error = 0, flags = LM_FLAG_NODE_SCOPE; + bool rg_locked; u64 last_unlinked = NO_BLOCK; + u32 target = ap->target; int loops = 0; - u32 free_blocks, skip = 0; + u32 free_blocks, blocks_available, skip = 0; + + BUG_ON(rs->rs_reserved); if (sdp->sd_args.ar_rgrplvb) flags |= GL_SKIP; - if (gfs2_assert_warn(sdp, ap->target)) + if (gfs2_assert_warn(sdp, target)) return -EINVAL; if (gfs2_rs_active(rs)) { - begin = rs->rs_rbm.rgd; - } else if (rs->rs_rbm.rgd && - rgrp_contains_block(rs->rs_rbm.rgd, ip->i_goal)) { - begin = rs->rs_rbm.rgd; + begin = rs->rs_rgd; + } else if (rs->rs_rgd && + rgrp_contains_block(rs->rs_rgd, ip->i_goal)) { + begin = rs->rs_rgd; } else { check_and_update_goal(ip); - rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); + rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); } if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV)) skip = gfs2_orlov_skip(ip); - if (rs->rs_rbm.rgd == NULL) + if (rs->rs_rgd == NULL) return -EBADSLT; while (loops < 3) { - rg_locked = 1; + struct gfs2_rgrpd *rgd; - if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { - rg_locked = 0; + rg_locked = gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl); + if (rg_locked) { + rgrp_lock_local(rs->rs_rgd); + } else { if (skip && skip--) goto next_rgrp; if (!gfs2_rs_active(rs)) { if (loops == 0 && - !fast_to_acquire(rs->rs_rbm.rgd)) + !fast_to_acquire(rs->rs_rgd)) goto next_rgrp; if ((loops < 2) && gfs2_rgrp_used_recently(rs, 1000) && - gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) + gfs2_rgrp_congested(rs->rs_rgd, loops)) goto next_rgrp; } - error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, + error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl, LM_ST_EXCLUSIVE, flags, &ip->i_rgd_gh); if (unlikely(error)) return error; + rgrp_lock_local(rs->rs_rgd); if (!gfs2_rs_active(rs) && (loops < 2) && - gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) + gfs2_rgrp_congested(rs->rs_rgd, loops)) goto skip_rgrp; if (sdp->sd_args.ar_rgrplvb) { - error = update_rgrp_lvb(rs->rs_rbm.rgd); + error = update_rgrp_lvb(rs->rs_rgd, + &ip->i_rgd_gh); if (unlikely(error)) { + rgrp_unlock_local(rs->rs_rgd); gfs2_glock_dq_uninit(&ip->i_rgd_gh); return error; } @@ -2103,36 +2111,49 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) } /* Skip unusable resource groups */ - if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | + if ((rs->rs_rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) || - (loops == 0 && ap->target > rs->rs_rbm.rgd->rd_extfail_pt)) + (loops == 0 && target > rs->rs_rgd->rd_extfail_pt)) goto skip_rgrp; - if (sdp->sd_args.ar_rgrplvb) - gfs2_rgrp_bh_get(rs->rs_rbm.rgd); + if (sdp->sd_args.ar_rgrplvb) { + error = gfs2_instantiate(&ip->i_rgd_gh); + if (error) + goto skip_rgrp; + } /* Get a reservation if we don't already have one */ if (!gfs2_rs_active(rs)) - rg_mblk_search(rs->rs_rbm.rgd, ip, ap); + rg_mblk_search(rs->rs_rgd, ip, ap); /* Skip rgrps when we can't get a reservation on first pass */ if (!gfs2_rs_active(rs) && (loops < 1)) goto check_rgrp; /* If rgrp has enough free space, use it */ - free_blocks = rgd_free(rs->rs_rbm.rgd, rs); - if (free_blocks >= ap->target || - (loops == 2 && ap->min_target && - free_blocks >= ap->min_target)) { - ap->allowed = free_blocks; - return 0; + rgd = rs->rs_rgd; + spin_lock(&rgd->rd_rsspin); + free_blocks = rgd_free(rgd, rs); + blocks_available = rgd->rd_free_clone - rgd->rd_reserved; + if (free_blocks < target || blocks_available < target) { + spin_unlock(&rgd->rd_rsspin); + goto check_rgrp; } + rs->rs_reserved = ap->target; + if (rs->rs_reserved > blocks_available) + rs->rs_reserved = blocks_available; + rgd->rd_reserved += rs->rs_reserved; + spin_unlock(&rgd->rd_rsspin); + rgrp_unlock_local(rs->rs_rgd); + return 0; check_rgrp: /* Check for unlinked inodes which can be reclaimed */ - if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) - try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked, + if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) + try_rgrp_unlink(rs->rs_rgd, &last_unlinked, ip->i_no_addr); skip_rgrp: + rgrp_unlock_local(rs->rs_rgd); + /* Drop reservation, if we couldn't use reserved rgrp */ if (gfs2_rs_active(rs)) gfs2_rs_deltree(rs); @@ -2142,7 +2163,7 @@ skip_rgrp: gfs2_glock_dq_uninit(&ip->i_rgd_gh); next_rgrp: /* Find the next rgrp, and continue looking */ - if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin)) + if (gfs2_select_rgrp(&rs->rs_rgd, begin)) continue; if (skip) continue; @@ -2159,9 +2180,12 @@ next_rgrp: return error; } /* Flushing the log may release space */ - if (loops == 2) + if (loops == 2) { + if (ap->min_target) + target = ap->min_target; gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_INPLACE_RESERVE); + } } return -ENOSPC; @@ -2176,6 +2200,17 @@ next_rgrp: void gfs2_inplace_release(struct gfs2_inode *ip) { + struct gfs2_blkreserv *rs = &ip->i_res; + + if (rs->rs_reserved) { + struct gfs2_rgrpd *rgd = rs->rs_rgd; + + spin_lock(&rgd->rd_rsspin); + GLOCK_BUG_ON(rgd->rd_gl, rgd->rd_reserved < rs->rs_reserved); + rgd->rd_reserved -= rs->rs_reserved; + spin_unlock(&rgd->rd_rsspin); + rs->rs_reserved = 0; + } if (gfs2_holder_initialized(&ip->i_rgd_gh)) gfs2_glock_dq_uninit(&ip->i_rgd_gh); } @@ -2245,41 +2280,41 @@ static void rgblk_free(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd, bi_prev = bi; } gfs2_setbit(&rbm, false, new_state); - gfs2_rbm_incr(&rbm); + gfs2_rbm_add(&rbm, 1); } } /** * gfs2_rgrp_dump - print out an rgrp * @seq: The iterator - * @gl: The glock in question + * @rgd: The rgrp in question + * @fs_id_buf: pointer to file system id (if requested) * */ -void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl) +void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd, + const char *fs_id_buf) { - struct gfs2_rgrpd *rgd = gl->gl_object; struct gfs2_blkreserv *trs; const struct rb_node *n; - if (rgd == NULL) - return; - gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n", + spin_lock(&rgd->rd_rsspin); + gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u q:%u r:%u e:%u\n", + fs_id_buf, (unsigned long long)rgd->rd_addr, rgd->rd_flags, rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes, - rgd->rd_reserved, rgd->rd_extfail_pt); - if (rgd->rd_sbd->sd_args.ar_rgrplvb) { + rgd->rd_requested, rgd->rd_reserved, rgd->rd_extfail_pt); + if (rgd->rd_sbd->sd_args.ar_rgrplvb && rgd->rd_rgl) { struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; - gfs2_print_dbg(seq, " L: f:%02x b:%u i:%u\n", + gfs2_print_dbg(seq, "%s L: f:%02x b:%u i:%u\n", fs_id_buf, be32_to_cpu(rgl->rl_flags), be32_to_cpu(rgl->rl_free), be32_to_cpu(rgl->rl_dinodes)); } - spin_lock(&rgd->rd_rsspin); for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) { trs = rb_entry(n, struct gfs2_blkreserv, rs_node); - dump_rs(seq, trs); + dump_rs(seq, trs, fs_id_buf); } spin_unlock(&rgd->rd_rsspin); } @@ -2287,10 +2322,13 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_glock *gl) static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) { struct gfs2_sbd *sdp = rgd->rd_sbd; + char fs_id_buf[sizeof(sdp->sd_fsname) + 7]; + fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n", (unsigned long long)rgd->rd_addr); fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n"); - gfs2_rgrp_dump(NULL, rgd->rd_gl); + sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname); + gfs2_rgrp_dump(NULL, rgd, fs_id_buf); rgd->rd_flags |= GFS2_RDF_ERROR; } @@ -2310,29 +2348,29 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip, { struct gfs2_blkreserv *rs = &ip->i_res; struct gfs2_rgrpd *rgd = rbm->rgd; - unsigned rlen; - u64 block; - int ret; - spin_lock(&rgd->rd_rsspin); + BUG_ON(rs->rs_reserved < len); + rs->rs_reserved -= len; if (gfs2_rs_active(rs)) { - if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) { - block = gfs2_rbm_to_block(rbm); - ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len); - rlen = min(rs->rs_free, len); - rs->rs_free -= rlen; - rgd->rd_reserved -= rlen; + u64 start = gfs2_rbm_to_block(rbm); + + if (rs->rs_start == start) { + unsigned int rlen; + + rs->rs_start += len; + rlen = min(rs->rs_requested, len); + rs->rs_requested -= rlen; + rgd->rd_requested -= rlen; trace_gfs2_rs(rs, TRACE_RS_CLAIM); - if (rs->rs_free && !ret) - goto out; + if (rs->rs_start < rgd->rd_data0 + rgd->rd_data && + rs->rs_requested) + return; /* We used up our block reservation, so we should reserve more blocks next time. */ atomic_add(RGRP_RSRV_ADDBLKS, &ip->i_sizehint); } __rs_deltree(rs); } -out: - spin_unlock(&rgd->rd_rsspin); } /** @@ -2352,15 +2390,13 @@ static void gfs2_set_alloc_start(struct gfs2_rbm *rbm, u64 goal; if (gfs2_rs_active(&ip->i_res)) { - *rbm = ip->i_res.rs_rbm; - return; + goal = ip->i_res.rs_start; + } else { + if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal)) + goal = ip->i_goal; + else + goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0; } - - if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal)) - goal = ip->i_goal; - else - goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0; - if (WARN_ON_ONCE(gfs2_rbm_from_block(rbm, goal))) { rbm->bii = 0; rbm->offset = 0; @@ -2373,27 +2409,30 @@ static void gfs2_set_alloc_start(struct gfs2_rbm *rbm, * @bn: Used to return the starting block number * @nblocks: requested number of blocks/extent length (value/result) * @dinode: 1 if we're allocating a dinode block, else 0 - * @generation: the generation number of the inode * * Returns: 0 or error */ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, - bool dinode, u64 *generation) + bool dinode) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head *dibh; - struct gfs2_rbm rbm = { .rgd = ip->i_res.rs_rbm.rgd, }; - unsigned int ndata; + struct gfs2_rbm rbm = { .rgd = ip->i_res.rs_rgd, }; u64 block; /* block, within the file system scope */ - int error; + u32 minext = 1; + int error = -ENOSPC; - gfs2_set_alloc_start(&rbm, ip, dinode); - error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false); + BUG_ON(ip->i_res.rs_reserved < *nblocks); + rgrp_lock_local(rbm.rgd); + if (gfs2_rs_active(&ip->i_res)) { + gfs2_set_alloc_start(&rbm, ip, dinode); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, &ip->i_res, false); + } if (error == -ENOSPC) { gfs2_set_alloc_start(&rbm, ip, dinode); - error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, NULL, false); } /* Since all blocks are reserved in advance, this shouldn't happen */ @@ -2408,14 +2447,8 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, gfs2_alloc_extent(&rbm, dinode, nblocks); block = gfs2_rbm_to_block(&rbm); rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0; - if (gfs2_rs_active(&ip->i_res)) - gfs2_adjust_reservation(ip, &rbm, *nblocks); - ndata = *nblocks; - if (dinode) - ndata--; - if (!dinode) { - ip->i_goal = block + ndata - 1; + ip->i_goal = block + *nblocks - 1; error = gfs2_meta_inode_buffer(ip, &dibh); if (error == 0) { struct gfs2_dinode *di = @@ -2426,35 +2459,47 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, brelse(dibh); } } - if (rbm.rgd->rd_free < *nblocks) { + spin_lock(&rbm.rgd->rd_rsspin); + gfs2_adjust_reservation(ip, &rbm, *nblocks); + if (rbm.rgd->rd_free < *nblocks || rbm.rgd->rd_reserved < *nblocks) { fs_warn(sdp, "nblocks=%u\n", *nblocks); + spin_unlock(&rbm.rgd->rd_rsspin); goto rgrp_error; } - + GLOCK_BUG_ON(rbm.rgd->rd_gl, rbm.rgd->rd_reserved < *nblocks); + GLOCK_BUG_ON(rbm.rgd->rd_gl, rbm.rgd->rd_free_clone < *nblocks); + GLOCK_BUG_ON(rbm.rgd->rd_gl, rbm.rgd->rd_free < *nblocks); + rbm.rgd->rd_reserved -= *nblocks; + rbm.rgd->rd_free_clone -= *nblocks; rbm.rgd->rd_free -= *nblocks; + spin_unlock(&rbm.rgd->rd_rsspin); if (dinode) { + u64 generation; + rbm.rgd->rd_dinodes++; - *generation = rbm.rgd->rd_igeneration++; - if (*generation == 0) - *generation = rbm.rgd->rd_igeneration++; + generation = rbm.rgd->rd_igeneration++; + if (generation == 0) + generation = rbm.rgd->rd_igeneration++; + ip->i_generation = generation; } gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data); + rgrp_unlock_local(rbm.rgd); gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); if (dinode) - gfs2_trans_add_unrevoke(sdp, block, *nblocks); + gfs2_trans_remove_revoke(sdp, block, *nblocks); gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid); - rbm.rgd->rd_free_clone -= *nblocks; trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); *bn = block; return 0; rgrp_error: + rgrp_unlock_local(rbm.rgd); gfs2_rgrp_error(rbm.rgd); return -EIO; } @@ -2474,16 +2519,18 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + rgrp_lock_local(rgd); rgblk_free(sdp, rgd, bstart, blen, GFS2_BLKST_FREE); trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE); rgd->rd_free += blen; rgd->rd_flags &= ~GFS2_RGF_TRIMMED; gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); + rgrp_unlock_local(rgd); /* Directories keep their data in the metadata address space */ - if (meta || ip->i_depth) - gfs2_meta_wipe(ip, bstart, blen); + if (meta || ip->i_depth || gfs2_is_jdata(ip)) + gfs2_journal_wipe(ip, bstart, blen); } /** @@ -2515,17 +2562,20 @@ void gfs2_unlink_di(struct inode *inode) rgd = gfs2_blk2rgrpd(sdp, blkno, true); if (!rgd) return; + rgrp_lock_local(rgd); rgblk_free(sdp, rgd, blkno, 1, GFS2_BLKST_UNLINKED); trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED); gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); be32_add_cpu(&rgd->rd_rgl->rl_unlinked, 1); + rgrp_unlock_local(rgd); } void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) { struct gfs2_sbd *sdp = rgd->rd_sbd; + rgrp_lock_local(rgd); rgblk_free(sdp, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE); if (!rgd->rd_dinodes) gfs2_consist_rgrpd(rgd); @@ -2535,11 +2585,12 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); be32_add_cpu(&rgd->rd_rgl->rl_unlinked, -1); + rgrp_unlock_local(rgd); gfs2_statfs_change(sdp, 0, +1, -1); trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE); gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid); - gfs2_meta_wipe(ip, ip->i_no_addr, 1); + gfs2_journal_wipe(ip, ip->i_no_addr, 1); } /** @@ -2548,6 +2599,10 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) * @no_addr: The block number to check * @type: The block type we are looking for * + * The inode glock of @no_addr must be held. The @type to check for is either + * GFS2_BLKST_DINODE or GFS2_BLKST_UNLINKED; checking for type GFS2_BLKST_FREE + * or GFS2_BLKST_USED would make no sense. + * * Returns: 0 if the block type matches the expected type * -ESTALE if it doesn't match * or -ve errno if something went wrong while checking @@ -2570,13 +2625,20 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) rbm.rgd = rgd; error = gfs2_rbm_from_block(&rbm, no_addr); - if (WARN_ON_ONCE(error)) - goto fail; - - if (gfs2_testbit(&rbm, false) != type) - error = -ESTALE; + if (!WARN_ON_ONCE(error)) { + /* + * No need to take the local resource group lock here; the + * inode glock of @no_addr provides the necessary + * synchronization in case the block is an inode. (In case + * the block is not an inode, the block type will not match + * the @type we are looking for.) + */ + if (gfs2_testbit(&rbm, false) != type) + error = -ESTALE; + } gfs2_glock_dq_uninit(&rgd_gh); + fail: return error; } @@ -2615,7 +2677,7 @@ void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, return; rgd = gfs2_blk2rgrpd(sdp, block, 1); } else { - rgd = ip->i_res.rs_rbm.rgd; + rgd = ip->i_res.rs_rgd; if (!rgd || !rgrp_contains_block(rgd, block)) rgd = gfs2_blk2rgrpd(sdp, block, 1); } @@ -2657,12 +2719,15 @@ void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate * and initialize an array of glock holders for them * @rlist: the list of resource groups + * @state: the state we're requesting + * @flags: the modifier flags * * FIXME: Don't use NOFAIL * */ -void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist) +void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, + unsigned int state, u16 flags) { unsigned int x; @@ -2670,9 +2735,8 @@ void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist) sizeof(struct gfs2_holder), GFP_NOFS | __GFP_NOFAIL); for (x = 0; x < rlist->rl_rgrps; x++) - gfs2_holder_init(rlist->rl_rgd[x]->rd_gl, - LM_ST_EXCLUSIVE, 0, - &rlist->rl_ghs[x]); + gfs2_holder_init(rlist->rl_rgd[x]->rd_gl, state, flags, + &rlist->rl_ghs[x]); } /** @@ -2695,3 +2759,12 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist) } } +void rgrp_lock_local(struct gfs2_rgrpd *rgd) +{ + mutex_lock(&rgd->rd_mutex); +} + +void rgrp_unlock_local(struct gfs2_rgrpd *rgd) +{ + mutex_unlock(&rgd->rd_mutex); +} |
